115 files changed, 10102 insertions, 4918 deletions
diff --git a/Makefile b/Makefile
index 51b998f4..ee2700af 100644
--- a/Makefile
+++ b/Makefile
@@ -18,8 +18,10 @@ TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS)
 WEB_URL=http://www.rabbitmq.com/
 MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml))
 WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml)
-USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml $(DOCS_DIR)/rabbitmq-multi.1.xml
+USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml
 USAGES_ERL=$(foreach XML, $(USAGES_XML), $(call usage_xml_to_erl, $(XML)))
+QC_MODULES := rabbit_backing_queue_qc
+QC_TRIALS ?= 100
 
 ifeq ($(shell python -c 'import simplejson' 2>/dev/null && echo yes),yes)
 PYTHON=python
@@ -41,12 +43,18 @@ RABBIT_PLT=rabbit.plt
 
 ifndef USE_SPECS
 # our type specs rely on features and bug fixes in dialyzer that are
-# only available in R14A upwards (R14A is erts 5.8)
-USE_SPECS:=$(shell erl -noshell -eval 'io:format([list_to_integer(X) || X <- string:tokens(erlang:system_info(version), ".")] >= [5,8]), halt().')
+# only available in R14B03 upwards (R14B03 is erts 5.8.4)
+USE_SPECS:=$(shell erl -noshell -eval 'io:format([list_to_integer(X) || X <- string:tokens(erlang:system_info(version), ".")] >= [5,8,4]), halt().')
+endif
+
+ifndef USE_PROPER_QC
+# PropEr needs to be installed for property checking
+# http://proper.softlab.ntua.gr/
+USE_PROPER_QC:=$(shell erl -noshell -eval 'io:format({module, proper} =:= code:ensure_loaded(proper)), halt().')
 endif
 
 #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests
-ERLC_OPTS=-I $(INCLUDE_DIR) -o $(EBIN_DIR) -Wall -v +debug_info $(if $(filter true,$(USE_SPECS)),-Duse_specs)
+ERLC_OPTS=-I $(INCLUDE_DIR) -o $(EBIN_DIR) -Wall -v +debug_info $(call boolean_macro,$(USE_SPECS),use_specs) $(call boolean_macro,$(USE_PROPER_QC),use_proper_qc)
 
 VERSION=0.0.0
 TARBALL_NAME=rabbitmq-server-$(VERSION)
@@ -69,6 +77,10 @@ define usage_dep
   $(call usage_xml_to_erl, $(1)): $(1) $(DOCS_DIR)/usage.xsl
 endef
 
+define boolean_macro
+$(if $(filter true,$(1)),-D$(2))
+endef
+
 ifneq "$(SBIN_DIR)" ""
 ifneq "$(TARGET_DIR)" ""
 SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR))
@@ -93,8 +105,8 @@ $(DEPS_FILE): $(SOURCES) $(INCLUDES)
 	rm -f $@
 	echo $(subst : ,:,$(foreach FILE,$^,$(FILE):)) | escript generate_deps $@ $(EBIN_DIR)
 
-$(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
-	escript generate_app $(EBIN_DIR) $@ < $<
+$(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(SOURCES) generate_app
+	escript generate_app $< $@ $(SOURCE_DIR)
 
 $(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl | $(DEPS_FILE)
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
@@ -162,7 +174,11 @@ run-node: all
 		./scripts/rabbitmq-server
 
 run-tests: all
-	echo "rabbit_tests:all_tests()." | $(ERL_CALL)
+	OUT=$$(echo "rabbit_tests:all_tests()." | $(ERL_CALL)) ; \
+	  echo $$OUT ; echo $$OUT | grep '^{ok, passed}$$' > /dev/null
+
+run-qc: all
+	$(foreach MOD,$(QC_MODULES),./quickcheck $(RABBITMQ_NODENAME) $(MOD) $(QC_TRIALS))
 
 start-background-node:
 	$(BASIC_SCRIPT_ENVIRONMENT_SETTINGS) \
@@ -177,11 +193,11 @@ stop-rabbit-on-node: all
 	echo "rabbit:stop()." | $(ERL_CALL)
 
 set-memory-alarm: all
-	echo "alarm_handler:set_alarm({vm_memory_high_watermark, []})." | \
+	echo "alarm_handler:set_alarm({{vm_memory_high_watermark, node()}, []})." | \
 	$(ERL_CALL)
 
 clear-memory-alarm: all
-	echo "alarm_handler:clear_alarm(vm_memory_high_watermark)." | \
+	echo "alarm_handler:clear_alarm({vm_memory_high_watermark, node()})." | \
 	$(ERL_CALL)
 
 stop-node:
@@ -222,7 +238,7 @@ srcdist: distclean
 	chmod 0755 $(TARGET_SRC_DIR)/scripts/*
 
 	(cd dist; tar -zcf $(TARBALL_NAME).tar.gz $(TARBALL_NAME))
-	(cd dist; zip -r $(TARBALL_NAME).zip $(TARBALL_NAME))
+	(cd dist; zip -q -r $(TARBALL_NAME).zip $(TARBALL_NAME))
 	rm -rf $(TARGET_SRC_DIR)
 
 distclean: clean
@@ -233,7 +249,7 @@ distclean: clean
 # xmlto can not read from standard input, so we mess with a tmp file.
 %.gz: %.xml $(DOCS_DIR)/examples-to-end.xsl
 	xmlto --version | grep -E '^xmlto version 0\.0\.([0-9]|1[1-8])$$' >/dev/null || opt='--stringparam man.indent.verbatims=0' ; \
-	    xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \
+	    xsltproc --novalid $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \
 	    xmlto -o $(DOCS_DIR) $$opt man $<.tmp && \
 	    gzip -f $(DOCS_DIR)/`basename $< .xml`
 	rm -f $<.tmp
@@ -242,7 +258,7 @@ distclean: clean
 # Do not fold the cp into previous line, it's there to stop the file being
 # generated but empty if we fail
 $(SOURCE_DIR)/%_usage.erl:
-	xsltproc --stringparam modulename "`basename $@ .erl`" \
+	xsltproc --novalid --stringparam modulename "`basename $@ .erl`" \
 		$(DOCS_DIR)/usage.xsl $< > $@.tmp
 	sed -e 's/"/\\"/g' -e 's/%QUOTE%/"/g' $@.tmp > $@.tmp2
 	fold -s $@.tmp2 > $@.tmp3
@@ -256,7 +272,7 @@ $(SOURCE_DIR)/%_usage.erl:
 		xmlto xhtml-nochunks `basename $< .xml`.xml ; rm `basename $< .xml`.xml
 	cat `basename $< .xml`.html | \
 	    xsltproc --novalid $(DOCS_DIR)/remove-namespaces.xsl - | \
-		xsltproc --stringparam original `basename $<` $(DOCS_DIR)/html-to-website-xml.xsl - | \
+		xsltproc --novalid --stringparam original `basename $<` $(DOCS_DIR)/html-to-website-xml.xsl - | \
 		xmllint --format - > $@
 	rm `basename $< .xml`.html
 
@@ -268,7 +284,7 @@ install_bin: all install_dirs
 	cp -r ebin include LICENSE LICENSE-MPL-RabbitMQ INSTALL $(TARGET_DIR)
 
 	chmod 0755 scripts/*
-	for script in rabbitmq-env rabbitmq-server rabbitmqctl rabbitmq-multi; do \
+	for script in rabbitmq-env rabbitmq-server rabbitmqctl; do \
 		cp scripts/$$script $(TARGET_DIR)/sbin; \
 		[ -e $(SBIN_DIR)/$$script ] || ln -s $(SCRIPTS_REL_PATH)/$$script $(SBIN_DIR)/$$script; \
 	done
@@ -313,3 +329,4 @@ ifneq "$(strip $(patsubst clean%,,$(patsubst %clean,,$(TESTABLEGOALS))))" ""
 -include $(DEPS_FILE)
 endif
 
+.PHONY: run-qc
diff --git a/codegen.py b/codegen.py
index 1fd5bc69..8cd9dab8 100644
--- a/codegen.py
+++ b/codegen.py
@@ -324,7 +324,7 @@ def genErl(spec):
 -type(amqp_field_type() ::
       'longstr' | 'signedint' | 'decimal' | 'timestamp' |
       'table' | 'byte' | 'double' | 'float' | 'long' |
-      'short' | 'bool' | 'binary' | 'void').
+      'short' | 'bool' | 'binary' | 'void' | 'array').
 -type(amqp_property_type() ::
       'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' |
       'longlongint' | 'timestamp' | 'bit' | 'table').
diff --git a/docs/examples-to-end.xsl b/docs/examples-to-end.xsl
index d9686ada..a0a74178 100644
--- a/docs/examples-to-end.xsl
+++ b/docs/examples-to-end.xsl
@@ -1,9 +1,5 @@
 <?xml version='1.0'?>
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-                xmlns:exsl="http://exslt.org/common"
-                xmlns:ng="http://docbook.org/docbook-ng"
-                xmlns:db="http://docbook.org/ns/docbook"
-                exclude-result-prefixes="exsl ng db"
                 version='1.0'>
 
 <xsl:output doctype-public="-//OASIS//DTD DocBook XML V4.5//EN" doctype-system="http://www.docbook.org/xml/4.5/docbookx.dtd" />
diff --git a/docs/rabbitmq.conf.5.xml b/docs/rabbitmq-env.conf.5.xml
index 31de7164..c887596c 100644
--- a/docs/rabbitmq.conf.5.xml
+++ b/docs/rabbitmq-env.conf.5.xml
@@ -9,20 +9,20 @@
     </refentryinfo>
 
     <refmeta>
-        <refentrytitle>rabbitmq.conf</refentrytitle>
+        <refentrytitle>rabbitmq-env.conf</refentrytitle>
         <manvolnum>5</manvolnum>
         <refmiscinfo class="manual">RabbitMQ Server</refmiscinfo>
     </refmeta>
 
     <refnamediv>
-        <refname>rabbitmq.conf</refname>
+        <refname>rabbitmq-env.conf</refname>
         <refpurpose>default settings for RabbitMQ AMQP server</refpurpose>
     </refnamediv>
 
     <refsect1>
         <title>Description</title>
         <para>
-<filename>/etc/rabbitmq/rabbitmq.conf</filename> contains variable settings that override the
+<filename>/etc/rabbitmq/rabbitmq-env.conf</filename> contains variable settings that override the
 defaults built in to the RabbitMQ startup scripts.
         </para>
         <para>
@@ -33,7 +33,7 @@ operator), including line comments starting with "#".
         </para>
         <para>
 In order of preference, the startup scripts get their values from the
-environment, from <filename>/etc/rabbitmq/rabbitmq.conf</filename> and finally from the
+environment, from <filename>/etc/rabbitmq/rabbitmq-env.conf</filename> and finally from the
 built-in default values. For example, for the <envar>RABBITMQ_NODENAME</envar>
 setting,
         </para>
@@ -48,26 +48,26 @@ empty string, then
           <envar>NODENAME</envar>
         </para>
         <para>
-from <filename>/etc/rabbitmq/rabbitmq.conf</filename> is checked. If it is also absent
+from <filename>/etc/rabbitmq/rabbitmq-env.conf</filename> is checked. If it is also absent
 or set equal to the empty string then the default value from the
 startup script is used.
         </para>
         <para>
-The variable names in /etc/rabbitmq/rabbitmq.conf are always equal to the
+The variable names in /etc/rabbitmq/rabbitmq-env.conf are always equal to the
 environment variable names, with the <envar>RABBITMQ_</envar> prefix removed:
 <envar>RABBITMQ_NODE_PORT</envar> from the environment becomes <envar>NODE_PORT</envar> in the
-<filename>/etc/rabbitmq/rabbitmq.conf</filename> file, etc.
+<filename>/etc/rabbitmq/rabbitmq-env.conf</filename> file, etc.
         </para>
         <para role="example-prefix">For example:</para>
         <screen role="example-multiline">
-# I am a complete /etc/rabbitmq/rabbitmq.conf file.
+# I am a complete /etc/rabbitmq/rabbitmq-env.conf file.
 # Comment lines start with a hash character.
 # This is a /bin/sh script file - use ordinary envt var syntax
 NODENAME=hare
         </screen>
         <para role="example">
             This is an example of a complete
-            <filename>/etc/rabbitmq/rabbitmq.conf</filename> file that overrides the default Erlang
+            <filename>/etc/rabbitmq/rabbitmq-env.conf</filename> file that overrides the default Erlang
             node name from "rabbit" to "hare".
         </para>
 
@@ -76,7 +76,6 @@ NODENAME=hare
     <refsect1>
         <title>See also</title>
         <para>
-            <citerefentry><refentrytitle>rabbitmq-multi</refentrytitle><manvolnum>1</manvolnum></citerefentry>
             <citerefentry><refentrytitle>rabbitmq-server</refentrytitle><manvolnum>1</manvolnum></citerefentry>
             <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
         </para>
diff --git a/docs/rabbitmq-multi.1.xml b/docs/rabbitmq-multi.1.xml
deleted file mode 100644
index 6586890a..00000000
--- a/docs/rabbitmq-multi.1.xml
+++ /dev/null
@@ -1,100 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.docbook.org/xml/4.5/docbookx.dtd">
-<refentry lang="en">
-    <refentryinfo>
-        <productname>RabbitMQ Server</productname>
-        <authorgroup>
-            <corpauthor>The RabbitMQ Team &lt;<ulink url="mailto:info@rabbitmq.com"><email>info@rabbitmq.com</email></ulink>&gt;</corpauthor>
-        </authorgroup>
-    </refentryinfo>
-
-    <refmeta>
-        <refentrytitle>rabbitmq-multi</refentrytitle>
-        <manvolnum>1</manvolnum>
-        <refmiscinfo class="manual">RabbitMQ Server</refmiscinfo>
-    </refmeta>
-
-    <refnamediv>
-        <refname>rabbitmq-multi</refname>
-        <refpurpose>start/stop local cluster RabbitMQ nodes</refpurpose>
-    </refnamediv>
-
-    <refsynopsisdiv>
-        <cmdsynopsis>
-          <command>rabbitmq-multi</command>
-          <arg choice="req"><replaceable>command</replaceable></arg>
-          <arg choice="opt" rep="repeat"><replaceable>command options</replaceable></arg>
-        </cmdsynopsis>
-    </refsynopsisdiv>
-
-    <refsect1>
-        <title>Description</title>
-        <para>
-           RabbitMQ is an implementation of AMQP, the emerging standard for high
-performance enterprise messaging. The RabbitMQ server is a robust and
-scalable implementation of an AMQP broker.
-        </para>
-        <para>
-rabbitmq-multi scripts allows for easy set-up of a cluster on a single
-machine.
-        </para>
-    </refsect1>
-
-    <refsect1>
-      <title>Commands</title>
-      <variablelist>
-        <varlistentry>
-          <term><cmdsynopsis><command>start_all</command> <arg choice="req"><replaceable>count</replaceable></arg></cmdsynopsis></term>
-          <listitem>
-            <para>
-Start count nodes with unique names, listening on all IP addresses and
-on sequential ports starting from 5672.
-            </para>
-            <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmq-multi start_all 3</screen>
-            <para role="example">
-              Starts 3 local RabbitMQ nodes with unique, sequential port numbers.
-            </para>
-          </listitem>
-        </varlistentry>
-
-        <varlistentry>
-          <term><cmdsynopsis><command>status</command></cmdsynopsis></term>
-          <listitem>
-            <para>
-Print the status of all running RabbitMQ nodes.
-            </para>
-          </listitem>
-        </varlistentry>
-
-        <varlistentry>
-          <term><cmdsynopsis><command>stop_all</command></cmdsynopsis></term>
-          <listitem>
-            <para>
-Stop all local RabbitMQ nodes,
-            </para>
-          </listitem>
-        </varlistentry>
-
-        <varlistentry>
-          <term><cmdsynopsis><command>rotate_logs</command></cmdsynopsis></term>
-          <listitem>
-            <para>
-Rotate log files for all local and running RabbitMQ nodes.
-            </para>
-          </listitem>
-        </varlistentry>
-
-      </variablelist>
-    </refsect1>
-
-
-    <refsect1>
-        <title>See also</title>
-        <para>
-            <citerefentry><refentrytitle>rabbitmq.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-server</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-        </para>
-    </refsect1>
-</refentry>
diff --git a/docs/rabbitmq-server.1.xml b/docs/rabbitmq-server.1.xml
index f161a291..ca63927c 100644
--- a/docs/rabbitmq-server.1.xml
+++ b/docs/rabbitmq-server.1.xml
@@ -124,8 +124,7 @@ Defaults to 5672.
     <refsect1>
         <title>See also</title>
         <para>
-            <citerefentry><refentrytitle>rabbitmq.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-            <citerefentry><refentrytitle>rabbitmq-multi</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+            <citerefentry><refentrytitle>rabbitmq-env.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
             <citerefentry><refentrytitle>rabbitmqctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
         </para>
     </refsect1>
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml
index bd9fee7d..ee000215 100644
--- a/docs/rabbitmqctl.1.xml
+++ b/docs/rabbitmqctl.1.xml
@@ -59,6 +59,11 @@
       RabbitMQ broker. It performs all actions by connecting to one of the
       broker's nodes.
     </para>
+    <para>
+      Diagnostic information is displayed if the broker was not
+      running, could not be reached, or rejected the connection due to
+      mismatching Erlang cookies.
+    </para>
   </refsect1>
 
   <refsect1>
@@ -158,19 +163,23 @@
         </varlistentry>
 
         <varlistentry>
-          <term><cmdsynopsis><command>status</command></cmdsynopsis></term>
+          <term><cmdsynopsis><command>wait</command></cmdsynopsis></term>
           <listitem>
             <para>
-              Displays various information about the RabbitMQ broker,
-              such as whether the RabbitMQ application on the current
-              node, its version number, what nodes are part of the
-              broker, which of these are running.
+              Wait for the RabbitMQ application to start.
+            </para>
+            <para>
+              This command will wait for the RabbitMQ application to
+              start at the node. As long as the Erlang node is up but
+              the RabbitMQ application is down it will wait
+              indefinitely. If the node itself goes down, or takes
+              more than five seconds to come up, it will fail.
             </para>
             <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl status</screen>
+            <screen role="example">rabbitmqctl wait</screen>
             <para role="example">
-              This command displays information about the RabbitMQ
-              broker.
+              This command will return when the RabbitMQ node has
+              started up.
             </para>
           </listitem>
         </varlistentry>
@@ -355,6 +364,20 @@
             </para>
           </listitem>
         </varlistentry>
+        <varlistentry>
+          <term><cmdsynopsis><command>cluster_status</command></cmdsynopsis></term>
+          <listitem>
+            <para>
+              Displays all the nodes in the cluster grouped by node type,
+              together with the currently running nodes.
+            </para>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl cluster_status</screen>
+            <para role="example">
+              This command displays the nodes in the cluster.
+            </para>
+          </listitem>
+        </varlistentry>
       </variablelist>
     </refsect2>
 
@@ -490,17 +513,22 @@
         </varlistentry>
 
         <varlistentry>
-          <term><cmdsynopsis><command>set_admin</command> <arg choice="req"><replaceable>username</replaceable></arg></cmdsynopsis></term>
+          <term><cmdsynopsis><command>set_user_tags</command> <arg choice="req"><replaceable>username</replaceable></arg> <arg choice="req"><replaceable>tag</replaceable> ...</arg></cmdsynopsis></term>
           <listitem>
             <variablelist>
               <varlistentry>
                 <term>username</term>
-                <listitem><para>The name of the user whose administrative
-                status is to be set.</para></listitem>
+                <listitem><para>The name of the user whose tags are to
+                be set.</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>tag</term>
+                <listitem><para>Zero, one or more tags to set. Any
+                existing tags will be removed.</para></listitem>
               </varlistentry>
             </variablelist>
             <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl set_admin tonyg</screen>
+            <screen role="example">rabbitmqctl set_user_tags tonyg administrator</screen>
             <para role="example">
               This command instructs the RabbitMQ broker to ensure the user
                named <command>tonyg</command> is an administrator. This has no
@@ -509,24 +537,10 @@
                user logs in via some other means (for example with the
                management plugin).
             </para>
-          </listitem>
-        </varlistentry>
-
-        <varlistentry>
-          <term><cmdsynopsis><command>clear_admin</command> <arg choice="req"><replaceable>username</replaceable></arg></cmdsynopsis></term>
-          <listitem>
-            <variablelist>
-              <varlistentry>
-                <term>username</term>
-                <listitem><para>The name of the user whose administrative
-                status is to be cleared.</para></listitem>
-              </varlistentry>
-            </variablelist>
-            <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl clear_admin tonyg</screen>
+            <screen role="example">rabbitmqctl set_user_tags tonyg</screen>
             <para role="example">
-              This command instructs the RabbitMQ broker to ensure the user
-               named <command>tonyg</command> is not an administrator.
+              This command instructs the RabbitMQ broker to remove any
+              tags from the user named <command>tonyg</command>.
             </para>
           </listitem>
         </varlistentry>
@@ -602,14 +616,35 @@
           </listitem>
         </varlistentry>
 
-        <varlistentry>
-          <term><cmdsynopsis><command>list_vhosts</command></cmdsynopsis></term>
+        <varlistentry role="usage-has-option-list">
+          <term><cmdsynopsis><command>list_vhosts</command> <arg choice="opt" role="usage-option-list"><replaceable>vhostinfoitem</replaceable> ...</arg></cmdsynopsis></term>
           <listitem>
             <para>
               Lists virtual hosts.
             </para>
+            <para>
+              The <command>vhostinfoitem</command> parameter is used to indicate which
+              virtual host information items to include in the results. The column order in the
+              results will match the order of the parameters.
+              <command>vhostinfoitem</command> can take any value from
+              the list that follows:
+            </para>
+            <variablelist>
+              <varlistentry>
+                <term>name</term>
+                <listitem><para>The name of the virtual host with non-ASCII characters escaped as in C.</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>tracing</term>
+                <listitem><para>Whether tracing is enabled for this virtual host.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para>
+              If no <command>vhostinfoitem</command>s are specified
+              then the vhost name is displayed.
+            </para>
             <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl list_vhosts</screen>
+            <screen role="example">rabbitmqctl list_vhosts name tracing</screen>
             <para role="example">
               This command instructs the RabbitMQ broker to list all
               virtual hosts.
@@ -1165,6 +1200,10 @@
                 <listitem><para>True if the channel is in transactional mode, false otherwise.</para></listitem>
               </varlistentry>
               <varlistentry>
+                <term>confirm</term>
+                <listitem><para>True if the channel is in confirm mode, false otherwise.</para></listitem>
+              </varlistentry>
+              <varlistentry>
                 <term>consumer_count</term>
                 <listitem><para>Number of logical AMQP consumers retrieving messages via
                   the channel.</para></listitem>
@@ -1175,11 +1214,22 @@
                   yet acknowledged.</para></listitem>
               </varlistentry>
               <varlistentry>
+                <term>messages_uncommitted</term>
+                <listitem><para>Number of messages received in an as yet
+                  uncommitted transaction.</para></listitem>
+              </varlistentry>
+              <varlistentry>
                 <term>acks_uncommitted</term>
                 <listitem><para>Number of acknowledgements received in an as yet
                   uncommitted transaction.</para></listitem>
               </varlistentry>
               <varlistentry>
+                <term>messages_unconfirmed</term>
+                <listitem><para>Number of published messages not yet
+                confirmed.  On channels not in confirm mode, this
+                remains 0.</para></listitem>
+              </varlistentry>
+              <varlistentry>
                 <term>prefetch_count</term>
                 <listitem><para>QoS prefetch count limit in force, 0 if unlimited.</para></listitem>
               </varlistentry>
@@ -1191,21 +1241,10 @@
                     messages to the channel's consumers.
                     </para></listitem>
               </varlistentry>
-              <varlistentry>
-                <term>confirm</term>
-                <listitem><para>True if the channel is in confirm mode, false otherwise.</para></listitem>
-              </varlistentry>
-              <varlistentry>
-                <term>messages_unconfirmed</term>
-                <listitem><para>Number of published messages not yet
-                confirmed.  On channels not in confirm mode, this
-                remains 0.</para></listitem>
-              </varlistentry>
             </variablelist>
             <para>
               If no <command>channelinfoitem</command>s are specified then pid,
-              user, transactional, consumer_count, and
-              messages_unacknowledged are assumed.
+              user, consumer_count, and messages_unacknowledged are assumed.
             </para>
 
             <para role="example-prefix">
@@ -1220,7 +1259,7 @@
         </varlistentry>
 
         <varlistentry>
-          <term><cmdsynopsis><command>list_consumers</command></cmdsynopsis></term>
+          <term><cmdsynopsis><command>list_consumers</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg></cmdsynopsis></term>
           <listitem>
             <para>
               List consumers, i.e. subscriptions to a queue's message
@@ -1232,14 +1271,98 @@
               indicating whether acknowledgements are expected for
               messages delivered to this consumer.
             </para>
-            <para role="usage">
-              The output format for "list_consumers" is a list of rows containing,
-              in order, the queue name, channel process id, consumer tag, and a
-              boolean indicating whether acknowledgements are expected from the
-              consumer.
+            <para>
+              The output is a list of rows containing, in order, the queue name,
+              channel process id, consumer tag, and a boolean indicating whether
+              acknowledgements are expected from the consumer.
+            </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><cmdsynopsis><command>status</command></cmdsynopsis></term>
+          <listitem>
+            <para>
+              Displays broker status information such as the running
+              applications on the current Erlang node, RabbitMQ and
+              Erlang versions and OS name.  (See
+              the <command>cluster_status</command> command to find
+              out which nodes are clustered and running.)
+            </para>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl status</screen>
+            <para role="example">
+              This command displays information about the RabbitMQ
+              broker.
             </para>
           </listitem>
         </varlistentry>
+
+        <varlistentry>
+          <term><cmdsynopsis><command>environment</command></cmdsynopsis></term>
+          <listitem>
+            <para>
+              Display the name and value of each variable in the
+              application environment.
+            </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><cmdsynopsis><command>report</command></cmdsynopsis></term>
+          <listitem>
+            <para>
+              Generate a server status report containing a
+              concatenation of all server status information for
+              support purposes. The output should be redirected to a
+              file when accompanying a support request.
+            </para>
+            <para role="example-prefix">
+              For example:
+            </para>
+            <screen role="example">rabbitmqctl report > server_report.txt</screen>
+            <para role="example">
+              This command creates a server report which may be
+              attached to a support request email.
+            </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>Message Tracing</title>
+      <variablelist>
+        <varlistentry>
+          <term><cmdsynopsis><command>trace_on</command> <arg choice="opt">-p <replaceable>vhost</replaceable></arg></cmdsynopsis></term>
+          <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>vhost</term>
+                <listitem><para>The name of the virtual host for which to start tracing.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para>
+              Starts tracing.
+            </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><cmdsynopsis><command>trace_off</command> <arg choice="opt">-p <replaceable>vhost</replaceable></arg></cmdsynopsis></term>
+          <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>vhost</term>
+                <listitem><para>The name of the virtual host for which to stop tracing.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para>
+              Stops tracing.
+            </para>
+          </listitem>
+        </varlistentry>
+
       </variablelist>
     </refsect2>
   </refsect1>
diff --git a/docs/usage.xsl b/docs/usage.xsl
index a6cebd93..586f8303 100644
--- a/docs/usage.xsl
+++ b/docs/usage.xsl
@@ -1,9 +1,5 @@
 <?xml version='1.0'?>
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-                xmlns:exsl="http://exslt.org/common"
-                xmlns:ng="http://docbook.org/docbook-ng"
-                xmlns:db="http://docbook.org/ns/docbook"
-                exclude-result-prefixes="exsl"
                 version='1.0'>
 
 <xsl:param name="modulename"/>
diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index cc7221d6..65a3269a 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -20,18 +20,26 @@
          {vm_memory_high_watermark, 0.4},
          {msg_store_index_module, rabbit_msg_store_ets_index},
          {backing_queue_module, rabbit_variable_queue},
-         {persister_max_wrap_entries, 500},
-         {persister_hibernate_after, 10000},
+         {frame_max, 131072},
          {msg_store_file_size_limit, 16777216},
          {queue_index_max_journal_entries, 262144},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
-         {default_user_is_admin, true},
+         {default_user_tags, [administrator]},
          {default_vhost, <<"/">>},
          {default_permissions, [<<".*">>, <<".*">>, <<".*">>]},
          {cluster_nodes, []},
          {server_properties, []},
          {collect_statistics, none},
+         {collect_statistics_interval, 5000},
          {auth_mechanisms, ['PLAIN', 'AMQPLAIN']},
          {auth_backends, [rabbit_auth_backend_internal]},
-         {delegate_count, 16}]}]}.
+         {delegate_count, 16},
+         {trace_vhosts, []},
+         {tcp_listen_options, [binary,
+                               {packet,        raw},
+                               {reuseaddr,     true},
+                               {backlog,       128},
+                               {nodelay,       true},
+                               {exit_on_close, false}]}
+         ]}]}.
diff --git a/generate_app b/generate_app
index 576b485e..fb0eb1ea 100644
--- a/generate_app
+++ b/generate_app
@@ -1,12 +1,16 @@
 #!/usr/bin/env escript
 %% -*- erlang -*-
 
-main([BeamDir, TargetFile]) ->
-    Modules = [list_to_atom(filename:basename(F, ".beam")) ||
-                  F <- filelib:wildcard("*.beam", BeamDir)],
-    {ok, {application, Application, Properties}} = io:read(''),
-    NewProperties = lists:keyreplace(modules, 1, Properties,
-                                     {modules, Modules}),
+main([InFile, OutFile | SrcDirs]) ->
+    Modules = [list_to_atom(filename:basename(F, ".erl")) ||
+                  SrcDir <- SrcDirs,
+                  F <- filelib:wildcard("*.erl", SrcDir)],
+    {ok, [{application, Application, Properties}]} = file:consult(InFile),
+    NewProperties =
+        case proplists:get_value(modules, Properties) of
+            [] -> lists:keyreplace(modules, 1, Properties, {modules, Modules});
+            _  -> Properties
+        end,
     file:write_file(
-      TargetFile,
+      OutFile,
       io_lib:format("~p.~n", [{application, Application, NewProperties}])).
diff --git a/include/gm_specs.hrl b/include/gm_specs.hrl
new file mode 100644
index 00000000..ee29706e
--- /dev/null
+++ b/include/gm_specs.hrl
@@ -0,0 +1,28 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-ifdef(use_specs).
+
+-type(callback_result() :: 'ok' | {'stop', any()} | {'become', atom(), args()}).
+-type(args() :: any()).
+-type(members() :: [pid()]).
+
+-spec(joined/2          :: (args(), members())            -> callback_result()).
+-spec(members_changed/3 :: (args(), members(), members()) -> callback_result()).
+-spec(handle_msg/3      :: (args(), pid(), any())         -> callback_result()).
+-spec(terminate/2       :: (args(), term())               -> any()).
+
+-endif.
diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 15f5d7c5..ac6399c6 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -15,12 +15,12 @@
 %%
 
 -record(user, {username,
-               is_admin,
+               tags,
                auth_backend, %% Module this user came from
                impl          %% Scratch space for that module
               }).
 
--record(internal_user, {username, password_hash, is_admin}).
+-record(internal_user, {username, password_hash, tags}).
 -record(permission, {configure, write, read}).
 -record(user_vhost, {username, virtual_host}).
 -record(user_permission, {user_vhost, permission}).
@@ -28,7 +28,7 @@
 -record(vhost, {virtual_host, dummy}).
 
 -record(connection, {protocol, user, timeout_sec, frame_max, vhost,
-                     client_properties}).
+                     client_properties, capabilities}).
 
 -record(content,
         {class_id,
@@ -42,10 +42,12 @@
 
 -record(resource, {virtual_host, kind, name}).
 
--record(exchange, {name, type, durable, auto_delete, internal, arguments}).
+-record(exchange, {name, type, durable, auto_delete, internal, arguments,
+                   scratch}).
+-record(exchange_serial, {name, next}).
 
 -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none,
-                   arguments, pid}).
+                   arguments, pid, slave_pids, mirror_nodes}).
 
 %% mnesia doesn't like unary records, so we add a dummy 'value' field
 -record(route, {binding, value = const}).
@@ -54,14 +56,19 @@
 -record(binding, {source, key, destination, args = []}).
 -record(reverse_binding, {destination, key, source, args = []}).
 
+-record(topic_trie_edge, {trie_edge, node_id}).
+-record(topic_trie_binding, {trie_binding, value = const}).
+
+-record(trie_edge, {exchange_name, node_id, word}).
+-record(trie_binding, {exchange_name, node_id, destination}).
+
 -record(listener, {node, protocol, host, ip_address, port}).
 
--record(basic_message, {exchange_name, routing_key, content, guid,
+-record(basic_message, {exchange_name, routing_keys = [], content, id,
                         is_persistent}).
 
 -record(ssl_socket, {tcp, ssl}).
--record(delivery, {mandatory, immediate, txn, sender, message,
-                   msg_seq_no}).
+-record(delivery, {mandatory, immediate, sender, message, msg_seq_no}).
 -record(amqp_error, {name, explanation = "", method = none}).
 
 -record(event, {type, props, timestamp}).
@@ -79,7 +86,9 @@
 
 -define(HIBERNATE_AFTER_MIN,        1000).
 -define(DESIRED_HIBERNATE,         10000).
--define(STATS_INTERVAL,             5000).
+
+-define(ROUTING_HEADERS, [<<"CC">>, <<"BCC">>]).
+-define(DELETED_HEADER, <<"BCC">>).
 
 -ifdef(debug).
 -define(LOGDEBUG0(F), rabbit_log:debug(F)).
diff --git a/include/rabbit_auth_backend_spec.hrl b/include/rabbit_auth_backend_spec.hrl
index e26d44ea..803bb75c 100644
--- a/include/rabbit_auth_backend_spec.hrl
+++ b/include/rabbit_auth_backend_spec.hrl
@@ -22,8 +22,7 @@
                                  {'ok', rabbit_types:user()} |
                                  {'refused', string(), [any()]} |
                                  {'error', any()}).
--spec(check_vhost_access/3 :: (rabbit_types:user(), rabbit_types:vhost(),
-                               rabbit_access_control:vhost_permission_atom()) ->
+-spec(check_vhost_access/2 :: (rabbit_types:user(), rabbit_types:vhost()) ->
                                    boolean() | {'error', any()}).
 -spec(check_resource_access/3 :: (rabbit_types:user(),
                                   rabbit_types:r(atom()),
diff --git a/include/rabbit_auth_mechanism_spec.hrl b/include/rabbit_auth_mechanism_spec.hrl
index 49614d5f..614a3eed 100644
--- a/include/rabbit_auth_mechanism_spec.hrl
+++ b/include/rabbit_auth_mechanism_spec.hrl
@@ -17,6 +17,7 @@
 -ifdef(use_specs).
 
 -spec(description/0 :: () -> [{atom(), any()}]).
+-spec(should_offer/1 :: (rabbit_net:socket()) -> boolean()).
 -spec(init/1 :: (rabbit_net:socket()) -> any()).
 -spec(handle_response/2 :: (binary(), any()) ->
                                 {'ok', rabbit_types:user()} |
diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index accb2c0e..ee102f5e 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -25,43 +25,44 @@
 -type(message_properties_transformer() ::
         fun ((rabbit_types:message_properties())
              -> rabbit_types:message_properties())).
+-type(async_callback() :: fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')).
 
 -spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
--spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) ->
-                     state()).
--spec(terminate/1 :: (state()) -> state()).
--spec(delete_and_terminate/1 :: (state()) -> state()).
+-spec(init/3 :: (rabbit_types:amqqueue(), attempt_recovery(),
+                 async_callback()) -> state()).
+-spec(terminate/2 :: (any(), state()) -> state()).
+-spec(delete_and_terminate/2 :: (any(), state()) -> state()).
 -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}).
--spec(publish/3 :: (rabbit_types:basic_message(),
-                    rabbit_types:message_properties(), state()) -> state()).
--spec(publish_delivered/4 :: (true, rabbit_types:basic_message(),
-                              rabbit_types:message_properties(), state())
+-spec(publish/4 :: (rabbit_types:basic_message(),
+                    rabbit_types:message_properties(), pid(), state()) ->
+                        state()).
+-spec(publish_delivered/5 :: (true, rabbit_types:basic_message(),
+                              rabbit_types:message_properties(), pid(), state())
                              -> {ack(), state()};
                              (false, rabbit_types:basic_message(),
-                              rabbit_types:message_properties(), state())
+                              rabbit_types:message_properties(), pid(), state())
                              -> {undefined, state()}).
+-spec(drain_confirmed/1 :: (state()) -> {[rabbit_guid:guid()], state()}).
 -spec(dropwhile/2 ::
         (fun ((rabbit_types:message_properties()) -> boolean()), state())
         -> state()).
 -spec(fetch/2 :: (true,  state()) -> {fetch_result(ack()), state()};
                  (false, state()) -> {fetch_result(undefined), state()}).
--spec(ack/2 :: ([ack()], state()) -> state()).
--spec(tx_publish/4 :: (rabbit_types:txn(), rabbit_types:basic_message(),
-                       rabbit_types:message_properties(), state()) -> state()).
--spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()).
--spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}).
--spec(tx_commit/4 ::
-        (rabbit_types:txn(), fun (() -> any()),
-         message_properties_transformer(), state()) -> {[ack()], state()}).
+-spec(ack/2 :: ([ack()], state()) -> {[rabbit_guid:guid()], state()}).
 -spec(requeue/3 :: ([ack()], message_properties_transformer(), state())
-                   -> state()).
+                   -> {[rabbit_guid:guid()], state()}).
 -spec(len/1 :: (state()) -> non_neg_integer()).
 -spec(is_empty/1 :: (state()) -> boolean()).
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> {number(), state()}).
--spec(needs_idle_timeout/1 :: (state()) -> boolean()).
--spec(idle_timeout/1 :: (state()) -> state()).
+-spec(needs_timeout/1 :: (state()) -> 'false' | 'timed' | 'idle').
+-spec(timeout/1 :: (state()) -> state()).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
+-spec(invoke/3 :: (atom(), fun ((atom(), A) -> A), state()) -> state()).
+-spec(is_duplicate/2 ::
+        (rabbit_types:basic_message(), state()) ->
+                             {'false'|'published'|'discarded', state()}).
+-spec(discard/3 :: (rabbit_types:basic_message(), pid(), state()) -> state()).
diff --git a/include/rabbit_exchange_type_spec.hrl b/include/rabbit_exchange_type_spec.hrl
index 45c475d8..f6283ef7 100644
--- a/include/rabbit_exchange_type_spec.hrl
+++ b/include/rabbit_exchange_type_spec.hrl
@@ -16,18 +16,20 @@
 
 -ifdef(use_specs).
 
+-type(tx() :: 'transaction' | 'none').
+-type(serial() :: pos_integer() | tx()).
+
 -spec(description/0 :: () -> [{atom(), any()}]).
+-spec(serialise_events/0 :: () -> boolean()).
 -spec(route/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
                  -> rabbit_router:match_result()).
 -spec(validate/1 :: (rabbit_types:exchange()) -> 'ok').
--spec(create/2 :: (boolean(), rabbit_types:exchange()) -> 'ok').
--spec(recover/2 :: (rabbit_types:exchange(),
-                    [rabbit_types:binding()]) -> 'ok').
--spec(delete/3 :: (boolean(), rabbit_types:exchange(),
+-spec(create/2 :: (tx(), rabbit_types:exchange()) -> 'ok').
+-spec(delete/3 :: (tx(), rabbit_types:exchange(),
                    [rabbit_types:binding()]) -> 'ok').
--spec(add_binding/3 :: (boolean(), rabbit_types:exchange(),
+-spec(add_binding/3 :: (serial(), rabbit_types:exchange(),
                         rabbit_types:binding()) -> 'ok').
--spec(remove_bindings/3 :: (boolean(), rabbit_types:exchange(),
+-spec(remove_bindings/3 :: (serial(), rabbit_types:exchange(),
                             [rabbit_types:binding()]) -> 'ok').
 -spec(assert_args_equivalence/2 ::
         (rabbit_types:exchange(), rabbit_framing:amqp_table())
diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 9d704f65..e9150a97 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -22,5 +22,4 @@
 
 -endif.
 
--record(msg_location,
-        {guid, ref_count, file, offset, total_size}).
+-record(msg_location, {msg_id, ref_count, file, offset, total_size}).
diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
index 289f8f60..2ae5b000 100644
--- a/include/rabbit_msg_store_index.hrl
+++ b/include/rabbit_msg_store_index.hrl
@@ -29,13 +29,13 @@
 -spec(new/1 :: (dir()) -> index_state()).
 -spec(recover/1 :: (dir()) -> rabbit_types:ok_or_error2(index_state(), any())).
 -spec(lookup/2 ::
-        (rabbit_guid:guid(), index_state()) -> ('not_found' | keyvalue())).
+        (rabbit_types:msg_id(), index_state()) -> ('not_found' | keyvalue())).
 -spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
 -spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
--spec(update_fields/3 :: (rabbit_guid:guid(), ({fieldpos(), fieldvalue()} |
-                                               [{fieldpos(), fieldvalue()}]),
+-spec(update_fields/3 :: (rabbit_types:msg_id(), ({fieldpos(), fieldvalue()} |
+                                                  [{fieldpos(), fieldvalue()}]),
                           index_state()) -> 'ok').
--spec(delete/2 :: (rabbit_guid:guid(), index_state()) -> 'ok').
+-spec(delete/2 :: (rabbit_types:msg_id(), index_state()) -> 'ok').
 -spec(delete_object/2 :: (keyvalue(), index_state()) -> 'ok').
 -spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok').
 -spec(terminate/1 :: (index_state()) -> any()).
diff --git a/packaging/RPMS/Fedora/Makefile b/packaging/RPMS/Fedora/Makefile
index 74a1800a..c67d8fd6 100644
--- a/packaging/RPMS/Fedora/Makefile
+++ b/packaging/RPMS/Fedora/Makefile
@@ -12,7 +12,7 @@ ifndef RPM_OS
 RPM_OS=fedora
 endif
 
-ifeq "x$(RPM_OS)" "xsuse"
+ifeq "$(RPM_OS)" "suse"
 REQUIRES=/sbin/chkconfig /sbin/service
 OS_DEFINES=--define '_initrddir /etc/init.d' --define 'dist .suse'
 else
@@ -31,9 +31,13 @@ prepare:
 
 	cp ${COMMON_DIR}/* SOURCES/
 	sed -i \
-	    -e 's|^DEFAULTS_FILE=.*$$|DEFAULTS_FILE=/etc/sysconfig/rabbitmq|' \
 	    -e 's|^LOCK_FILE=.*$$|LOCK_FILE=/var/lock/subsys/$$NAME|' \
 	    SOURCES/rabbitmq-server.init
+ifeq "$(RPM_OS)" "fedora"
+# Fedora says that only vital services should have Default-Start
+	sed -i -e '/^# Default-Start:/d;/^# Default-Stop:/d' \
+	    SOURCES/rabbitmq-server.init
+endif
 	sed -i -e 's|@SU_RABBITMQ_SH_C@|su rabbitmq -s /bin/sh -c|' \
 	    SOURCES/rabbitmq-script-wrapper
 	cp rabbitmq-server.logrotate SOURCES/rabbitmq-server.logrotate
@@ -41,5 +45,5 @@ prepare:
 server: prepare
 	rpmbuild -ba --nodeps SPECS/rabbitmq-server.spec $(DEFINES) $(OS_DEFINES)
 
-clean:	
+clean:
 	rm -rf SOURCES SPECS RPMS SRPMS BUILD tmp
diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec
index 47316864..ffc826eb 100644
--- a/packaging/RPMS/Fedora/rabbitmq-server.spec
+++ b/packaging/RPMS/Fedora/rabbitmq-server.spec
@@ -55,7 +55,6 @@ mkdir -p %{buildroot}%{_localstatedir}/log/rabbitmq
 install -p -D -m 0755 %{S:1} %{buildroot}%{_initrddir}/rabbitmq-server
 install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmqctl
 install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmq-server
-install -p -D -m 0755 %{_rabbit_wrapper} %{buildroot}%{_sbindir}/rabbitmq-multi
 install -p -D -m 0755 %{_rabbit_server_ocf} %{buildroot}%{_exec_prefix}/lib/ocf/resource.d/rabbitmq/rabbitmq-server
 
 install -p -D -m 0644 %{S:3} %{buildroot}%{_sysconfdir}/logrotate.d/rabbitmq-server
@@ -65,12 +64,8 @@ mkdir -p %{buildroot}%{_sysconfdir}/rabbitmq
 rm %{_maindir}/LICENSE %{_maindir}/LICENSE-MPL-RabbitMQ %{_maindir}/INSTALL
 
 #Build the list of files
-rm -f %{_builddir}/%{name}.files
-echo '%defattr(-,root,root, -)' >> %{_builddir}/%{name}.files 
-(cd %{buildroot}; \
-    find . -type f ! -regex '\.%{_sysconfdir}.*' \
-        ! -regex '\.\(%{_rabbit_erllibdir}\|%{_rabbit_libdir}\).*' \
-        | sed -e 's/^\.//' >> %{_builddir}/%{name}.files)
+echo '%defattr(-,root,root, -)' >%{_builddir}/%{name}.files
+find %{buildroot} -path %{buildroot}%{_sysconfdir} -prune -o '!' -type d -printf "/%%P\n" >>%{_builddir}/%{name}.files
 
 %pre
 
@@ -92,6 +87,9 @@ fi
 
 %post
 /sbin/chkconfig --add %{name}
+if [ -f %{_sysconfdir}/rabbitmq/rabbitmq.conf ] && [ ! -f %{_sysconfdir}/rabbitmq/rabbitmq-env.conf ]; then
+    mv %{_sysconfdir}/rabbitmq/rabbitmq.conf %{_sysconfdir}/rabbitmq/rabbitmq-env.conf
+fi
 
 %preun
 if [ $1 = 0 ]; then
@@ -114,8 +112,6 @@ done
 %attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/lib/rabbitmq
 %attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/log/rabbitmq
 %dir %{_sysconfdir}/rabbitmq
-%{_rabbit_erllibdir}
-%{_rabbit_libdir}
 %{_initrddir}/rabbitmq-server
 %config(noreplace) %{_sysconfdir}/logrotate.d/rabbitmq-server
 %doc LICENSE LICENSE-MPL-RabbitMQ
@@ -124,6 +120,15 @@ done
 rm -rf %{buildroot}
 
 %changelog
+* Thu Jun 9 2011 jerryk@vmware.com 2.5.0-1
+- New Upstream Release
+
+* Thu Apr 7 2011 Alexandru Scvortov <alexandru@rabbitmq.com> 2.4.1-1
+- New Upstream Release
+
+* Tue Mar 22 2011 Alexandru Scvortov <alexandru@rabbitmq.com> 2.4.0-1
+- New Upstream Release
+
 * Thu Feb 3 2011 simon@rabbitmq.com 2.3.1-1
 - New Upstream Release
 
diff --git a/packaging/common/rabbitmq-server.init b/packaging/common/rabbitmq-server.init
index 39d23983..d8a7a94d 100644
--- a/packaging/common/rabbitmq-server.init
+++ b/packaging/common/rabbitmq-server.init
@@ -10,97 +10,110 @@
 # Provides:          rabbitmq-server
 # Required-Start:    $remote_fs $network
 # Required-Stop:     $remote_fs $network
-# Default-Start:
-# Default-Stop:
+# Default-Start:     3 4 5
+# Default-Stop:      0 1 2 6
 # Description:       RabbitMQ broker
 # Short-Description: Enable AMQP service provided by RabbitMQ broker
 ### END INIT INFO
 
 PATH=/sbin:/usr/sbin:/bin:/usr/bin
-DAEMON=/usr/sbin/rabbitmq-multi
 NAME=rabbitmq-server
+DAEMON=/usr/sbin/${NAME}
+CONTROL=/usr/sbin/rabbitmqctl
 DESC=rabbitmq-server
 USER=rabbitmq
-NODE_COUNT=1
 ROTATE_SUFFIX=
 INIT_LOG_DIR=/var/log/rabbitmq
 
-DEFAULTS_FILE= # This is filled in when building packages
 LOCK_FILE= # This is filled in when building packages
 
 test -x $DAEMON || exit 0
-
-# Include rabbitmq defaults if available
-if [ -f "$DEFAULTS_FILE" ] ; then
-	. $DEFAULTS_FILE
-fi
+test -x $CONTROL || exit 0
 
 RETVAL=0
 set -e
 
 start_rabbitmq () {
-    set +e
-    $DAEMON start_all ${NODE_COUNT} > ${INIT_LOG_DIR}/startup_log 2> ${INIT_LOG_DIR}/startup_err
-    case "$?" in
-      0)
-        echo SUCCESS
-        [ -n "$LOCK_FILE" ] && touch $LOCK_FILE
+    status_rabbitmq quiet
+    if [ $RETVAL = 0 ] ; then
+        echo RabbitMQ is currently running
+    else
         RETVAL=0
-        ;;
-      1)
-        echo TIMEOUT - check ${INIT_LOG_DIR}/startup_\{log,err\}
-        RETVAL=1
-        ;;
-      *)
-        echo FAILED - check ${INIT_LOG_DIR}/startup_log, _err
-        RETVAL=1
-        ;;
-    esac
-    set -e
+        set +e
+        setsid sh -c "$DAEMON > ${INIT_LOG_DIR}/startup_log \
+            2> ${INIT_LOG_DIR}/startup_err" &
+        $CONTROL wait >/dev/null 2>&1
+        RETVAL=$?
+        set -e
+        case "$RETVAL" in
+            0)
+                echo SUCCESS
+                if [ -n "$LOCK_FILE" ] ; then
+                    touch $LOCK_FILE
+                fi
+                ;;
+            *)
+                echo FAILED - check ${INIT_LOG_DIR}/startup_\{log, _err\}
+                RETVAL=1
+                ;;
+        esac
+    fi
 }
 
 stop_rabbitmq () {
-    set +e
     status_rabbitmq quiet
     if [ $RETVAL = 0 ] ; then
-        $DAEMON stop_all > ${INIT_LOG_DIR}/shutdown_log 2> ${INIT_LOG_DIR}/shutdown_err
+        set +e
+        $CONTROL stop > ${INIT_LOG_DIR}/shutdown_log 2> ${INIT_LOG_DIR}/shutdown_err
         RETVAL=$?
+        set -e
         if [ $RETVAL = 0 ] ; then
-            [ -n "$LOCK_FILE" ] && rm -rf $LOCK_FILE
+            if [ -n "$LOCK_FILE" ] ; then
+                rm -f $LOCK_FILE
+            fi
         else
             echo FAILED - check ${INIT_LOG_DIR}/shutdown_log, _err
         fi
     else
-        echo No nodes running 
+        echo RabbitMQ is not running
         RETVAL=0
     fi
-    set -e
 }
 
 status_rabbitmq() {
     set +e
     if [ "$1" != "quiet" ] ; then
-        $DAEMON status 2>&1
+        $CONTROL status 2>&1
     else
-        $DAEMON status > /dev/null 2>&1
+        $CONTROL status > /dev/null 2>&1
     fi
     if [ $? != 0 ] ; then
-        RETVAL=1
+        RETVAL=3
     fi
     set -e
 }
 
 rotate_logs_rabbitmq() {
     set +e
-    $DAEMON rotate_logs ${ROTATE_SUFFIX}
+    $CONTROL rotate_logs ${ROTATE_SUFFIX}
     if [ $? != 0 ] ; then
         RETVAL=1
     fi
     set -e
 }
 
+restart_running_rabbitmq () {
+    status_rabbitmq quiet
+    if [ $RETVAL = 0 ] ; then
+        restart_rabbitmq
+    else
+        echo RabbitMQ is not runnning
+        RETVAL=0
+    fi
+}
+
 restart_rabbitmq() {
-    stop_rabbitmq	    
+    stop_rabbitmq
     start_rabbitmq
 }
 
@@ -122,11 +135,16 @@ case "$1" in
         echo -n "Rotating log files for $DESC: "
         rotate_logs_rabbitmq
         ;;
-    force-reload|reload|restart|condrestart|try-restart)
+    force-reload|reload|restart)
         echo -n "Restarting $DESC: "
         restart_rabbitmq
         echo "$NAME."
         ;;
+    try-restart)
+        echo -n "Restarting $DESC: "
+        restart_running_rabbitmq
+        echo "$NAME."
+        ;;
     *)
         echo "Usage: $0 {start|stop|status|rotate-logs|restart|condrestart|try-restart|reload|force-reload}" >&2
         RETVAL=1
diff --git a/packaging/common/rabbitmq-server.ocf b/packaging/common/rabbitmq-server.ocf
index dc0521dd..d58c48ed 100755
--- a/packaging/common/rabbitmq-server.ocf
+++ b/packaging/common/rabbitmq-server.ocf
@@ -20,7 +20,7 @@
 ##
 
 ## OCF instance parameters
-##   OCF_RESKEY_multi
+##   OCF_RESKEY_server
 ##   OCF_RESKEY_ctl
 ##   OCF_RESKEY_nodename
 ##   OCF_RESKEY_ip
@@ -38,11 +38,11 @@
 
 #######################################################################
 
-OCF_RESKEY_multi_default="/usr/sbin/rabbitmq-multi"
+OCF_RESKEY_server_default="/usr/sbin/rabbitmq-server"
 OCF_RESKEY_ctl_default="/usr/sbin/rabbitmqctl"
 OCF_RESKEY_nodename_default="rabbit@localhost"
 OCF_RESKEY_log_base_default="/var/log/rabbitmq"
-: ${OCF_RESKEY_multi=${OCF_RESKEY_multi_default}}
+: ${OCF_RESKEY_server=${OCF_RESKEY_server_default}}
 : ${OCF_RESKEY_ctl=${OCF_RESKEY_ctl_default}}
 : ${OCF_RESKEY_nodename=${OCF_RESKEY_nodename_default}}
 : ${OCF_RESKEY_log_base=${OCF_RESKEY_log_base_default}}
@@ -61,12 +61,12 @@ Resource agent for RabbitMQ-server
 <shortdesc lang="en">Resource agent for RabbitMQ-server</shortdesc>
 
 <parameters>
-<parameter name="multi" unique="0" required="0">
+<parameter name="server" unique="0" required="0">
 <longdesc lang="en">
-The path to the rabbitmq-multi script
+The path to the rabbitmq-server script
 </longdesc>
-<shortdesc lang="en">Path to rabbitmq-multi</shortdesc>
-<content type="string" default="${OCF_RESKEY_multi_default}" />
+<shortdesc lang="en">Path to rabbitmq-server</shortdesc>
+<content type="string" default="${OCF_RESKEY_server_default}" />
 </parameter>
 
 <parameter name="ctl" unique="0" required="0">
@@ -103,9 +103,9 @@ The IP Port for rabbitmq-server to listen on
 
 <parameter name="config_file" unique="0" required="0">
 <longdesc lang="en">
-Location of the config file
+Location of the config file (without the .config suffix)
 </longdesc>
-<shortdesc lang="en">Config file path</shortdesc>
+<shortdesc lang="en">Config file path (without the .config suffix)</shortdesc>
 <content type="string" default="" />
 </parameter>
 
@@ -155,7 +155,7 @@ Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
-RABBITMQ_MULTI=$OCF_RESKEY_multi
+RABBITMQ_SERVER=$OCF_RESKEY_server
 RABBITMQ_CTL=$OCF_RESKEY_ctl
 RABBITMQ_NODENAME=$OCF_RESKEY_nodename
 RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip
@@ -177,8 +177,8 @@ export_vars() {
 }
 
 rabbit_validate_partial() {
-    if [ ! -x $RABBITMQ_MULTI ]; then
-        ocf_log err "rabbitmq-server multi $RABBITMQ_MULTI does not exist or is not executable";
+    if [ ! -x $RABBITMQ_SERVER ]; then
+        ocf_log err "rabbitmq-server server $RABBITMQ_SERVER does not exist or is not executable";
         exit $OCF_ERR_INSTALLED;
     fi
 
@@ -189,8 +189,8 @@ rabbit_validate_partial() {
 }
 
 rabbit_validate_full() {
-    if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e $RABBITMQ_CONFIG_FILE ]; then
-        ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file";
+    if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e "${RABBITMQ_CONFIG_FILE}.config" ]; then
+        ocf_log err "rabbitmq-server config_file ${RABBITMQ_CONFIG_FILE}.config does not exist or is not a file";
         exit $OCF_ERR_INSTALLED;
     fi
 
@@ -210,8 +210,18 @@ rabbit_validate_full() {
 }
 
 rabbit_status() {
+    rabbitmqctl_action "status"
+}
+
+rabbit_wait() {
+    rabbitmqctl_action "wait"
+}
+
+rabbitmqctl_action() {
     local rc
-    $RABBITMQ_CTL $NODENAME_ARG status > /dev/null 2> /dev/null
+    local action
+    action=$1
+    $RABBITMQ_CTL $NODENAME_ARG $action > /dev/null 2> /dev/null
     rc=$?
     case "$rc" in
         0)
@@ -223,7 +233,7 @@ rabbit_status() {
             return $OCF_NOT_RUNNING
             ;;
         *)
-            ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG status: $rc"
+            ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG $action: $rc"
             exit $OCF_ERR_GENERIC
     esac
 }
@@ -238,28 +248,16 @@ rabbit_start() {
 
     export_vars
 
-    $RABBITMQ_MULTI start_all 1 > ${RABBITMQ_LOG_BASE}/startup_log 2> ${RABBITMQ_LOG_BASE}/startup_err &
-    rc=$?
-
-    if [ "$rc" != 0 ]; then
-        ocf_log err "rabbitmq-server start command failed: $RABBITMQ_MULTI start_all 1, $rc"
-        return $rc
-    fi
+    setsid sh -c "$RABBITMQ_SERVER > ${RABBITMQ_LOG_BASE}/startup_log 2> ${RABBITMQ_LOG_BASE}/startup_err" &
 
-    # Spin waiting for the server to come up.
+    # Wait for the server to come up.
     # Let the CRM/LRM time us out if required
-    start_wait=1
-    while [ $start_wait = 1 ]; do
-        rabbit_status
-        rc=$?
-        if [ "$rc" = $OCF_SUCCESS ]; then
-            start_wait=0
-        elif [ "$rc" != $OCF_NOT_RUNNING ]; then
-            ocf_log info "rabbitmq-server start failed: $rc"
-            exit $OCF_ERR_GENERIC
-        fi
-        sleep 1
-    done
+    rabbit_wait
+    rc=$?
+    if [ "$rc" != $OCF_SUCCESS ]; then
+        ocf_log info "rabbitmq-server start failed: $rc"
+        exit $OCF_ERR_GENERIC
+    fi
 
     return $OCF_SUCCESS
 }
@@ -272,11 +270,11 @@ rabbit_stop() {
         return $OCF_SUCCESS
     fi
 
-    $RABBITMQ_MULTI stop_all &
+    $RABBITMQ_CTL stop
     rc=$?
 
     if [ "$rc" != 0 ]; then
-        ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_MULTI stop_all, $rc"
+        ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_CTL stop, $rc"
         return $rc
     fi
 
diff --git a/packaging/debs/Debian/Makefile b/packaging/debs/Debian/Makefile
index ab05f732..38c81134 100644
--- a/packaging/debs/Debian/Makefile
+++ b/packaging/debs/Debian/Makefile
@@ -19,12 +19,15 @@ all:
 
 package: clean
 	cp $(TARBALL_DIR)/$(TARBALL) $(DEBIAN_ORIG_TARBALL)
-	tar -zxvf $(DEBIAN_ORIG_TARBALL)
+	tar -zxf $(DEBIAN_ORIG_TARBALL)
 	cp -r debian $(UNPACKED_DIR)
 	cp $(COMMON_DIR)/* $(UNPACKED_DIR)/debian/
+# Debian and descendants differ from most other distros in that
+# runlevel 2 should start network services.
 	sed -i \
-	    -e 's|^DEFAULTS_FILE=.*$$|DEFAULTS_FILE=/etc/default/rabbitmq|' \
 	    -e 's|^LOCK_FILE=.*$$|LOCK_FILE=|' \
+	    -e 's|^\(# Default-Start:\).*$$|\1 2 3 4 5|' \
+	    -e 's|^\(# Default-Stop:\).*$$|\1 0 1 6|' \
 	    $(UNPACKED_DIR)/debian/rabbitmq-server.init
 	sed -i -e 's|@SU_RABBITMQ_SH_C@|su rabbitmq -s /bin/sh -c|' \
 	    $(UNPACKED_DIR)/debian/rabbitmq-script-wrapper
diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog
index 12165dc0..1cab4235 100644
--- a/packaging/debs/Debian/debian/changelog
+++ b/packaging/debs/Debian/debian/changelog
@@ -1,3 +1,21 @@
+rabbitmq-server (2.5.0-1) lucid; urgency=low
+
+  * New Upstream Release
+
+ --  <jerryk@vmware.com>  Thu, 09 Jun 2011 07:20:29 -0700
+
+rabbitmq-server (2.4.1-1) lucid; urgency=low
+
+  * New Upstream Release
+
+ -- Alexandru Scvortov <alexandru@rabbitmq.com>  Thu, 07 Apr 2011 16:49:22 +0100
+
+rabbitmq-server (2.4.0-1) lucid; urgency=low
+
+  * New Upstream Release
+
+ -- Alexandru Scvortov <alexandru@rabbitmq.com>  Tue, 22 Mar 2011 17:34:31 +0000
+
 rabbitmq-server (2.3.1-1) lucid; urgency=low
 
   * New Upstream Release
diff --git a/packaging/debs/Debian/debian/control b/packaging/debs/Debian/debian/control
index 02da0cc6..45f5c5c4 100644
--- a/packaging/debs/Debian/debian/control
+++ b/packaging/debs/Debian/debian/control
@@ -7,10 +7,7 @@ Standards-Version: 3.8.0
 
 Package: rabbitmq-server
 Architecture: all
-# erlang-inets is not a strict dependency, but it's needed to allow
-# the installation of plugins that use mochiweb. Ideally it would be a
-# "Recommends" instead, but gdebi does not install those.
-Depends: erlang-base (>= 1:12.b.3) | erlang-base-hipe (>= 1:12.b.3), erlang-ssl | erlang-nox (<< 1:13.b-dfsg1-1), erlang-os-mon | erlang-nox (<< 1:13.b-dfsg1-1), erlang-mnesia | erlang-nox (<< 1:13.b-dfsg1-1), erlang-inets | erlang-nox (<< 1:13.b-dfsg1-1), adduser, logrotate, ${misc:Depends}
+Depends: erlang-nox (>= 1:12.b.3), adduser, logrotate, ${misc:Depends}
 Description: An AMQP server written in Erlang
  RabbitMQ is an implementation of AMQP, the emerging standard for high
  performance enterprise messaging. The RabbitMQ server is a robust and
diff --git a/packaging/debs/Debian/debian/postinst b/packaging/debs/Debian/debian/postinst
index 134f16ee..b11340ef 100644
--- a/packaging/debs/Debian/debian/postinst
+++ b/packaging/debs/Debian/debian/postinst
@@ -35,6 +35,10 @@ chown -R rabbitmq:rabbitmq /var/log/rabbitmq
 
 case "$1" in
     configure)
+        if [ -f /etc/rabbitmq/rabbitmq.conf ] && \
+           [ ! -f /etc/rabbitmq/rabbitmq-env.conf ]; then
+            mv /etc/rabbitmq/rabbitmq.conf /etc/rabbitmq/rabbitmq-env.conf
+        fi
     ;;
 
     abort-upgrade|abort-remove|abort-deconfigure)
diff --git a/packaging/debs/Debian/debian/rules b/packaging/debs/Debian/debian/rules
index 6b6df33b..a785b292 100644
--- a/packaging/debs/Debian/debian/rules
+++ b/packaging/debs/Debian/debian/rules
@@ -14,7 +14,7 @@ DOCDIR=$(DEB_DESTDIR)usr/share/doc/rabbitmq-server/
 install/rabbitmq-server::
 	mkdir -p $(DOCDIR)
 	rm $(RABBIT_LIB)LICENSE* $(RABBIT_LIB)INSTALL*
-	for script in rabbitmqctl rabbitmq-server rabbitmq-multi; do \
+	for script in rabbitmqctl rabbitmq-server; do \
 		install -p -D -m 0755 debian/rabbitmq-script-wrapper $(DEB_DESTDIR)usr/sbin/$$script; \
 	done
 	sed -e 's|@RABBIT_LIB@|/usr/lib/rabbitmq/lib/rabbitmq_server-$(DEB_UPSTREAM_VERSION)|g' <debian/postrm.in >debian/postrm
diff --git a/packaging/generic-unix/Makefile b/packaging/generic-unix/Makefile
index c4e01f4a..b5c342aa 100644
--- a/packaging/generic-unix/Makefile
+++ b/packaging/generic-unix/Makefile
@@ -4,7 +4,7 @@ TARGET_DIR=rabbitmq_server-$(VERSION)
 TARGET_TARBALL=rabbitmq-server-generic-unix-$(VERSION)
 
 dist:
-	tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz
+	tar -zxf ../../dist/$(SOURCE_DIR).tar.gz
 
 	$(MAKE) -C $(SOURCE_DIR) \
 		TARGET_DIR=`pwd`/$(TARGET_DIR) \
diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in
index f8417b83..4a866305 100644
--- a/packaging/macports/Portfile.in
+++ b/packaging/macports/Portfile.in
@@ -5,7 +5,7 @@ PortSystem 1.0
 name		rabbitmq-server
 version		@VERSION@
 categories	net 
-maintainers	paperplanes.de:meyer rabbitmq.com:tonyg openmaintainer
+maintainers	paperplanes.de:meyer openmaintainer
 platforms	darwin
 supported_archs	noarch
 
@@ -23,12 +23,12 @@ distfiles      ${name}-${version}${extract.suffix} \
                ${name}-generic-unix-${version}${extract.suffix}
 
 checksums \
-    ${name}-${version}${extract.suffix} md5 @md5-src@ \
-    ${name}-${version}${extract.suffix} sha1 @sha1-src@ \
-    ${name}-${version}${extract.suffix} rmd160 @rmd160-src@ \
-    ${name}-generic-unix-${version}${extract.suffix} md5 @md5-bin@ \
-    ${name}-generic-unix-${version}${extract.suffix} sha1 @sha1-bin@ \
-    ${name}-generic-unix-${version}${extract.suffix} rmd160 @rmd160-bin@
+    ${name}-${version}${extract.suffix} \
+        sha1 @sha1-src@ \
+        rmd160 @rmd160-src@ \
+    ${name}-generic-unix-${version}${extract.suffix} \
+        sha1 @sha1-bin@ \
+        rmd160 @rmd160-bin@
 
 depends_lib	port:erlang
 depends_build	port:libxslt
@@ -81,31 +81,28 @@ post-destroot {
     xinstall -d -g [existsgroup ${servergroup}] -m 775 ${destroot}${serverhome}
     xinstall -d -g [existsgroup ${servergroup}] -m 775 ${destroot}${mnesiadbdir}
 
-    reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \
+    reinplace -E "s:(/etc/rabbitmq/rabbitmq):${prefix}\\1:g" \
         ${realsbin}/rabbitmq-env
-    foreach var {CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} {
+    foreach var {CONFIG_FILE LOG_BASE MNESIA_BASE} {
         reinplace -E "s:^($var)=/:\\1=${prefix}/:" \
-            ${realsbin}/rabbitmq-multi \
             ${realsbin}/rabbitmq-server \
             ${realsbin}/rabbitmqctl
     }
 
     xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \
-                ${wrappersbin}/rabbitmq-multi
+                ${wrappersbin}/rabbitmq-server
 
     reinplace -E "s:MACPORTS_PREFIX/bin:${prefix}/bin:" \
-                ${wrappersbin}/rabbitmq-multi
+                ${wrappersbin}/rabbitmq-server
     reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \
-                ${wrappersbin}/rabbitmq-multi
+                ${wrappersbin}/rabbitmq-server
     reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \
-                ${wrappersbin}/rabbitmq-multi
-    file copy ${wrappersbin}/rabbitmq-multi ${wrappersbin}/rabbitmq-server
-    file copy ${wrappersbin}/rabbitmq-multi ${wrappersbin}/rabbitmqctl
-
-    file copy ${mansrc}/man1/rabbitmq-multi.1.gz  ${mandest}/man1/
-    file copy ${mansrc}/man1/rabbitmq-server.1.gz ${mandest}/man1/
-    file copy ${mansrc}/man1/rabbitmqctl.1.gz     ${mandest}/man1/
-    file copy ${mansrc}/man5/rabbitmq.conf.5.gz   ${mandest}/man5/
+                ${wrappersbin}/rabbitmq-server
+    file copy ${wrappersbin}/rabbitmq-server ${wrappersbin}/rabbitmqctl
+
+    xinstall -m 644 -W ${mansrc}/man1 rabbitmq-server.1.gz rabbitmqctl.1.gz \
+        ${mandest}/man1/
+    xinstall -m 644 -W ${mansrc}/man5 rabbitmq-env.conf.5.gz ${mandest}/man5/
 }
 
 pre-install {
diff --git a/packaging/macports/make-checksums.sh b/packaging/macports/make-checksums.sh
index 11424dfc..891de6ba 100755
--- a/packaging/macports/make-checksums.sh
+++ b/packaging/macports/make-checksums.sh
@@ -6,7 +6,7 @@ for type in src bin
 do
     tarball_var=tarball_${type}
     tarball=${!tarball_var}
-    for algo in md5 sha1 rmd160
+    for algo in sha1 rmd160
     do
         checksum=$(openssl $algo ${tarball} | awk '{print $NF}')
         echo "s|@$algo-$type@|$checksum|g"
diff --git a/packaging/windows-exe/Makefile b/packaging/windows-exe/Makefile
index 59803f9c..ab50e30b 100644
--- a/packaging/windows-exe/Makefile
+++ b/packaging/windows-exe/Makefile
@@ -2,7 +2,7 @@ VERSION=0.0.0
 ZIP=../windows/rabbitmq-server-windows-$(VERSION)
 
 dist: rabbitmq-$(VERSION).nsi rabbitmq_server-$(VERSION)
-	makensis rabbitmq-$(VERSION).nsi
+	makensis -V2 rabbitmq-$(VERSION).nsi
 
 rabbitmq-$(VERSION).nsi: rabbitmq_nsi.in
 	sed \
@@ -10,7 +10,7 @@ rabbitmq-$(VERSION).nsi: rabbitmq_nsi.in
 	    $< > $@
 
 rabbitmq_server-$(VERSION):
-	unzip $(ZIP)
+	unzip -q $(ZIP)
 
 clean:
 	rm -rf rabbitmq-*.nsi rabbitmq_server-* rabbitmq-server-*.exe
diff --git a/packaging/windows-exe/lib/EnvVarUpdate.nsh b/packaging/windows-exe/lib/EnvVarUpdate.nsh
deleted file mode 100644
index 839d6a02..00000000
--- a/packaging/windows-exe/lib/EnvVarUpdate.nsh
+++ /dev/null
@@ -1,327 +0,0 @@
-/**
- *  EnvVarUpdate.nsh
- *    : Environmental Variables: append, prepend, and remove entries
- *
- *     WARNING: If you use StrFunc.nsh header then include it before this file
- *              with all required definitions. This is to avoid conflicts
- *
- *  Usage:
- *    ${EnvVarUpdate} "ResultVar" "EnvVarName" "Action" "RegLoc" "PathString"
- *
- *  Credits:
- *  Version 1.0 
- *  * Cal Turney (turnec2)
- *  * Amir Szekely (KiCHiK) and e-circ for developing the forerunners of this
- *    function: AddToPath, un.RemoveFromPath, AddToEnvVar, un.RemoveFromEnvVar,
- *    WriteEnvStr, and un.DeleteEnvStr
- *  * Diego Pedroso (deguix) for StrTok
- *  * Kevin English (kenglish_hi) for StrContains
- *  * Hendri Adriaens (Smile2Me), Diego Pedroso (deguix), and Dan Fuhry  
- *    (dandaman32) for StrReplace
- *
- *  Version 1.1 (compatibility with StrFunc.nsh)
- *  * techtonik
- *
- *  http://nsis.sourceforge.net/Environmental_Variables:_append%2C_prepend%2C_and_remove_entries
- *
- */
-
-
-!ifndef ENVVARUPDATE_FUNCTION
-!define ENVVARUPDATE_FUNCTION
-!verbose push
-!verbose 3
-!include "LogicLib.nsh"
-!include "WinMessages.NSH"
-!include "StrFunc.nsh"
-
-; ---- Fix for conflict if StrFunc.nsh is already includes in main file -----------------------
-!macro _IncludeStrFunction StrFuncName
-  !ifndef ${StrFuncName}_INCLUDED
-    ${${StrFuncName}}
-  !endif
-  !ifndef Un${StrFuncName}_INCLUDED
-    ${Un${StrFuncName}}
-  !endif
-  !define un.${StrFuncName} "${Un${StrFuncName}}"
-!macroend
-
-!insertmacro _IncludeStrFunction StrTok
-!insertmacro _IncludeStrFunction StrStr
-!insertmacro _IncludeStrFunction StrRep
-
-; ---------------------------------- Macro Definitions ----------------------------------------
-!macro _EnvVarUpdateConstructor ResultVar EnvVarName Action Regloc PathString
-  Push "${EnvVarName}"
-  Push "${Action}"
-  Push "${RegLoc}"
-  Push "${PathString}"
-    Call EnvVarUpdate
-  Pop "${ResultVar}"
-!macroend
-!define EnvVarUpdate '!insertmacro "_EnvVarUpdateConstructor"'
- 
-!macro _unEnvVarUpdateConstructor ResultVar EnvVarName Action Regloc PathString
-  Push "${EnvVarName}"
-  Push "${Action}"
-  Push "${RegLoc}"
-  Push "${PathString}"
-    Call un.EnvVarUpdate
-  Pop "${ResultVar}"
-!macroend
-!define un.EnvVarUpdate '!insertmacro "_unEnvVarUpdateConstructor"'
-; ---------------------------------- Macro Definitions end-------------------------------------
- 
-;----------------------------------- EnvVarUpdate start----------------------------------------
-!define hklm_all_users     'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"'
-!define hkcu_current_user  'HKCU "Environment"'
- 
-!macro EnvVarUpdate UN
- 
-Function ${UN}EnvVarUpdate
- 
-  Push $0
-  Exch 4
-  Exch $1
-  Exch 3
-  Exch $2
-  Exch 2
-  Exch $3
-  Exch
-  Exch $4
-  Push $5
-  Push $6
-  Push $7
-  Push $8
-  Push $9
-  Push $R0
- 
-  /* After this point:
-  -------------------------
-     $0 = ResultVar     (returned)
-     $1 = EnvVarName    (input)
-     $2 = Action        (input)
-     $3 = RegLoc        (input)
-     $4 = PathString    (input)
-     $5 = Orig EnvVar   (read from registry)
-     $6 = Len of $0     (temp)
-     $7 = tempstr1      (temp)
-     $8 = Entry counter (temp)
-     $9 = tempstr2      (temp)
-     $R0 = tempChar     (temp)  */
- 
-  ; Step 1:  Read contents of EnvVarName from RegLoc
-  ;
-  ; Check for empty EnvVarName
-  ${If} $1 == ""
-    SetErrors
-    DetailPrint "ERROR: EnvVarName is blank"
-    Goto EnvVarUpdate_Restore_Vars
-  ${EndIf}
- 
-  ; Check for valid Action
-  ${If}    $2 != "A"
-  ${AndIf} $2 != "P"
-  ${AndIf} $2 != "R"
-    SetErrors
-    DetailPrint "ERROR: Invalid Action - must be A, P, or R"
-    Goto EnvVarUpdate_Restore_Vars
-  ${EndIf}
- 
-  ${If} $3 == HKLM
-    ReadRegStr $5 ${hklm_all_users} $1     ; Get EnvVarName from all users into $5
-  ${ElseIf} $3 == HKCU
-    ReadRegStr $5 ${hkcu_current_user} $1  ; Read EnvVarName from current user into $5
-  ${Else}
-    SetErrors
-    DetailPrint 'ERROR: Action is [$3] but must be "HKLM" or HKCU"'
-    Goto EnvVarUpdate_Restore_Vars
-  ${EndIf}
- 
-  ; Check for empty PathString
-  ${If} $4 == ""
-    SetErrors
-    DetailPrint "ERROR: PathString is blank"
-    Goto EnvVarUpdate_Restore_Vars
-  ${EndIf}
- 
-  ; Make sure we've got some work to do
-  ${If} $5 == ""
-  ${AndIf} $2 == "R"
-    SetErrors
-    DetailPrint "$1 is empty - Nothing to remove"
-    Goto EnvVarUpdate_Restore_Vars
-  ${EndIf}
- 
-  ; Step 2: Scrub EnvVar
-  ;
-  StrCpy $0 $5                             ; Copy the contents to $0
-  ; Remove spaces around semicolons (NOTE: spaces before the 1st entry or
-  ; after the last one are not removed here but instead in Step 3)
-  ${If} $0 != ""                           ; If EnvVar is not empty ...
-    ${Do}
-      ${${UN}StrStr} $7 $0 " ;"
-      ${If} $7 == ""
-        ${ExitDo}
-      ${EndIf}
-      ${${UN}StrRep} $0  $0 " ;" ";"         ; Remove '<space>;'
-    ${Loop}
-    ${Do}
-      ${${UN}StrStr} $7 $0 "; "
-      ${If} $7 == ""
-        ${ExitDo}
-      ${EndIf}
-      ${${UN}StrRep} $0  $0 "; " ";"         ; Remove ';<space>'
-    ${Loop}
-    ${Do}
-      ${${UN}StrStr} $7 $0 ";;" 
-      ${If} $7 == ""
-        ${ExitDo}
-      ${EndIf}
-      ${${UN}StrRep} $0  $0 ";;" ";"
-    ${Loop}
- 
-    ; Remove a leading or trailing semicolon from EnvVar
-    StrCpy  $7  $0 1 0
-    ${If} $7 == ";"
-      StrCpy $0  $0 "" 1                   ; Change ';<EnvVar>' to '<EnvVar>'
-    ${EndIf}
-    StrLen $6 $0
-    IntOp $6 $6 - 1
-    StrCpy $7  $0 1 $6
-    ${If} $7 == ";"
-     StrCpy $0  $0 $6                      ; Change ';<EnvVar>' to '<EnvVar>'
-    ${EndIf}
-    ; DetailPrint "Scrubbed $1: [$0]"      ; Uncomment to debug
-  ${EndIf}
- 
-  /* Step 3. Remove all instances of the target path/string (even if "A" or "P")
-     $6 = bool flag (1 = found and removed PathString)
-     $7 = a string (e.g. path) delimited by semicolon(s)
-     $8 = entry counter starting at 0
-     $9 = copy of $0
-     $R0 = tempChar      */
- 
-  ${If} $5 != ""                           ; If EnvVar is not empty ...
-    StrCpy $9 $0
-    StrCpy $0 ""
-    StrCpy $8 0
-    StrCpy $6 0
- 
-    ${Do}
-      ${${UN}StrTok} $7 $9 ";" $8 "0"      ; $7 = next entry, $8 = entry counter
- 
-      ${If} $7 == ""                       ; If we've run out of entries,
-        ${ExitDo}                          ;    were done
-      ${EndIf}                             ;
- 
-      ; Remove leading and trailing spaces from this entry (critical step for Action=Remove)
-      ${Do}
-        StrCpy $R0  $7 1
-        ${If} $R0 != " "
-          ${ExitDo}
-        ${EndIf}
-        StrCpy $7   $7 "" 1                ;  Remove leading space
-      ${Loop}
-      ${Do}
-        StrCpy $R0  $7 1 -1
-        ${If} $R0 != " "
-          ${ExitDo}
-        ${EndIf}
-        StrCpy $7   $7 -1                  ;  Remove trailing space
-      ${Loop}
-      ${If} $7 == $4                       ; If string matches, remove it by not appending it
-        StrCpy $6 1                        ; Set 'found' flag
-      ${ElseIf} $7 != $4                   ; If string does NOT match
-      ${AndIf}  $0 == ""                   ;    and the 1st string being added to $0,
-        StrCpy $0 $7                       ;    copy it to $0 without a prepended semicolon
-      ${ElseIf} $7 != $4                   ; If string does NOT match
-      ${AndIf}  $0 != ""                   ;    and this is NOT the 1st string to be added to $0,
-        StrCpy $0 $0;$7                    ;    append path to $0 with a prepended semicolon
-      ${EndIf}                             ;
- 
-      IntOp $8 $8 + 1                      ; Bump counter
-    ${Loop}                                ; Check for duplicates until we run out of paths
-  ${EndIf}
- 
-  ; Step 4:  Perform the requested Action
-  ;
-  ${If} $2 != "R"                          ; If Append or Prepend
-    ${If} $6 == 1                          ; And if we found the target
-      DetailPrint "Target is already present in $1. It will be removed and"
-    ${EndIf}
-    ${If} $0 == ""                         ; If EnvVar is (now) empty
-      StrCpy $0 $4                         ;   just copy PathString to EnvVar
-      ${If} $6 == 0                        ; If found flag is either 0
-      ${OrIf} $6 == ""                     ; or blank (if EnvVarName is empty)
-        DetailPrint "$1 was empty and has been updated with the target"
-      ${EndIf}
-    ${ElseIf} $2 == "A"                    ;  If Append (and EnvVar is not empty),
-      StrCpy $0 $0;$4                      ;     append PathString
-      ${If} $6 == 1
-        DetailPrint "appended to $1"
-      ${Else}
-        DetailPrint "Target was appended to $1"
-      ${EndIf}
-    ${Else}                                ;  If Prepend (and EnvVar is not empty),
-      StrCpy $0 $4;$0                      ;     prepend PathString
-      ${If} $6 == 1
-        DetailPrint "prepended to $1"
-      ${Else}
-        DetailPrint "Target was prepended to $1"
-      ${EndIf}
-    ${EndIf}
-  ${Else}                                  ; If Action = Remove
-    ${If} $6 == 1                          ;   and we found the target
-      DetailPrint "Target was found and removed from $1"
-    ${Else}
-      DetailPrint "Target was NOT found in $1 (nothing to remove)"
-    ${EndIf}
-    ${If} $0 == ""
-      DetailPrint "$1 is now empty"
-    ${EndIf}
-  ${EndIf}
- 
-  ; Step 5:  Update the registry at RegLoc with the updated EnvVar and announce the change
-  ;
-  ClearErrors
-  ${If} $3  == HKLM
-    WriteRegExpandStr ${hklm_all_users} $1 $0     ; Write it in all users section
-  ${ElseIf} $3 == HKCU
-    WriteRegExpandStr ${hkcu_current_user} $1 $0  ; Write it to current user section
-  ${EndIf}
- 
-  IfErrors 0 +4
-    MessageBox MB_OK|MB_ICONEXCLAMATION "Could not write updated $1 to $3"
-    DetailPrint "Could not write updated $1 to $3"
-    Goto EnvVarUpdate_Restore_Vars
- 
-  ; "Export" our change
-  SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000
- 
-  EnvVarUpdate_Restore_Vars:
-  ;
-  ; Restore the user's variables and return ResultVar
-  Pop $R0
-  Pop $9
-  Pop $8
-  Pop $7
-  Pop $6
-  Pop $5
-  Pop $4
-  Pop $3
-  Pop $2
-  Pop $1
-  Push $0  ; Push my $0 (ResultVar)
-  Exch
-  Pop $0   ; Restore his $0
- 
-FunctionEnd
- 
-!macroend   ; EnvVarUpdate UN
-!insertmacro EnvVarUpdate ""
-!insertmacro EnvVarUpdate "un."
-;----------------------------------- EnvVarUpdate end----------------------------------------
- 
-!verbose pop
-!endif
diff --git a/packaging/windows-exe/rabbitmq_nsi.in b/packaging/windows-exe/rabbitmq_nsi.in
index 6d79ffd4..27e4e1dc 100644
--- a/packaging/windows-exe/rabbitmq_nsi.in
+++ b/packaging/windows-exe/rabbitmq_nsi.in
@@ -4,7 +4,6 @@
 !include WinMessages.nsh
 !include FileFunc.nsh
 !include WordFunc.nsh
-!include lib\EnvVarUpdate.nsh
 
 !define env_hklm 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"'
 !define uninstall "Software\Microsoft\Windows\CurrentVersion\Uninstall\RabbitMQ"
@@ -77,9 +76,6 @@ Section "RabbitMQ Server (required)" Rabbit
   File /r "rabbitmq_server-%%VERSION%%"
   File "rabbitmq.ico"
 
-  ; Add to PATH
-  ${EnvVarUpdate} $0 "PATH" "A" "HKLM" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin"
-
   ; Write the installation path into the registry
   WriteRegStr HKLM "SOFTWARE\VMware, Inc.\RabbitMQ Server" "Install_Dir" "$INSTDIR"
 
@@ -117,15 +113,18 @@ Section "Start Menu" RabbitStartMenu
   CreateDirectory "$APPDATA\RabbitMQ\db"
 
   CreateDirectory "$SMPROGRAMS\RabbitMQ Server"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Uninstall.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Plugins Directory.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\plugins"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Log Directory.lnk" "$APPDATA\RabbitMQ\log"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Database Directory.lnk" "$APPDATA\RabbitMQ\db"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\(Re)Install Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "install" "$INSTDIR\rabbitmq.ico"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Remove Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "remove" "$INSTDIR\rabbitmq.ico"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Start Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "start" "$INSTDIR\rabbitmq.ico"
-  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Stop Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "stop" "$INSTDIR\rabbitmq.ico"
-
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Uninstall RabbitMQ.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Plugins.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\plugins"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Logs.lnk" "$APPDATA\RabbitMQ\log"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Database Directory.lnk" "$APPDATA\RabbitMQ\db"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - (re)install.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "install" "$INSTDIR\rabbitmq.ico"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - remove.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "remove" "$INSTDIR\rabbitmq.ico"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - start.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "start" "$INSTDIR\rabbitmq.ico"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - stop.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "stop" "$INSTDIR\rabbitmq.ico"
+
+  SetOutPath "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin"
+  CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Command Prompt (sbin dir).lnk" "$WINDIR\system32\cmd.exe" "" "$WINDIR\system32\cmd.exe"
+  SetOutPath $INSTDIR
 SectionEnd
 
 ;--------------------------------
@@ -157,9 +156,6 @@ Section "Uninstall"
   ExecWait '"$0" /C "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" stop'
   ExecWait '"$0" /C "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" remove'
 
-  ; Remove from PATH
-  ${un.EnvVarUpdate} $0 "PATH" "R" "HKLM" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin"
-
   ; Remove files and uninstaller
   RMDir /r "$INSTDIR\rabbitmq_server-%%VERSION%%"
   Delete "$INSTDIR\rabbitmq.ico"
diff --git a/packaging/windows/Makefile b/packaging/windows/Makefile
index abe174e0..a0be8d89 100644
--- a/packaging/windows/Makefile
+++ b/packaging/windows/Makefile
@@ -4,14 +4,13 @@ TARGET_DIR=rabbitmq_server-$(VERSION)
 TARGET_ZIP=rabbitmq-server-windows-$(VERSION)
 
 dist:
-	tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz
+	tar -zxf ../../dist/$(SOURCE_DIR).tar.gz
 	$(MAKE) -C $(SOURCE_DIR)
 
 	mkdir $(SOURCE_DIR)/sbin
 	mv $(SOURCE_DIR)/scripts/rabbitmq-server.bat $(SOURCE_DIR)/sbin
 	mv $(SOURCE_DIR)/scripts/rabbitmq-service.bat $(SOURCE_DIR)/sbin
 	mv $(SOURCE_DIR)/scripts/rabbitmqctl.bat $(SOURCE_DIR)/sbin
-	mv $(SOURCE_DIR)/scripts/rabbitmq-multi.bat $(SOURCE_DIR)/sbin
 	rm -rf $(SOURCE_DIR)/scripts
 	rm -rf $(SOURCE_DIR)/codegen* $(SOURCE_DIR)/Makefile
 	rm -f $(SOURCE_DIR)/README
@@ -25,7 +24,7 @@ dist:
 	elinks -dump -no-references -no-numbering rabbitmq-service.html \
 		> $(TARGET_DIR)/readme-service.txt
 	todos $(TARGET_DIR)/readme-service.txt
-	zip -r $(TARGET_ZIP).zip $(TARGET_DIR)
+	zip -q -r $(TARGET_ZIP).zip $(TARGET_DIR)
 	rm -rf $(TARGET_DIR) rabbitmq-service.html
 
 clean: clean_partial
diff --git a/quickcheck b/quickcheck
new file mode 100755
index 00000000..a36cf3ed
--- /dev/null
+++ b/quickcheck
@@ -0,0 +1,36 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+%%! -sname quickcheck
+-mode(compile).
+
+%% A helper to test quickcheck properties on a running broker
+%% NodeStr is a local broker node name
+%% ModStr is the module containing quickcheck properties
+%% The number of trials is optional
+main([NodeStr, ModStr | TrialsStr]) ->
+    {ok, Hostname} = inet:gethostname(),
+    Node = list_to_atom(NodeStr ++ "@" ++ Hostname),
+    Mod  = list_to_atom(ModStr),
+    Trials = lists:map(fun erlang:list_to_integer/1, TrialsStr),
+    case rpc:call(Node, code, ensure_loaded, [proper]) of
+        {module, proper} ->
+            case rpc:call(Node, proper, module, [Mod] ++ Trials) of
+                [] -> ok;
+                _  -> quit(1)
+            end;
+        {badrpc, Reason} ->
+            io:format("Could not contact node ~p: ~p.~n", [Node, Reason]),
+            quit(2);
+        {error,nofile} ->
+            io:format("Module PropEr was not found on node ~p~n", [Node]),
+            quit(2)
+    end;
+main([]) ->
+    io:format("This script requires a node name and a module.~n").
+
+quit(Status) ->
+    case os:type() of
+        {unix,  _} -> halt(Status);
+        {win32, _} -> init:stop(Status)
+    end.
+
diff --git a/scripts/rabbitmq-env b/scripts/rabbitmq-env
index df4b24d8..a2ef8d3c 100755
--- a/scripts/rabbitmq-env
+++ b/scripts/rabbitmq-env
@@ -37,4 +37,9 @@ RABBITMQ_HOME="${SCRIPT_DIR}/.."
 NODENAME=rabbit@${HOSTNAME%%.*}
 
 # Load configuration from the rabbitmq.conf file
-[ -f /etc/rabbitmq/rabbitmq.conf ] && . /etc/rabbitmq/rabbitmq.conf
+if [ -f /etc/rabbitmq/rabbitmq.conf ] && \
+   [ ! -f /etc/rabbitmq/rabbitmq-env.conf ] ; then
+    echo -n "WARNING: ignoring /etc/rabbitmq/rabbitmq.conf -- "
+    echo "location has moved to /etc/rabbitmq/rabbitmq-env.conf"
+fi
+[ -f /etc/rabbitmq/rabbitmq-env.conf ] && . /etc/rabbitmq/rabbitmq-env.conf
diff --git a/scripts/rabbitmq-multi b/scripts/rabbitmq-multi
deleted file mode 100755
index ebcf4b63..00000000
--- a/scripts/rabbitmq-multi
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/sh
-##  The contents of this file are subject to the Mozilla Public License
-##  Version 1.1 (the "License"); you may not use this file except in
-##  compliance with the License. You may obtain a copy of the License
-##  at http://www.mozilla.org/MPL/
-##
-##  Software distributed under the License is distributed on an "AS IS"
-##  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-##  the License for the specific language governing rights and
-##  limitations under the License.
-##
-##  The Original Code is RabbitMQ.
-##
-##  The Initial Developer of the Original Code is VMware, Inc.
-##  Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
-##
-
-SCRIPT_HOME=$(dirname $0)
-PIDS_FILE=/var/lib/rabbitmq/pids
-MULTI_ERL_ARGS=
-MULTI_START_ARGS=
-CONFIG_FILE=/etc/rabbitmq/rabbitmq
-
-. `dirname $0`/rabbitmq-env
-
-DEFAULT_NODE_IP_ADDRESS=0.0.0.0
-DEFAULT_NODE_PORT=5672
-[ "x" = "x$RABBITMQ_NODE_IP_ADDRESS" ] && [ "x" != "x$NODE_IP_ADDRESS" ] && RABBITMQ_NODE_IP_ADDRESS=${NODE_IP_ADDRESS}
-[ "x" = "x$RABBITMQ_NODE_PORT" ] && [ "x" != "x$NODE_PORT" ] && RABBITMQ_NODE_PORT=${NODE_PORT}
-if [ "x" = "x$RABBITMQ_NODE_IP_ADDRESS" ]
-then
-    if [ "x" != "x$RABBITMQ_NODE_PORT" ]
-    then RABBITMQ_NODE_IP_ADDRESS=${DEFAULT_NODE_IP_ADDRESS}
-    fi
-else
-    if [ "x" = "x$RABBITMQ_NODE_PORT" ]
-    then RABBITMQ_NODE_PORT=${DEFAULT_NODE_PORT}
-    fi
-fi
-[ "x" = "x$RABBITMQ_NODENAME" ] && RABBITMQ_NODENAME=${NODENAME}
-[ "x" = "x$RABBITMQ_SCRIPT_HOME" ] && RABBITMQ_SCRIPT_HOME=${SCRIPT_HOME}
-[ "x" = "x$RABBITMQ_PIDS_FILE" ] && RABBITMQ_PIDS_FILE=${PIDS_FILE}
-[ "x" = "x$RABBITMQ_MULTI_ERL_ARGS" ] && RABBITMQ_MULTI_ERL_ARGS=${MULTI_ERL_ARGS}
-[ "x" = "x$RABBITMQ_MULTI_START_ARGS" ] && RABBITMQ_MULTI_START_ARGS=${MULTI_START_ARGS}
-[ "x" = "x$RABBITMQ_CONFIG_FILE" ] && RABBITMQ_CONFIG_FILE=${CONFIG_FILE}
-
-export \
-    RABBITMQ_NODENAME \
-    RABBITMQ_NODE_IP_ADDRESS \
-    RABBITMQ_NODE_PORT \
-    RABBITMQ_SCRIPT_HOME \
-    RABBITMQ_PIDS_FILE \
-    RABBITMQ_CONFIG_FILE
-
-RABBITMQ_CONFIG_ARG=
-[ -f "${RABBITMQ_CONFIG_FILE}.config" ] && RABBITMQ_CONFIG_ARG="-config ${RABBITMQ_CONFIG_FILE}"
-
-# we need to turn off path expansion because some of the vars, notably
-# RABBITMQ_MULTI_ERL_ARGS, may contain terms that look like globs and
-# there is no other way of preventing their expansion.
-set -f
-
-exec erl \
-    -pa "${RABBITMQ_HOME}/ebin" \
-    -noinput \
-    -hidden \
-    ${RABBITMQ_MULTI_ERL_ARGS} \
-    -sname rabbitmq_multi$$ \
-    ${RABBITMQ_CONFIG_ARG} \
-    -s rabbit_multi \
-    ${RABBITMQ_MULTI_START_ARGS} \
-    -extra "$@"
diff --git a/scripts/rabbitmq-multi.bat b/scripts/rabbitmq-multi.bat
deleted file mode 100644
index a2d10f2e..00000000
--- a/scripts/rabbitmq-multi.bat
+++ /dev/null
@@ -1,84 +0,0 @@
-@echo off
-REM  The contents of this file are subject to the Mozilla Public License
-REM  Version 1.1 (the "License"); you may not use this file except in
-REM  compliance with the License. You may obtain a copy of the License
-REM  at http://www.mozilla.org/MPL/
-REM
-REM  Software distributed under the License is distributed on an "AS IS"
-REM  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-REM  the License for the specific language governing rights and
-REM  limitations under the License.
-REM
-REM  The Original Code is RabbitMQ.
-REM
-REM  The Initial Developer of the Original Code is VMware, Inc.
-REM  Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
-REM
-
-setlocal
-
-rem Preserve values that might contain exclamation marks before
-rem enabling delayed expansion
-set TDP0=%~dp0
-set STAR=%*
-setlocal enabledelayedexpansion
-
-if "!RABBITMQ_BASE!"=="" (
-    set RABBITMQ_BASE=!APPDATA!\RabbitMQ
-)
-
-if "!COMPUTERNAME!"=="" (
-    set COMPUTERNAME=localhost
-)
-
-if "!RABBITMQ_NODENAME!"=="" (
-    set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
-)
-
-if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
-   if not "!RABBITMQ_NODE_PORT!"=="" (
-      set RABBITMQ_NODE_IP_ADDRESS=0.0.0.0
-   )
-) else (
-   if "!RABBITMQ_NODE_PORT!"=="" (
-      set RABBITMQ_NODE_PORT=5672
-   )
-)
-
-set RABBITMQ_PIDS_FILE=!RABBITMQ_BASE!\rabbitmq.pids
-set RABBITMQ_SCRIPT_HOME=!TDP0!
-
-if "!RABBITMQ_CONFIG_FILE!"=="" (
-    set RABBITMQ_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq
-)
-
-if exist "!RABBITMQ_CONFIG_FILE!.config" (
-    set RABBITMQ_CONFIG_ARG=-config "!RABBITMQ_CONFIG_FILE!"
-) else (
-    set RABBITMQ_CONFIG_ARG=
-)
-
-if not exist "!ERLANG_HOME!\bin\erl.exe" (
-    echo.
-    echo ******************************
-    echo ERLANG_HOME not set correctly.
-    echo ******************************
-    echo.
-    echo Please either set ERLANG_HOME to point to your Erlang installation or place the
-    echo RabbitMQ server distribution in the Erlang lib folder.
-    echo.
-    exit /B
-)
-
-"!ERLANG_HOME!\bin\erl.exe" ^
--pa "!TDP0!..\ebin" ^
--noinput -hidden ^
-!RABBITMQ_MULTI_ERL_ARGS! ^
--sname rabbitmq_multi!RANDOM! ^
-!RABBITMQ_CONFIG_ARG! ^
--s rabbit_multi ^
-!RABBITMQ_MULTI_START_ARGS! ^
--extra !STAR!
-
-endlocal
-endlocal
diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 5c390a51..2f80eb96 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -16,7 +16,6 @@
 ##
 
 SERVER_ERL_ARGS="+K true +A30 +P 1048576 \
--kernel inet_default_listen_options [{nodelay,true}] \
 -kernel inet_default_connect_options [{nodelay,true}]"
 CONFIG_FILE=/etc/rabbitmq/rabbitmq
 LOG_BASE=/var/log/rabbitmq
diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index 0cfa5ea8..5e2097db 100644
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -72,17 +72,14 @@ rem Log management (rotation, filtering based of size...) is left as an exercice
 
 set BACKUP_EXTENSION=.1
 
-set LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log
-set SASL_LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log
-
-set LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log!BACKUP_EXTENSION!
-set SASL_LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log!BACKUP_EXTENSION!
+set LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!.log
+set SASL_LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!-sasl.log
 
 if exist "!LOGS!" (
-    type "!LOGS!" >> "!LOGS_BACKUP!"
+    type "!LOGS!" >> "!LOGS!!BACKUP_EXTENSION!"
 )
 if exist "!SASL_LOGS!" (
-    type "!SASL_LOGS!" >> "!SASL_LOGS_BACKUP!"
+    type "!SASL_LOGS!" >> "!SASL_LOGS!!BACKUP_EXTENSION!"
 )
 
 rem End of log management
@@ -142,13 +139,12 @@ if not "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
 +W w ^
 +A30 ^
 +P 1048576 ^
--kernel inet_default_listen_options "[{nodelay, true}]" ^
 -kernel inet_default_connect_options "[{nodelay, true}]" ^
 !RABBITMQ_LISTEN_ARG! ^
--kernel error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!.log"\"} ^
+-kernel error_logger {file,\""!LOGS:\=/!"\"} ^
 !RABBITMQ_SERVER_ERL_ARGS! ^
 -sasl errlog_type error ^
--sasl sasl_error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!-sasl.log"\"} ^
+-sasl sasl_error_logger {file,\""!SASL_LOGS:\=/!"\"} ^
 -os_mon start_cpu_sup true ^
 -os_mon start_disksup false ^
 -os_mon start_memsup false ^
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat
index 43520b55..b2aa4f58 100644
--- a/scripts/rabbitmq-service.bat
+++ b/scripts/rabbitmq-service.bat
@@ -105,17 +105,14 @@ rem Log management (rotation, filtering based on size...) is left as an exercise
 
 set BACKUP_EXTENSION=.1
 
-set LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log
-set SASL_LOGS=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log
-
-set LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!.log!BACKUP_EXTENSION!
-set SASL_LOGS_BACKUP=!RABBITMQ_BASE!\log\!RABBITMQ_NODENAME!-sasl.log!BACKUP_EXTENSION!
+set LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!.log
+set SASL_LOGS=!RABBITMQ_LOG_BASE!\!RABBITMQ_NODENAME!-sasl.log
 
 if exist "!LOGS!" (
-	type "!LOGS!" >> "!LOGS_BACKUP!"
+    type "!LOGS!" >> "!LOGS!!BACKUP_EXTENSION!"
 )
 if exist "!SASL_LOGS!" (
-	type "!SASL_LOGS!" >> "!SASL_LOGS_BACKUP!"
+    type "!SASL_LOGS!" >> "!SASL_LOGS!!BACKUP_EXTENSION!"
 )
 
 rem End of log management
@@ -207,13 +204,12 @@ set ERLANG_SERVICE_ARGUMENTS= ^
 -s rabbit ^
 +W w ^
 +A30 ^
--kernel inet_default_listen_options "[{nodelay,true}]" ^
 -kernel inet_default_connect_options "[{nodelay,true}]" ^
 !RABBITMQ_LISTEN_ARG! ^
--kernel error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!.log"\"} ^
+-kernel error_logger {file,\""!LOGS:\=/!"\"} ^
 !RABBITMQ_SERVER_ERL_ARGS! ^
 -sasl errlog_type error ^
--sasl sasl_error_logger {file,\""!RABBITMQ_LOG_BASE!/!RABBITMQ_NODENAME!-sasl.log"\"} ^
+-sasl sasl_error_logger {file,\""!SASL_LOGS:\=/!"\"} ^
 -os_mon start_cpu_sup true ^
 -os_mon start_disksup false ^
 -os_mon start_memsup false ^
@@ -231,6 +227,7 @@ set ERLANG_SERVICE_ARGUMENTS=!ERLANG_SERVICE_ARGUMENTS:"=\"!
 -stopaction "rabbit:stop_and_halt()." ^
 -sname !RABBITMQ_NODENAME! ^
 !CONSOLE_FLAG! ^
+-comment "A robust and scalable messaging broker" ^
 -args "!ERLANG_SERVICE_ARGUMENTS!" > NUL
 
 goto END
diff --git a/src/delegate.erl b/src/delegate.erl
index 46bd8245..17046201 100644
--- a/src/delegate.erl
+++ b/src/delegate.erl
@@ -18,7 +18,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/1, invoke_no_result/2, invoke/2, delegate_count/1]).
+-export([start_link/1, invoke_no_result/2, invoke/2]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -36,8 +36,6 @@
         ([pid()], fun ((pid()) -> A)) -> {[{pid(), A}],
                                           [{pid(), term()}]}).
 
--spec(delegate_count/1 :: ([node()]) -> non_neg_integer()).
-
 -endif.
 
 %%----------------------------------------------------------------------------
@@ -111,22 +109,14 @@ group_pids_by_node(Pids) ->
                  node(Pid), fun (List) -> [Pid | List] end, [Pid], Remote)}
       end, {[], orddict:new()}, Pids).
 
-delegate_count([RemoteNode | _]) ->
-    {ok, Count} = case application:get_env(rabbit, delegate_count) of
-                      undefined -> rpc:call(RemoteNode, application, get_env,
-                                            [rabbit, delegate_count]);
-                      Result    -> Result
-                  end,
-    Count.
-
 delegate_name(Hash) ->
     list_to_atom("delegate_" ++ integer_to_list(Hash)).
 
 delegate(RemoteNodes) ->
     case get(delegate) of
-        undefined -> Name =
-                         delegate_name(erlang:phash2(
-                                         self(), delegate_count(RemoteNodes))),
+        undefined -> Name = delegate_name(
+                              erlang:phash2(self(),
+                                            delegate_sup:count(RemoteNodes))),
                      put(delegate, Name),
                      Name;
         Name      -> Name
diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl
index e0ffa7c8..fc693c7d 100644
--- a/src/delegate_sup.erl
+++ b/src/delegate_sup.erl
@@ -18,7 +18,7 @@
 
 -behaviour(supervisor).
 
--export([start_link/0]).
+-export([start_link/1, count/1]).
 
 -export([init/1]).
 
@@ -28,20 +28,32 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}).
+-spec(start_link/1 :: (integer()) -> {'ok', pid()} | {'error', any()}).
+-spec(count/1 :: ([node()]) -> integer()).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start_link() ->
-    supervisor:start_link({local, ?SERVER}, ?MODULE, []).
+start_link(Count) ->
+    supervisor:start_link({local, ?SERVER}, ?MODULE, [Count]).
+
+count([]) ->
+    1;
+count([Node | Nodes]) ->
+    try
+        length(supervisor:which_children({?SERVER, Node}))
+    catch exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown ->
+            count(Nodes);
+          exit:{R, _}      when R =:= noproc; R =:= normal; R =:= shutdown;
+                                R =:= nodedown ->
+            count(Nodes)
+    end.
 
 %%----------------------------------------------------------------------------
 
-init(_Args) ->
-    DCount = delegate:delegate_count([node()]),
+init([Count]) ->
     {ok, {{one_for_one, 10, 10},
           [{Num, {delegate, start_link, [Num]},
             transient, 16#ffffffff, worker, [delegate]} ||
-              Num <- lists:seq(0, DCount - 1)]}}.
+              Num <- lists:seq(0, Count - 1)]}}.
diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index b5b07eca..235e14c0 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -146,7 +146,8 @@
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
          flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]).
--export([obtain/0, transfer/1, set_limit/1, get_limit/0]).
+-export([obtain/0, transfer/1, set_limit/1, get_limit/0, info_keys/0, info/0,
+         info/1]).
 -export([ulimit/0]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
@@ -155,13 +156,6 @@
 -define(SERVER, ?MODULE).
 -define(RESERVED_FOR_OTHERS, 100).
 
-%% Googling around suggests that Windows has a limit somewhere around
-%% 16M, eg
-%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx
-%% however, it turns out that's only available through the win32
-%% API. Via the C Runtime, we have just 512:
-%% http://msdn.microsoft.com/en-us/library/6e3b887c%28VS.80%29.aspx
--define(FILE_HANDLES_LIMIT_WINDOWS, 512).
 -define(FILE_HANDLES_LIMIT_OTHER, 1024).
 -define(FILE_HANDLES_CHECK_INTERVAL, 2000).
 
@@ -241,7 +235,7 @@
         -> val_or_error(ref())).
 -spec(close/1 :: (ref()) -> ok_or_error()).
 -spec(read/2 :: (ref(), non_neg_integer()) ->
-             val_or_error([char()] | binary()) | 'eof').
+                     val_or_error([char()] | binary()) | 'eof').
 -spec(append/2 :: (ref(), iodata()) -> ok_or_error()).
 -spec(sync/1 :: (ref()) ->  ok_or_error()).
 -spec(position/2 :: (ref(), position()) -> val_or_error(offset())).
@@ -251,7 +245,7 @@
 -spec(current_raw_offset/1     :: (ref()) -> val_or_error(offset())).
 -spec(flush/1 :: (ref()) -> ok_or_error()).
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
-             val_or_error(non_neg_integer())).
+                     val_or_error(non_neg_integer())).
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(delete/1 :: (ref()) -> ok_or_error()).
 -spec(clear/1 :: (ref()) -> ok_or_error()).
@@ -259,11 +253,17 @@
 -spec(transfer/1 :: (pid()) -> 'ok').
 -spec(set_limit/1 :: (non_neg_integer()) -> 'ok').
 -spec(get_limit/0 :: () -> non_neg_integer()).
+-spec(info_keys/0 :: () -> [atom()]).
+-spec(info/0 :: () -> [{atom(), any()}]).
+-spec(info/1 :: ([atom()]) -> [{atom(), any()}]).
 -spec(ulimit/0 :: () -> 'infinity' | 'unknown' | non_neg_integer()).
 
 -endif.
 
 %%----------------------------------------------------------------------------
+-define(INFO_KEYS, [obtain_count, obtain_limit]).
+
+%%----------------------------------------------------------------------------
 %% Public API
 %%----------------------------------------------------------------------------
 
@@ -494,6 +494,11 @@ set_limit(Limit) ->
 get_limit() ->
     gen_server:call(?SERVER, get_limit, infinity).
 
+info_keys() -> ?INFO_KEYS.
+
+info() -> info(?INFO_KEYS).
+info(Items) -> gen_server:call(?SERVER, {info, Items}, infinity).
+
 %%----------------------------------------------------------------------------
 %% Internal functions
 %%----------------------------------------------------------------------------
@@ -789,6 +794,12 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
             {Error, Handle}
     end.
 
+infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
+
+i(obtain_count, #fhc_state{obtain_count = Count}) -> Count;
+i(obtain_limit, #fhc_state{obtain_limit = Limit}) -> Limit;
+i(Item, _) -> throw({bad_argument, Item}).
+
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
@@ -849,35 +860,41 @@ handle_call({open, Pid, Requested, EldestUnusedSince}, From,
         false -> {noreply, run_pending_item(Item, State1)}
     end;
 
-handle_call({obtain, Pid}, From, State = #fhc_state { obtain_limit   = Limit,
-                                                      obtain_count   = Count,
-                                                      obtain_pending = Pending,
-                                                      clients = Clients })
-  when Limit =/= infinity andalso Count >= Limit ->
-    ok = track_client(Pid, Clients),
-    true = ets:update_element(Clients, Pid, {#cstate.blocked, true}),
-    Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From },
-    {noreply, State #fhc_state { obtain_pending = pending_in(Item, Pending) }};
 handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count   = Count,
                                                       obtain_pending = Pending,
                                                       clients = Clients }) ->
-    Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From },
     ok = track_client(Pid, Clients),
-    case needs_reduce(State #fhc_state { obtain_count = Count + 1 }) of
-        true ->
-            true = ets:update_element(Clients, Pid, {#cstate.blocked, true}),
-            {noreply, reduce(State #fhc_state {
-                               obtain_pending = pending_in(Item, Pending) })};
-        false ->
-            {noreply, run_pending_item(Item, State)}
-    end;
+    Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From },
+    Enqueue = fun () ->
+                      true = ets:update_element(Clients, Pid,
+                                                {#cstate.blocked, true}),
+                      State #fhc_state {
+                        obtain_pending = pending_in(Item, Pending) }
+              end,
+    {noreply,
+        case obtain_limit_reached(State) of
+            true  -> Enqueue();
+            false -> case needs_reduce(State #fhc_state {
+                                      obtain_count = Count + 1 }) of
+                         true  -> reduce(Enqueue());
+                         false -> adjust_alarm(
+                                      State, run_pending_item(Item, State))
+                     end
+        end};
+
 handle_call({set_limit, Limit}, _From, State) ->
-    {reply, ok, maybe_reduce(
-                  process_pending(State #fhc_state {
-                                    limit        = Limit,
-                                    obtain_limit = obtain_limit(Limit) }))};
+    {reply, ok, adjust_alarm(
+                  State, maybe_reduce(
+                           process_pending(
+                             State #fhc_state {
+                               limit        = Limit,
+                               obtain_limit = obtain_limit(Limit) })))};
+
 handle_call(get_limit, _From, State = #fhc_state { limit = Limit }) ->
-    {reply, Limit, State}.
+    {reply, Limit, State};
+
+handle_call({info, Items}, _From, State) ->
+    {reply, infos(Items, State), State}.
 
 handle_cast({register_callback, Pid, MFA},
             State = #fhc_state { clients = Clients }) ->
@@ -900,9 +917,9 @@ handle_cast({close, Pid, EldestUnusedSince},
                   _         -> dict:store(Pid, EldestUnusedSince, Elders)
               end,
     ets:update_counter(Clients, Pid, {#cstate.pending_closes, -1, 0, 0}),
-    {noreply, process_pending(
+    {noreply, adjust_alarm(State, process_pending(
                 update_counts(open, Pid, -1,
-                              State #fhc_state { elders = Elders1 }))};
+                              State #fhc_state { elders = Elders1 })))};
 
 handle_cast({transfer, FromPid, ToPid}, State) ->
     ok = track_client(ToPid, State#fhc_state.clients),
@@ -924,13 +941,15 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason},
         ets:lookup(Clients, Pid),
     true = ets:delete(Clients, Pid),
     FilterFun = fun (#pending { pid = Pid1 }) -> Pid1 =/= Pid end,
-    {noreply, process_pending(
-                State #fhc_state {
-                  open_count     = OpenCount - Opened,
-                  open_pending   = filter_pending(FilterFun, OpenPending),
-                  obtain_count   = ObtainCount - Obtained,
-                  obtain_pending = filter_pending(FilterFun, ObtainPending),
-                  elders         = dict:erase(Pid, Elders) })}.
+    {noreply, adjust_alarm(
+                State,
+                process_pending(
+                  State #fhc_state {
+                    open_count     = OpenCount - Opened,
+                    open_pending   = filter_pending(FilterFun, OpenPending),
+                    obtain_count   = ObtainCount - Obtained,
+                    obtain_pending = filter_pending(FilterFun, ObtainPending),
+                    elders         = dict:erase(Pid, Elders) }))}.
 
 terminate(_Reason, State = #fhc_state { clients = Clients }) ->
     ets:delete(Clients),
@@ -951,12 +970,13 @@ queue_fold(Fun, Init, Q) ->
 
 filter_pending(Fun, {Count, Queue}) ->
     {Delta, Queue1} =
-        queue_fold(fun (Item, {DeltaN, QueueN}) ->
-                           case Fun(Item) of
-                               true  -> {DeltaN, queue:in(Item, QueueN)};
-                               false -> {DeltaN - requested(Item), QueueN}
-                           end
-                   end, {0, queue:new()}, Queue),
+        queue_fold(
+          fun (Item = #pending { requested = Requested }, {DeltaN, QueueN}) ->
+                  case Fun(Item) of
+                      true  -> {DeltaN, queue:in(Item, QueueN)};
+                      false -> {DeltaN - Requested, QueueN}
+                  end
+          end, {0, queue:new()}, Queue),
     {Count + Delta, Queue1}.
 
 pending_new() ->
@@ -990,8 +1010,17 @@ obtain_limit(Limit)    -> case ?OBTAIN_LIMIT(Limit) of
                               OLimit                 -> OLimit
                           end.
 
-requested({_Kind, _Pid, Requested, _From}) ->
-    Requested.
+obtain_limit_reached(#fhc_state { obtain_limit = Limit,
+                                  obtain_count = Count}) ->
+    Limit =/= infinity andalso Count >= Limit.
+
+adjust_alarm(OldState, NewState) ->
+    case {obtain_limit_reached(OldState), obtain_limit_reached(NewState)} of
+        {false, true} -> alarm_handler:set_alarm({file_descriptor_limit, []});
+        {true, false} -> alarm_handler:clear_alarm(file_descriptor_limit);
+        _             -> ok
+    end,
+    NewState.
 
 process_pending(State = #fhc_state { limit = infinity }) ->
     State;
@@ -1094,7 +1123,7 @@ reduce(State = #fhc_state { open_pending   = OpenPending,
     case CStates of
         [] -> ok;
         _  -> case (Sum / ClientCount) -
-                       (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of
+                  (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of
                   AverageAge when AverageAge > 0 ->
                       notify_age(CStates, AverageAge);
                   _ ->
@@ -1118,11 +1147,12 @@ notify_age(CStates, AverageAge) ->
       end, CStates).
 
 notify_age0(Clients, CStates, Required) ->
-    Notifications =
-        [CState || CState <- CStates, CState#cstate.callback =/= undefined],
-    {L1, L2} = lists:split(random:uniform(length(Notifications)),
-                           Notifications),
-    notify(Clients, Required, L2 ++ L1).
+    case [CState || CState <- CStates, CState#cstate.callback =/= undefined] of
+        []            -> ok;
+        Notifications -> S = random:uniform(length(Notifications)),
+                         {L1, L2} = lists:split(S, Notifications),
+                         notify(Clients, Required, L2 ++ L1)
+    end.
 
 notify(_Clients, _Required, []) ->
     ok;
@@ -1147,29 +1177,20 @@ track_client(Pid, Clients) ->
         false -> ok
     end.
 
-%% For all unices, assume ulimit exists. Further googling suggests
-%% that BSDs (incl OS X), solaris and linux all agree that ulimit -n
-%% is file handles
+
+%% To increase the number of file descriptors: on Windows set ERL_MAX_PORTS
+%% environment variable, on Linux set `ulimit -n`.
 ulimit() ->
-    case os:type() of
-        {win32, _OsName} ->
-            ?FILE_HANDLES_LIMIT_WINDOWS;
-        {unix, _OsName} ->
-            %% Under Linux, Solaris and FreeBSD, ulimit is a shell
-            %% builtin, not a command. In OS X and AIX it's a command.
-            %% Fortunately, os:cmd invokes the cmd in a shell env, so
-            %% we're safe in all cases.
-            case os:cmd("ulimit -n") of
-                "unlimited" ->
-                    infinity;
-                String = [C|_] when $0 =< C andalso C =< $9 ->
-                    list_to_integer(
-                      lists:takewhile(
-                        fun (D) -> $0 =< D andalso D =< $9 end, String));
-                _ ->
-                    %% probably a variant of
-                    %% "/bin/sh: line 1: ulimit: command not found\n"
-                    unknown
+    case proplists:get_value(max_fds, erlang:system_info(check_io)) of
+        MaxFds when is_integer(MaxFds) andalso MaxFds > 1 ->
+            case os:type() of
+                {win32, _OsName} ->
+                    %% On Windows max_fds is twice the number of open files:
+                    %%   https://github.com/yrashk/erlang/blob/e1282325ed75e52a98d5/erts/emulator/sys/win32/sys.c#L2459-2466
+                    MaxFds div 2;
+                _Any ->
+                    %% For other operating systems trust Erlang.
+                    MaxFds
             end;
         _ ->
             unknown
diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index a637dddd..35258139 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -58,6 +58,20 @@
 %% hibernate the process immediately, as it would if backoff wasn't
 %% being used. Instead it'll wait for the current timeout as described
 %% above.
+%%
+%% 7) The callback module can return from any of the handle_*
+%% functions, a {become, Module, State} triple, or a {become, Module,
+%% State, Timeout} quadruple. This allows the gen_server to
+%% dynamically change the callback module. The State is the new state
+%% which will be passed into any of the callback functions in the new
+%% module. Note there is no form also encompassing a reply, thus if
+%% you wish to reply in handle_call/3 and change the callback module,
+%% you need to use gen_server2:reply/2 to issue the reply manually.
+%%
+%% 8) The callback module can optionally implement
+%% format_message_queue/2 which is the equivalent of format_status/2
+%% but where the second argument is specifically the priority_queue
+%% which contains the prioritised message_queue.
 
 %% All modifications are (C) 2009-2011 VMware, Inc.
 
@@ -444,8 +458,8 @@ unregister_name({global,Name}) ->
     _ = global:unregister_name(Name);
 unregister_name(Pid) when is_pid(Pid) ->
     Pid;
-% Under R12 let's just ignore it, as we have a single term as Name.
-% On R13 it will never get here, as we get tuple with 'local/global' atom.
+%% Under R12 let's just ignore it, as we have a single term as Name.
+%% On R13 it will never get here, as we get tuple with 'local/global' atom.
 unregister_name(_Name) -> ok.
 
 extend_backoff(undefined) ->
@@ -584,41 +598,35 @@ adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO,
     CurrentTO1 = Base + Extra,
     {backoff, CurrentTO1, MinimumTO, DesiredHibPeriod, RandomState1}.
 
-in({'$gen_cast', Msg}, GS2State = #gs2_state { prioritise_cast = PC,
-                                               queue           = Queue }) ->
-    GS2State #gs2_state { queue = priority_queue:in(
-                                    {'$gen_cast', Msg},
-                                    PC(Msg, GS2State), Queue) };
-in({'$gen_call', From, Msg}, GS2State = #gs2_state { prioritise_call = PC,
-                                                     queue           = Queue }) ->
-    GS2State #gs2_state { queue = priority_queue:in(
-                                    {'$gen_call', From, Msg},
-                                    PC(Msg, From, GS2State), Queue) };
-in(Input, GS2State = #gs2_state { prioritise_info = PI, queue = Queue }) ->
-    GS2State #gs2_state { queue = priority_queue:in(
-                                    Input, PI(Input, GS2State), Queue) }.
-
-process_msg(Msg,
-            GS2State = #gs2_state { parent = Parent,
-                                    name   = Name,
-                                    debug  = Debug }) ->
-    case Msg of
-        {system, From, Req} ->
-            sys:handle_system_msg(
-              Req, From, Parent, ?MODULE, Debug,
-              GS2State);
-        %% gen_server puts Hib on the end as the 7th arg, but that
-        %% version of the function seems not to be documented so
-        %% leaving out for now.
-        {'EXIT', Parent, Reason} ->
-            terminate(Reason, Msg, GS2State);
-        _Msg when Debug =:= [] ->
-            handle_msg(Msg, GS2State);
-        _Msg ->
-            Debug1 = sys:handle_debug(Debug, fun print_event/3,
-                                      Name, {in, Msg}),
-            handle_msg(Msg, GS2State #gs2_state { debug = Debug1 })
-    end.
+in({'$gen_cast', Msg} = Input,
+   GS2State = #gs2_state { prioritise_cast = PC }) ->
+    in(Input, PC(Msg, GS2State), GS2State);
+in({'$gen_call', From, Msg} = Input,
+   GS2State = #gs2_state { prioritise_call = PC }) ->
+    in(Input, PC(Msg, From, GS2State), GS2State);
+in({'EXIT', Parent, _R} = Input, GS2State = #gs2_state { parent = Parent }) ->
+    in(Input, infinity, GS2State);
+in({system, _From, _Req} = Input, GS2State) ->
+    in(Input, infinity, GS2State);
+in(Input, GS2State = #gs2_state { prioritise_info = PI }) ->
+    in(Input, PI(Input, GS2State), GS2State).
+
+in(Input, Priority, GS2State = #gs2_state { queue = Queue }) ->
+    GS2State # gs2_state { queue = priority_queue:in(Input, Priority, Queue) }.
+
+process_msg({system, From, Req},
+            GS2State = #gs2_state { parent = Parent, debug  = Debug }) ->
+    sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, GS2State);
+process_msg({'EXIT', Parent, Reason} = Msg,
+            GS2State = #gs2_state { parent = Parent }) ->
+    %% gen_server puts Hib on the end as the 7th arg, but that version
+    %% of the fun seems not to be documented so leaving out for now.
+    terminate(Reason, Msg, GS2State);
+process_msg(Msg, GS2State = #gs2_state { debug  = [] }) ->
+    handle_msg(Msg, GS2State);
+process_msg(Msg, GS2State = #gs2_state { name = Name, debug  = Debug }) ->
+    Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, {in, Msg}),
+    handle_msg(Msg, GS2State #gs2_state { debug = Debug1 }).
 
 %%% ---------------------------------------------------
 %%% Send/recive functions
@@ -880,6 +888,22 @@ handle_common_reply(Reply, Msg, GS2State = #gs2_state { name  = Name,
             loop(GS2State #gs2_state { state = NState,
                                        time  = Time1,
                                        debug = Debug1 });
+        {become, Mod, NState} ->
+            Debug1 = common_debug(Debug, fun print_event/3, Name,
+                                  {become, Mod, NState}),
+            loop(find_prioritisers(
+                   GS2State #gs2_state { mod   = Mod,
+                                         state = NState,
+                                         time  = infinity,
+                                         debug = Debug1 }));
+        {become, Mod, NState, Time1} ->
+            Debug1 = common_debug(Debug, fun print_event/3, Name,
+                                  {become, Mod, NState}),
+            loop(find_prioritisers(
+                   GS2State #gs2_state { mod   = Mod,
+                                         state = NState,
+                                         time  = Time1,
+                                         debug = Debug1 }));
         _ ->
             handle_common_termination(Reply, Msg, GS2State)
     end.
@@ -1136,17 +1160,22 @@ format_status(Opt, StatusData) ->
               end,
     Header = lists:concat(["Status for generic server ", NameTag]),
     Log = sys:get_debug(log, Debug, []),
-    Specfic =
-        case erlang:function_exported(Mod, format_status, 2) of
-            true -> case catch Mod:format_status(Opt, [PDict, State]) of
-                        {'EXIT', _} -> [{data, [{"State", State}]}];
-                        Else        -> Else
-                    end;
-            _    -> [{data, [{"State", State}]}]
-        end,
+    Specfic = callback(Mod, format_status, [Opt, [PDict, State]],
+                       fun () -> [{data, [{"State", State}]}] end),
+    Messages = callback(Mod, format_message_queue, [Opt, Queue],
+                        fun () -> priority_queue:to_list(Queue) end),
     [{header, Header},
      {data, [{"Status", SysState},
              {"Parent", Parent},
              {"Logged events", Log},
-             {"Queued messages", priority_queue:to_list(Queue)}]} |
+             {"Queued messages", Messages}]} |
      Specfic].
+
+callback(Mod, FunName, Args, DefaultThunk) ->
+    case erlang:function_exported(Mod, FunName, length(Args)) of
+        true  -> case catch apply(Mod, FunName, Args) of
+                     {'EXIT', _} -> DefaultThunk();
+                     Success     -> Success
+                 end;
+        false -> DefaultThunk()
+    end.
diff --git a/src/gm.erl b/src/gm.erl
new file mode 100644
index 00000000..8b7dc70c
--- /dev/null
+++ b/src/gm.erl
@@ -0,0 +1,1379 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(gm).
+
+%% Guaranteed Multicast
+%% ====================
+%%
+%% This module provides the ability to create named groups of
+%% processes to which members can be dynamically added and removed,
+%% and for messages to be broadcast within the group that are
+%% guaranteed to reach all members of the group during the lifetime of
+%% the message. The lifetime of a message is defined as being, at a
+%% minimum, the time from which the message is first sent to any
+%% member of the group, up until the time at which it is known by the
+%% member who published the message that the message has reached all
+%% group members.
+%%
+%% The guarantee given is that provided a message, once sent, makes it
+%% to members who do not all leave the group, the message will
+%% continue to propagate to all group members.
+%%
+%% Another way of stating the guarantee is that if member P publishes
+%% messages m and m', then for all members P', if P' is a member of
+%% the group prior to the publication of m, and P' receives m', then
+%% P' will receive m.
+%%
+%% Note that only local-ordering is enforced: i.e. if member P sends
+%% message m and then message m', then for-all members P', if P'
+%% receives m and m', then they will receive m' after m. Causality
+%% ordering is _not_ enforced. I.e. if member P receives message m
+%% and as a result publishes message m', there is no guarantee that
+%% other members P' will receive m before m'.
+%%
+%%
+%% API Use
+%% -------
+%%
+%% Mnesia must be started. Use the idempotent create_tables/0 function
+%% to create the tables required.
+%%
+%% start_link/3
+%% Provide the group name, the callback module name, and any arguments
+%% you wish to be passed into the callback module's functions. The
+%% joined/2 function will be called when we have joined the group,
+%% with the arguments passed to start_link and a list of the current
+%% members of the group. See the comments in behaviour_info/1 below
+%% for further details of the callback functions.
+%%
+%% leave/1
+%% Provide the Pid. Removes the Pid from the group. The callback
+%% terminate/2 function will be called.
+%%
+%% broadcast/2
+%% Provide the Pid and a Message. The message will be sent to all
+%% members of the group as per the guarantees given above. This is a
+%% cast and the function call will return immediately. There is no
+%% guarantee that the message will reach any member of the group.
+%%
+%% confirmed_broadcast/2
+%% Provide the Pid and a Message. As per broadcast/2 except that this
+%% is a call, not a cast, and only returns 'ok' once the Message has
+%% reached every member of the group. Do not call
+%% confirmed_broadcast/2 directly from the callback module otherwise
+%% you will deadlock the entire group.
+%%
+%% group_members/1
+%% Provide the Pid. Returns a list of the current group members.
+%%
+%%
+%% Implementation Overview
+%% -----------------------
+%%
+%% One possible means of implementation would be a fan-out from the
+%% sender to every member of the group. This would require that the
+%% group is fully connected, and, in the event that the original
+%% sender of the message disappears from the group before the message
+%% has made it to every member of the group, raises questions as to
+%% who is responsible for sending on the message to new group members.
+%% In particular, the issue is with [ Pid ! Msg || Pid <- Members ] -
+%% if the sender dies part way through, who is responsible for
+%% ensuring that the remaining Members receive the Msg? In the event
+%% that within the group, messages sent are broadcast from a subset of
+%% the members, the fan-out arrangement has the potential to
+%% substantially impact the CPU and network workload of such members,
+%% as such members would have to accommodate the cost of sending each
+%% message to every group member.
+%%
+%% Instead, if the members of the group are arranged in a chain, then
+%% it becomes easier to reason about who within the group has received
+%% each message and who has not. It eases issues of responsibility: in
+%% the event of a group member disappearing, the nearest upstream
+%% member of the chain is responsible for ensuring that messages
+%% continue to propagate down the chain. It also results in equal
+%% distribution of sending and receiving workload, even if all
+%% messages are being sent from just a single group member. This
+%% configuration has the further advantage that it is not necessary
+%% for every group member to know of every other group member, and
+%% even that a group member does not have to be accessible from all
+%% other group members.
+%%
+%% Performance is kept high by permitting pipelining and all
+%% communication between joined group members is asynchronous. In the
+%% chain A -> B -> C -> D, if A sends a message to the group, it will
+%% not directly contact C or D. However, it must know that D receives
+%% the message (in addition to B and C) before it can consider the
+%% message fully sent. A simplistic implementation would require that
+%% D replies to C, C replies to B and B then replies to A. This would
+%% result in a propagation delay of twice the length of the chain. It
+%% would also require, in the event of the failure of C, that D knows
+%% to directly contact B and issue the necessary replies. Instead, the
+%% chain forms a ring: D sends the message on to A: D does not
+%% distinguish A as the sender, merely as the next member (downstream)
+%% within the chain (which has now become a ring). When A receives
+%% from D messages that A sent, it knows that all members have
+%% received the message. However, the message is not dead yet: if C
+%% died as B was sending to C, then B would need to detect the death
+%% of C and forward the message on to D instead: thus every node has
+%% to remember every message published until it is told that it can
+%% forget about the message. This is essential not just for dealing
+%% with failure of members, but also for the addition of new members.
+%%
+%% Thus once A receives the message back again, it then sends to B an
+%% acknowledgement for the message, indicating that B can now forget
+%% about the message. B does so, and forwards the ack to C. C forgets
+%% the message, and forwards the ack to D, which forgets the message
+%% and finally forwards the ack back to A. At this point, A takes no
+%% further action: the message and its acknowledgement have made it to
+%% every member of the group. The message is now dead, and any new
+%% member joining the group at this point will not receive the
+%% message.
+%%
+%% We therefore have two roles:
+%%
+%% 1. The sender, who upon receiving their own messages back, must
+%% then send out acknowledgements, and upon receiving their own
+%% acknowledgements back perform no further action.
+%%
+%% 2. The other group members who upon receiving messages and
+%% acknowledgements must update their own internal state accordingly
+%% (the sending member must also do this in order to be able to
+%% accommodate failures), and forwards messages on to their downstream
+%% neighbours.
+%%
+%%
+%% Implementation: It gets trickier
+%% --------------------------------
+%%
+%% Chain A -> B -> C -> D
+%%
+%% A publishes a message which B receives. A now dies. B and D will
+%% detect the death of A, and will link up, thus the chain is now B ->
+%% C -> D. B forwards A's message on to C, who forwards it to D, who
+%% forwards it to B. Thus B is now responsible for A's messages - both
+%% publications and acknowledgements that were in flight at the point
+%% at which A died. Even worse is that this is transitive: after B
+%% forwards A's message to C, B dies as well. Now C is not only
+%% responsible for B's in-flight messages, but is also responsible for
+%% A's in-flight messages.
+%%
+%% Lemma 1: A member can only determine which dead members they have
+%% inherited responsibility for if there is a total ordering on the
+%% conflicting additions and subtractions of members from the group.
+%%
+%% Consider the simultaneous death of B and addition of B' that
+%% transitions a chain from A -> B -> C to A -> B' -> C. Either B' or
+%% C is responsible for in-flight messages from B. It is easy to
+%% ensure that at least one of them thinks they have inherited B, but
+%% if we do not ensure that exactly one of them inherits B, then we
+%% could have B' converting publishes to acks, which then will crash C
+%% as C does not believe it has issued acks for those messages.
+%%
+%% More complex scenarios are easy to concoct: A -> B -> C -> D -> E
+%% becoming A -> C' -> E. Who has inherited which of B, C and D?
+%%
+%% However, for non-conflicting membership changes, only a partial
+%% ordering is required. For example, A -> B -> C becoming A -> A' ->
+%% B. The addition of A', between A and B can have no conflicts with
+%% the death of C: it is clear that A has inherited C's messages.
+%%
+%% For ease of implementation, we adopt the simple solution, of
+%% imposing a total order on all membership changes.
+%%
+%% On the death of a member, it is ensured the dead member's
+%% neighbours become aware of the death, and the upstream neighbour
+%% now sends to its new downstream neighbour its state, including the
+%% messages pending acknowledgement. The downstream neighbour can then
+%% use this to calculate which publishes and acknowledgements it has
+%% missed out on, due to the death of its old upstream. Thus the
+%% downstream can catch up, and continues the propagation of messages
+%% through the group.
+%%
+%% Lemma 2: When a member is joining, it must synchronously
+%% communicate with its upstream member in order to receive its
+%% starting state atomically with its addition to the group.
+%%
+%% New members must start with the same state as their nearest
+%% upstream neighbour. This ensures that it is not surprised by
+%% acknowledgements they are sent, and that should their downstream
+%% neighbour die, they are able to send the correct state to their new
+%% downstream neighbour to ensure it can catch up. Thus in the
+%% transition A -> B -> C becomes A -> A' -> B -> C becomes A -> A' ->
+%% C, A' must start with the state of A, so that it can send C the
+%% correct state when B dies, allowing C to detect any missed
+%% messages.
+%%
+%% If A' starts by adding itself to the group membership, A could then
+%% die, without A' having received the necessary state from A. This
+%% would leave A' responsible for in-flight messages from A, but
+%% having the least knowledge of all, of those messages. Thus A' must
+%% start by synchronously calling A, which then immediately sends A'
+%% back its state. A then adds A' to the group. If A dies at this
+%% point then A' will be able to see this (as A' will fail to appear
+%% in the group membership), and thus A' will ignore the state it
+%% receives from A, and will simply repeat the process, trying to now
+%% join downstream from some other member. This ensures that should
+%% the upstream die as soon as the new member has been joined, the new
+%% member is guaranteed to receive the correct state, allowing it to
+%% correctly process messages inherited due to the death of its
+%% upstream neighbour.
+%%
+%% The canonical definition of the group membership is held by a
+%% distributed database. Whilst this allows the total ordering of
+%% changes to be achieved, it is nevertheless undesirable to have to
+%% query this database for the current view, upon receiving each
+%% message. Instead, we wish for members to be able to cache a view of
+%% the group membership, which then requires a cache invalidation
+%% mechanism. Each member maintains its own view of the group
+%% membership. Thus when the group's membership changes, members may
+%% need to become aware of such changes in order to be able to
+%% accurately process messages they receive. Because of the
+%% requirement of a total ordering of conflicting membership changes,
+%% it is not possible to use the guaranteed broadcast mechanism to
+%% communicate these changes: to achieve the necessary ordering, it
+%% would be necessary for such messages to be published by exactly one
+%% member, which can not be guaranteed given that such a member could
+%% die.
+%%
+%% The total ordering we enforce on membership changes gives rise to a
+%% view version number: every change to the membership creates a
+%% different view, and the total ordering permits a simple
+%% monotonically increasing view version number.
+%%
+%% Lemma 3: If a message is sent from a member that holds view version
+%% N, it can be correctly processed by any member receiving the
+%% message with a view version >= N.
+%%
+%% Initially, let us suppose that each view contains the ordering of
+%% every member that was ever part of the group. Dead members are
+%% marked as such. Thus we have a ring of members, some of which are
+%% dead, and are thus inherited by the nearest alive downstream
+%% member.
+%%
+%% In the chain A -> B -> C, all three members initially have view
+%% version 1, which reflects reality. B publishes a message, which is
+%% forward by C to A. B now dies, which A notices very quickly. Thus A
+%% updates the view, creating version 2. It now forwards B's
+%% publication, sending that message to its new downstream neighbour,
+%% C. This happens before C is aware of the death of B. C must become
+%% aware of the view change before it interprets the message its
+%% received, otherwise it will fail to learn of the death of B, and
+%% thus will not realise it has inherited B's messages (and will
+%% likely crash).
+%%
+%% Thus very simply, we have that each subsequent view contains more
+%% information than the preceding view.
+%%
+%% However, to avoid the views growing indefinitely, we need to be
+%% able to delete members which have died _and_ for which no messages
+%% are in-flight. This requires that upon inheriting a dead member, we
+%% know the last publication sent by the dead member (this is easy: we
+%% inherit a member because we are the nearest downstream member which
+%% implies that we know at least as much than everyone else about the
+%% publications of the dead member), and we know the earliest message
+%% for which the acknowledgement is still in flight.
+%%
+%% In the chain A -> B -> C, when B dies, A will send to C its state
+%% (as C is the new downstream from A), allowing C to calculate which
+%% messages it has missed out on (described above). At this point, C
+%% also inherits B's messages. If that state from A also includes the
+%% last message published by B for which an acknowledgement has been
+%% seen, then C knows exactly which further acknowledgements it must
+%% receive (also including issuing acknowledgements for publications
+%% still in-flight that it receives), after which it is known there
+%% are no more messages in flight for B, thus all evidence that B was
+%% ever part of the group can be safely removed from the canonical
+%% group membership.
+%%
+%% Thus, for every message that a member sends, it includes with that
+%% message its view version. When a member receives a message it will
+%% update its view from the canonical copy, should its view be older
+%% than the view version included in the message it has received.
+%%
+%% The state held by each member therefore includes the messages from
+%% each publisher pending acknowledgement, the last publication seen
+%% from that publisher, and the last acknowledgement from that
+%% publisher. In the case of the member's own publications or
+%% inherited members, this last acknowledgement seen state indicates
+%% the last acknowledgement retired, rather than sent.
+%%
+%%
+%% Proof sketch
+%% ------------
+%%
+%% We need to prove that with the provided operational semantics, we
+%% can never reach a state that is not well formed from a well-formed
+%% starting state.
+%%
+%% Operational semantics (small step): straight-forward message
+%% sending, process monitoring, state updates.
+%%
+%% Well formed state: dead members inherited by exactly one non-dead
+%% member; for every entry in anyone's pending-acks, either (the
+%% publication of the message is in-flight downstream from the member
+%% and upstream from the publisher) or (the acknowledgement of the
+%% message is in-flight downstream from the publisher and upstream
+%% from the member).
+%%
+%% Proof by induction on the applicable operational semantics.
+%%
+%%
+%% Related work
+%% ------------
+%%
+%% The ring configuration and double traversal of messages around the
+%% ring is similar (though developed independently) to the LCR
+%% protocol by [Levy 2008]. However, LCR differs in several
+%% ways. Firstly, by using vector clocks, it enforces a total order of
+%% message delivery, which is unnecessary for our purposes. More
+%% significantly, it is built on top of a "group communication system"
+%% which performs the group management functions, taking
+%% responsibility away from the protocol as to how to cope with safely
+%% adding and removing members. When membership changes do occur, the
+%% protocol stipulates that every member must perform communication
+%% with every other member of the group, to ensure all outstanding
+%% deliveries complete, before the entire group transitions to the new
+%% view. This, in total, requires two sets of all-to-all synchronous
+%% communications.
+%%
+%% This is not only rather inefficient, but also does not explain what
+%% happens upon the failure of a member during this process. It does
+%% though entirely avoid the need for inheritance of responsibility of
+%% dead members that our protocol incorporates.
+%%
+%% In [Marandi et al 2010], a Paxos-based protocol is described. This
+%% work explicitly focuses on the efficiency of communication. LCR
+%% (and our protocol too) are more efficient, but at the cost of
+%% higher latency. The Ring-Paxos protocol is itself built on top of
+%% IP-multicast, which rules it out for many applications where
+%% point-to-point communication is all that can be required. They also
+%% have an excellent related work section which I really ought to
+%% read...
+%%
+%%
+%% [Levy 2008] The Complexity of Reliable Distributed Storage, 2008.
+%% [Marandi et al 2010] Ring Paxos: A High-Throughput Atomic Broadcast
+%% Protocol
+
+
+-behaviour(gen_server2).
+
+-export([create_tables/0, start_link/3, leave/1, broadcast/2,
+         confirmed_broadcast/2, group_members/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3, prioritise_cast/2, prioritise_info/2]).
+
+-export([behaviour_info/1]).
+
+-export([table_definitions/0, flush/1]).
+
+-define(GROUP_TABLE, gm_group).
+-define(HIBERNATE_AFTER_MIN, 1000).
+-define(DESIRED_HIBERNATE, 10000).
+-define(BROADCAST_TIMER, 25).
+-define(SETS, ordsets).
+-define(DICT, orddict).
+
+-record(state,
+        { self,
+          left,
+          right,
+          group_name,
+          module,
+          view,
+          pub_count,
+          members_state,
+          callback_args,
+          confirms,
+          broadcast_buffer,
+          broadcast_timer
+        }).
+
+-record(gm_group, { name, version, members }).
+
+-record(view_member, { id, aliases, left, right }).
+
+-record(member, { pending_ack, last_pub, last_ack }).
+
+-define(TABLE, {?GROUP_TABLE, [{record_name, gm_group},
+                               {attributes, record_info(fields, gm_group)}]}).
+-define(TABLE_MATCH, {match, #gm_group { _ = '_' }}).
+
+-define(TAG, '$gm').
+
+-ifdef(use_specs).
+
+-export_type([group_name/0]).
+
+-type(group_name() :: any()).
+
+-spec(create_tables/0 :: () -> 'ok').
+-spec(start_link/3 :: (group_name(), atom(), any()) ->
+                           {'ok', pid()} | {'error', any()}).
+-spec(leave/1 :: (pid()) -> 'ok').
+-spec(broadcast/2 :: (pid(), any()) -> 'ok').
+-spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok').
+-spec(group_members/1 :: (pid()) -> [pid()]).
+
+-endif.
+
+behaviour_info(callbacks) ->
+    [
+     %% The joined, members_changed and handle_msg callbacks can all
+     %% return any of the following terms:
+     %%
+     %% 'ok' - the callback function returns normally
+     %%
+     %% {'stop', Reason} - the callback indicates the member should
+     %% stop with reason Reason and should leave the group.
+     %%
+     %% {'become', Module, Args} - the callback indicates that the
+     %% callback module should be changed to Module and that the
+     %% callback functions should now be passed the arguments
+     %% Args. This allows the callback module to be dynamically
+     %% changed.
+
+     %% Called when we've successfully joined the group. Supplied with
+     %% Args provided in start_link, plus current group members.
+     {joined, 2},
+
+     %% Supplied with Args provided in start_link, the list of new
+     %% members and the list of members previously known to us that
+     %% have since died. Note that if a member joins and dies very
+     %% quickly, it's possible that we will never see that member
+     %% appear in either births or deaths. However we are guaranteed
+     %% that (1) we will see a member joining either in the births
+     %% here, or in the members passed to joined/2 before receiving
+     %% any messages from it; and (2) we will not see members die that
+     %% we have not seen born (or supplied in the members to
+     %% joined/2).
+     {members_changed, 3},
+
+     %% Supplied with Args provided in start_link, the sender, and the
+     %% message. This does get called for messages injected by this
+     %% member, however, in such cases, there is no special
+     %% significance of this invocation: it does not indicate that the
+     %% message has made it to any other members, let alone all other
+     %% members.
+     {handle_msg, 3},
+
+     %% Called on gm member termination as per rules in gen_server,
+     %% with the Args provided in start_link plus the termination
+     %% Reason.
+     {terminate, 2}
+    ];
+behaviour_info(_Other) ->
+    undefined.
+
+create_tables() ->
+    create_tables([?TABLE]).
+
+create_tables([]) ->
+    ok;
+create_tables([{Table, Attributes} | Tables]) ->
+    case mnesia:create_table(Table, Attributes) of
+        {atomic, ok}                          -> create_tables(Tables);
+        {aborted, {already_exists, gm_group}} -> create_tables(Tables);
+        Err                                   -> Err
+    end.
+
+table_definitions() ->
+    {Name, Attributes} = ?TABLE,
+    [{Name, [?TABLE_MATCH | Attributes]}].
+
+start_link(GroupName, Module, Args) ->
+    gen_server2:start_link(?MODULE, [GroupName, Module, Args], []).
+
+leave(Server) ->
+    gen_server2:cast(Server, leave).
+
+broadcast(Server, Msg) ->
+    gen_server2:cast(Server, {broadcast, Msg}).
+
+confirmed_broadcast(Server, Msg) ->
+    gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity).
+
+group_members(Server) ->
+    gen_server2:call(Server, group_members, infinity).
+
+flush(Server) ->
+    gen_server2:cast(Server, flush).
+
+
+init([GroupName, Module, Args]) ->
+    {MegaSecs, Secs, MicroSecs} = now(),
+    random:seed(MegaSecs, Secs, MicroSecs),
+    gen_server2:cast(self(), join),
+    Self = self(),
+    {ok, #state { self             = Self,
+                  left             = {Self, undefined},
+                  right            = {Self, undefined},
+                  group_name       = GroupName,
+                  module           = Module,
+                  view             = undefined,
+                  pub_count        = 0,
+                  members_state    = undefined,
+                  callback_args    = Args,
+                  confirms         = queue:new(),
+                  broadcast_buffer = [],
+                  broadcast_timer  = undefined }, hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+
+handle_call({confirmed_broadcast, _Msg}, _From,
+            State = #state { members_state = undefined }) ->
+    reply(not_joined, State);
+
+handle_call({confirmed_broadcast, Msg}, _From,
+            State = #state { self          = Self,
+                             right         = {Self, undefined},
+                             module        = Module,
+                             callback_args = Args }) ->
+    handle_callback_result({Module:handle_msg(Args, Self, Msg), ok, State});
+
+handle_call({confirmed_broadcast, Msg}, From, State) ->
+    internal_broadcast(Msg, From, State);
+
+handle_call(group_members, _From,
+            State = #state { members_state = undefined }) ->
+    reply(not_joined, State);
+
+handle_call(group_members, _From, State = #state { view = View }) ->
+    reply(alive_view_members(View), State);
+
+handle_call({add_on_right, _NewMember}, _From,
+            State = #state { members_state = undefined }) ->
+    reply(not_ready, State);
+
+handle_call({add_on_right, NewMember}, _From,
+            State = #state { self          = Self,
+                             group_name    = GroupName,
+                             view          = View,
+                             members_state = MembersState,
+                             module        = Module,
+                             callback_args = Args }) ->
+    Group = record_new_member_in_group(
+              GroupName, Self, NewMember,
+              fun (Group1) ->
+                      View1 = group_to_view(Group1),
+                      ok = send_right(NewMember, View1,
+                                      {catchup, Self, prepare_members_state(
+                                                        MembersState)})
+              end),
+    View2 = group_to_view(Group),
+    State1 = check_neighbours(State #state { view = View2 }),
+    Result = callback_view_changed(Args, Module, View, View2),
+    handle_callback_result({Result, {ok, Group}, State1}).
+
+
+handle_cast({?TAG, ReqVer, Msg},
+            State = #state { view          = View,
+                             group_name    = GroupName,
+                             module        = Module,
+                             callback_args = Args }) ->
+    {Result, State1} =
+        case needs_view_update(ReqVer, View) of
+            true ->
+                View1 = group_to_view(read_group(GroupName)),
+                {callback_view_changed(Args, Module, View, View1),
+                 check_neighbours(State #state { view = View1 })};
+            false ->
+                {ok, State}
+        end,
+    handle_callback_result(
+      if_callback_success(
+        Result, fun handle_msg_true/3, fun handle_msg_false/3, Msg, State1));
+
+handle_cast({broadcast, _Msg}, State = #state { members_state = undefined }) ->
+    noreply(State);
+
+handle_cast({broadcast, Msg},
+            State = #state { self          = Self,
+                             right         = {Self, undefined},
+                             module        = Module,
+                             callback_args = Args }) ->
+    handle_callback_result({Module:handle_msg(Args, Self, Msg), State});
+
+handle_cast({broadcast, Msg}, State) ->
+    internal_broadcast(Msg, none, State);
+
+handle_cast(join, State = #state { self          = Self,
+                                   group_name    = GroupName,
+                                   members_state = undefined,
+                                   module        = Module,
+                                   callback_args = Args }) ->
+    View = join_group(Self, GroupName),
+    MembersState =
+        case alive_view_members(View) of
+            [Self] -> blank_member_state();
+            _      -> undefined
+        end,
+    State1 = check_neighbours(State #state { view          = View,
+                                             members_state = MembersState }),
+    handle_callback_result(
+      {Module:joined(Args, all_known_members(View)), State1});
+
+handle_cast(leave, State) ->
+    {stop, normal, State};
+
+handle_cast(flush, State) ->
+    noreply(
+      flush_broadcast_buffer(State #state { broadcast_timer = undefined })).
+
+
+handle_info({'DOWN', MRef, process, _Pid, _Reason},
+            State = #state { self          = Self,
+                             left          = Left,
+                             right         = Right,
+                             group_name    = GroupName,
+                             view          = View,
+                             module        = Module,
+                             callback_args = Args,
+                             confirms      = Confirms }) ->
+    Member = case {Left, Right} of
+                 {{Member1, MRef}, _} -> Member1;
+                 {_, {Member1, MRef}} -> Member1;
+                 _                    -> undefined
+             end,
+    case Member of
+        undefined ->
+            noreply(State);
+        _ ->
+            View1 =
+                group_to_view(record_dead_member_in_group(Member, GroupName)),
+            State1 = State #state { view = View1 },
+            {Result, State2} =
+                case alive_view_members(View1) of
+                    [Self] ->
+                        maybe_erase_aliases(
+                          State1 #state {
+                            members_state = blank_member_state(),
+                            confirms      = purge_confirms(Confirms) });
+                    _ ->
+                        %% here we won't be pointing out any deaths:
+                        %% the concern is that there maybe births
+                        %% which we'd otherwise miss.
+                        {callback_view_changed(Args, Module, View, View1),
+                         State1}
+                end,
+            handle_callback_result({Result, check_neighbours(State2)})
+    end.
+
+
+terminate(Reason, State = #state { module        = Module,
+                                   callback_args = Args }) ->
+    flush_broadcast_buffer(State),
+    Module:terminate(Args, Reason).
+
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+prioritise_cast(flush, _State) -> 1;
+prioritise_cast(_    , _State) -> 0.
+
+prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _State) -> 1;
+prioritise_info(_                                      , _State) -> 0.
+
+
+handle_msg(check_neighbours, State) ->
+    %% no-op - it's already been done by the calling handle_cast
+    {ok, State};
+
+handle_msg({catchup, Left, MembersStateLeft},
+           State = #state { self          = Self,
+                            left          = {Left, _MRefL},
+                            right         = {Right, _MRefR},
+                            view          = View,
+                            members_state = undefined }) ->
+    ok = send_right(Right, View, {catchup, Self, MembersStateLeft}),
+    MembersStateLeft1 = build_members_state(MembersStateLeft),
+    {ok, State #state { members_state = MembersStateLeft1 }};
+
+handle_msg({catchup, Left, MembersStateLeft},
+           State = #state { self = Self,
+                            left = {Left, _MRefL},
+                            view = View,
+                            members_state = MembersState })
+  when MembersState =/= undefined ->
+    MembersStateLeft1 = build_members_state(MembersStateLeft),
+    AllMembers = lists:usort(?DICT:fetch_keys(MembersState) ++
+                                 ?DICT:fetch_keys(MembersStateLeft1)),
+    {MembersState1, Activity} =
+        lists:foldl(
+          fun (Id, MembersStateActivity) ->
+                  #member { pending_ack = PALeft, last_ack = LA } =
+                      find_member_or_blank(Id, MembersStateLeft1),
+                  with_member_acc(
+                    fun (#member { pending_ack = PA } = Member, Activity1) ->
+                            case is_member_alias(Id, Self, View) of
+                                true ->
+                                    {_AcksInFlight, Pubs, _PA1} =
+                                        find_prefix_common_suffix(PALeft, PA),
+                                    {Member #member { last_ack = LA },
+                                     activity_cons(Id, pubs_from_queue(Pubs),
+                                                   [], Activity1)};
+                                false ->
+                                    {Acks, _Common, Pubs} =
+                                        find_prefix_common_suffix(PA, PALeft),
+                                    {Member,
+                                     activity_cons(Id, pubs_from_queue(Pubs),
+                                                   acks_from_queue(Acks),
+                                                   Activity1)}
+                            end
+                    end, Id, MembersStateActivity)
+          end, {MembersState, activity_nil()}, AllMembers),
+    handle_msg({activity, Left, activity_finalise(Activity)},
+               State #state { members_state = MembersState1 });
+
+handle_msg({catchup, _NotLeft, _MembersState}, State) ->
+    {ok, State};
+
+handle_msg({activity, Left, Activity},
+           State = #state { self          = Self,
+                            left          = {Left, _MRefL},
+                            view          = View,
+                            members_state = MembersState,
+                            confirms      = Confirms })
+  when MembersState =/= undefined ->
+    {MembersState1, {Confirms1, Activity1}} =
+        lists:foldl(
+          fun ({Id, Pubs, Acks}, MembersStateConfirmsActivity) ->
+                  with_member_acc(
+                    fun (Member = #member { pending_ack = PA,
+                                            last_pub    = LP,
+                                            last_ack    = LA },
+                         {Confirms2, Activity2}) ->
+                            case is_member_alias(Id, Self, View) of
+                                true ->
+                                    {ToAck, PA1} =
+                                        find_common(queue_from_pubs(Pubs), PA,
+                                                    queue:new()),
+                                    LA1 = last_ack(Acks, LA),
+                                    AckNums = acks_from_queue(ToAck),
+                                    Confirms3 = maybe_confirm(
+                                                  Self, Id, Confirms2, AckNums),
+                                    {Member #member { pending_ack = PA1,
+                                                      last_ack    = LA1 },
+                                     {Confirms3,
+                                      activity_cons(
+                                        Id, [], AckNums, Activity2)}};
+                                false ->
+                                    PA1 = apply_acks(Acks, join_pubs(PA, Pubs)),
+                                    LA1 = last_ack(Acks, LA),
+                                    LP1 = last_pub(Pubs, LP),
+                                    {Member #member { pending_ack = PA1,
+                                                      last_pub    = LP1,
+                                                      last_ack    = LA1 },
+                                     {Confirms2,
+                                      activity_cons(Id, Pubs, Acks, Activity2)}}
+                            end
+                    end, Id, MembersStateConfirmsActivity)
+          end, {MembersState, {Confirms, activity_nil()}}, Activity),
+    State1 = State #state { members_state = MembersState1,
+                            confirms      = Confirms1 },
+    Activity3 = activity_finalise(Activity1),
+    {Result, State2} = maybe_erase_aliases(State1),
+    ok = maybe_send_activity(Activity3, State2),
+    if_callback_success(
+      Result, fun activity_true/3, fun activity_false/3, Activity3, State2);
+
+handle_msg({activity, _NotLeft, _Activity}, State) ->
+    {ok, State}.
+
+
+noreply(State) ->
+    {noreply, ensure_broadcast_timer(State), hibernate}.
+
+reply(Reply, State) ->
+    {reply, Reply, ensure_broadcast_timer(State), hibernate}.
+
+ensure_broadcast_timer(State = #state { broadcast_buffer = [],
+                                        broadcast_timer  = undefined }) ->
+    State;
+ensure_broadcast_timer(State = #state { broadcast_buffer = [],
+                                        broadcast_timer  = TRef }) ->
+    timer:cancel(TRef),
+    State #state { broadcast_timer = undefined };
+ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) ->
+    {ok, TRef} = timer:apply_after(?BROADCAST_TIMER, ?MODULE, flush, [self()]),
+    State #state { broadcast_timer = TRef };
+ensure_broadcast_timer(State) ->
+    State.
+
+internal_broadcast(Msg, From, State = #state { self             = Self,
+                                               pub_count        = PubCount,
+                                               module           = Module,
+                                               confirms         = Confirms,
+                                               callback_args    = Args,
+                                               broadcast_buffer = Buffer }) ->
+    Result = Module:handle_msg(Args, Self, Msg),
+    Buffer1 = [{PubCount, Msg} | Buffer],
+    Confirms1 = case From of
+                    none -> Confirms;
+                    _    -> queue:in({PubCount, From}, Confirms)
+                end,
+    State1 = State #state { pub_count        = PubCount + 1,
+                            confirms         = Confirms1,
+                            broadcast_buffer = Buffer1 },
+    case From =/= none of
+        true ->
+            handle_callback_result({Result, flush_broadcast_buffer(State1)});
+        false ->
+            handle_callback_result(
+              {Result, State1 #state { broadcast_buffer = Buffer1 }})
+    end.
+
+flush_broadcast_buffer(State = #state { broadcast_buffer = [] }) ->
+    State;
+flush_broadcast_buffer(State = #state { self             = Self,
+                                        members_state    = MembersState,
+                                        broadcast_buffer = Buffer }) ->
+    Pubs = lists:reverse(Buffer),
+    Activity = activity_cons(Self, Pubs, [], activity_nil()),
+    ok = maybe_send_activity(activity_finalise(Activity), State),
+    MembersState1 = with_member(
+                      fun (Member = #member { pending_ack = PA }) ->
+                              PA1 = queue:join(PA, queue:from_list(Pubs)),
+                              Member #member { pending_ack = PA1 }
+                      end, Self, MembersState),
+    State #state { members_state    = MembersState1,
+                   broadcast_buffer = [] }.
+
+
+%% ---------------------------------------------------------------------------
+%% View construction and inspection
+%% ---------------------------------------------------------------------------
+
+needs_view_update(ReqVer, {Ver, _View}) ->
+    Ver < ReqVer.
+
+view_version({Ver, _View}) ->
+    Ver.
+
+is_member_alive({dead, _Member}) -> false;
+is_member_alive(_)               -> true.
+
+is_member_alias(Self, Self, _View) ->
+    true;
+is_member_alias(Member, Self, View) ->
+    ?SETS:is_element(Member,
+                     ((fetch_view_member(Self, View)) #view_member.aliases)).
+
+dead_member_id({dead, Member}) -> Member.
+
+store_view_member(VMember = #view_member { id = Id }, {Ver, View}) ->
+    {Ver, ?DICT:store(Id, VMember, View)}.
+
+with_view_member(Fun, View, Id) ->
+    store_view_member(Fun(fetch_view_member(Id, View)), View).
+
+fetch_view_member(Id, {_Ver, View}) ->
+    ?DICT:fetch(Id, View).
+
+find_view_member(Id, {_Ver, View}) ->
+    ?DICT:find(Id, View).
+
+blank_view(Ver) ->
+    {Ver, ?DICT:new()}.
+
+alive_view_members({_Ver, View}) ->
+    ?DICT:fetch_keys(View).
+
+all_known_members({_Ver, View}) ->
+    ?DICT:fold(
+       fun (Member, #view_member { aliases = Aliases }, Acc) ->
+               ?SETS:to_list(Aliases) ++ [Member | Acc]
+       end, [], View).
+
+group_to_view(#gm_group { members = Members, version = Ver }) ->
+    Alive = lists:filter(fun is_member_alive/1, Members),
+    [_|_] = Alive, %% ASSERTION - can't have all dead members
+    add_aliases(link_view(Alive ++ Alive ++ Alive, blank_view(Ver)), Members).
+
+link_view([Left, Middle, Right | Rest], View) ->
+    case find_view_member(Middle, View) of
+        error ->
+            link_view(
+              [Middle, Right | Rest],
+              store_view_member(#view_member { id      = Middle,
+                                               aliases = ?SETS:new(),
+                                               left    = Left,
+                                               right   = Right }, View));
+        {ok, _} ->
+            View
+    end;
+link_view(_, View) ->
+    View.
+
+add_aliases(View, Members) ->
+    Members1 = ensure_alive_suffix(Members),
+    {EmptyDeadSet, View1} =
+        lists:foldl(
+          fun (Member, {DeadAcc, ViewAcc}) ->
+                  case is_member_alive(Member) of
+                      true ->
+                          {?SETS:new(),
+                           with_view_member(
+                             fun (VMember =
+                                      #view_member { aliases = Aliases }) ->
+                                     VMember #view_member {
+                                       aliases = ?SETS:union(Aliases, DeadAcc) }
+                             end, ViewAcc, Member)};
+                      false ->
+                          {?SETS:add_element(dead_member_id(Member), DeadAcc),
+                           ViewAcc}
+                  end
+          end, {?SETS:new(), View}, Members1),
+    0 = ?SETS:size(EmptyDeadSet), %% ASSERTION
+    View1.
+
+ensure_alive_suffix(Members) ->
+    queue:to_list(ensure_alive_suffix1(queue:from_list(Members))).
+
+ensure_alive_suffix1(MembersQ) ->
+    {{value, Member}, MembersQ1} = queue:out_r(MembersQ),
+    case is_member_alive(Member) of
+        true  -> MembersQ;
+        false -> ensure_alive_suffix1(queue:in_r(Member, MembersQ1))
+    end.
+
+
+%% ---------------------------------------------------------------------------
+%% View modification
+%% ---------------------------------------------------------------------------
+
+join_group(Self, GroupName) ->
+    join_group(Self, GroupName, read_group(GroupName)).
+
+join_group(Self, GroupName, {error, not_found}) ->
+    join_group(Self, GroupName, prune_or_create_group(Self, GroupName));
+join_group(Self, _GroupName, #gm_group { members = [Self] } = Group) ->
+    group_to_view(Group);
+join_group(Self, GroupName, #gm_group { members = Members } = Group) ->
+    case lists:member(Self, Members) of
+        true ->
+            group_to_view(Group);
+        false ->
+            case lists:filter(fun is_member_alive/1, Members) of
+                [] ->
+                    join_group(Self, GroupName,
+                               prune_or_create_group(Self, GroupName));
+                Alive ->
+                    Left = lists:nth(random:uniform(length(Alive)), Alive),
+                    Handler =
+                        fun () ->
+                                join_group(
+                                  Self, GroupName,
+                                  record_dead_member_in_group(Left, GroupName))
+                        end,
+                    try
+                        case gen_server2:call(
+                               Left, {add_on_right, Self}, infinity) of
+                            {ok, Group1} -> group_to_view(Group1);
+                            not_ready    -> join_group(Self, GroupName)
+                        end
+                    catch
+                        exit:{R, _}
+                          when R =:= noproc; R =:= normal; R =:= shutdown ->
+                            Handler();
+                        exit:{{R, _}, _}
+                          when R =:= nodedown; R =:= shutdown ->
+                            Handler()
+                    end
+            end
+    end.
+
+read_group(GroupName) ->
+    case mnesia:dirty_read(?GROUP_TABLE, GroupName) of
+        []      -> {error, not_found};
+        [Group] -> Group
+    end.
+
+prune_or_create_group(Self, GroupName) ->
+    {atomic, Group} =
+        mnesia:sync_transaction(
+          fun () -> GroupNew = #gm_group { name    = GroupName,
+                                           members = [Self],
+                                           version = 0 },
+                    case mnesia:read({?GROUP_TABLE, GroupName}) of
+                        [] ->
+                            mnesia:write(GroupNew),
+                            GroupNew;
+                        [Group1 = #gm_group { members = Members }] ->
+                            case lists:any(fun is_member_alive/1, Members) of
+                                true  -> Group1;
+                                false -> mnesia:write(GroupNew),
+                                         GroupNew
+                            end
+                    end
+          end),
+    Group.
+
+record_dead_member_in_group(Member, GroupName) ->
+    {atomic, Group} =
+        mnesia:sync_transaction(
+          fun () -> [Group1 = #gm_group { members = Members, version = Ver }] =
+                        mnesia:read({?GROUP_TABLE, GroupName}),
+                    case lists:splitwith(
+                           fun (Member1) -> Member1 =/= Member end, Members) of
+                        {_Members1, []} -> %% not found - already recorded dead
+                            Group1;
+                        {Members1, [Member | Members2]} ->
+                            Members3 = Members1 ++ [{dead, Member} | Members2],
+                            Group2 = Group1 #gm_group { members = Members3,
+                                                        version = Ver + 1 },
+                            mnesia:write(Group2),
+                            Group2
+                    end
+          end),
+    Group.
+
+record_new_member_in_group(GroupName, Left, NewMember, Fun) ->
+    {atomic, Group} =
+        mnesia:sync_transaction(
+          fun () ->
+                  [#gm_group { members = Members, version = Ver } = Group1] =
+                      mnesia:read({?GROUP_TABLE, GroupName}),
+                  {Prefix, [Left | Suffix]} =
+                      lists:splitwith(fun (M) -> M =/= Left end, Members),
+                  Members1 = Prefix ++ [Left, NewMember | Suffix],
+                  Group2 = Group1 #gm_group { members = Members1,
+                                              version = Ver + 1 },
+                  ok = Fun(Group2),
+                  mnesia:write(Group2),
+                  Group2
+          end),
+    Group.
+
+erase_members_in_group(Members, GroupName) ->
+    DeadMembers = [{dead, Id} || Id <- Members],
+    {atomic, Group} =
+        mnesia:sync_transaction(
+          fun () ->
+                  [Group1 = #gm_group { members = [_|_] = Members1,
+                                        version = Ver }] =
+                      mnesia:read({?GROUP_TABLE, GroupName}),
+                  case Members1 -- DeadMembers of
+                      Members1 -> Group1;
+                      Members2 -> Group2 =
+                                      Group1 #gm_group { members = Members2,
+                                                         version = Ver + 1 },
+                                  mnesia:write(Group2),
+                                  Group2
+                  end
+          end),
+    Group.
+
+maybe_erase_aliases(State = #state { self          = Self,
+                                     group_name    = GroupName,
+                                     view          = View,
+                                     members_state = MembersState,
+                                     module        = Module,
+                                     callback_args = Args }) ->
+    #view_member { aliases = Aliases } = fetch_view_member(Self, View),
+    {Erasable, MembersState1}
+        = ?SETS:fold(
+             fun (Id, {ErasableAcc, MembersStateAcc} = Acc) ->
+                     #member { last_pub = LP, last_ack = LA } =
+                         find_member_or_blank(Id, MembersState),
+                     case can_erase_view_member(Self, Id, LA, LP) of
+                         true  -> {[Id | ErasableAcc],
+                                   erase_member(Id, MembersStateAcc)};
+                         false -> Acc
+                     end
+             end, {[], MembersState}, Aliases),
+    State1 = State #state { members_state = MembersState1 },
+    case Erasable of
+        [] -> {ok, State1};
+        _  -> View1 = group_to_view(
+                        erase_members_in_group(Erasable, GroupName)),
+              {callback_view_changed(Args, Module, View, View1),
+               State1 #state { view = View1 }}
+    end.
+
+can_erase_view_member(Self, Self, _LA, _LP) -> false;
+can_erase_view_member(_Self, _Id,   N,   N) -> true;
+can_erase_view_member(_Self, _Id, _LA, _LP) -> false.
+
+
+%% ---------------------------------------------------------------------------
+%% View monitoring and maintanence
+%% ---------------------------------------------------------------------------
+
+ensure_neighbour(_Ver, Self, {Self, undefined}, Self) ->
+    {Self, undefined};
+ensure_neighbour(Ver, Self, {Self, undefined}, RealNeighbour) ->
+    ok = gen_server2:cast(RealNeighbour, {?TAG, Ver, check_neighbours}),
+    {RealNeighbour, maybe_monitor(RealNeighbour, Self)};
+ensure_neighbour(_Ver, _Self, {RealNeighbour, MRef}, RealNeighbour) ->
+    {RealNeighbour, MRef};
+ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) ->
+    true = erlang:demonitor(MRef),
+    Msg = {?TAG, Ver, check_neighbours},
+    ok = gen_server2:cast(RealNeighbour, Msg),
+    ok = case Neighbour of
+             Self -> ok;
+             _    -> gen_server2:cast(Neighbour, Msg)
+         end,
+    {Neighbour, maybe_monitor(Neighbour, Self)}.
+
+maybe_monitor(Self, Self) ->
+    undefined;
+maybe_monitor(Other, _Self) ->
+    erlang:monitor(process, Other).
+
+check_neighbours(State = #state { self             = Self,
+                                  left             = Left,
+                                  right            = Right,
+                                  view             = View,
+                                  broadcast_buffer = Buffer }) ->
+    #view_member { left = VLeft, right = VRight }
+        = fetch_view_member(Self, View),
+    Ver = view_version(View),
+    Left1 = ensure_neighbour(Ver, Self, Left, VLeft),
+    Right1 = ensure_neighbour(Ver, Self, Right, VRight),
+    Buffer1 = case Right1 of
+                  {Self, undefined} -> [];
+                  _                 -> Buffer
+              end,
+    State1 = State #state { left = Left1, right = Right1,
+                            broadcast_buffer = Buffer1 },
+    ok = maybe_send_catchup(Right, State1),
+    State1.
+
+maybe_send_catchup(Right, #state { right = Right }) ->
+    ok;
+maybe_send_catchup(_Right, #state { self  = Self,
+                                    right = {Self, undefined} }) ->
+    ok;
+maybe_send_catchup(_Right, #state { members_state = undefined }) ->
+    ok;
+maybe_send_catchup(_Right, #state { self          = Self,
+                                    right         = {Right, _MRef},
+                                    view          = View,
+                                    members_state = MembersState }) ->
+    send_right(Right, View,
+               {catchup, Self, prepare_members_state(MembersState)}).
+
+
+%% ---------------------------------------------------------------------------
+%% Catch_up delta detection
+%% ---------------------------------------------------------------------------
+
+find_prefix_common_suffix(A, B) ->
+    {Prefix, A1} = find_prefix(A, B, queue:new()),
+    {Common, Suffix} = find_common(A1, B, queue:new()),
+    {Prefix, Common, Suffix}.
+
+%% Returns the elements of A that occur before the first element of B,
+%% plus the remainder of A.
+find_prefix(A, B, Prefix) ->
+    case {queue:out(A), queue:out(B)} of
+        {{{value, Val}, _A1}, {{value, Val}, _B1}} ->
+            {Prefix, A};
+        {{empty, A1}, {{value, _A}, _B1}} ->
+            {Prefix, A1};
+        {{{value, {NumA, _MsgA} = Val}, A1},
+         {{value, {NumB, _MsgB}}, _B1}} when NumA < NumB ->
+            find_prefix(A1, B, queue:in(Val, Prefix));
+        {_, {empty, _B1}} ->
+            {A, Prefix} %% Prefix well be empty here
+    end.
+
+%% A should be a prefix of B. Returns the commonality plus the
+%% remainder of B.
+find_common(A, B, Common) ->
+    case {queue:out(A), queue:out(B)} of
+        {{{value, Val}, A1}, {{value, Val}, B1}} ->
+            find_common(A1, B1, queue:in(Val, Common));
+        {{empty, _A}, _} ->
+            {Common, B}
+    end.
+
+
+%% ---------------------------------------------------------------------------
+%% Members helpers
+%% ---------------------------------------------------------------------------
+
+with_member(Fun, Id, MembersState) ->
+    store_member(
+      Id, Fun(find_member_or_blank(Id, MembersState)), MembersState).
+
+with_member_acc(Fun, Id, {MembersState, Acc}) ->
+    {MemberState, Acc1} = Fun(find_member_or_blank(Id, MembersState), Acc),
+    {store_member(Id, MemberState, MembersState), Acc1}.
+
+find_member_or_blank(Id, MembersState) ->
+    case ?DICT:find(Id, MembersState) of
+        {ok, Result} -> Result;
+        error        -> blank_member()
+    end.
+
+erase_member(Id, MembersState) ->
+    ?DICT:erase(Id, MembersState).
+
+blank_member() ->
+    #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }.
+
+blank_member_state() ->
+    ?DICT:new().
+
+store_member(Id, MemberState, MembersState) ->
+    ?DICT:store(Id, MemberState, MembersState).
+
+prepare_members_state(MembersState) ->
+    ?DICT:to_list(MembersState).
+
+build_members_state(MembersStateList) ->
+    ?DICT:from_list(MembersStateList).
+
+
+%% ---------------------------------------------------------------------------
+%% Activity assembly
+%% ---------------------------------------------------------------------------
+
+activity_nil() ->
+    queue:new().
+
+activity_cons(_Id, [], [], Tail) ->
+    Tail;
+activity_cons(Sender, Pubs, Acks, Tail) ->
+    queue:in({Sender, Pubs, Acks}, Tail).
+
+activity_finalise(Activity) ->
+    queue:to_list(Activity).
+
+maybe_send_activity([], _State) ->
+    ok;
+maybe_send_activity(Activity, #state { self  = Self,
+                                       right = {Right, _MRefR},
+                                       view  = View }) ->
+    send_right(Right, View, {activity, Self, Activity}).
+
+send_right(Right, View, Msg) ->
+    ok = gen_server2:cast(Right, {?TAG, view_version(View), Msg}).
+
+callback(Args, Module, Activity) ->
+    lists:foldl(
+      fun ({Id, Pubs, _Acks}, ok) ->
+              lists:foldl(fun ({_PubNum, Pub}, ok) ->
+                                  Module:handle_msg(Args, Id, Pub);
+                              (_, Error) ->
+                                  Error
+                          end, ok, Pubs);
+          (_, Error) ->
+              Error
+      end, ok, Activity).
+
+callback_view_changed(Args, Module, OldView, NewView) ->
+    OldMembers = all_known_members(OldView),
+    NewMembers = all_known_members(NewView),
+    Births = NewMembers -- OldMembers,
+    Deaths = OldMembers -- NewMembers,
+    case {Births, Deaths} of
+        {[], []} -> ok;
+        _        -> Module:members_changed(Args, Births, Deaths)
+    end.
+
+handle_callback_result({Result, State}) ->
+    if_callback_success(
+      Result, fun no_reply_true/3, fun no_reply_false/3, undefined, State);
+handle_callback_result({Result, Reply, State}) ->
+    if_callback_success(
+      Result, fun reply_true/3, fun reply_false/3, Reply, State).
+
+no_reply_true (_Result,        _Undefined, State) -> noreply(State).
+no_reply_false({stop, Reason}, _Undefined, State) -> {stop, Reason, State}.
+
+reply_true (_Result,        Reply, State) -> reply(Reply, State).
+reply_false({stop, Reason}, Reply, State) -> {stop, Reason, Reply, State}.
+
+handle_msg_true (_Result, Msg, State) -> handle_msg(Msg, State).
+handle_msg_false(Result, _Msg, State) -> {Result, State}.
+
+activity_true(_Result, Activity, State = #state { module        = Module,
+                                                  callback_args = Args }) ->
+    {callback(Args, Module, Activity), State}.
+activity_false(Result, _Activity, State) ->
+    {Result, State}.
+
+if_callback_success(ok, True, _False, Arg, State) ->
+    True(ok, Arg, State);
+if_callback_success(
+  {become, Module, Args} = Result, True, _False, Arg, State) ->
+    True(Result, Arg, State #state { module        = Module,
+                                     callback_args = Args });
+if_callback_success({stop, _Reason} = Result, _True, False, Arg, State) ->
+    False(Result, Arg, State).
+
+maybe_confirm(_Self, _Id, Confirms, []) ->
+    Confirms;
+maybe_confirm(Self, Self, Confirms, [PubNum | PubNums]) ->
+    case queue:out(Confirms) of
+        {empty, _Confirms} ->
+            Confirms;
+        {{value, {PubNum, From}}, Confirms1} ->
+            gen_server2:reply(From, ok),
+            maybe_confirm(Self, Self, Confirms1, PubNums);
+        {{value, {PubNum1, _From}}, _Confirms} when PubNum1 > PubNum ->
+            maybe_confirm(Self, Self, Confirms, PubNums)
+    end;
+maybe_confirm(_Self, _Id, Confirms, _PubNums) ->
+    Confirms.
+
+purge_confirms(Confirms) ->
+    [gen_server2:reply(From, ok) || {_PubNum, From} <- queue:to_list(Confirms)],
+    queue:new().
+
+
+%% ---------------------------------------------------------------------------
+%% Msg transformation
+%% ---------------------------------------------------------------------------
+
+acks_from_queue(Q) ->
+    [PubNum || {PubNum, _Msg} <- queue:to_list(Q)].
+
+pubs_from_queue(Q) ->
+    queue:to_list(Q).
+
+queue_from_pubs(Pubs) ->
+    queue:from_list(Pubs).
+
+apply_acks([], Pubs) ->
+    Pubs;
+apply_acks(List, Pubs) ->
+    {_, Pubs1} = queue:split(length(List), Pubs),
+    Pubs1.
+
+join_pubs(Q, [])   -> Q;
+join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)).
+
+last_ack([], LA) ->
+    LA;
+last_ack(List, LA) ->
+    LA1 = lists:last(List),
+    true = LA1 > LA, %% ASSERTION
+    LA1.
+
+last_pub([], LP) ->
+    LP;
+last_pub(List, LP) ->
+    {PubNum, _Msg} = lists:last(List),
+    true = PubNum > LP, %% ASSERTION
+    PubNum.
diff --git a/src/gm_soak_test.erl b/src/gm_soak_test.erl
new file mode 100644
index 00000000..5e5a3a5a
--- /dev/null
+++ b/src/gm_soak_test.erl
@@ -0,0 +1,131 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(gm_soak_test).
+
+-export([test/0]).
+-export([joined/2, members_changed/3, handle_msg/3, terminate/2]).
+
+-behaviour(gm).
+
+-include("gm_specs.hrl").
+
+%% ---------------------------------------------------------------------------
+%% Soak test
+%% ---------------------------------------------------------------------------
+
+get_state() ->
+    get(state).
+
+with_state(Fun) ->
+    put(state, Fun(get_state())).
+
+inc() ->
+    case 1 + get(count) of
+        100000 -> Now = now(),
+                  Start = put(ts, Now),
+                  Diff = timer:now_diff(Now, Start),
+                  Rate = 100000 / (Diff / 1000000),
+                  io:format("~p seeing ~p msgs/sec~n", [self(), Rate]),
+                  put(count, 0);
+        N      -> put(count, N)
+    end.
+
+joined([], Members) ->
+    io:format("Joined ~p (~p members)~n", [self(), length(Members)]),
+    put(state, dict:from_list([{Member, empty} || Member <- Members])),
+    put(count, 0),
+    put(ts, now()),
+    ok.
+
+members_changed([], Births, Deaths) ->
+    with_state(
+      fun (State) ->
+              State1 =
+                  lists:foldl(
+                    fun (Born, StateN) ->
+                            false = dict:is_key(Born, StateN),
+                            dict:store(Born, empty, StateN)
+                    end, State, Births),
+              lists:foldl(
+                fun (Died, StateN) ->
+                        true = dict:is_key(Died, StateN),
+                        dict:store(Died, died, StateN)
+                end, State1, Deaths)
+      end),
+    ok.
+
+handle_msg([], From, {test_msg, Num}) ->
+    inc(),
+    with_state(
+      fun (State) ->
+              ok = case dict:find(From, State) of
+                       {ok, died} ->
+                           exit({{from, From},
+                                 {received_posthumous_delivery, Num}});
+                       {ok, empty} -> ok;
+                       {ok, Num}   -> ok;
+                       {ok, Num1} when Num < Num1 ->
+                           exit({{from, From},
+                                 {duplicate_delivery_of, Num},
+                                 {expecting, Num1}});
+                       {ok, Num1} ->
+                           exit({{from, From},
+                                 {received_early, Num},
+                                 {expecting, Num1}});
+                       error ->
+                           exit({{from, From},
+                                 {received_premature_delivery, Num}})
+                   end,
+              dict:store(From, Num + 1, State)
+      end),
+    ok.
+
+terminate([], Reason) ->
+    io:format("Left ~p (~p)~n", [self(), Reason]),
+    ok.
+
+spawn_member() ->
+    spawn_link(
+      fun () ->
+              {MegaSecs, Secs, MicroSecs} = now(),
+              random:seed(MegaSecs, Secs, MicroSecs),
+              %% start up delay of no more than 10 seconds
+              timer:sleep(random:uniform(10000)),
+              {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []),
+              Start = random:uniform(10000),
+              send_loop(Pid, Start, Start + random:uniform(10000)),
+              gm:leave(Pid),
+              spawn_more()
+      end).
+
+spawn_more() ->
+    [spawn_member() || _ <- lists:seq(1, 4 - random:uniform(4))].
+
+send_loop(_Pid, Target, Target) ->
+    ok;
+send_loop(Pid, Count, Target) when Target > Count ->
+    case random:uniform(3) of
+        3 -> gm:confirmed_broadcast(Pid, {test_msg, Count});
+        _ -> gm:broadcast(Pid, {test_msg, Count})
+    end,
+    timer:sleep(random:uniform(5) - 1), %% sleep up to 4 ms
+    send_loop(Pid, Count + 1, Target).
+
+test() ->
+    ok = gm:create_tables(),
+    spawn_member(),
+    spawn_member().
diff --git a/src/gm_speed_test.erl b/src/gm_speed_test.erl
new file mode 100644
index 00000000..defb0f29
--- /dev/null
+++ b/src/gm_speed_test.erl
@@ -0,0 +1,82 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(gm_speed_test).
+
+-export([test/3]).
+-export([joined/2, members_changed/3, handle_msg/3, terminate/2]).
+-export([wile_e_coyote/2]).
+
+-behaviour(gm).
+
+-include("gm_specs.hrl").
+
+%% callbacks
+
+joined(Owner, _Members) ->
+    Owner ! joined,
+    ok.
+
+members_changed(_Owner, _Births, _Deaths) ->
+    ok.
+
+handle_msg(Owner, _From, ping) ->
+    Owner ! ping,
+    ok.
+
+terminate(Owner, _Reason) ->
+    Owner ! terminated,
+    ok.
+
+%% other
+
+wile_e_coyote(Time, WriteUnit) ->
+    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()),
+    receive joined -> ok end,
+    timer:sleep(1000), %% wait for all to join
+    timer:send_after(Time, stop),
+    Start = now(),
+    {Sent, Received} = loop(Pid, WriteUnit, 0, 0),
+    End = now(),
+    ok = gm:leave(Pid),
+    receive terminated -> ok end,
+    Elapsed = timer:now_diff(End, Start) / 1000000,
+    io:format("Sending rate:   ~p msgs/sec~nReceiving rate: ~p msgs/sec~n~n",
+              [Sent/Elapsed, Received/Elapsed]),
+    ok.
+
+loop(Pid, WriteUnit, Sent, Received) ->
+    case read(Received) of
+        {stop, Received1} -> {Sent, Received1};
+        {ok,   Received1} -> ok = write(Pid, WriteUnit),
+                             loop(Pid, WriteUnit, Sent + WriteUnit, Received1)
+    end.
+
+read(Count) ->
+    receive
+        ping -> read(Count + 1);
+        stop -> {stop, Count}
+    after 5 ->
+            {ok, Count}
+    end.
+
+write(_Pid, 0) -> ok;
+write(Pid,  N) -> ok = gm:broadcast(Pid, ping),
+                  write(Pid, N - 1).
+
+test(Time, WriteUnit, Nodes) ->
+    ok = gm:create_tables(),
+    [spawn(Node, ?MODULE, wile_e_coyote, [Time, WriteUnit]) || Node <- Nodes].
diff --git a/src/gm_tests.erl b/src/gm_tests.erl
new file mode 100644
index 00000000..ca0ffd64
--- /dev/null
+++ b/src/gm_tests.erl
@@ -0,0 +1,182 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(gm_tests).
+
+-export([test_join_leave/0,
+         test_broadcast/0,
+         test_confirmed_broadcast/0,
+         test_member_death/0,
+         test_receive_in_order/0,
+         all_tests/0]).
+-export([joined/2, members_changed/3, handle_msg/3, terminate/2]).
+
+-behaviour(gm).
+
+-include("gm_specs.hrl").
+
+-define(RECEIVE_OR_THROW(Body, Bool, Error),
+        receive Body ->
+                true = Bool,
+                passed
+        after 1000 ->
+                throw(Error)
+        end).
+
+joined(Pid, Members) ->
+    Pid ! {joined, self(), Members},
+    ok.
+
+members_changed(Pid, Births, Deaths) ->
+    Pid ! {members_changed, self(), Births, Deaths},
+    ok.
+
+handle_msg(Pid, From, Msg) ->
+    Pid ! {msg, self(), From, Msg},
+    ok.
+
+terminate(Pid, Reason) ->
+    Pid ! {termination, self(), Reason},
+    ok.
+
+%% ---------------------------------------------------------------------------
+%% Functional tests
+%% ---------------------------------------------------------------------------
+
+all_tests() ->
+    passed = test_join_leave(),
+    passed = test_broadcast(),
+    passed = test_confirmed_broadcast(),
+    passed = test_member_death(),
+    passed = test_receive_in_order(),
+    passed.
+
+test_join_leave() ->
+    with_two_members(fun (_Pid, _Pid2) -> passed end).
+
+test_broadcast() ->
+    test_broadcast(fun gm:broadcast/2).
+
+test_confirmed_broadcast() ->
+    test_broadcast(fun gm:confirmed_broadcast/2).
+
+test_member_death() ->
+    with_two_members(
+      fun (Pid, Pid2) ->
+              {ok, Pid3} = gm:start_link(?MODULE, ?MODULE, self()),
+              passed = receive_joined(Pid3, [Pid, Pid2, Pid3],
+                                      timeout_joining_gm_group_3),
+              passed = receive_birth(Pid, Pid3, timeout_waiting_for_birth_3_1),
+              passed = receive_birth(Pid2, Pid3, timeout_waiting_for_birth_3_2),
+
+              unlink(Pid3),
+              exit(Pid3, kill),
+
+              %% Have to do some broadcasts to ensure that all members
+              %% find out about the death.
+              passed = (test_broadcast_fun(fun gm:confirmed_broadcast/2))(
+                         Pid, Pid2),
+
+              passed = receive_death(Pid, Pid3, timeout_waiting_for_death_3_1),
+              passed = receive_death(Pid2, Pid3, timeout_waiting_for_death_3_2),
+
+              passed
+      end).
+
+test_receive_in_order() ->
+    with_two_members(
+      fun (Pid, Pid2) ->
+              Numbers = lists:seq(1,1000),
+              [begin ok = gm:broadcast(Pid, N), ok = gm:broadcast(Pid2, N) end
+               || N <- Numbers],
+              passed = receive_numbers(
+                         Pid, Pid, {timeout_for_msgs, Pid, Pid}, Numbers),
+              passed = receive_numbers(
+                         Pid, Pid2, {timeout_for_msgs, Pid, Pid2}, Numbers),
+              passed = receive_numbers(
+                         Pid2, Pid, {timeout_for_msgs, Pid2, Pid}, Numbers),
+              passed = receive_numbers(
+                         Pid2, Pid2, {timeout_for_msgs, Pid2, Pid2}, Numbers),
+              passed
+      end).
+
+test_broadcast(Fun) ->
+    with_two_members(test_broadcast_fun(Fun)).
+
+test_broadcast_fun(Fun) ->
+    fun (Pid, Pid2) ->
+            ok = Fun(Pid, magic_message),
+            passed = receive_or_throw({msg, Pid, Pid, magic_message},
+                                      timeout_waiting_for_msg),
+            passed = receive_or_throw({msg, Pid2, Pid, magic_message},
+                                      timeout_waiting_for_msg)
+    end.
+
+with_two_members(Fun) ->
+    ok = gm:create_tables(),
+
+    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()),
+    passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1),
+
+    {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self()),
+    passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2),
+    passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2),
+
+    passed = Fun(Pid, Pid2),
+
+    ok = gm:leave(Pid),
+    passed = receive_death(Pid2, Pid, timeout_waiting_for_death_1),
+    passed =
+        receive_termination(Pid, normal, timeout_waiting_for_termination_1),
+
+    ok = gm:leave(Pid2),
+    passed =
+        receive_termination(Pid2, normal, timeout_waiting_for_termination_2),
+
+    receive X -> throw({unexpected_message, X})
+    after 0 -> passed
+    end.
+
+receive_or_throw(Pattern, Error) ->
+    ?RECEIVE_OR_THROW(Pattern, true, Error).
+
+receive_birth(From, Born, Error) ->
+    ?RECEIVE_OR_THROW({members_changed, From, Birth, Death},
+                      ([Born] == Birth) andalso ([] == Death),
+                      Error).
+
+receive_death(From, Died, Error) ->
+    ?RECEIVE_OR_THROW({members_changed, From, Birth, Death},
+                      ([] == Birth) andalso ([Died] == Death),
+                      Error).
+
+receive_joined(From, Members, Error) ->
+    ?RECEIVE_OR_THROW({joined, From, Members1},
+                      lists:usort(Members) == lists:usort(Members1),
+                      Error).
+
+receive_termination(From, Reason, Error) ->
+    ?RECEIVE_OR_THROW({termination, From, Reason1},
+                      Reason == Reason1,
+                      Error).
+
+receive_numbers(_Pid, _Sender, _Error, []) ->
+    passed;
+receive_numbers(Pid, Sender, Error, [N | Numbers]) ->
+    ?RECEIVE_OR_THROW({msg, Pid, Sender, M},
+                      M == N,
+                      Error),
+    receive_numbers(Pid, Sender, Error, Numbers).
diff --git a/src/pg_local.erl b/src/pg_local.erl
index fd515747..c9c3a3a7 100644
--- a/src/pg_local.erl
+++ b/src/pg_local.erl
@@ -83,7 +83,7 @@ get_members(Name) ->
 
 sync() ->
     ensure_started(),
-    gen_server:call(?MODULE, sync).
+    gen_server:call(?MODULE, sync, infinity).
 
 %%%
 %%% Callback functions from gen_server
diff --git a/src/priority_queue.erl b/src/priority_queue.erl
index 4a94b24b..4fc8b469 100644
--- a/src/priority_queue.erl
+++ b/src/priority_queue.erl
@@ -47,7 +47,10 @@
 
 -ifdef(use_specs).
 
--type(priority() :: integer()).
+-export_type([q/0]).
+
+-type(q() :: pqueue()).
+-type(priority() :: integer() | 'infinity').
 -type(squeue() :: {queue, [any()], [any()]}).
 -type(pqueue() ::  squeue() | {pqueue, [{priority(), squeue()}]}).
 
@@ -71,8 +74,9 @@ new() ->
 is_queue({queue, R, F}) when is_list(R), is_list(F) ->
     true;
 is_queue({pqueue, Queues}) when is_list(Queues) ->
-    lists:all(fun ({P, Q}) -> is_integer(P) andalso is_queue(Q) end,
-              Queues);
+    lists:all(fun ({infinity, Q}) -> is_queue(Q);
+                  ({P,        Q}) -> is_integer(P) andalso is_queue(Q)
+              end, Queues);
 is_queue(_) ->
     false.
 
@@ -89,7 +93,8 @@ len({pqueue, Queues}) ->
 to_list({queue, In, Out}) when is_list(In), is_list(Out) ->
     [{0, V} || V <- Out ++ lists:reverse(In, [])];
 to_list({pqueue, Queues}) ->
-    [{-P, V} || {P, Q} <- Queues, {0, V} <- to_list(Q)].
+    [{maybe_negate_priority(P), V} || {P, Q} <- Queues,
+                                      {0, V} <- to_list(Q)].
 
 in(Item, Q) ->
     in(Item, 0, Q).
@@ -103,12 +108,20 @@ in(X, Priority, _Q = {queue, [], []}) ->
 in(X, Priority, Q = {queue, _, _}) ->
     in(X, Priority, {pqueue, [{0, Q}]});
 in(X, Priority, {pqueue, Queues}) ->
-    P = -Priority,
+    P = maybe_negate_priority(Priority),
     {pqueue, case lists:keysearch(P, 1, Queues) of
                  {value, {_, Q}} ->
                      lists:keyreplace(P, 1, Queues, {P, in(X, Q)});
+                 false when P == infinity ->
+                     [{P, {queue, [X], []}} | Queues];
                  false ->
-                     lists:keysort(1, [{P, {queue, [X], []}} | Queues])
+                     case Queues of
+                         [{infinity, InfQueue} | Queues1] ->
+                             [{infinity, InfQueue} |
+                              lists:keysort(1, [{P, {queue, [X], []}} | Queues1])];
+                         _ ->
+                             lists:keysort(1, [{P, {queue, [X], []}} | Queues])
+                     end
              end}.
 
 out({queue, [], []} = Q) ->
@@ -141,7 +154,8 @@ join({queue, [], []}, B) ->
 join({queue, AIn, AOut}, {queue, BIn, BOut}) ->
     {queue, BIn, AOut ++ lists:reverse(AIn, BOut)};
 join(A = {queue, _, _}, {pqueue, BPQ}) ->
-    {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, BPQ),
+    {Pre, Post} =
+        lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, BPQ),
     Post1 = case Post of
                 []                        -> [ {0, A} ];
                 [ {0, ZeroQueue} | Rest ] -> [ {0, join(A, ZeroQueue)} | Rest ];
@@ -149,7 +163,8 @@ join(A = {queue, _, _}, {pqueue, BPQ}) ->
             end,
     {pqueue, Pre ++ Post1};
 join({pqueue, APQ}, B = {queue, _, _}) ->
-    {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, APQ),
+    {Pre, Post} =
+        lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, APQ),
     Post1 = case Post of
                 []                        -> [ {0, B} ];
                 [ {0, ZeroQueue} | Rest ] -> [ {0, join(ZeroQueue, B)} | Rest ];
@@ -165,7 +180,7 @@ merge(APQ, [], Acc) ->
     lists:reverse(Acc, APQ);
 merge([{P, A}|As], [{P, B}|Bs], Acc) ->
     merge(As, Bs, [ {P, join(A, B)} | Acc ]);
-merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB ->
+merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB orelse PA == infinity ->
     merge(As, Bs, [ {PA, A} | Acc ]);
 merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) ->
     merge(As, Bs, [ {PB, B} | Acc ]).
@@ -174,3 +189,6 @@ r2f([])      -> {queue, [], []};
 r2f([_] = R) -> {queue, [], R};
 r2f([X,Y])   -> {queue, [X], [Y]};
 r2f([X,Y|R]) -> {queue, [X,Y], lists:reverse(R, [])}.
+
+maybe_negate_priority(infinity) -> infinity;
+maybe_negate_priority(P)        -> -P.
diff --git a/src/rabbit.erl b/src/rabbit.erl
index c6661d39..e067607d 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -18,37 +18,43 @@
 
 -behaviour(application).
 
--export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0,
+-export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, environment/0,
          rotate_logs/1]).
 
 -export([start/2, stop/1]).
 
--export([log_location/1]).
+-export([log_location/1]). %% for testing
 
 %%---------------------------------------------------------------------------
 %% Boot steps.
--export([maybe_insert_default_data/0]).
+-export([maybe_insert_default_data/0, boot_delegate/0, recover/0]).
+
+-rabbit_boot_step({pre_boot, [{description, "rabbit boot start"}]}).
 
 -rabbit_boot_step({codec_correctness_check,
                    [{description, "codec correctness check"},
                     {mfa,         {rabbit_binary_generator,
                                    check_empty_content_body_frame_size,
                                    []}},
+                    {requires,    pre_boot},
                     {enables,     external_infrastructure}]}).
 
 -rabbit_boot_step({database,
                    [{mfa,         {rabbit_mnesia, init, []}},
+                    {requires,    file_handle_cache},
                     {enables,     external_infrastructure}]}).
 
 -rabbit_boot_step({file_handle_cache,
                    [{description, "file handle cache server"},
                     {mfa,         {rabbit_sup, start_restartable_child,
                                    [file_handle_cache]}},
+                    {requires,    pre_boot},
                     {enables,     worker_pool}]}).
 
 -rabbit_boot_step({worker_pool,
                    [{description, "worker pool"},
                     {mfa,         {rabbit_sup, start_child, [worker_pool_sup]}},
+                    {requires,    pre_boot},
                     {enables,     external_infrastructure}]}).
 
 -rabbit_boot_step({external_infrastructure,
@@ -101,8 +107,7 @@
 
 -rabbit_boot_step({delegate_sup,
                    [{description, "cluster delegate"},
-                    {mfa,         {rabbit_sup, start_child,
-                                   [delegate_sup]}},
+                    {mfa,         {rabbit, boot_delegate, []}},
                     {requires,    kernel_ready},
                     {enables,     core_initialized}]}).
 
@@ -123,16 +128,22 @@
                     {requires,    core_initialized},
                     {enables,     routing_ready}]}).
 
--rabbit_boot_step({exchange_recovery,
-                   [{description, "exchange recovery"},
-                    {mfa,         {rabbit_exchange, recover, []}},
+-rabbit_boot_step({recovery,
+                   [{description, "exchange, queue and binding recovery"},
+                    {mfa,         {rabbit, recover, []}},
                     {requires,    empty_db_check},
                     {enables,     routing_ready}]}).
 
--rabbit_boot_step({queue_sup_queue_recovery,
-                   [{description, "queue supervisor and queue recovery"},
-                    {mfa,         {rabbit_amqqueue, start, []}},
-                    {requires,    empty_db_check},
+-rabbit_boot_step({mirror_queue_slave_sup,
+                   [{description, "mirror queue slave sup"},
+                    {mfa,         {rabbit_mirror_queue_slave_sup, start, []}},
+                    {requires,    recovery},
+                    {enables,     routing_ready}]}).
+
+-rabbit_boot_step({mirrored_queues,
+                   [{description, "adding mirrors to queues"},
+                    {mfa,         {rabbit_mirror_queue_misc, on_node_up, []}},
+                    {requires,    mirror_queue_slave_sup},
                     {enables,     routing_ready}]}).
 
 -rabbit_boot_step({routing_ready,
@@ -153,6 +164,11 @@
                    [{mfa,         {rabbit_networking, boot, []}},
                     {requires,    log_relay}]}).
 
+-rabbit_boot_step({notify_cluster,
+                   [{description, "notify cluster nodes"},
+                    {mfa,         {rabbit_node_monitor, notify_cluster, []}},
+                    {requires,    networking}]}).
+
 %%---------------------------------------------------------------------------
 
 -include("rabbit_framing.hrl").
@@ -174,29 +190,37 @@
 -spec(stop_and_halt/0 :: () -> 'ok').
 -spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())).
 -spec(status/0 ::
-        () -> [{running_applications, [{atom(), string(), string()}]} |
-               {nodes, [{rabbit_mnesia:node_type(), [node()]}]} |
-               {running_nodes, [node()]}]).
+        () -> [{pid, integer()} |
+               {running_applications, [{atom(), string(), string()}]} |
+               {os, {atom(), atom()}} |
+               {erlang_version, string()} |
+               {memory, any()}]).
+-spec(environment/0 :: () -> [{atom() | term()}]).
 -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()).
 
+-spec(maybe_insert_default_data/0 :: () -> 'ok').
+-spec(boot_delegate/0 :: () -> 'ok').
+-spec(recover/0 :: () -> 'ok').
+
 -endif.
 
 %%----------------------------------------------------------------------------
 
 prepare() ->
-    ok = ensure_working_log_handlers().
+    ok = ensure_working_log_handlers(),
+    ok = rabbit_upgrade:maybe_upgrade_mnesia().
 
 start() ->
     try
         ok = prepare(),
-        ok = rabbit_misc:start_applications(?APPS)
+        ok = rabbit_misc:start_applications(application_load_order())
     after
         %%give the error loggers some time to catch up
         timer:sleep(100)
     end.
 
 stop() ->
-    ok = rabbit_misc:stop_applications(?APPS).
+    ok = rabbit_misc:stop_applications(application_load_order()).
 
 stop_and_halt() ->
     try
@@ -207,8 +231,16 @@ stop_and_halt() ->
     ok.
 
 status() ->
-    [{running_applications, application:which_applications()}] ++
-        rabbit_mnesia:status().
+    [{pid, list_to_integer(os:getpid())},
+     {running_applications, application:which_applications()},
+     {os, os:type()},
+     {erlang_version, erlang:system_info(system_version)},
+     {memory, erlang:memory()}].
+
+environment() ->
+    lists:keysort(
+      1, [P || P = {K, _} <- application:get_all_env(rabbit),
+               K =/= default_pass]).
 
 rotate_logs(BinarySuffix) ->
     Suffix = binary_to_list(BinarySuffix),
@@ -224,18 +256,20 @@ rotate_logs(BinarySuffix) ->
 start(normal, []) ->
     case erts_version_check() of
         ok ->
+            ok = rabbit_mnesia:delete_previously_running_nodes(),
             {ok, SupPid} = rabbit_sup:start_link(),
+            true = register(rabbit, self()),
 
             print_banner(),
             [ok = run_boot_step(Step) || Step <- boot_steps()],
             io:format("~nbroker running~n"),
-
             {ok, SupPid};
         Error ->
             Error
     end.
 
 stop(_State) ->
+    ok = rabbit_mnesia:record_running_nodes(),
     terminated_ok = error_logger:delete_report_handler(rabbit_error_logger),
     ok = rabbit_alarm:stop(),
     ok = case rabbit_mnesia:is_clustered() of
@@ -245,20 +279,51 @@ stop(_State) ->
     ok.
 
 %%---------------------------------------------------------------------------
+%% application life cycle
+
+application_load_order() ->
+    ok = load_applications(),
+    {ok, G} = rabbit_misc:build_acyclic_graph(
+                fun (App, _Deps) -> [{App, App}] end,
+                fun (App,  Deps) -> [{Dep, App} || Dep <- Deps] end,
+                [{App, app_dependencies(App)} ||
+                    {App, _Desc, _Vsn} <- application:loaded_applications()]),
+    true = digraph:del_vertices(
+             G, digraph:vertices(G) -- digraph_utils:reachable(?APPS, G)),
+    Result = digraph_utils:topsort(G),
+    true = digraph:delete(G),
+    Result.
+
+load_applications() ->
+    load_applications(queue:from_list(?APPS), sets:new()).
+
+load_applications(Worklist, Loaded) ->
+    case queue:out(Worklist) of
+        {empty, _WorkList} ->
+            ok;
+        {{value, App}, Worklist1} ->
+            case sets:is_element(App, Loaded) of
+                true  -> load_applications(Worklist1, Loaded);
+                false -> case application:load(App) of
+                             ok                             -> ok;
+                             {error, {already_loaded, App}} -> ok;
+                             Error                          -> throw(Error)
+                         end,
+                         load_applications(
+                           queue:join(Worklist1,
+                                      queue:from_list(app_dependencies(App))),
+                           sets:add_element(App, Loaded))
+            end
+    end.
 
-erts_version_check() ->
-    FoundVer = erlang:system_info(version),
-    case rabbit_misc:version_compare(?ERTS_MINIMUM, FoundVer, lte) of
-        true  -> ok;
-        false -> {error, {erlang_version_too_old,
-                          {found, FoundVer}, {required, ?ERTS_MINIMUM}}}
+app_dependencies(App) ->
+    case application:get_key(App, applications) of
+        undefined -> [];
+        {ok, Lst} -> Lst
     end.
 
-boot_error(Format, Args) ->
-    io:format("BOOT ERROR: " ++ Format, Args),
-    error_logger:error_msg(Format, Args),
-    timer:sleep(1000),
-    exit({?MODULE, failure_during_boot}).
+%%---------------------------------------------------------------------------
+%% boot step logic
 
 run_boot_step({StepName, Attributes}) ->
     Description = case lists:keysearch(description, 1, Attributes) of
@@ -333,83 +398,46 @@ sort_boot_steps(UnsortedSteps) ->
                end])
     end.
 
-%%---------------------------------------------------------------------------
+boot_error(Format, Args) ->
+    io:format("BOOT ERROR: " ++ Format, Args),
+    error_logger:error_msg(Format, Args),
+    timer:sleep(1000),
+    exit({?MODULE, failure_during_boot}).
 
-log_location(Type) ->
-    case application:get_env(Type, case Type of
-                                       kernel -> error_logger;
-                                       sasl   -> sasl_error_logger
-                                   end) of
-        {ok, {file, File}} -> File;
-        {ok, false}        -> undefined;
-        {ok, tty}          -> tty;
-        {ok, silent}       -> undefined;
-        {ok, Bad}          -> throw({error, {cannot_log_to_file, Bad}});
-        _                  -> undefined
-    end.
+%%---------------------------------------------------------------------------
+%% boot step functions
 
-app_location() ->
-    {ok, Application} = application:get_application(),
-    filename:absname(code:where_is_file(atom_to_list(Application) ++ ".app")).
+boot_delegate() ->
+    {ok, Count} = application:get_env(rabbit, delegate_count),
+    rabbit_sup:start_child(delegate_sup, [Count]).
 
-home_dir() ->
-    case init:get_argument(home) of
-        {ok, [[Home]]} -> Home;
-        Other          -> Other
-    end.
+recover() ->
+    rabbit_binding:recover(rabbit_exchange:recover(), rabbit_amqqueue:start()).
 
-config_files() ->
-    case init:get_argument(config) of
-        {ok, Files} -> [filename:absname(
-                          filename:rootname(File, ".config") ++ ".config") ||
-                           File <- Files];
-        error       -> []
+maybe_insert_default_data() ->
+    case rabbit_mnesia:is_db_empty() of
+        true -> insert_default_data();
+        false -> ok
     end.
 
-%---------------------------------------------------------------------------
+insert_default_data() ->
+    {ok, DefaultUser} = application:get_env(default_user),
+    {ok, DefaultPass} = application:get_env(default_pass),
+    {ok, DefaultTags} = application:get_env(default_user_tags),
+    {ok, DefaultVHost} = application:get_env(default_vhost),
+    {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} =
+        application:get_env(default_permissions),
+    ok = rabbit_vhost:add(DefaultVHost),
+    ok = rabbit_auth_backend_internal:add_user(DefaultUser, DefaultPass),
+    ok = rabbit_auth_backend_internal:set_tags(DefaultUser, DefaultTags),
+    ok = rabbit_auth_backend_internal:set_permissions(DefaultUser, DefaultVHost,
+                                                      DefaultConfigurePerm,
+                                                      DefaultWritePerm,
+                                                      DefaultReadPerm),
+    ok.
 
-print_banner() ->
-    {ok, Product} = application:get_key(id),
-    {ok, Version} = application:get_key(vsn),
-    ProductLen = string:len(Product),
-    io:format("~n"
-              "+---+   +---+~n"
-              "|   |   |   |~n"
-              "|   |   |   |~n"
-              "|   |   |   |~n"
-              "|   +---+   +-------+~n"
-              "|                   |~n"
-              "| ~s  +---+   |~n"
-              "|           |   |   |~n"
-              "| ~s  +---+   |~n"
-              "|                   |~n"
-              "+-------------------+~n"
-              "~s~n~s~n~s~n~n",
-              [Product, string:right([$v|Version], ProductLen),
-               ?PROTOCOL_VERSION,
-               ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]),
-    Settings = [{"node",           node()},
-                {"app descriptor", app_location()},
-                {"home dir",       home_dir()},
-                {"config file(s)", config_files()},
-                {"cookie hash",    rabbit_misc:cookie_hash()},
-                {"log",            log_location(kernel)},
-                {"sasl log",       log_location(sasl)},
-                {"database dir",   rabbit_mnesia:dir()},
-                {"erlang version", erlang:system_info(version)}],
-    DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]),
-    Format = fun (K, V) ->
-                     io:format("~-" ++ integer_to_list(DescrLen) ++ "s: ~s~n",
-                               [K, V])
-             end,
-    lists:foreach(fun ({"config file(s)" = K, []}) ->
-                          Format(K, "(none)");
-                      ({"config file(s)" = K, [V0 | Vs]}) ->
-                          Format(K, V0), [Format("", V) || V <- Vs];
-                      ({K, V}) ->
-                          Format(K, V)
-                  end, Settings),
-    io:nl().
+%%---------------------------------------------------------------------------
+%% logging
 
 ensure_working_log_handlers() ->
     Handlers = gen_event:which_handlers(error_logger),
@@ -448,31 +476,19 @@ ensure_working_log_handler(OldFHandler, NewFHandler, TTYHandler,
                      end
     end.
 
-maybe_insert_default_data() ->
-    case rabbit_mnesia:is_db_empty() of
-        true -> insert_default_data();
-        false -> ok
+log_location(Type) ->
+    case application:get_env(Type, case Type of
+                                       kernel -> error_logger;
+                                       sasl   -> sasl_error_logger
+                                   end) of
+        {ok, {file, File}} -> File;
+        {ok, false}        -> undefined;
+        {ok, tty}          -> tty;
+        {ok, silent}       -> undefined;
+        {ok, Bad}          -> throw({error, {cannot_log_to_file, Bad}});
+        _                  -> undefined
     end.
 
-insert_default_data() ->
-    {ok, DefaultUser} = application:get_env(default_user),
-    {ok, DefaultPass} = application:get_env(default_pass),
-    {ok, DefaultAdmin} = application:get_env(default_user_is_admin),
-    {ok, DefaultVHost} = application:get_env(default_vhost),
-    {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} =
-        application:get_env(default_permissions),
-    ok = rabbit_vhost:add(DefaultVHost),
-    ok = rabbit_auth_backend_internal:add_user(DefaultUser, DefaultPass),
-    case DefaultAdmin of
-        true -> rabbit_auth_backend_internal:set_admin(DefaultUser);
-        _    -> ok
-    end,
-    ok = rabbit_auth_backend_internal:set_permissions(DefaultUser, DefaultVHost,
-                                                      DefaultConfigurePerm,
-                                                      DefaultWritePerm,
-                                                      DefaultReadPerm),
-    ok.
-
 rotate_logs(File, Suffix, Handler) ->
     rotate_logs(File, Suffix, Handler, Handler).
 
@@ -495,3 +511,75 @@ log_rotation_result(ok, {error, SaslLogError}) ->
     {error, {cannot_rotate_sasl_logs, SaslLogError}};
 log_rotation_result(ok, ok) ->
     ok.
+
+%%---------------------------------------------------------------------------
+%% misc
+
+erts_version_check() ->
+    FoundVer = erlang:system_info(version),
+    case rabbit_misc:version_compare(?ERTS_MINIMUM, FoundVer, lte) of
+        true  -> ok;
+        false -> {error, {erlang_version_too_old,
+                          {found, FoundVer}, {required, ?ERTS_MINIMUM}}}
+    end.
+
+print_banner() ->
+    {ok, Product} = application:get_key(id),
+    {ok, Version} = application:get_key(vsn),
+    ProductLen = string:len(Product),
+    io:format("~n"
+              "+---+   +---+~n"
+              "|   |   |   |~n"
+              "|   |   |   |~n"
+              "|   |   |   |~n"
+              "|   +---+   +-------+~n"
+              "|                   |~n"
+              "| ~s  +---+   |~n"
+              "|           |   |   |~n"
+              "| ~s  +---+   |~n"
+              "|                   |~n"
+              "+-------------------+~n"
+              "~s~n~s~n~s~n~n",
+              [Product, string:right([$v|Version], ProductLen),
+               ?PROTOCOL_VERSION,
+               ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]),
+    Settings = [{"node",           node()},
+                {"app descriptor", app_location()},
+                {"home dir",       home_dir()},
+                {"config file(s)", config_files()},
+                {"cookie hash",    rabbit_misc:cookie_hash()},
+                {"log",            log_location(kernel)},
+                {"sasl log",       log_location(sasl)},
+                {"database dir",   rabbit_mnesia:dir()},
+                {"erlang version", erlang:system_info(version)}],
+    DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]),
+    Format = fun (K, V) ->
+                     io:format("~-" ++ integer_to_list(DescrLen) ++ "s: ~s~n",
+                               [K, V])
+             end,
+    lists:foreach(fun ({"config file(s)" = K, []}) ->
+                          Format(K, "(none)");
+                      ({"config file(s)" = K, [V0 | Vs]}) ->
+                          Format(K, V0), [Format("", V) || V <- Vs];
+                      ({K, V}) ->
+                          Format(K, V)
+                  end, Settings),
+    io:nl().
+
+app_location() ->
+    {ok, Application} = application:get_application(),
+    filename:absname(code:where_is_file(atom_to_list(Application) ++ ".app")).
+
+home_dir() ->
+    case init:get_argument(home) of
+        {ok, [[Home]]} -> Home;
+        Other          -> Other
+    end.
+
+config_files() ->
+    case init:get_argument(config) of
+        {ok, Files} -> [filename:absname(
+                          filename:rootname(File, ".config") ++ ".config") ||
+                           File <- Files];
+        error       -> []
+    end.
diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl
index b0b57af4..c0ae18c0 100644
--- a/src/rabbit_access_control.erl
+++ b/src/rabbit_access_control.erl
@@ -18,21 +18,17 @@
 
 -include("rabbit.hrl").
 
--export([user_pass_login/2, check_user_pass_login/2, check_user_login/2,
-         check_vhost_access/2, check_resource_access/3, list_vhosts/2]).
+-export([check_user_pass_login/2, check_user_login/2,
+         check_vhost_access/2, check_resource_access/3]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--export_type([permission_atom/0, vhost_permission_atom/0]).
+-export_type([permission_atom/0]).
 
 -type(permission_atom() :: 'configure' | 'read' | 'write').
--type(vhost_permission_atom() :: 'read' | 'write').
 
--spec(user_pass_login/2 ::
-        (rabbit_types:username(), rabbit_types:password())
-        -> rabbit_types:user() | rabbit_types:channel_exit()).
 -spec(check_user_pass_login/2 ::
         (rabbit_types:username(), rabbit_types:password())
         -> {'ok', rabbit_types:user()} | {'refused', string(), [any()]}).
@@ -42,23 +38,11 @@
 -spec(check_resource_access/3 ::
         (rabbit_types:user(), rabbit_types:r(atom()), permission_atom())
         -> 'ok' | rabbit_types:channel_exit()).
--spec(list_vhosts/2 :: (rabbit_types:user(), vhost_permission_atom())
-                       -> [rabbit_types:vhost()]).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-user_pass_login(User, Pass) ->
-    ?LOGDEBUG("Login with user ~p pass ~p~n", [User, Pass]),
-    case check_user_pass_login(User, Pass) of
-        {refused, Msg, Args} ->
-            rabbit_misc:protocol_error(
-              access_refused, "login refused: ~s", [io_lib:format(Msg, Args)]);
-        {ok, U} ->
-            U
-    end.
-
 check_user_pass_login(Username, Password) ->
     check_user_login(Username, [{password, Password}]).
 
@@ -83,7 +67,7 @@ check_vhost_access(User = #user{ username     = Username,
     check_access(
       fun() ->
               rabbit_vhost:exists(VHostPath) andalso
-                  Module:check_vhost_access(User, VHostPath, write)
+                  Module:check_vhost_access(User, VHostPath)
       end,
       "~s failed checking vhost access to ~s for ~s: ~p~n",
       [Module, VHostPath, Username],
@@ -117,21 +101,3 @@ check_access(Fun, ErrStr, ErrArgs, RefStr, RefArgs) ->
         false ->
             rabbit_misc:protocol_error(access_refused, RefStr, RefArgs)
     end.
-
-%% Permission = write -> log in
-%% Permission = read  -> learn of the existence of (only relevant for
-%%                       management plugin)
-list_vhosts(User = #user{username = Username, auth_backend = Module},
-            Permission) ->
-    lists:filter(
-      fun(VHost) ->
-              case Module:check_vhost_access(User, VHost, Permission) of
-                  {error, _} = E ->
-                      rabbit_log:warning("~w failed checking vhost access "
-                                         "to ~s for ~s: ~p~n",
-                                         [Module, VHost, Username, E]),
-                      false;
-                  Else ->
-                      Else
-              end
-      end, rabbit_vhost:list()).
diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl
index 37e40981..d38ecb91 100644
--- a/src/rabbit_alarm.erl
+++ b/src/rabbit_alarm.erl
@@ -18,12 +18,14 @@
 
 -behaviour(gen_event).
 
--export([start/0, stop/0, register/2]).
+-export([start/0, stop/0, register/2, on_node_up/1, on_node_down/1]).
 
 -export([init/1, handle_call/2, handle_event/2, handle_info/2,
          terminate/2, code_change/3]).
 
--record(alarms, {alertees, vm_memory_high_watermark = false}).
+-export([remote_conserve_memory/2]). %% Internal use only
+
+-record(alarms, {alertees, alarmed_nodes}).
 
 %%----------------------------------------------------------------------------
 
@@ -33,6 +35,8 @@
 -spec(start/0 :: () -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), mfa_tuple()) -> boolean()).
+-spec(on_node_up/1 :: (node()) -> 'ok').
+-spec(on_node_down/1 :: (node()) -> 'ok').
 
 -endif.
 
@@ -56,39 +60,57 @@ register(Pid, HighMemMFA) ->
                    {register, Pid, HighMemMFA},
                    infinity).
 
+on_node_up(Node) -> gen_event:notify(alarm_handler, {node_up, Node}).
+
+on_node_down(Node) -> gen_event:notify(alarm_handler, {node_down, Node}).
+
+%% Can't use alarm_handler:{set,clear}_alarm because that doesn't
+%% permit notifying a remote node.
+remote_conserve_memory(Pid, true) ->
+    gen_event:notify({alarm_handler, node(Pid)},
+                     {set_alarm, {{vm_memory_high_watermark, node()}, []}});
+remote_conserve_memory(Pid, false) ->
+    gen_event:notify({alarm_handler, node(Pid)},
+                     {clear_alarm, {vm_memory_high_watermark, node()}}).
+
 %%----------------------------------------------------------------------------
 
 init([]) ->
-    {ok, #alarms{alertees = dict:new()}}.
+    {ok, #alarms{alertees      = dict:new(),
+                 alarmed_nodes = sets:new()}}.
 
-handle_call({register, Pid, {M, F, A} = HighMemMFA},
-            State = #alarms{alertees = Alertess}) ->
-    _MRef = erlang:monitor(process, Pid),
-    ok = case State#alarms.vm_memory_high_watermark of
-             true  -> apply(M, F, A ++ [Pid, true]);
-             false -> ok
-         end,
-    NewAlertees = dict:store(Pid, HighMemMFA, Alertess),
-    {ok, State#alarms.vm_memory_high_watermark,
-     State#alarms{alertees = NewAlertees}};
+handle_call({register, Pid, HighMemMFA}, State) ->
+    {ok, 0 < sets:size(State#alarms.alarmed_nodes),
+     internal_register(Pid, HighMemMFA, State)};
 
 handle_call(_Request, State) ->
     {ok, not_understood, State}.
 
-handle_event({set_alarm, {vm_memory_high_watermark, []}}, State) ->
-    ok = alert(true, State#alarms.alertees),
-    {ok, State#alarms{vm_memory_high_watermark = true}};
+handle_event({set_alarm, {{vm_memory_high_watermark, Node}, []}}, State) ->
+    {ok, maybe_alert(fun sets:add_element/2, Node, State)};
 
-handle_event({clear_alarm, vm_memory_high_watermark}, State) ->
-    ok = alert(false, State#alarms.alertees),
-    {ok, State#alarms{vm_memory_high_watermark = false}};
+handle_event({clear_alarm, {vm_memory_high_watermark, Node}}, State) ->
+    {ok, maybe_alert(fun sets:del_element/2, Node, State)};
+
+handle_event({node_up, Node}, State) ->
+    %% Must do this via notify and not call to avoid possible deadlock.
+    ok = gen_event:notify(
+           {alarm_handler, Node},
+           {register, self(), {?MODULE, remote_conserve_memory, []}}),
+    {ok, State};
+
+handle_event({node_down, Node}, State) ->
+    {ok, maybe_alert(fun sets:del_element/2, Node, State)};
+
+handle_event({register, Pid, HighMemMFA}, State) ->
+    {ok, internal_register(Pid, HighMemMFA, State)};
 
 handle_event(_Event, State) ->
     {ok, State}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #alarms{alertees = Alertess}) ->
-    {ok, State#alarms{alertees = dict:erase(Pid, Alertess)}};
+            State = #alarms{alertees = Alertees}) ->
+    {ok, State#alarms{alertees = dict:erase(Pid, Alertees)}};
 
 handle_info(_Info, State) ->
     {ok, State}.
@@ -100,10 +122,45 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 %%----------------------------------------------------------------------------
-alert(_Alert, undefined) ->
-    ok;
-alert(Alert, Alertees) ->
-    dict:fold(fun (Pid, {M, F, A}, Acc) ->
-                      ok = erlang:apply(M, F, A ++ [Pid, Alert]),
-                      Acc
+
+maybe_alert(SetFun, Node, State = #alarms{alarmed_nodes = AN,
+                                          alertees      = Alertees}) ->
+    AN1 = SetFun(Node, AN),
+    BeforeSz = sets:size(AN),
+    AfterSz  = sets:size(AN1),
+    %% If we have changed our alarm state, inform the remotes.
+    IsLocal = Node =:= node(),
+    if IsLocal andalso BeforeSz < AfterSz -> ok = alert_remote(true,  Alertees);
+       IsLocal andalso BeforeSz > AfterSz -> ok = alert_remote(false, Alertees);
+       true                               -> ok
+    end,
+    %% If the overall alarm state has changed, inform the locals.
+    case {BeforeSz, AfterSz} of
+        {0, 1} -> ok = alert_local(true,  Alertees);
+        {1, 0} -> ok = alert_local(false, Alertees);
+        {_, _} -> ok
+    end,
+    State#alarms{alarmed_nodes = AN1}.
+
+alert_local(Alert, Alertees)  -> alert(Alert, Alertees, fun erlang:'=:='/2).
+
+alert_remote(Alert, Alertees) -> alert(Alert, Alertees, fun erlang:'=/='/2).
+
+alert(Alert, Alertees, NodeComparator) ->
+    Node = node(),
+    dict:fold(fun (Pid, {M, F, A}, ok) ->
+                      case NodeComparator(Node, node(Pid)) of
+                          true  -> apply(M, F, A ++ [Pid, Alert]);
+                          false -> ok
+                      end
               end, ok, Alertees).
+
+internal_register(Pid, {M, F, A} = HighMemMFA,
+                  State = #alarms{alertees = Alertees}) ->
+    _MRef = erlang:monitor(process, Pid),
+    case sets:is_element(node(), State#alarms.alarmed_nodes) of
+        true  -> ok = apply(M, F, A ++ [Pid, true]);
+        false -> ok
+    end,
+    NewAlertees = dict:store(Pid, HighMemMFA, Alertees),
+    State#alarms{alertees = NewAlertees}.
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index dc3f249a..6024db65 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -17,21 +17,23 @@
 -module(rabbit_amqqueue).
 
 -export([start/0, stop/0, declare/5, delete_immediately/1, delete/3, purge/1]).
--export([internal_declare/2, internal_delete/1,
-         maybe_run_queue_via_backing_queue/2,
-         maybe_run_queue_via_backing_queue_async/2,
-         sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2,
-         set_maximum_since_use/2, maybe_expire/1, drop_expired/1]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2, assert_equivalence/5,
          check_exclusive_access/2, with_exclusive_access_or_die/3,
-         stat/1, deliver/2, requeue/3, ack/4, reject/4]).
+         stat/1, deliver/2, requeue/3, ack/3, reject/4]).
 -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]).
--export([consumers/1, consumers_all/1]).
+-export([consumers/1, consumers_all/1, consumer_info_keys/0]).
 -export([basic_get/3, basic_consume/7, basic_cancel/4]).
 -export([notify_sent/2, unblock/2, flush_all/2]).
--export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]).
+-export([notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
+-export([store_queue/1]).
+
+
+%% internal
+-export([internal_declare/2, internal_delete/1, run_backing_queue/3,
+         sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2,
+         set_maximum_since_use/2, maybe_expire/1, drop_expired/1]).
 
 -include("rabbit.hrl").
 -include_lib("stdlib/include/qlc.hrl").
@@ -51,11 +53,11 @@
 -type(qmsg() :: {name(), pid(), msg_id(), boolean(), rabbit_types:message()}).
 -type(msg_id() :: non_neg_integer()).
 -type(ok_or_errors() ::
-      'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
+        'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
 
 -type(queue_or_not_found() :: rabbit_types:amqqueue() | 'not_found').
 
--spec(start/0 :: () -> 'ok').
+-spec(start/0 :: () -> [name()]).
 -spec(stop/0 :: () -> 'ok').
 -spec(declare/5 ::
         (name(), boolean(), boolean(),
@@ -90,6 +92,7 @@
 -spec(consumers/1 ::
         (rabbit_types:amqqueue())
         -> [{pid(), rabbit_types:ctag(), boolean()}]).
+-spec(consumer_info_keys/0 :: () -> rabbit_types:info_keys()).
 -spec(consumers_all/1 ::
         (rabbit_types:vhost())
         -> [{name(), pid(), rabbit_types:ctag(), boolean()}]).
@@ -98,32 +101,28 @@
         -> {'ok', non_neg_integer(), non_neg_integer()}).
 -spec(delete_immediately/1 :: (rabbit_types:amqqueue()) -> 'ok').
 -spec(delete/3 ::
-      (rabbit_types:amqqueue(), 'false', 'false')
+        (rabbit_types:amqqueue(), 'false', 'false')
         -> qlen();
-      (rabbit_types:amqqueue(), 'true' , 'false')
+        (rabbit_types:amqqueue(), 'true' , 'false')
         -> qlen() | rabbit_types:error('in_use');
-      (rabbit_types:amqqueue(), 'false', 'true' )
+        (rabbit_types:amqqueue(), 'false', 'true' )
         -> qlen() | rabbit_types:error('not_empty');
-      (rabbit_types:amqqueue(), 'true' , 'true' )
+        (rabbit_types:amqqueue(), 'true' , 'true' )
         -> qlen() |
            rabbit_types:error('in_use') |
            rabbit_types:error('not_empty')).
 -spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()).
 -spec(deliver/2 :: (pid(), rabbit_types:delivery()) -> boolean()).
 -spec(requeue/3 :: (pid(), [msg_id()],  pid()) -> 'ok').
--spec(ack/4 ::
-        (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid())
-        -> 'ok').
+-spec(ack/3 :: (pid(), [msg_id()], pid()) -> 'ok').
 -spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok').
--spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()).
--spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok').
 -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()).
 -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()).
 -spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) ->
-             {'ok', non_neg_integer(), qmsg()} | 'empty').
+                          {'ok', non_neg_integer(), qmsg()} | 'empty').
 -spec(basic_consume/7 ::
-      (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined',
-       rabbit_types:ctag(), boolean(), any())
+        (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined',
+         rabbit_types:ctag(), boolean(), any())
         -> rabbit_types:ok_or_error('exclusive_consume_unavailable')).
 -spec(basic_cancel/4 ::
         (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok').
@@ -136,12 +135,11 @@
 -spec(internal_delete/1 ::
         (name()) -> rabbit_types:ok_or_error('not_found') |
                     rabbit_types:connection_exit() |
-                    fun ((boolean()) -> rabbit_types:ok_or_error('not_found') |
-                                        rabbit_types:connection_exit())).
--spec(maybe_run_queue_via_backing_queue/2 ::
-        (pid(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok').
--spec(maybe_run_queue_via_backing_queue_async/2 ::
-        (pid(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok').
+                    fun (() -> rabbit_types:ok_or_error('not_found') |
+                               rabbit_types:connection_exit())).
+-spec(run_backing_queue/3 ::
+        (pid(), atom(),
+         (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> 'ok').
 -spec(sync_timeout/1 :: (pid()) -> 'ok').
 -spec(update_ram_duration/1 :: (pid()) -> 'ok').
 -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok').
@@ -154,6 +152,9 @@
 
 %%----------------------------------------------------------------------------
 
+-define(CONSUMER_INFO_KEYS,
+        [queue_name, channel_pid, consumer_tag, ack_required]).
+
 start() ->
     DurableQueues = find_durable_queues(),
     {ok, BQ} = application:get_env(rabbit, backing_queue_module),
@@ -163,8 +164,7 @@ start() ->
                {rabbit_amqqueue_sup,
                 {rabbit_amqqueue_sup, start_link, []},
                 transient, infinity, supervisor, [rabbit_amqqueue_sup]}),
-    _RealDurableQueues = recover_durable_queues(DurableQueues),
-    ok.
+    recover_durable_queues(DurableQueues).
 
 stop() ->
     ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
@@ -183,19 +183,22 @@ find_durable_queues() ->
       end).
 
 recover_durable_queues(DurableQueues) ->
-    Qs = [start_queue_process(Q) || Q <- DurableQueues],
-    [Q || Q <- Qs,
-          gen_server2:call(Q#amqqueue.pid, {init, true}, infinity) == Q].
+    Qs = [start_queue_process(node(), Q) || Q <- DurableQueues],
+    [QName || Q = #amqqueue{name = QName, pid = Pid} <- Qs,
+              gen_server2:call(Pid, {init, true}, infinity) == {new, Q}].
 
 declare(QueueName, Durable, AutoDelete, Args, Owner) ->
     ok = check_declare_arguments(QueueName, Args),
-    Q = start_queue_process(#amqqueue{name = QueueName,
-                                      durable = Durable,
-                                      auto_delete = AutoDelete,
-                                      arguments = Args,
-                                      exclusive_owner = Owner,
-                                      pid = none}),
-    case gen_server2:call(Q#amqqueue.pid, {init, false}) of
+    {Node, MNodes} = determine_queue_nodes(Args),
+    Q = start_queue_process(Node, #amqqueue{name            = QueueName,
+                                            durable         = Durable,
+                                            auto_delete     = AutoDelete,
+                                            arguments       = Args,
+                                            exclusive_owner = Owner,
+                                            pid             = none,
+                                            slave_pids      = [],
+                                            mirror_nodes    = MNodes}),
+    case gen_server2:call(Q#amqqueue.pid, {init, false}, infinity) of
         not_found -> rabbit_misc:not_found(QueueName);
         Q1        -> Q1
     end.
@@ -211,15 +214,15 @@ internal_declare(Q = #amqqueue{name = QueueName}, false) ->
                       case mnesia:read({rabbit_durable_queue, QueueName}) of
                           []  -> ok = store_queue(Q),
                                  B = add_default_binding(Q),
-                                 fun (Tx) -> B(Tx), Q end;
-                          [_] -> %% Q exists on stopped node
-                                 rabbit_misc:const(not_found)
+                                 fun () -> B(), Q end;
+                          %% Q exists on stopped node
+                          [_] -> rabbit_misc:const(not_found)
                       end;
                   [ExistingQ = #amqqueue{pid = QPid}] ->
-                      case is_process_alive(QPid) of
+                      case rabbit_misc:is_process_alive(QPid) of
                           true  -> rabbit_misc:const(ExistingQ);
                           false -> TailFun = internal_delete(QueueName),
-                                   fun (Tx) -> TailFun(Tx), ExistingQ end
+                                   fun () -> TailFun(), ExistingQ end
                       end
               end
       end).
@@ -232,8 +235,24 @@ store_queue(Q = #amqqueue{durable = false}) ->
     ok = mnesia:write(rabbit_queue, Q, write),
     ok.
 
-start_queue_process(Q) ->
-    {ok, Pid} = rabbit_amqqueue_sup:start_child([Q]),
+determine_queue_nodes(Args) ->
+    Policy = rabbit_misc:table_lookup(Args, <<"x-ha-policy">>),
+    PolicyParams = rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>),
+    case {Policy, PolicyParams} of
+        {{_Type, <<"nodes">>}, {array, Nodes}} ->
+            case [list_to_atom(binary_to_list(Node)) ||
+                     {longstr, Node} <- Nodes] of
+                [Node]         -> {Node,   undefined};
+                [First | Rest] -> {First,  Rest}
+            end;
+        {{_Type, <<"all">>}, _} ->
+            {node(), all};
+        _ ->
+            {node(), undefined}
+    end.
+
+start_queue_process(Node, Q) ->
+    {ok, Pid} = rabbit_amqqueue_sup:start_child(Node, [Q]),
     Q#amqqueue{pid = Pid}.
 
 add_default_binding(#amqqueue{name = QueueName}) ->
@@ -249,8 +268,13 @@ lookup(Name) ->
 
 with(Name, F, E) ->
     case lookup(Name) of
-        {ok, Q} -> rabbit_misc:with_exit_handler(E, fun () -> F(Q) end);
-        {error, not_found} -> E()
+        {ok, Q = #amqqueue{slave_pids = []}} ->
+            rabbit_misc:with_exit_handler(E, fun () -> F(Q) end);
+        {ok, Q} ->
+            E1 = fun () -> timer:sleep(25), with(Name, F, E) end,
+            rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end);
+        {error, not_found} ->
+            E()
     end.
 
 with(Name, F) ->
@@ -286,41 +310,58 @@ with_exclusive_access_or_die(Name, ReaderPid, F) ->
                 fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end).
 
 assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args},
-                       RequiredArgs) ->
-    rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName,
-                                        [<<"x-expires">>]).
+                        RequiredArgs) ->
+    rabbit_misc:assert_args_equivalence(
+      Args, RequiredArgs, QueueName,
+      [<<"x-expires">>, <<"x-message-ttl">>, <<"x-ha-policy">>]).
 
 check_declare_arguments(QueueName, Args) ->
-    [case Fun(rabbit_misc:table_lookup(Args, Key)) of
+    [case Fun(rabbit_misc:table_lookup(Args, Key), Args) of
          ok             -> ok;
          {error, Error} -> rabbit_misc:protocol_error(
                              precondition_failed,
                              "invalid arg '~s' for ~s: ~w",
                              [Key, rabbit_misc:rs(QueueName), Error])
      end || {Key, Fun} <-
-                [{<<"x-expires">>,     fun check_expires_argument/1},
-                 {<<"x-message-ttl">>, fun check_message_ttl_argument/1}]],
+                [{<<"x-expires">>,     fun check_integer_argument/2},
+                 {<<"x-message-ttl">>, fun check_integer_argument/2},
+                 {<<"x-ha-policy">>,   fun check_ha_policy_argument/2}]],
     ok.
 
-check_expires_argument(Val) ->
-    check_integer_argument(Val,
-                           expires_not_of_acceptable_type,
-                           expires_zero_or_less).
-
-check_message_ttl_argument(Val) ->
-    check_integer_argument(Val,
-                           ttl_not_of_acceptable_type,
-                           ttl_zero_or_less).
-
-check_integer_argument(undefined, _, _) ->
+check_integer_argument(undefined, _Args) ->
     ok;
-check_integer_argument({Type, Val}, InvalidTypeError, _) when Val > 0 ->
+check_integer_argument({Type, Val}, _Args) when Val > 0 ->
     case lists:member(Type, ?INTEGER_ARG_TYPES) of
         true  -> ok;
-        false -> {error, {InvalidTypeError, Type, Val}}
+        false -> {error, {unacceptable_type, Type}}
+    end;
+check_integer_argument({_Type, Val}, _Args) ->
+    {error, {value_zero_or_less, Val}}.
+
+check_ha_policy_argument(undefined, _Args) ->
+    ok;
+check_ha_policy_argument({longstr, <<"all">>}, _Args) ->
+    ok;
+check_ha_policy_argument({longstr, <<"nodes">>}, Args) ->
+    case rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>) of
+        undefined ->
+            {error, {require, 'x-ha-policy-params'}};
+        {array, []} ->
+            {error, {require_non_empty_list_of_nodes_for_ha}};
+        {array, Ary} ->
+            case lists:all(fun ({longstr, _Node}) -> true;
+                               (_               ) -> false
+                           end, Ary) of
+                true  -> ok;
+                false -> {error, {require_node_list_as_longstrs_for_ha, Ary}}
+            end;
+        {Type, _} ->
+            {error, {ha_nodes_policy_params_not_array_of_longstr, Type}}
     end;
-check_integer_argument({_Type, _Val}, _, ZeroOrLessError) ->
-    {error, ZeroOrLessError}.
+check_ha_policy_argument({longstr, Policy}, _Args) ->
+    {error, {invalid_ha_policy, Policy}};
+check_ha_policy_argument({Type, _}, _Args) ->
+    {error, {unacceptable_type, Type}}.
 
 list(VHostPath) ->
     mnesia:dirty_match_object(
@@ -332,10 +373,10 @@ info_keys() -> rabbit_amqqueue_process:info_keys().
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
 info(#amqqueue{ pid = QPid }) ->
-    delegate_call(QPid, info, infinity).
+    delegate_call(QPid, info).
 
 info(#amqqueue{ pid = QPid }, Items) ->
-    case delegate_call(QPid, {info, Items}, infinity) of
+    case delegate_call(QPid, {info, Items}) of
         {ok, Res}      -> Res;
         {error, Error} -> throw(Error)
     end.
@@ -345,24 +386,30 @@ info_all(VHostPath) -> map(VHostPath, fun (Q) -> info(Q) end).
 info_all(VHostPath, Items) -> map(VHostPath, fun (Q) -> info(Q, Items) end).
 
 consumers(#amqqueue{ pid = QPid }) ->
-    delegate_call(QPid, consumers, infinity).
+    delegate_call(QPid, consumers).
+
+consumer_info_keys() -> ?CONSUMER_INFO_KEYS.
 
 consumers_all(VHostPath) ->
+    ConsumerInfoKeys=consumer_info_keys(),
     lists:append(
       map(VHostPath,
-          fun (Q) -> [{Q#amqqueue.name, ChPid, ConsumerTag, AckRequired} ||
+          fun (Q) ->
+              [lists:zip(ConsumerInfoKeys,
+                         [Q#amqqueue.name, ChPid, ConsumerTag, AckRequired]) ||
                          {ChPid, ConsumerTag, AckRequired} <- consumers(Q)]
           end)).
 
-stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity).
+stat(#amqqueue{pid = QPid}) ->
+    delegate_call(QPid, stat).
 
 delete_immediately(#amqqueue{ pid = QPid }) ->
     gen_server2:cast(QPid, delete_immediately).
 
 delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) ->
-    delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity).
+    delegate_call(QPid, {delete, IfUnused, IfEmpty}).
 
-purge(#amqqueue{ pid = QPid }) -> delegate_call(QPid, purge, infinity).
+purge(#amqqueue{ pid = QPid }) -> delegate_call(QPid, purge).
 
 deliver(QPid, Delivery = #delivery{immediate = true}) ->
     gen_server2:call(QPid, {deliver_immediately, Delivery}, infinity);
@@ -374,23 +421,14 @@ deliver(QPid, Delivery) ->
     true.
 
 requeue(QPid, MsgIds, ChPid) ->
-    delegate_call(QPid, {requeue, MsgIds, ChPid}, infinity).
+    delegate_call(QPid, {requeue, MsgIds, ChPid}).
 
-ack(QPid, Txn, MsgIds, ChPid) ->
-    delegate_cast(QPid, {ack, Txn, MsgIds, ChPid}).
+ack(QPid, MsgIds, ChPid) ->
+    delegate_cast(QPid, {ack, MsgIds, ChPid}).
 
 reject(QPid, MsgIds, Requeue, ChPid) ->
     delegate_cast(QPid, {reject, MsgIds, Requeue, ChPid}).
 
-commit_all(QPids, Txn, ChPid) ->
-    safe_delegate_call_ok(
-      fun (QPid) -> gen_server2:call(QPid, {commit, Txn, ChPid}, infinity) end,
-      QPids).
-
-rollback_all(QPids, Txn, ChPid) ->
-    delegate:invoke_no_result(
-      QPids, fun (QPid) -> gen_server2:cast(QPid, {rollback, Txn, ChPid}) end).
-
 notify_down_all(QPids, ChPid) ->
     safe_delegate_call_ok(
       fun (QPid) -> gen_server2:call(QPid, {notify_down, ChPid}, infinity) end,
@@ -403,20 +441,18 @@ limit_all(QPids, ChPid, LimiterPid) ->
              end).
 
 basic_get(#amqqueue{pid = QPid}, ChPid, NoAck) ->
-    delegate_call(QPid, {basic_get, ChPid, NoAck}, infinity).
+    delegate_call(QPid, {basic_get, ChPid, NoAck}).
 
 basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, LimiterPid,
               ConsumerTag, ExclusiveConsume, OkMsg) ->
     delegate_call(QPid, {basic_consume, NoAck, ChPid,
-                         LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg},
-                  infinity).
+                         LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg}).
 
 basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) ->
-    ok = delegate_call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg},
-                       infinity).
+    ok = delegate_call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg}).
 
 notify_sent(QPid, ChPid) ->
-    delegate_cast(QPid, {notify_sent, ChPid}).
+    gen_server2:cast(QPid, {notify_sent, ChPid}).
 
 unblock(QPid, ChPid) ->
     delegate_cast(QPid, {unblock, ChPid}).
@@ -438,17 +474,12 @@ internal_delete(QueueName) ->
               case mnesia:wread({rabbit_queue, QueueName}) of
                   []  -> rabbit_misc:const({error, not_found});
                   [_] -> Deletions = internal_delete1(QueueName),
-                         fun (Tx) -> ok = rabbit_binding:process_deletions(
-                                            Deletions, Tx)
-                         end
+                         rabbit_binding:process_deletions(Deletions)
               end
       end).
 
-maybe_run_queue_via_backing_queue(QPid, Fun) ->
-    gen_server2:call(QPid, {maybe_run_queue_via_backing_queue, Fun}, infinity).
-
-maybe_run_queue_via_backing_queue_async(QPid, Fun) ->
-    gen_server2:cast(QPid, {maybe_run_queue_via_backing_queue, Fun}).
+run_backing_queue(QPid, Mod, Fun) ->
+    gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}).
 
 sync_timeout(QPid) ->
     gen_server2:cast(QPid, sync_timeout).
@@ -469,18 +500,15 @@ drop_expired(QPid) ->
     gen_server2:cast(QPid, drop_expired).
 
 on_node_down(Node) ->
-    rabbit_misc:execute_mnesia_transaction(
-      fun () -> qlc:e(qlc:q([delete_queue(QueueName) ||
-                                #amqqueue{name = QueueName, pid = Pid}
-                                    <- mnesia:table(rabbit_queue),
-                                node(Pid) == Node]))
-      end,
-      fun (Deletions, Tx) ->
-              rabbit_binding:process_deletions(
-                lists:foldl(fun rabbit_binding:combine_deletions/2,
-                            rabbit_binding:new_deletions(),
-                            Deletions),
-                Tx)
+    rabbit_misc:execute_mnesia_tx_with_tail(
+      fun () -> Dels = qlc:e(qlc:q([delete_queue(QueueName) ||
+                                       #amqqueue{name = QueueName, pid = Pid,
+                                                 slave_pids = []}
+                                           <- mnesia:table(rabbit_queue),
+                                       node(Pid) == Node])),
+                rabbit_binding:process_deletions(
+                  lists:foldl(fun rabbit_binding:combine_deletions/2,
+                              rabbit_binding:new_deletions(), Dels))
       end).
 
 delete_queue(QueueName) ->
@@ -488,11 +516,13 @@ delete_queue(QueueName) ->
     rabbit_binding:remove_transient_for_destination(QueueName).
 
 pseudo_queue(QueueName, Pid) ->
-    #amqqueue{name = QueueName,
-              durable = false,
-              auto_delete = false,
-              arguments = [],
-              pid = Pid}.
+    #amqqueue{name         = QueueName,
+              durable      = false,
+              auto_delete  = false,
+              arguments    = [],
+              pid          = Pid,
+              slave_pids   = [],
+              mirror_nodes = undefined}.
 
 safe_delegate_call_ok(F, Pids) ->
     case delegate:invoke(Pids, fun (Pid) ->
@@ -504,8 +534,8 @@ safe_delegate_call_ok(F, Pids) ->
         {_, Bad} -> {error, Bad}
     end.
 
-delegate_call(Pid, Msg, Timeout) ->
-    delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, Timeout) end).
+delegate_call(Pid, Msg) ->
+    delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, infinity) end).
 
 delegate_cast(Pid, Msg) ->
     delegate:invoke_no_result(Pid, fun (P) -> gen_server2:cast(P, Msg) end).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 2999aab2..e787fa84 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -21,7 +21,7 @@
 -behaviour(gen_server2).
 
 -define(UNSENT_MESSAGE_LIMIT,          100).
--define(SYNC_INTERVAL,                 5). %% milliseconds
+-define(SYNC_INTERVAL,                 25). %% milliseconds
 -define(RAM_DURATION_UPDATE_INTERVAL,  5000).
 
 -define(BASE_MESSAGE_PROPERTIES,
@@ -31,9 +31,11 @@
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
          handle_info/2, handle_pre_hibernate/1, prioritise_call/3,
-         prioritise_cast/2, prioritise_info/2]).
+         prioritise_cast/2, prioritise_info/2, format_message_queue/2]).
 
-% Queue's state
+-export([init_with_backing_queue_state/7]).
+
+%% Queue's state
 -record(q, {q,
             exclusive_consumer,
             has_had_consumers,
@@ -46,7 +48,7 @@
             rate_timer_ref,
             expiry_timer_ref,
             stats_timer,
-            guid_to_channel,
+            msg_id_to_channel,
             ttl,
             ttl_timer_ref
            }).
@@ -60,7 +62,6 @@
              monitor_ref,
              acktags,
              is_limit_active,
-             txn,
              unsent_message_count}).
 
 -define(STATISTICS_KEYS,
@@ -72,7 +73,8 @@
          messages,
          consumers,
          memory,
-         backing_queue_status
+         backing_queue_status,
+         slave_pids
         ]).
 
 -define(CREATION_EVENT_KEYS,
@@ -81,7 +83,8 @@
          durable,
          auto_delete,
          arguments,
-         owner_pid
+         owner_pid,
+         mirror_nodes
         ]).
 
 -define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
@@ -97,12 +100,11 @@ info_keys() -> ?INFO_KEYS.
 init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
-    {ok, BQ} = application:get_env(backing_queue_module),
 
     {ok, #q{q                   = Q#amqqueue{pid = self()},
             exclusive_consumer  = none,
             has_had_consumers   = false,
-            backing_queue       = BQ,
+            backing_queue       = backing_queue_module(Q),
             backing_queue_state = undefined,
             active_consumers    = queue:new(),
             blocked_consumers   = queue:new(),
@@ -112,17 +114,47 @@ init(Q) ->
             expiry_timer_ref    = undefined,
             ttl                 = undefined,
             stats_timer         = rabbit_event:init_stats_timer(),
-            guid_to_channel     = dict:new()}, hibernate,
+            msg_id_to_channel   = dict:new()}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-terminate(shutdown,      State = #q{backing_queue = BQ}) ->
-    terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State);
-terminate({shutdown, _}, State = #q{backing_queue = BQ}) ->
-    terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State);
-terminate(_Reason,       State = #q{backing_queue = BQ}) ->
+init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS,
+                              RateTRef, AckTags, Deliveries, MTC) ->
+    ?LOGDEBUG("Queue starting - ~p~n", [Q]),
+    case Owner of
+        none -> ok;
+        _    -> erlang:monitor(process, Owner)
+    end,
+    State = requeue_and_run(
+              AckTags,
+              process_args(
+                #q{q                   = Q,
+                   exclusive_consumer  = none,
+                   has_had_consumers   = false,
+                   backing_queue       = BQ,
+                   backing_queue_state = BQS,
+                   active_consumers    = queue:new(),
+                   blocked_consumers   = queue:new(),
+                   expires             = undefined,
+                   sync_timer_ref      = undefined,
+                   rate_timer_ref      = RateTRef,
+                   expiry_timer_ref    = undefined,
+                   ttl                 = undefined,
+                   stats_timer         = rabbit_event:init_stats_timer(),
+                   msg_id_to_channel   = MTC})),
+    lists:foldl(
+      fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end,
+      State, Deliveries).
+
+terminate(shutdown = R,      State = #q{backing_queue = BQ}) ->
+    terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State);
+terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) ->
+    terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State);
+terminate(Reason,            State = #q{backing_queue = BQ}) ->
     %% FIXME: How do we cancel active subscriptions?
     terminate_shutdown(fun (BQS) ->
-                               BQS1 = BQ:delete_and_terminate(BQS),
+                               rabbit_event:notify(
+                                 queue_deleted, [{pid, self()}]),
+                               BQS1 = BQ:delete_and_terminate(Reason, BQS),
                                %% don't care if the internal delete
                                %% doesn't return 'ok'.
                                rabbit_amqqueue:internal_delete(qname(State)),
@@ -135,8 +167,7 @@ code_change(_OldVsn, State, _Extra) ->
 %%----------------------------------------------------------------------------
 
 declare(Recover, From,
-        State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable},
-                   backing_queue = BQ, backing_queue_state = undefined,
+        State = #q{q = Q, backing_queue = BQ, backing_queue_state = undefined,
                    stats_timer = StatsTimer}) ->
     case rabbit_amqqueue:internal_declare(Q, Recover) of
         not_found -> {stop, normal, not_found, State};
@@ -147,7 +178,7 @@ declare(Recover, From,
                      ok = rabbit_memory_monitor:register(
                             self(), {rabbit_amqqueue,
                                      set_ram_duration_target, [self()]}),
-                     BQS = BQ:init(QName, IsDurable, Recover),
+                     BQS = bq_init(BQ, Q, Recover),
                      State1 = process_args(State#q{backing_queue_state = BQS}),
                      rabbit_event:notify(queue_created,
                                          infos(?CREATION_EVENT_KEYS, State1)),
@@ -157,6 +188,13 @@ declare(Recover, From,
         Q1        -> {stop, normal, {existing, Q1}, State}
     end.
 
+bq_init(BQ, Q, Recover) ->
+    Self = self(),
+    BQ:init(Q, Recover,
+            fun (Mod, Fun) ->
+                    rabbit_amqqueue:run_backing_queue(Self, Mod, Fun)
+            end).
+
 process_args(State = #q{q = #amqqueue{arguments = Arguments}}) ->
     lists:foldl(fun({Arg, Fun}, State1) ->
                         case rabbit_misc:table_lookup(Arguments, Arg) of
@@ -171,23 +209,14 @@ init_expires(Expires, State) -> ensure_expiry_timer(State#q{expires = Expires}).
 init_ttl(TTL, State) -> drop_expired_messages(State#q{ttl = TTL}).
 
 terminate_shutdown(Fun, State) ->
-    State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
+    State1 = #q{backing_queue_state = BQS} =
         stop_sync_timer(stop_rate_timer(State)),
     case BQS of
-        undefined -> State;
+        undefined -> State1;
         _         -> ok = rabbit_memory_monitor:deregister(self()),
-                     BQS1 = lists:foldl(
-                              fun (#cr{txn = none}, BQSN) ->
-                                      BQSN;
-                                  (#cr{txn = Txn}, BQSN) ->
-                                      {_AckTags, BQSN1} =
-                                          BQ:tx_rollback(Txn, BQSN),
-                                      BQSN1
-                              end, BQS, all_ch_record()),
                      [emit_consumer_deleted(Ch, CTag)
                       || {Ch, CTag, _} <- consumers(State1)],
-                     rabbit_event:notify(queue_deleted, [{pid, self()}]),
-                     State1#q{backing_queue_state = Fun(BQS1)}
+                     State1#q{backing_queue_state = Fun(BQS)}
     end.
 
 reply(Reply, NewState) ->
@@ -200,13 +229,23 @@ noreply(NewState) ->
     {NewState1, Timeout} = next_state(NewState),
     {noreply, NewState1, Timeout}.
 
-next_state(State) ->
-    State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
-        ensure_rate_timer(State),
-    State2 = ensure_stats_timer(State1),
-    case BQ:needs_idle_timeout(BQS) of
-        true  -> {ensure_sync_timer(State2), 0};
-        false -> {stop_sync_timer(State2), hibernate}
+next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+    {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+    State1 = ensure_stats_timer(
+               ensure_rate_timer(
+                 confirm_messages(MsgIds, State#q{
+                                            backing_queue_state = BQS1}))),
+    case BQ:needs_timeout(BQS1) of
+        false -> {stop_sync_timer(State1),   hibernate};
+        idle  -> {stop_sync_timer(State1),   0        };
+        timed -> {ensure_sync_timer(State1), 0        }
+    end.
+
+backing_queue_module(#amqqueue{arguments = Args}) ->
+    case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of
+        undefined -> {ok, BQM} = application:get_env(backing_queue_module),
+                     BQM;
+        _Policy   -> rabbit_mirror_queue_master
     end.
 
 ensure_sync_timer(State = #q{sync_timer_ref = undefined}) ->
@@ -278,17 +317,15 @@ lookup_ch(ChPid) ->
 ch_record(ChPid) ->
     Key = {ch, ChPid},
     case get(Key) of
-        undefined ->
-            MonitorRef = erlang:monitor(process, ChPid),
-            C = #cr{consumer_count = 0,
-                    ch_pid = ChPid,
-                    monitor_ref = MonitorRef,
-                    acktags = sets:new(),
-                    is_limit_active = false,
-                    txn = none,
-                    unsent_message_count = 0},
-            put(Key, C),
-            C;
+        undefined -> MonitorRef = erlang:monitor(process, ChPid),
+                     C = #cr{consumer_count       = 0,
+                             ch_pid               = ChPid,
+                             monitor_ref          = MonitorRef,
+                             acktags              = sets:new(),
+                             is_limit_active      = false,
+                             unsent_message_count = 0},
+                     put(Key, C),
+                     C;
         C = #cr{} -> C
     end.
 
@@ -297,13 +334,12 @@ store_ch_record(C = #cr{ch_pid = ChPid}) ->
 
 maybe_store_ch_record(C = #cr{consumer_count       = ConsumerCount,
                               acktags              = ChAckTags,
-                              txn                  = Txn,
                               unsent_message_count = UnsentMessageCount}) ->
-    case {sets:size(ChAckTags), ConsumerCount, UnsentMessageCount, Txn} of
-        {0, 0, 0, none} -> ok = erase_ch_record(C),
-                           false;
-        _               -> store_ch_record(C),
-                           true
+    case {sets:size(ChAckTags), ConsumerCount, UnsentMessageCount} of
+        {0, 0, 0} -> ok = erase_ch_record(C),
+                     false;
+        _         -> store_ch_record(C),
+                     true
     end.
 
 erase_ch_record(#cr{ch_pid      = ChPid,
@@ -314,18 +350,16 @@ erase_ch_record(#cr{ch_pid      = ChPid,
     erase({ch, ChPid}),
     ok.
 
-all_ch_record() ->
-    [C || {{ch, _}, C} <- get()].
+all_ch_record() -> [C || {{ch, _}, C} <- get()].
 
 is_ch_blocked(#cr{unsent_message_count = Count, is_limit_active = Limited}) ->
     Limited orelse Count >= ?UNSENT_MESSAGE_LIMIT.
 
 ch_record_state_transition(OldCR, NewCR) ->
-    BlockedOld = is_ch_blocked(OldCR),
-    BlockedNew = is_ch_blocked(NewCR),
-    if BlockedOld andalso not(BlockedNew) -> unblock;
-       BlockedNew andalso not(BlockedOld) -> block;
-       true                               -> ok
+    case {is_ch_blocked(OldCR), is_ch_blocked(NewCR)} of
+        {true, false} -> unblock;
+        {false, true} -> block;
+        {_, _}        -> ok
     end.
 
 deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
@@ -360,13 +394,12 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                         case ch_record_state_transition(C, NewC) of
                             ok    -> {queue:in(QEntry, ActiveConsumersTail),
                                       BlockedConsumers};
-                            block ->
-                                {ActiveConsumers1, BlockedConsumers1} =
-                                    move_consumers(ChPid,
-                                                   ActiveConsumersTail,
-                                                   BlockedConsumers),
-                                {ActiveConsumers1,
-                                 queue:in(QEntry, BlockedConsumers1)}
+                            block -> {ActiveConsumers1, BlockedConsumers1} =
+                                         move_consumers(ChPid,
+                                                        ActiveConsumersTail,
+                                                        BlockedConsumers),
+                                     {ActiveConsumers1,
+                                      queue:in(QEntry, BlockedConsumers1)}
                         end,
                     State2 = State1#q{
                                active_consumers = NewActiveConsumers,
@@ -391,56 +424,63 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
             {FunAcc, State}
     end.
 
-deliver_from_queue_pred(IsEmpty, _State) ->
-    not IsEmpty.
+deliver_from_queue_pred(IsEmpty, _State) -> not IsEmpty.
 
 deliver_from_queue_deliver(AckRequired, false, State) ->
     {{Message, IsDelivered, AckTag, Remaining}, State1} =
         fetch(AckRequired, State),
     {{Message, IsDelivered, AckTag}, 0 == Remaining, State1}.
 
-confirm_messages(Guids, State = #q{guid_to_channel = GTC}) ->
-    {CMs, GTC1} =
-        lists:foldl(
-          fun(Guid, {CMs, GTC0}) ->
-                  case dict:find(Guid, GTC0) of
-                      {ok, {ChPid, MsgSeqNo}} ->
-                          {[{ChPid, MsgSeqNo} | CMs], dict:erase(Guid, GTC0)};
-                      _ ->
-                          {CMs, GTC0}
-                  end
-          end, {[], GTC}, Guids),
-    case lists:usort(CMs) of
-        [{Ch, MsgSeqNo} | CMs1] ->
-            [rabbit_channel:confirm(ChPid, MsgSeqNos) ||
-                {ChPid, MsgSeqNos} <- group_confirms_by_channel(
-                                        CMs1, [{Ch, [MsgSeqNo]}])];
-        [] ->
-            ok
-    end,
-    State#q{guid_to_channel = GTC1}.
-
-group_confirms_by_channel([], Acc) ->
-    Acc;
-group_confirms_by_channel([{Ch, Msg1} | CMs], [{Ch, Msgs} | Acc]) ->
-    group_confirms_by_channel(CMs, [{Ch, [Msg1 | Msgs]} | Acc]);
-group_confirms_by_channel([{Ch, Msg1} | CMs], Acc) ->
-    group_confirms_by_channel(CMs, [{Ch, [Msg1]} | Acc]).
-
-record_confirm_message(#delivery{msg_seq_no = undefined}, State) ->
-    {no_confirm, State};
-record_confirm_message(#delivery{sender     = ChPid,
+confirm_messages([], State) ->
+    State;
+confirm_messages(MsgIds, State = #q{msg_id_to_channel = MTC}) ->
+    {CMs, MTC1} = lists:foldl(
+                    fun(MsgId, {CMs, MTC0}) ->
+                            case dict:find(MsgId, MTC0) of
+                                {ok, {ChPid, MsgSeqNo}} ->
+                                    {gb_trees_cons(ChPid, MsgSeqNo, CMs),
+                                     dict:erase(MsgId, MTC0)};
+                                _ ->
+                                    {CMs, MTC0}
+                            end
+                    end, {gb_trees:empty(), MTC}, MsgIds),
+    gb_trees_foreach(fun rabbit_channel:confirm/2, CMs),
+    State#q{msg_id_to_channel = MTC1}.
+
+gb_trees_foreach(_, none) ->
+    ok;
+gb_trees_foreach(Fun, {Key, Val, It}) ->
+    Fun(Key, Val),
+    gb_trees_foreach(Fun, gb_trees:next(It));
+gb_trees_foreach(Fun, Tree) ->
+    gb_trees_foreach(Fun, gb_trees:next(gb_trees:iterator(Tree))).
+
+gb_trees_cons(Key, Value, Tree) ->
+    case gb_trees:lookup(Key, Tree) of
+        {value, Values} -> gb_trees:update(Key, [Value | Values], Tree);
+        none            -> gb_trees:insert(Key, [Value], Tree)
+    end.
+
+should_confirm_message(#delivery{msg_seq_no = undefined}, _State) ->
+    never;
+should_confirm_message(#delivery{sender     = ChPid,
                                  msg_seq_no = MsgSeqNo,
                                  message    = #basic_message {
                                    is_persistent = true,
-                                   guid          = Guid}},
-                       State =
-                           #q{guid_to_channel = GTC,
-                              q               = #amqqueue{durable = true}}) ->
-    {confirm,
-     State#q{guid_to_channel = dict:store(Guid, {ChPid, MsgSeqNo}, GTC)}};
-record_confirm_message(_Delivery, State) ->
-    {no_confirm, State}.
+                                   id            = MsgId}},
+                       #q{q = #amqqueue{durable = true}}) ->
+    {eventually, ChPid, MsgSeqNo, MsgId};
+should_confirm_message(_Delivery, _State) ->
+    immediately.
+
+needs_confirming({eventually, _, _, _}) -> true;
+needs_confirming(_)                     -> false.
+
+maybe_record_confirm_message({eventually, ChPid, MsgSeqNo, MsgId},
+                             State = #q{msg_id_to_channel = MTC}) ->
+    State#q{msg_id_to_channel = dict:store(MsgId, {ChPid, MsgSeqNo}, MTC)};
+maybe_record_confirm_message(_Confirm, State) ->
+    State.
 
 run_message_queue(State) ->
     Funs = {fun deliver_from_queue_pred/2,
@@ -451,67 +491,71 @@ run_message_queue(State) ->
     {_IsEmpty1, State2} = deliver_msgs_to_consumers(Funs, IsEmpty, State1),
     State2.
 
-attempt_delivery(#delivery{txn        = none,
-                           sender     = ChPid,
-                           message    = Message,
-                           msg_seq_no = MsgSeqNo},
-                 {NeedsConfirming, State = #q{backing_queue = BQ}}) ->
-    %% must confirm immediately if it has a MsgSeqNo and not NeedsConfirming
-    case {NeedsConfirming, MsgSeqNo} of
-        {_, undefined}  -> ok;
-        {no_confirm, _} -> rabbit_channel:confirm(ChPid, [MsgSeqNo]);
-        {confirm, _}    -> ok
+attempt_delivery(Delivery = #delivery{sender     = ChPid,
+                                      message    = Message,
+                                      msg_seq_no = MsgSeqNo},
+                 State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+    Confirm = should_confirm_message(Delivery, State),
+    case Confirm of
+        immediately -> rabbit_channel:confirm(ChPid, [MsgSeqNo]);
+        _           -> ok
     end,
-    PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
-    DeliverFun =
-        fun (AckRequired, false, State1 = #q{backing_queue_state = BQS}) ->
-                %% we don't need an expiry here because messages are
-                %% not being enqueued, so we use an empty
-                %% message_properties.
-                {AckTag, BQS1} =
-                    BQ:publish_delivered(
-                      AckRequired, Message,
-                      (?BASE_MESSAGE_PROPERTIES)#message_properties{
-                        needs_confirming = (NeedsConfirming =:= confirm)},
-                      BQS),
-                {{Message, false, AckTag}, true,
-                 State1#q{backing_queue_state = BQS1}}
-        end,
-    {Delivered, State1} =
-        deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State),
-    {Delivered, NeedsConfirming, State1};
-attempt_delivery(#delivery{txn = Txn,
-                           sender  = ChPid,
-                           message = Message},
-                 {NeedsConfirming,
-                  State = #q{backing_queue = BQ,
-                            backing_queue_state = BQS}}) ->
-    store_ch_record((ch_record(ChPid))#cr{txn = Txn}),
-    {true,
-     NeedsConfirming,
-     State#q{backing_queue_state =
-                 BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, BQS)}}.
-
-deliver_or_enqueue(Delivery, State) ->
-    case attempt_delivery(Delivery, record_confirm_message(Delivery, State)) of
-        {true, _, State1} ->
-            {true, State1};
-        {false, NeedsConfirming, State1 = #q{backing_queue = BQ,
-                                             backing_queue_state = BQS}} ->
-            #delivery{message = Message} = Delivery,
-            BQS1 = BQ:publish(Message,
-                              (message_properties(State)) #message_properties{
-                                needs_confirming =
-                                    (NeedsConfirming =:= confirm)},
-                              BQS),
-            {false, ensure_ttl_timer(State1#q{backing_queue_state = BQS1})}
+    case BQ:is_duplicate(Message, BQS) of
+        {false, BQS1} ->
+            PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
+            DeliverFun =
+                fun (AckRequired, false,
+                     State1 = #q{backing_queue_state = BQS2}) ->
+                        %% we don't need an expiry here because
+                        %% messages are not being enqueued, so we use
+                        %% an empty message_properties.
+                        {AckTag, BQS3} =
+                            BQ:publish_delivered(
+                              AckRequired, Message,
+                              (?BASE_MESSAGE_PROPERTIES)#message_properties{
+                                needs_confirming = needs_confirming(Confirm)},
+                              ChPid, BQS2),
+                        {{Message, false, AckTag}, true,
+                         State1#q{backing_queue_state = BQS3}}
+                end,
+            {Delivered, State2} =
+                deliver_msgs_to_consumers({ PredFun, DeliverFun }, false,
+                                          State#q{backing_queue_state = BQS1}),
+            {Delivered, Confirm, State2};
+        {Duplicate, BQS1} ->
+            %% if the message has previously been seen by the BQ then
+            %% it must have been seen under the same circumstances as
+            %% now: i.e. if it is now a deliver_immediately then it
+            %% must have been before.
+            Delivered = case Duplicate of
+                            published -> true;
+                            discarded -> false
+                        end,
+            {Delivered, Confirm, State#q{backing_queue_state = BQS1}}
+    end.
+
+deliver_or_enqueue(Delivery = #delivery{message = Message,
+                                        sender  = ChPid}, State) ->
+    {Delivered, Confirm, State1} = attempt_delivery(Delivery, State),
+    State2 = #q{backing_queue = BQ, backing_queue_state = BQS} =
+        maybe_record_confirm_message(Confirm, State1),
+    case Delivered of
+        true  -> State2;
+        false -> BQS1 =
+                     BQ:publish(Message,
+                                (message_properties(State)) #message_properties{
+                                  needs_confirming = needs_confirming(Confirm)},
+                                ChPid, BQS),
+                 ensure_ttl_timer(State2#q{backing_queue_state = BQS1})
     end.
 
 requeue_and_run(AckTags, State = #q{backing_queue = BQ, ttl=TTL}) ->
-    maybe_run_queue_via_backing_queue(
-      fun (BQS) ->
-              {[], BQ:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS)}
-      end, State).
+    run_backing_queue(
+      BQ, fun (M, BQS) ->
+                  {_MsgIds, BQS1} =
+                      M:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS),
+                  BQS1
+          end, State).
 
 fetch(AckRequired, State = #q{backing_queue_state = BQS,
                               backing_queue       = BQ}) ->
@@ -567,7 +611,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
     case lookup_ch(DownPid) of
         not_found ->
             {ok, State};
-        C = #cr{ch_pid = ChPid, txn = Txn, acktags = ChAckTags} ->
+        C = #cr{ch_pid = ChPid, acktags = ChAckTags} ->
             ok = erase_ch_record(C),
             State1 = State#q{
                        exclusive_consumer = case Holder of
@@ -580,13 +624,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                                              ChPid, State#q.blocked_consumers)},
             case should_auto_delete(State1) of
                 true  -> {stop, State1};
-                false -> State2 = case Txn of
-                                      none -> State1;
-                                      _    -> rollback_transaction(Txn, C,
-                                                                   State1)
-                                  end,
-                         {ok, requeue_and_run(sets:to_list(ChAckTags),
-                                              ensure_expiry_timer(State2))}
+                false -> {ok, requeue_and_run(sets:to_list(ChAckTags),
+                                              ensure_expiry_timer(State1))}
             end
     end.
 
@@ -613,37 +652,22 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
-backing_queue_idle_timeout(State = #q{backing_queue = BQ}) ->
-    maybe_run_queue_via_backing_queue(
-      fun (BQS) -> {[], BQ:idle_timeout(BQS)} end, State).
-
-maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) ->
-    {Guids, BQS1} = Fun(BQS),
-    run_message_queue(
-      confirm_messages(Guids, State#q{backing_queue_state = BQS1})).
-
-commit_transaction(Txn, From, C = #cr{acktags = ChAckTags},
-                   State = #q{backing_queue       = BQ,
-                              backing_queue_state = BQS,
-                              ttl                 = TTL}) ->
-    {AckTags, BQS1} = BQ:tx_commit(
-                        Txn, fun () -> gen_server2:reply(From, ok) end,
-                        reset_msg_expiry_fun(TTL), BQS),
-    ChAckTags1 = subtract_acks(ChAckTags, AckTags),
-    maybe_store_ch_record(C#cr{acktags = ChAckTags1, txn = none}),
-    State#q{backing_queue_state = BQS1}.
-
-rollback_transaction(Txn, C, State = #q{backing_queue = BQ,
-                                        backing_queue_state = BQS}) ->
-    {_AckTags, BQS1} = BQ:tx_rollback(Txn, BQS),
-    %% Iff we removed acktags from the channel record on ack+txn then
-    %% we would add them back in here.
-    maybe_store_ch_record(C#cr{txn = none}),
-    State#q{backing_queue_state = BQS1}.
+backing_queue_timeout(State = #q{backing_queue = BQ}) ->
+    run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State).
+
+run_backing_queue(Mod, Fun, State = #q{backing_queue = BQ,
+                                       backing_queue_state = BQS}) ->
+    run_message_queue(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}).
 
 subtract_acks(A, B) when is_list(B) ->
     lists:foldl(fun sets:del_element/2, A, B).
 
+discard_delivery(#delivery{sender = ChPid,
+                           message = Message},
+                 State = #q{backing_queue = BQ,
+                            backing_queue_state = BQS}) ->
+    State#q{backing_queue_state = BQ:discard(Message, ChPid, BQS)}.
+
 reset_msg_expiry_fun(TTL) ->
     fun(MsgProps) ->
             MsgProps#message_properties{expiry = calculate_msg_expiry(TTL)}
@@ -653,17 +677,16 @@ message_properties(#q{ttl=TTL}) ->
     #message_properties{expiry = calculate_msg_expiry(TTL)}.
 
 calculate_msg_expiry(undefined) -> undefined;
-calculate_msg_expiry(TTL)       -> now_millis() + (TTL * 1000).
+calculate_msg_expiry(TTL)       -> now_micros() + (TTL * 1000).
 
 drop_expired_messages(State = #q{ttl = undefined}) ->
     State;
 drop_expired_messages(State = #q{backing_queue_state = BQS,
                                  backing_queue = BQ}) ->
-    Now = now_millis(),
+    Now = now_micros(),
     BQS1 = BQ:dropwhile(
-             fun (#message_properties{expiry = Expiry}) ->
-                     Now > Expiry
-             end, BQS),
+             fun (#message_properties{expiry = Expiry}) -> Now > Expiry end,
+             BQS),
     ensure_ttl_timer(State#q{backing_queue_state = BQS1}).
 
 ensure_ttl_timer(State = #q{backing_queue       = BQ,
@@ -679,7 +702,7 @@ ensure_ttl_timer(State = #q{backing_queue       = BQ,
 ensure_ttl_timer(State) ->
     State.
 
-now_millis() -> timer:now_diff(now(), {0,0,0}).
+now_micros() -> timer:now_diff(now(), {0,0,0}).
 
 infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
@@ -715,16 +738,22 @@ i(memory, _) ->
     M;
 i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
     BQ:status(BQS);
+i(slave_pids, #q{q = #amqqueue{name = Name}}) ->
+    {ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(Name),
+    SPids;
+i(mirror_nodes, #q{q = #amqqueue{name = Name}}) ->
+    {ok, #amqqueue{mirror_nodes = MNodes}} = rabbit_amqqueue:lookup(Name),
+    MNodes;
 i(Item, _) ->
     throw({bad_argument, Item}).
 
 consumers(#q{active_consumers = ActiveConsumers,
              blocked_consumers = BlockedConsumers}) ->
     rabbit_misc:queue_fold(
-            fun ({ChPid, #consumer{tag = ConsumerTag,
-                                   ack_required = AckRequired}}, Acc) ->
-                    [{ChPid, ConsumerTag, AckRequired} | Acc]
-            end, [], queue:join(ActiveConsumers, BlockedConsumers)).
+      fun ({ChPid, #consumer{tag = ConsumerTag,
+                             ack_required = AckRequired}}, Acc) ->
+              [{ChPid, ConsumerTag, AckRequired} | Acc]
+      end, [], queue:join(ActiveConsumers, BlockedConsumers)).
 
 emit_stats(State) ->
     emit_stats(State, []).
@@ -746,29 +775,32 @@ emit_consumer_deleted(ChPid, ConsumerTag) ->
                          {channel,      ChPid},
                          {queue,        self()}]).
 
-%---------------------------------------------------------------------------
+%%----------------------------------------------------------------------------
 
 prioritise_call(Msg, _From, _State) ->
     case Msg of
-        info                                      -> 9;
-        {info, _Items}                            -> 9;
-        consumers                                 -> 9;
-        {maybe_run_queue_via_backing_queue, _Fun} -> 6;
-        _                                         -> 0
+        info                            -> 9;
+        {info, _Items}                  -> 9;
+        consumers                       -> 9;
+        _                               -> 0
     end.
 
 prioritise_cast(Msg, _State) ->
     case Msg of
-        delete_immediately                        -> 8;
-        {set_ram_duration_target, _Duration}      -> 8;
-        {set_maximum_since_use, _Age}             -> 8;
-        {ack, _Txn, _MsgIds, _ChPid}              -> 7;
-        {reject, _MsgIds, _Requeue, _ChPid}       -> 7;
-        {notify_sent, _ChPid}                     -> 7;
-        {unblock, _ChPid}                         -> 7;
-        {maybe_run_queue_via_backing_queue, _Fun} -> 6;
-        sync_timeout                              -> 6;
-        _                                         -> 0
+        update_ram_duration                  -> 8;
+        delete_immediately                   -> 8;
+        {set_ram_duration_target, _Duration} -> 8;
+        {set_maximum_since_use, _Age}        -> 8;
+        maybe_expire                         -> 8;
+        drop_expired                         -> 8;
+        emit_stats                           -> 7;
+        {ack, _AckTags, _ChPid}              -> 7;
+        {reject, _AckTags, _Requeue, _ChPid} -> 7;
+        {notify_sent, _ChPid}                -> 7;
+        {unblock, _ChPid}                    -> 7;
+        {run_backing_queue, _Mod, _Fun}      -> 6;
+        sync_timeout                         -> 6;
+        _                                    -> 0
     end.
 
 prioritise_info({'DOWN', _MonitorRef, process, DownPid, _Reason},
@@ -788,20 +820,20 @@ handle_call({init, Recover}, From,
 
 handle_call({init, Recover}, From,
             State = #q{q = #amqqueue{exclusive_owner = Owner}}) ->
-    case rpc:call(node(Owner), erlang, is_process_alive, [Owner]) of
-        true -> erlang:monitor(process, Owner),
-                declare(Recover, From, State);
-        _    -> #q{q = #amqqueue{name = QName, durable = IsDurable},
-                   backing_queue = BQ, backing_queue_state = undefined} = State,
-                gen_server2:reply(From, not_found),
-                case Recover of
-                    true -> ok;
-                    _    -> rabbit_log:warning(
-                              "Queue ~p exclusive owner went away~n", [QName])
-                end,
-                BQS = BQ:init(QName, IsDurable, Recover),
-                %% Rely on terminate to delete the queue.
-                {stop, normal, State#q{backing_queue_state = BQS}}
+    case rabbit_misc:is_process_alive(Owner) of
+        true  -> erlang:monitor(process, Owner),
+                 declare(Recover, From, State);
+        false -> #q{backing_queue = BQ, backing_queue_state = undefined,
+                    q = #amqqueue{name = QName} = Q} = State,
+                 gen_server2:reply(From, not_found),
+                 case Recover of
+                     true -> ok;
+                     _    -> rabbit_log:warning(
+                               "Queue ~p exclusive owner went away~n", [QName])
+                 end,
+                 BQS = bq_init(BQ, Q, Recover),
+                 %% Rely on terminate to delete the queue.
+                 {stop, normal, State#q{backing_queue_state = BQS}}
     end;
 
 handle_call(info, _From, State) ->
@@ -816,8 +848,7 @@ handle_call({info, Items}, _From, State) ->
 handle_call(consumers, _From, State) ->
     reply(consumers(State), State);
 
-handle_call({deliver_immediately, Delivery},
-            _From, State) ->
+handle_call({deliver_immediately, Delivery}, _From, State) ->
     %% Synchronous, "immediate" delivery mode
     %%
     %% FIXME: Is this correct semantics?
@@ -831,22 +862,16 @@ handle_call({deliver_immediately, Delivery},
     %% just all ready-to-consume queues get the message, with unready
     %% queues discarding the message?
     %%
-    {Delivered, _NeedsConfirming, State1} =
-        attempt_delivery(Delivery, record_confirm_message(Delivery, State)),
-    reply(Delivered, State1);
+    {Delivered, Confirm, State1} = attempt_delivery(Delivery, State),
+    reply(Delivered, case Delivered of
+                         true  -> maybe_record_confirm_message(Confirm, State1);
+                         false -> discard_delivery(Delivery, State1)
+                     end);
 
 handle_call({deliver, Delivery}, From, State) ->
     %% Synchronous, "mandatory" delivery mode. Reply asap.
     gen_server2:reply(From, true),
-    {_Delivered, NewState} = deliver_or_enqueue(Delivery, State),
-    noreply(NewState);
-
-handle_call({commit, Txn, ChPid}, From, State) ->
-    case lookup_ch(ChPid) of
-        not_found -> reply(ok, State);
-        C         -> noreply(run_message_queue(
-                               commit_transaction(Txn, From, C, State)))
-    end;
+    noreply(deliver_or_enqueue(Delivery, State));
 
 handle_call({notify_down, ChPid}, _From, State) ->
     %% we want to do this synchronously, so that auto_deleted queues
@@ -908,15 +933,13 @@ handle_call({basic_consume, NoAck, ChPid, LimiterPid,
                 case is_ch_blocked(C) of
                     true  -> State1#q{
                                blocked_consumers =
-                               add_consumer(
-                                 ChPid, Consumer,
-                                 State1#q.blocked_consumers)};
+                                   add_consumer(ChPid, Consumer,
+                                                State1#q.blocked_consumers)};
                     false -> run_message_queue(
                                State1#q{
                                  active_consumers =
-                                 add_consumer(
-                                   ChPid, Consumer,
-                                   State1#q.active_consumers)})
+                                     add_consumer(ChPid, Consumer,
+                                                  State1#q.active_consumers)})
                 end,
             emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume,
                                   not NoAck),
@@ -989,41 +1012,28 @@ handle_call({requeue, AckTags, ChPid}, From, State) ->
             ChAckTags1 = subtract_acks(ChAckTags, AckTags),
             maybe_store_ch_record(C#cr{acktags = ChAckTags1}),
             noreply(requeue_and_run(AckTags, State))
-    end;
-
-handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
-    reply(ok, maybe_run_queue_via_backing_queue(Fun, State)).
-
+    end.
 
-handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) ->
-    noreply(maybe_run_queue_via_backing_queue(Fun, State));
+handle_cast({run_backing_queue, Mod, Fun}, State) ->
+    noreply(run_backing_queue(Mod, Fun, State));
 
 handle_cast(sync_timeout, State) ->
-    noreply(backing_queue_idle_timeout(State#q{sync_timer_ref = undefined}));
+    noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined}));
 
 handle_cast({deliver, Delivery}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
-    {_Delivered, NewState} = deliver_or_enqueue(Delivery, State),
-    noreply(NewState);
+    noreply(deliver_or_enqueue(Delivery, State));
 
-handle_cast({ack, Txn, AckTags, ChPid},
+handle_cast({ack, AckTags, ChPid},
             State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     case lookup_ch(ChPid) of
         not_found ->
             noreply(State);
         C = #cr{acktags = ChAckTags} ->
-            {C1, State1} =
-                case Txn of
-                    none -> ChAckTags1 = subtract_acks(ChAckTags, AckTags),
-                            NewC = C#cr{acktags = ChAckTags1},
-                            BQS1 = BQ:ack(AckTags, BQS),
-                            {NewC, State#q{backing_queue_state = BQS1}};
-                    _    -> BQS1 = BQ:tx_ack(Txn, AckTags, BQS),
-                            {C#cr{txn = Txn},
-                             State#q{backing_queue_state = BQS1}}
-                end,
-            maybe_store_ch_record(C1),
-            noreply(State1)
+            maybe_store_ch_record(C#cr{acktags = subtract_acks(
+                                                   ChAckTags, AckTags)}),
+            {_Guids, BQS1} = BQ:ack(AckTags, BQS),
+            noreply(State#q{backing_queue_state = BQS1})
     end;
 
 handle_cast({reject, AckTags, Requeue, ChPid},
@@ -1037,17 +1047,11 @@ handle_cast({reject, AckTags, Requeue, ChPid},
             maybe_store_ch_record(C#cr{acktags = ChAckTags1}),
             noreply(case Requeue of
                         true  -> requeue_and_run(AckTags, State);
-                        false -> BQS1 = BQ:ack(AckTags, BQS),
+                        false -> {_Guids, BQS1} = BQ:ack(AckTags, BQS),
                                  State#q{backing_queue_state = BQS1}
                     end)
     end;
 
-handle_cast({rollback, Txn, ChPid}, State) ->
-    noreply(case lookup_ch(ChPid) of
-                not_found -> State;
-                C         -> rollback_transaction(Txn, C, State)
-            end);
-
 handle_cast(delete_immediately, State) ->
     {stop, normal, State};
 
@@ -1134,7 +1138,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     end;
 
 handle_info(timeout, State) ->
-    noreply(backing_queue_idle_timeout(State));
+    noreply(backing_queue_timeout(State));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -1160,3 +1164,5 @@ handle_pre_hibernate(State = #q{backing_queue = BQ,
     State1 = State#q{stats_timer = rabbit_event:stop_stats_timer(StatsTimer),
                      backing_queue_state = BQS3},
     {hibernate, stop_rate_timer(State1)}.
+
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl
index 1344956e..2c28adce 100644
--- a/src/rabbit_amqqueue_sup.erl
+++ b/src/rabbit_amqqueue_sup.erl
@@ -18,7 +18,7 @@
 
 -behaviour(supervisor2).
 
--export([start_link/0, start_child/1]).
+-export([start_link/0, start_child/2]).
 
 -export([init/1]).
 
@@ -29,8 +29,8 @@
 start_link() ->
     supervisor2:start_link({local, ?SERVER}, ?MODULE, []).
 
-start_child(Args) ->
-    supervisor2:start_child(?SERVER, Args).
+start_child(Node, Args) ->
+    supervisor2:start_child({?SERVER, Node}, Args).
 
 init([]) ->
     {ok, {{simple_one_for_one_terminate, 10, 10},
diff --git a/src/rabbit_auth_backend.erl b/src/rabbit_auth_backend.erl
index 09820c5b..ade158bb 100644
--- a/src/rabbit_auth_backend.erl
+++ b/src/rabbit_auth_backend.erl
@@ -36,17 +36,13 @@ behaviour_info(callbacks) ->
      %%     Client failed authentication. Log and die.
      {check_user_login, 2},
 
-     %% Given #user, vhost path and permission, can a user access a vhost?
-     %% Permission is read  - learn of the existence of (only relevant for
-     %%                       management plugin)
-     %%            or write - log in
-     %%
+     %% Given #user and vhost, can a user log in to a vhost?
      %% Possible responses:
      %% true
      %% false
      %% {error, Error}
      %%     Something went wrong. Log and die.
-     {check_vhost_access, 3},
+     {check_vhost_access, 2},
 
      %% Given #user, resource and permission, can a user access a resource?
      %%
diff --git a/src/rabbit_auth_backend_internal.erl b/src/rabbit_auth_backend_internal.erl
index a564480b..6a018bd1 100644
--- a/src/rabbit_auth_backend_internal.erl
+++ b/src/rabbit_auth_backend_internal.erl
@@ -20,15 +20,17 @@
 -behaviour(rabbit_auth_backend).
 
 -export([description/0]).
--export([check_user_login/2, check_vhost_access/3, check_resource_access/3]).
+-export([check_user_login/2, check_vhost_access/2, check_resource_access/3]).
 
--export([add_user/2, delete_user/1, change_password/2, set_admin/1,
-         clear_admin/1, list_users/0, lookup_user/1, clear_password/1]).
+-export([add_user/2, delete_user/1, change_password/2, set_tags/2,
+         list_users/0, user_info_keys/0, lookup_user/1, clear_password/1]).
 -export([make_salt/0, check_password/2, change_password_hash/2,
          hash_password/1]).
 -export([set_permissions/5, clear_permissions/2,
          list_permissions/0, list_vhost_permissions/1, list_user_permissions/1,
-         list_user_vhost_permissions/2]).
+         list_user_vhost_permissions/2, perms_info_keys/0,
+         vhost_perms_info_keys/0, user_perms_info_keys/0,
+         user_vhost_perms_info_keys/0]).
 
 -include("rabbit_auth_backend_spec.hrl").
 
@@ -48,33 +50,35 @@
                                  rabbit_types:password_hash()) -> 'ok').
 -spec(hash_password/1 :: (rabbit_types:password())
                          -> rabbit_types:password_hash()).
--spec(set_admin/1 :: (rabbit_types:username()) -> 'ok').
--spec(clear_admin/1 :: (rabbit_types:username()) -> 'ok').
--spec(list_users/0 :: () -> [{rabbit_types:username(), boolean()}]).
+-spec(set_tags/2 :: (rabbit_types:username(), [atom()]) -> 'ok').
+-spec(list_users/0 :: () -> rabbit_types:infos()).
+-spec(user_info_keys/0 :: () -> rabbit_types:info_keys()).
 -spec(lookup_user/1 :: (rabbit_types:username())
-        -> rabbit_types:ok(rabbit_types:internal_user())
-               | rabbit_types:error('not_found')).
+                       -> rabbit_types:ok(rabbit_types:internal_user())
+                              | rabbit_types:error('not_found')).
 -spec(set_permissions/5 ::(rabbit_types:username(), rabbit_types:vhost(),
                            regexp(), regexp(), regexp()) -> 'ok').
 -spec(clear_permissions/2 :: (rabbit_types:username(), rabbit_types:vhost())
                              -> 'ok').
--spec(list_permissions/0 ::
-        () -> [{rabbit_types:username(), rabbit_types:vhost(),
-                regexp(), regexp(), regexp()}]).
+-spec(list_permissions/0 :: () -> rabbit_types:infos()).
 -spec(list_vhost_permissions/1 ::
-        (rabbit_types:vhost()) -> [{rabbit_types:username(),
-                                    regexp(), regexp(), regexp()}]).
+        (rabbit_types:vhost()) -> rabbit_types:infos()).
 -spec(list_user_permissions/1 ::
-        (rabbit_types:username()) -> [{rabbit_types:vhost(),
-                                       regexp(), regexp(), regexp()}]).
+        (rabbit_types:username()) -> rabbit_types:infos()).
 -spec(list_user_vhost_permissions/2 ::
         (rabbit_types:username(), rabbit_types:vhost())
-        -> [{regexp(), regexp(), regexp()}]).
-
+        -> rabbit_types:infos()).
+-spec(perms_info_keys/0 :: () -> rabbit_types:info_keys()).
+-spec(vhost_perms_info_keys/0 :: () -> rabbit_types:info_keys()).
+-spec(user_perms_info_keys/0 :: () -> rabbit_types:info_keys()).
+-spec(user_vhost_perms_info_keys/0 :: () -> rabbit_types:info_keys()).
 -endif.
 
 %%----------------------------------------------------------------------------
 
+-define(PERMS_INFO_KEYS, [configure, write, read]).
+-define(USER_INFO_KEYS, [user, tags]).
+
 %% Implementation of rabbit_auth_backend
 
 description() ->
@@ -85,20 +89,19 @@ check_user_login(Username, []) ->
     internal_check_user_login(Username, fun(_) -> true end);
 check_user_login(Username, [{password, Password}]) ->
     internal_check_user_login(
-      Username,
-      fun(#internal_user{password_hash = Hash}) ->
-              check_password(Password, Hash)
-      end);
+      Username, fun(#internal_user{password_hash = Hash}) ->
+                        check_password(Password, Hash)
+                end);
 check_user_login(Username, AuthProps) ->
     exit({unknown_auth_props, Username, AuthProps}).
 
 internal_check_user_login(Username, Fun) ->
     Refused = {refused, "user '~s' - invalid credentials", [Username]},
     case lookup_user(Username) of
-        {ok, User = #internal_user{is_admin = IsAdmin}} ->
+        {ok, User = #internal_user{tags = Tags}} ->
             case Fun(User) of
                 true -> {ok, #user{username     = Username,
-                                   is_admin     = IsAdmin,
+                                   tags         = Tags,
                                    auth_backend = ?MODULE,
                                    impl         = User}};
                 _    -> Refused
@@ -107,16 +110,13 @@ internal_check_user_login(Username, Fun) ->
             Refused
     end.
 
-check_vhost_access(#user{is_admin = true},    _VHostPath, read) ->
-    true;
-
-check_vhost_access(#user{username = Username}, VHostPath, _) ->
+check_vhost_access(#user{username = Username}, VHost) ->
     %% TODO: use dirty ops instead
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
               case mnesia:read({rabbit_user_permission,
                                 #user_vhost{username     = Username,
-                                            virtual_host = VHostPath}}) of
+                                            virtual_host = VHost}}) of
                   []   -> false;
                   [_R] -> true
               end
@@ -131,12 +131,11 @@ check_resource_access(#user{username = Username},
         [] ->
             false;
         [#user_permission{permission = P}] ->
-            PermRegexp =
-                case element(permission_index(Permission), P) of
-                    %% <<"^$">> breaks Emacs' erlang mode
-                    <<"">> -> <<$^, $$>>;
-                    RE     -> RE
-                end,
+            PermRegexp = case element(permission_index(Permission), P) of
+                             %% <<"^$">> breaks Emacs' erlang mode
+                             <<"">> -> <<$^, $$>>;
+                             RE     -> RE
+                         end,
             case re:run(Name, PermRegexp, [{capture, none}]) of
                 match    -> true;
                 nomatch  -> false
@@ -160,7 +159,7 @@ add_user(Username, Password) ->
                                  #internal_user{username = Username,
                                                 password_hash =
                                                     hash_password(Password),
-                                                is_admin = false},
+                                                tags = []},
                                  write);
                       _ ->
                           mnesia:abort({user_already_exists, Username})
@@ -221,18 +220,12 @@ salted_md5(Salt, Cleartext) ->
     Salted = <<Salt/binary, Cleartext/binary>>,
     erlang:md5(Salted).
 
-set_admin(Username) ->
-    set_admin(Username, true).
-
-clear_admin(Username) ->
-    set_admin(Username, false).
-
-set_admin(Username, IsAdmin) ->
+set_tags(Username, Tags) ->
     R = update_user(Username, fun(User) ->
-                                      User#internal_user{is_admin = IsAdmin}
+                                      User#internal_user{tags = Tags}
                               end),
-    rabbit_log:info("Set user admin flag for user ~p to ~p~n",
-                    [Username, IsAdmin]),
+    rabbit_log:info("Set user tags for user ~p to ~p~n",
+                    [Username, Tags]),
     R.
 
 update_user(Username, Fun) ->
@@ -245,10 +238,12 @@ update_user(Username, Fun) ->
         end)).
 
 list_users() ->
-    [{Username, IsAdmin} ||
-        #internal_user{username = Username, is_admin = IsAdmin} <-
+    [[{user, Username}, {tags, Tags}] ||
+        #internal_user{username = Username, tags = Tags} <-
             mnesia:dirty_match_object(rabbit_user, #internal_user{_ = '_'})].
 
+user_info_keys() -> ?USER_INFO_KEYS.
+
 lookup_user(Username) ->
     rabbit_misc:dirty_read({rabbit_user, Username}).
 
@@ -287,32 +282,38 @@ clear_permissions(Username, VHostPath) ->
                                                 virtual_host = VHostPath}})
         end)).
 
+perms_info_keys()            -> [user, vhost | ?PERMS_INFO_KEYS].
+vhost_perms_info_keys()      -> [user | ?PERMS_INFO_KEYS].
+user_perms_info_keys()       -> [vhost | ?PERMS_INFO_KEYS].
+user_vhost_perms_info_keys() -> ?PERMS_INFO_KEYS.
+
 list_permissions() ->
-    [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} ||
-        {Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} <-
-            list_permissions(match_user_vhost('_', '_'))].
+    list_permissions(perms_info_keys(), match_user_vhost('_', '_')).
 
 list_vhost_permissions(VHostPath) ->
-    [{Username, ConfigurePerm, WritePerm, ReadPerm} ||
-        {Username, _, ConfigurePerm, WritePerm, ReadPerm} <-
-            list_permissions(rabbit_vhost:with(
-                               VHostPath, match_user_vhost('_', VHostPath)))].
+    list_permissions(
+      vhost_perms_info_keys(),
+      rabbit_vhost:with(VHostPath, match_user_vhost('_', VHostPath))).
 
 list_user_permissions(Username) ->
-    [{VHostPath, ConfigurePerm, WritePerm, ReadPerm} ||
-        {_, VHostPath, ConfigurePerm, WritePerm, ReadPerm} <-
-            list_permissions(rabbit_misc:with_user(
-                               Username, match_user_vhost(Username, '_')))].
+    list_permissions(
+      user_perms_info_keys(),
+      rabbit_misc:with_user(Username, match_user_vhost(Username, '_'))).
 
 list_user_vhost_permissions(Username, VHostPath) ->
-    [{ConfigurePerm, WritePerm, ReadPerm} ||
-        {_, _, ConfigurePerm, WritePerm, ReadPerm} <-
-            list_permissions(rabbit_misc:with_user_and_vhost(
-                               Username, VHostPath,
-                               match_user_vhost(Username, VHostPath)))].
-
-list_permissions(QueryThunk) ->
-    [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} ||
+    list_permissions(
+      user_vhost_perms_info_keys(),
+      rabbit_misc:with_user_and_vhost(
+        Username, VHostPath, match_user_vhost(Username, VHostPath))).
+
+filter_props(Keys, Props) -> [T || T = {K, _} <- Props, lists:member(K, Keys)].
+
+list_permissions(Keys, QueryThunk) ->
+    [filter_props(Keys, [{user,      Username},
+                         {vhost,     VHostPath},
+                         {configure, ConfigurePerm},
+                         {write,     WritePerm},
+                         {read,      ReadPerm}]) ||
         #user_permission{user_vhost = #user_vhost{username     = Username,
                                                   virtual_host = VHostPath},
                          permission = #permission{ configure = ConfigurePerm,
diff --git a/src/rabbit_auth_mechanism.erl b/src/rabbit_auth_mechanism.erl
index 1d14f9f0..897199ee 100644
--- a/src/rabbit_auth_mechanism.erl
+++ b/src/rabbit_auth_mechanism.erl
@@ -23,6 +23,10 @@ behaviour_info(callbacks) ->
      %% A description.
      {description, 0},
 
+     %% If this mechanism is enabled, should it be offered for a given socket?
+     %% (primarily so EXTERNAL can be SSL-only)
+     {should_offer, 1},
+
      %% Called before authentication starts. Should create a state
      %% object to be passed through all the stages of authentication.
      {init, 1},
diff --git a/src/rabbit_auth_mechanism_amqplain.erl b/src/rabbit_auth_mechanism_amqplain.erl
index 5e422eee..b8682a46 100644
--- a/src/rabbit_auth_mechanism_amqplain.erl
+++ b/src/rabbit_auth_mechanism_amqplain.erl
@@ -19,7 +19,7 @@
 
 -behaviour(rabbit_auth_mechanism).
 
--export([description/0, init/1, handle_response/2]).
+-export([description/0, should_offer/1, init/1, handle_response/2]).
 
 -include("rabbit_auth_mechanism_spec.hrl").
 
@@ -38,6 +38,9 @@ description() ->
     [{name, <<"AMQPLAIN">>},
      {description, <<"QPid AMQPLAIN mechanism">>}].
 
+should_offer(_Sock) ->
+    true.
+
 init(_Sock) ->
     [].
 
@@ -51,5 +54,5 @@ handle_response(Response, _State) ->
         _ ->
             {protocol_error,
              "AMQPLAIN auth info ~w is missing LOGIN or PASSWORD field",
-              [LoginTable]}
+             [LoginTable]}
     end.
diff --git a/src/rabbit_auth_mechanism_cr_demo.erl b/src/rabbit_auth_mechanism_cr_demo.erl
index 7fd20f8b..acbb6e48 100644
--- a/src/rabbit_auth_mechanism_cr_demo.erl
+++ b/src/rabbit_auth_mechanism_cr_demo.erl
@@ -19,7 +19,7 @@
 
 -behaviour(rabbit_auth_mechanism).
 
--export([description/0, init/1, handle_response/2]).
+-export([description/0, should_offer/1, init/1, handle_response/2]).
 
 -include("rabbit_auth_mechanism_spec.hrl").
 
@@ -43,6 +43,9 @@ description() ->
      {description, <<"RabbitMQ Demo challenge-response authentication "
                      "mechanism">>}].
 
+should_offer(_Sock) ->
+    true.
+
 init(_Sock) ->
     #state{}.
 
@@ -50,10 +53,8 @@ handle_response(Response, State = #state{username = undefined}) ->
     {challenge, <<"Please tell me your password">>,
      State#state{username = Response}};
 
-handle_response(Response, #state{username = Username}) ->
-    case Response of
-        <<"My password is ", Password/binary>> ->
-            rabbit_access_control:check_user_pass_login(Username, Password);
-        _ ->
-            {protocol_error, "Invalid response '~s'", [Response]}
-    end.
+handle_response(<<"My password is ", Password/binary>>,
+                #state{username = Username}) ->
+    rabbit_access_control:check_user_pass_login(Username, Password);
+handle_response(Response, _State) ->
+    {protocol_error, "Invalid response '~s'", [Response]}.
diff --git a/src/rabbit_auth_mechanism_plain.erl b/src/rabbit_auth_mechanism_plain.erl
index 1ca07018..2448acb6 100644
--- a/src/rabbit_auth_mechanism_plain.erl
+++ b/src/rabbit_auth_mechanism_plain.erl
@@ -19,7 +19,7 @@
 
 -behaviour(rabbit_auth_mechanism).
 
--export([description/0, init/1, handle_response/2]).
+-export([description/0, should_offer/1, init/1, handle_response/2]).
 
 -include("rabbit_auth_mechanism_spec.hrl").
 
@@ -41,6 +41,9 @@ description() ->
     [{name, <<"PLAIN">>},
      {description, <<"SASL PLAIN authentication mechanism">>}].
 
+should_offer(_Sock) ->
+    true.
+
 init(_Sock) ->
     [].
 
@@ -62,15 +65,12 @@ extract_user_pass(Response) ->
     end.
 
 extract_elem(<<0:8, Rest/binary>>) ->
-    Count = next_null_pos(Rest),
+    Count = next_null_pos(Rest, 0),
     <<Elem:Count/binary, Rest1/binary>> = Rest,
     {ok, Elem, Rest1};
 extract_elem(_) ->
     error.
 
-next_null_pos(Bin) ->
-    next_null_pos(Bin, 0).
-
 next_null_pos(<<>>, Count)                  -> Count;
 next_null_pos(<<0:8, _Rest/binary>>, Count) -> Count;
 next_null_pos(<<_:8, Rest/binary>>,  Count) -> next_null_pos(Rest, Count + 1).
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 6a21e10f..77278416 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -33,26 +33,66 @@ behaviour_info(callbacks) ->
      {stop, 0},
 
      %% Initialise the backing queue and its state.
+     %%
+     %% Takes
+     %% 1. the amqqueue record
+     %% 2. a boolean indicating whether the queue is an existing queue
+     %%    that should be recovered
+     %% 3. an asynchronous callback which accepts a function of type
+     %%    backing-queue-state to backing-queue-state. This callback
+     %%    function can be safely invoked from any process, which
+     %%    makes it useful for passing messages back into the backing
+     %%    queue, especially as the backing queue does not have
+     %%    control of its own mailbox.
      {init, 3},
 
      %% Called on queue shutdown when queue isn't being deleted.
-     {terminate, 1},
+     {terminate, 2},
 
      %% Called when the queue is terminating and needs to delete all
      %% its content.
-     {delete_and_terminate, 1},
+     {delete_and_terminate, 2},
 
      %% Remove all messages in the queue, but not messages which have
      %% been fetched and are pending acks.
      {purge, 1},
 
      %% Publish a message.
-     {publish, 3},
+     {publish, 4},
 
      %% Called for messages which have already been passed straight
      %% out to a client. The queue will be empty for these calls
      %% (i.e. saves the round trip through the backing queue).
-     {publish_delivered, 4},
+     {publish_delivered, 5},
+
+     %% Return ids of messages which have been confirmed since
+     %% the last invocation of this function (or initialisation).
+     %%
+     %% Message ids should only appear in the result of
+     %% drain_confirmed under the following circumstances:
+     %%
+     %% 1. The message appears in a call to publish_delivered/4 and
+     %%    the first argument (ack_required) is false; or
+     %% 2. The message is fetched from the queue with fetch/2 and the
+     %%    first argument (ack_required) is false; or
+     %% 3. The message is acked (ack/2 is called for the message); or
+     %% 4. The message is fully fsync'd to disk in such a way that the
+     %%    recovery of the message is guaranteed in the event of a
+     %%    crash of this rabbit node (excluding hardware failure).
+     %%
+     %% In addition to the above conditions, a message id may only
+     %% appear in the result of drain_confirmed if
+     %% #message_properties.needs_confirming = true when the msg was
+     %% published (through whichever means) to the backing queue.
+     %%
+     %% It is legal for the same message id to appear in the results
+     %% of multiple calls to drain_confirmed, which means that the
+     %% backing queue is not required to keep track of which messages
+     %% it has already confirmed. The confirm will be issued to the
+     %% publisher the first time the message id appears in the result
+     %% of drain_confirmed. All subsequent appearances of that message
+     %% id will be ignored.
+     {drain_confirmed, 1},
 
      %% Drop messages from the head of the queue while the supplied
      %% predicate returns true.
@@ -62,24 +102,9 @@ behaviour_info(callbacks) ->
      {fetch, 2},
 
      %% Acktags supplied are for messages which can now be forgotten
-     %% about. Must return 1 guid per Ack, in the same order as Acks.
+     %% about. Must return 1 msg_id per Ack, in the same order as Acks.
      {ack, 2},
 
-     %% A publish, but in the context of a transaction.
-     {tx_publish, 4},
-
-     %% Acks, but in the context of a transaction.
-     {tx_ack, 3},
-
-     %% Undo anything which has been done in the context of the
-     %% specified transaction.
-     {tx_rollback, 2},
-
-     %% Commit a transaction. The Fun passed in must be called once
-     %% the messages have really been commited. This CPS permits the
-     %% possibility of commit coalescing.
-     {tx_commit, 4},
-
      %% Reinsert messages into the queue which have already been
      %% delivered and were pending acknowledgement.
      {requeue, 3},
@@ -107,22 +132,40 @@ behaviour_info(callbacks) ->
      %% queue.
      {ram_duration, 1},
 
-     %% Should 'idle_timeout' be called as soon as the queue process
+     %% Should 'timeout' be called as soon as the queue process
      %% can manage (either on an empty mailbox, or when a timer
      %% fires)?
-     {needs_idle_timeout, 1},
+     {needs_timeout, 1},
 
-     %% Called (eventually) after needs_idle_timeout returns
-     %% 'true'. Note this may be called more than once for each 'true'
-     %% returned from needs_idle_timeout.
-     {idle_timeout, 1},
+     %% Called (eventually) after needs_timeout returns 'idle' or
+     %% 'timed'.  Note this may be called more than once for each
+     %% 'idle' or 'timed' returned from needs_timeout.
+     {timeout, 1},
 
      %% Called immediately before the queue hibernates.
      {handle_pre_hibernate, 1},
 
      %% Exists for debugging purposes, to be able to expose state via
      %% rabbitmqctl list_queues backing_queue_status
-     {status, 1}
+     {status, 1},
+
+     %% Passed a function to be invoked with the relevant backing
+     %% queue's state. Useful for when the backing queue or other
+     %% components need to pass functions into the backing queue.
+     {invoke, 3},
+
+     %% Called prior to a publish or publish_delivered call. Allows
+     %% the BQ to signal that it's already seen this message (and in
+     %% what capacity - i.e. was it published previously or discarded
+     %% previously) and thus the message should be dropped.
+     {is_duplicate, 2},
+
+     %% Called to inform the BQ about messages which have reached the
+     %% queue, but are not going to be further passed to BQ for some
+     %% reason. Note that this is may be invoked for messages for
+     %% which BQ:is_duplicate/2 has already returned {'published' |
+     %% 'discarded', BQS}.
+     {discard, 3}
     ];
 behaviour_info(_Other) ->
     undefined.
diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl
new file mode 100644
index 00000000..d358a041
--- /dev/null
+++ b/src/rabbit_backing_queue_qc.erl
@@ -0,0 +1,392 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2011-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_backing_queue_qc).
+-ifdef(use_proper_qc).
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+-include_lib("proper/include/proper.hrl").
+
+-behaviour(proper_statem).
+
+-define(BQMOD, rabbit_variable_queue).
+-define(QUEUE_MAXLEN, 10000).
+-define(TIMEOUT_LIMIT, 100).
+
+-define(RECORD_INDEX(Key, Record),
+    proplists:get_value(Key, lists:zip(
+       record_info(fields, Record), lists:seq(2, record_info(size, Record))))).
+
+-export([initial_state/0, command/1, precondition/2, postcondition/3,
+         next_state/3]).
+
+-export([prop_backing_queue_test/0, publish_multiple/4, timeout/2]).
+
+-record(state, {bqstate,
+                len,        %% int
+                messages,   %% queue of {msg_props, basic_msg}
+                acks,       %% dict of acktag => {msg_props, basic_msg}
+                confirms}). %% set of msgid
+
+%% Initialise model
+
+initial_state() ->
+    #state{bqstate  = qc_variable_queue_init(qc_test_queue()),
+           len      = 0,
+           messages = queue:new(),
+           acks     = orddict:new(),
+           confirms = gb_sets:new()}.
+
+%% Property
+
+prop_backing_queue_test() ->
+    ?FORALL(Cmds, commands(?MODULE, initial_state()),
+        backing_queue_test(Cmds)).
+
+backing_queue_test(Cmds) ->
+    {ok, FileSizeLimit} =
+        application:get_env(rabbit, msg_store_file_size_limit),
+    application:set_env(rabbit, msg_store_file_size_limit, 512,
+                        infinity),
+    {ok, MaxJournal} =
+        application:get_env(rabbit, queue_index_max_journal_entries),
+    application:set_env(rabbit, queue_index_max_journal_entries, 128,
+                        infinity),
+
+    {_H, #state{bqstate = BQ}, Res} = run_commands(?MODULE, Cmds),
+
+    application:set_env(rabbit, msg_store_file_size_limit,
+                        FileSizeLimit, infinity),
+    application:set_env(rabbit, queue_index_max_journal_entries,
+                        MaxJournal, infinity),
+
+    ?BQMOD:delete_and_terminate(shutdown, BQ),
+    ?WHENFAIL(
+        io:format("Result: ~p~n", [Res]),
+        aggregate(command_names(Cmds), Res =:= ok)).
+
+%% Commands
+
+%% Command frequencies are tuned so that queues are normally reasonably
+%% short, but they may sometimes exceed ?QUEUE_MAXLEN. Publish-multiple
+%% and purging cause extreme queue lengths, so these have lower probabilities.
+%% Fetches are sufficiently frequent so that commands that need acktags
+%% get decent coverage.
+
+command(S) ->
+    frequency([{10, qc_publish(S)},
+               {1,  qc_publish_delivered(S)},
+               {1,  qc_publish_multiple(S)},  %% very slow
+               {15, qc_fetch(S)},             %% needed for ack and requeue
+               {15, qc_ack(S)},
+               {15, qc_requeue(S)},
+               {3,  qc_set_ram_duration_target(S)},
+               {1,  qc_ram_duration(S)},
+               {1,  qc_drain_confirmed(S)},
+               {1,  qc_dropwhile(S)},
+               {1,  qc_is_empty(S)},
+               {1,  qc_timeout(S)},
+               {1,  qc_purge(S)}]).
+
+qc_publish(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, publish,
+      [qc_message(),
+       #message_properties{needs_confirming = frequency([{1,  true},
+                                                         {20, false}]),
+                           expiry = choose(0, 10)},
+       self(), BQ]}.
+
+qc_publish_multiple(#state{bqstate = BQ}) ->
+    {call, ?MODULE, publish_multiple,
+      [qc_message(), #message_properties{}, BQ,
+       resize(?QUEUE_MAXLEN, pos_integer())]}.
+
+qc_publish_delivered(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, publish_delivered,
+      [boolean(), qc_message(), #message_properties{}, self(), BQ]}.
+
+qc_fetch(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, fetch, [boolean(), BQ]}.
+
+qc_ack(#state{bqstate = BQ, acks = Acks}) ->
+    {call, ?BQMOD, ack, [rand_choice(orddict:fetch_keys(Acks)), BQ]}.
+
+qc_requeue(#state{bqstate = BQ, acks = Acks}) ->
+    {call, ?BQMOD, requeue,
+      [rand_choice(orddict:fetch_keys(Acks)), fun(MsgOpts) -> MsgOpts end, BQ]}.
+
+qc_set_ram_duration_target(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, set_ram_duration_target,
+      [oneof([0, 1, 2, resize(1000, pos_integer()), infinity]), BQ]}.
+
+qc_ram_duration(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, ram_duration, [BQ]}.
+
+qc_drain_confirmed(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, drain_confirmed, [BQ]}.
+
+qc_dropwhile(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, dropwhile, [fun dropfun/1, BQ]}.
+
+qc_is_empty(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, is_empty, [BQ]}.
+
+qc_timeout(#state{bqstate = BQ}) ->
+    {call, ?MODULE, timeout, [BQ, ?TIMEOUT_LIMIT]}.
+
+qc_purge(#state{bqstate = BQ}) ->
+    {call, ?BQMOD, purge, [BQ]}.
+
+%% Preconditions
+
+precondition(#state{acks = Acks}, {call, ?BQMOD, Fun, _Arg})
+    when Fun =:= ack; Fun =:= requeue ->
+    orddict:size(Acks) > 0;
+precondition(#state{messages = Messages},
+             {call, ?BQMOD, publish_delivered, _Arg}) ->
+    queue:is_empty(Messages);
+precondition(_S, {call, ?BQMOD, _Fun, _Arg}) ->
+    true;
+precondition(_S, {call, ?MODULE, timeout, _Arg}) ->
+    true;
+precondition(#state{len = Len}, {call, ?MODULE, publish_multiple, _Arg}) ->
+    Len < ?QUEUE_MAXLEN.
+
+%% Model updates
+
+next_state(S, BQ, {call, ?BQMOD, publish, [Msg, MsgProps, _Pid, _BQ]}) ->
+    #state{len = Len, messages = Messages, confirms = Confirms} = S,
+    MsgId = {call, erlang, element, [?RECORD_INDEX(id, basic_message), Msg]},
+    NeedsConfirm =
+        {call, erlang, element,
+            [?RECORD_INDEX(needs_confirming, message_properties), MsgProps]},
+    S#state{bqstate  = BQ,
+            len      = Len + 1,
+            messages = queue:in({MsgProps, Msg}, Messages),
+            confirms = case eval(NeedsConfirm) of
+                           true -> gb_sets:add(MsgId, Confirms);
+                           _    -> Confirms
+                       end};
+
+next_state(S, BQ, {call, _, publish_multiple, [Msg, MsgProps, _BQ, Count]}) ->
+    #state{len = Len, messages = Messages} = S,
+    Messages1 = repeat(Messages, fun(Msgs) ->
+                                    queue:in({MsgProps, Msg}, Msgs)
+                                 end, Count),
+    S#state{bqstate  = BQ,
+            len      = Len + Count,
+            messages = Messages1};
+
+next_state(S, Res,
+           {call, ?BQMOD, publish_delivered,
+               [AckReq, Msg, MsgProps, _Pid, _BQ]}) ->
+    #state{confirms = Confirms, acks = Acks} = S,
+    AckTag = {call, erlang, element, [1, Res]},
+    BQ1    = {call, erlang, element, [2, Res]},
+    MsgId  = {call, erlang, element, [?RECORD_INDEX(id, basic_message), Msg]},
+    NeedsConfirm =
+        {call, erlang, element,
+            [?RECORD_INDEX(needs_confirming, message_properties), MsgProps]},
+    S#state{bqstate  = BQ1,
+            confirms = case eval(NeedsConfirm) of
+                           true -> gb_sets:add(MsgId, Confirms);
+                           _    -> Confirms
+                       end,
+            acks = case AckReq of
+                       true  -> orddict:append(AckTag, {MsgProps, Msg}, Acks);
+                       false -> Acks
+                   end
+           };
+
+next_state(S, Res, {call, ?BQMOD, fetch, [AckReq, _BQ]}) ->
+    #state{len = Len, messages = Messages, acks = Acks} = S,
+    ResultInfo = {call, erlang, element, [1, Res]},
+    BQ1        = {call, erlang, element, [2, Res]},
+    AckTag     = {call, erlang, element, [3, ResultInfo]},
+    S1         = S#state{bqstate = BQ1},
+    case queue:out(Messages) of
+        {empty, _M2}       ->
+            S1;
+        {{value, MsgProp_Msg}, M2} ->
+            S2 = S1#state{len = Len - 1, messages = M2},
+            case AckReq of
+                true  ->
+                    S2#state{acks = orddict:append(AckTag, MsgProp_Msg, Acks)};
+                false ->
+                    S2
+           end
+    end;
+
+next_state(S, Res, {call, ?BQMOD, ack, [AcksArg, _BQ]}) ->
+    #state{acks = AcksState} = S,
+    BQ1 = {call, erlang, element, [2, Res]},
+    S#state{bqstate = BQ1,
+            acks    = lists:foldl(fun orddict:erase/2, AcksState, AcksArg)};
+
+next_state(S, Res, {call, ?BQMOD, requeue, [AcksArg, _F, _V]}) ->
+    #state{len = Len, messages = Messages, acks = AcksState} = S,
+    BQ1 = {call, erlang, element, [2, Res]},
+    RequeueMsgs = lists:append([orddict:fetch(Key, AcksState) ||
+                                Key <- AcksArg]),
+    S#state{bqstate  = BQ1,
+            len      = Len + length(RequeueMsgs),
+            messages = queue:join(Messages, queue:from_list(RequeueMsgs)),
+            acks     = lists:foldl(fun orddict:erase/2, AcksState, AcksArg)};
+
+next_state(S, BQ, {call, ?BQMOD, set_ram_duration_target, _Args}) ->
+    S#state{bqstate = BQ};
+
+next_state(S, Res, {call, ?BQMOD, ram_duration, _Args}) ->
+    BQ1 = {call, erlang, element, [2, Res]},
+    S#state{bqstate = BQ1};
+
+next_state(S, Res, {call, ?BQMOD, drain_confirmed, _Args}) ->
+    BQ1 = {call, erlang, element, [2, Res]},
+    S#state{bqstate = BQ1};
+
+next_state(S, BQ1, {call, ?BQMOD, dropwhile, _Args}) ->
+    #state{messages = Messages} = S,
+    Messages1 = drop_messages(Messages),
+    S#state{bqstate = BQ1, len = queue:len(Messages1), messages = Messages1};
+
+next_state(S, _Res, {call, ?BQMOD, is_empty, _Args}) ->
+    S;
+
+next_state(S, BQ, {call, ?MODULE, timeout, _Args}) ->
+    S#state{bqstate = BQ};
+
+next_state(S, Res, {call, ?BQMOD, purge, _Args}) ->
+    BQ1 = {call, erlang, element, [2, Res]},
+    S#state{bqstate = BQ1, len = 0, messages = queue:new()}.
+
+%% Postconditions
+
+postcondition(S, {call, ?BQMOD, fetch, _Args}, Res) ->
+    #state{messages = Messages, len = Len, acks = Acks, confirms = Confrms} = S,
+    case Res of
+        {{MsgFetched, _IsDelivered, AckTag, RemainingLen}, _BQ} ->
+            {_MsgProps, Msg} = queue:head(Messages),
+            MsgFetched =:= Msg andalso
+            not orddict:is_key(AckTag, Acks) andalso
+            not gb_sets:is_element(AckTag, Confrms) andalso
+            RemainingLen =:= Len - 1;
+        {empty, _BQ} ->
+            Len =:= 0
+    end;
+
+postcondition(S, {call, ?BQMOD, publish_delivered, _Args}, {AckTag, _BQ}) ->
+    #state{acks = Acks, confirms = Confrms} = S,
+    not orddict:is_key(AckTag, Acks) andalso
+    not gb_sets:is_element(AckTag, Confrms);
+
+postcondition(#state{len = Len}, {call, ?BQMOD, purge, _Args}, Res) ->
+    {PurgeCount, _BQ} = Res,
+    Len =:= PurgeCount;
+
+postcondition(#state{len = Len},
+              {call, ?BQMOD, is_empty, _Args}, Res) ->
+    (Len =:= 0) =:= Res;
+
+postcondition(S, {call, ?BQMOD, drain_confirmed, _Args}, Res) ->
+    #state{confirms = Confirms} = S,
+    {ReportedConfirmed, _BQ} = Res,
+    lists:all(fun (M) ->
+                  gb_sets:is_element(M, Confirms)
+              end, ReportedConfirmed);
+
+postcondition(#state{bqstate = BQ, len = Len}, {call, _M, _F, _A}, _Res) ->
+    ?BQMOD:len(BQ) =:= Len.
+
+%% Helpers
+
+repeat(Result, _Fun, 0) ->
+    Result;
+repeat(Result, Fun, Times) ->
+    repeat(Fun(Result), Fun, Times - 1).
+
+publish_multiple(Msg, MsgProps, BQ, Count) ->
+    repeat(BQ, fun(BQ1) ->
+                   ?BQMOD:publish(Msg, MsgProps, self(), BQ1)
+               end, Count).
+
+timeout(BQ, 0) ->
+    BQ;
+timeout(BQ, AtMost) ->
+    case ?BQMOD:needs_timeout(BQ) of
+        false -> BQ;
+        _     -> timeout(?BQMOD:timeout(BQ), AtMost - 1)
+    end.
+
+qc_message_payload() ->
+    ?SIZED(Size, resize(Size * Size, binary())).
+
+qc_routing_key() ->
+    noshrink(binary(10)).
+
+qc_delivery_mode() ->
+    oneof([1, 2]).
+
+qc_message() ->
+    qc_message(qc_delivery_mode()).
+
+qc_message(DeliveryMode) ->
+    {call, rabbit_basic, message, [
+        qc_default_exchange(),
+        qc_routing_key(),
+        #'P_basic'{delivery_mode = DeliveryMode},
+        qc_message_payload()]}.
+
+qc_default_exchange() ->
+    {call, rabbit_misc, r, [<<>>, exchange, <<>>]}.
+
+qc_variable_queue_init(Q) ->
+    {call, ?BQMOD, init,
+        [Q, false, function(2, ok)]}.
+
+qc_test_q() ->
+    {call, rabbit_misc, r, [<<"/">>, queue, noshrink(binary(16))]}.
+
+qc_test_queue() ->
+    qc_test_queue(boolean()).
+
+qc_test_queue(Durable) ->
+    #amqqueue{name        = qc_test_q(),
+              durable     = Durable,
+              auto_delete = false,
+              arguments   = [],
+              pid         = self()}.
+
+rand_choice([])   -> [];
+rand_choice(List) -> [lists:nth(random:uniform(length(List)), List)].
+
+dropfun(Props) ->
+    Expiry = eval({call, erlang, element,
+                       [?RECORD_INDEX(expiry, message_properties), Props]}),
+    Expiry =/= 0.
+
+drop_messages(Messages) ->
+    case queue:out(Messages) of
+        {empty, _} ->
+            Messages;
+        {{value, MsgProps_Msg}, M2} ->
+            MsgProps = {call, erlang, element, [1, MsgProps_Msg]},
+            case dropfun(MsgProps) of
+                true  -> drop_messages(M2);
+                false -> Messages
+            end
+    end.
+
+-endif.
diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index c5bd9575..9cc406e7 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -18,10 +18,9 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([publish/1, message/4, properties/1, delivery/5]).
--export([publish/4, publish/7]).
+-export([publish/1, message/3, message/4, properties/1, delivery/4]).
+-export([publish/4, publish/6]).
 -export([build_content/2, from_content/1]).
--export([is_message_persistent/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -33,32 +32,33 @@
         ({ok, rabbit_router:routing_result(), [pid()]}
          | rabbit_types:error('not_found'))).
 
+-type(exchange_input() :: (rabbit_types:exchange() | rabbit_exchange:name())).
+-type(body_input() :: (binary() | [binary()])).
+
 -spec(publish/1 ::
         (rabbit_types:delivery()) -> publish_result()).
--spec(delivery/5 ::
-        (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()),
-         rabbit_types:message(), undefined | integer()) ->
+-spec(delivery/4 ::
+        (boolean(), boolean(), rabbit_types:message(), undefined | integer()) ->
                          rabbit_types:delivery()).
 -spec(message/4 ::
         (rabbit_exchange:name(), rabbit_router:routing_key(),
-         properties_input(), binary()) ->
-                        (rabbit_types:message() | rabbit_types:error(any()))).
+         properties_input(), binary()) -> rabbit_types:message()).
+-spec(message/3 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         rabbit_types:decoded_content()) ->
+                        rabbit_types:ok_or_error2(rabbit_types:message(), any())).
 -spec(properties/1 ::
         (properties_input()) -> rabbit_framing:amqp_property_record()).
 -spec(publish/4 ::
-        (rabbit_exchange:name(), rabbit_router:routing_key(),
-         properties_input(), binary()) -> publish_result()).
--spec(publish/7 ::
-        (rabbit_exchange:name(), rabbit_router:routing_key(),
-         boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()),
-         properties_input(), binary()) -> publish_result()).
--spec(build_content/2 :: (rabbit_framing:amqp_property_record(), binary()) ->
-                              rabbit_types:content()).
+        (exchange_input(), rabbit_router:routing_key(), properties_input(),
+         body_input()) -> publish_result()).
+-spec(publish/6 ::
+        (exchange_input(), rabbit_router:routing_key(), boolean(), boolean(),
+         properties_input(), body_input()) -> publish_result()).
+-spec(build_content/2 :: (rabbit_framing:amqp_property_record(),
+                          binary() | [binary()]) -> rabbit_types:content()).
 -spec(from_content/1 :: (rabbit_types:content()) ->
                              {rabbit_framing:amqp_property_record(), binary()}).
--spec(is_message_persistent/1 :: (rabbit_types:decoded_content()) ->
-                                      (boolean() |
-                                       {'invalid', non_neg_integer()})).
 
 -endif.
 
@@ -67,18 +67,18 @@
 publish(Delivery = #delivery{
           message = #basic_message{exchange_name = ExchangeName}}) ->
     case rabbit_exchange:lookup(ExchangeName) of
-        {ok, X} ->
-            {RoutingRes, DeliveredQPids} = rabbit_exchange:publish(X, Delivery),
-            {ok, RoutingRes, DeliveredQPids};
-        Other ->
-            Other
+        {ok, X} -> publish(X, Delivery);
+        Other   -> Other
     end.
 
-delivery(Mandatory, Immediate, Txn, Message, MsgSeqNo) ->
-    #delivery{mandatory = Mandatory, immediate = Immediate, txn = Txn,
-              sender = self(), message = Message, msg_seq_no = MsgSeqNo}.
+delivery(Mandatory, Immediate, Message, MsgSeqNo) ->
+    #delivery{mandatory = Mandatory, immediate = Immediate, sender = self(),
+              message = Message, msg_seq_no = MsgSeqNo}.
 
-build_content(Properties, BodyBin) ->
+build_content(Properties, BodyBin) when is_binary(BodyBin) ->
+    build_content(Properties, [BodyBin]);
+
+build_content(Properties, PFR) ->
     %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
     {ClassId, _MethodId} =
         rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
@@ -86,7 +86,7 @@ build_content(Properties, BodyBin) ->
              properties = Properties,
              properties_bin = none,
              protocol = none,
-             payload_fragments_rev = [BodyBin]}.
+             payload_fragments_rev = PFR}.
 
 from_content(Content) ->
     #content{class_id = ClassId,
@@ -98,20 +98,41 @@ from_content(Content) ->
         rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
     {Props, list_to_binary(lists:reverse(FragmentsRev))}.
 
-message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) ->
-    Properties = properties(RawProperties),
-    Content = build_content(Properties, BodyBin),
-    case is_message_persistent(Content) of
-        {invalid, Other} ->
-            {error, {invalid_delivery_mode, Other}};
-        IsPersistent when is_boolean(IsPersistent) ->
-            #basic_message{exchange_name  = ExchangeName,
-                           routing_key    = RoutingKeyBin,
-                           content        = Content,
-                           guid           = rabbit_guid:guid(),
-                           is_persistent  = IsPersistent}
+%% This breaks the spec rule forbidding message modification
+strip_header(#content{properties = #'P_basic'{headers = undefined}}
+             = DecodedContent, _Key) ->
+    DecodedContent;
+strip_header(#content{properties = Props = #'P_basic'{headers = Headers}}
+             = DecodedContent, Key) ->
+    case lists:keysearch(Key, 1, Headers) of
+        false          -> DecodedContent;
+        {value, Found} -> Headers0 = lists:delete(Found, Headers),
+                          rabbit_binary_generator:clear_encoded_content(
+                            DecodedContent#content{
+                              properties = Props#'P_basic'{
+                                             headers = Headers0}})
     end.
 
+message(ExchangeName, RoutingKey,
+        #content{properties = Props} = DecodedContent) ->
+    try
+        {ok, #basic_message{
+           exchange_name = ExchangeName,
+           content       = strip_header(DecodedContent, ?DELETED_HEADER),
+           id            = rabbit_guid:guid(),
+           is_persistent = is_message_persistent(DecodedContent),
+           routing_keys  = [RoutingKey |
+                            header_routes(Props#'P_basic'.headers)]}}
+    catch
+        {error, _Reason} = Error -> Error
+    end.
+
+message(ExchangeName, RoutingKey, RawProperties, Body) ->
+    Properties = properties(RawProperties),
+    Content = build_content(Properties, Body),
+    {ok, Msg} = message(ExchangeName, RoutingKey, Content),
+    Msg.
+
 properties(P = #'P_basic'{}) ->
     P;
 properties(P) when is_list(P) ->
@@ -133,18 +154,25 @@ indexof([_ | Rest], Element, N)        -> indexof(Rest, Element, N + 1).
 
 %% Convenience function, for avoiding round-trips in calls across the
 %% erlang distributed network.
-publish(ExchangeName, RoutingKeyBin, Properties, BodyBin) ->
-    publish(ExchangeName, RoutingKeyBin, false, false, none, Properties,
-            BodyBin).
+publish(Exchange, RoutingKeyBin, Properties, Body) ->
+    publish(Exchange, RoutingKeyBin, false, false, Properties, Body).
 
 %% Convenience function, for avoiding round-trips in calls across the
 %% erlang distributed network.
-publish(ExchangeName, RoutingKeyBin, Mandatory, Immediate, Txn, Properties,
-        BodyBin) ->
-    publish(delivery(Mandatory, Immediate, Txn,
-                     message(ExchangeName, RoutingKeyBin,
-                             properties(Properties), BodyBin),
-                     undefined)).
+publish(X = #exchange{name = XName}, RKey, Mandatory, Immediate, Props, Body) ->
+    publish(X, delivery(Mandatory, Immediate,
+                        message(XName, RKey, properties(Props), Body),
+                        undefined));
+publish(XName, RKey, Mandatory, Immediate, Props, Body) ->
+    case rabbit_exchange:lookup(XName) of
+        {ok, X} -> publish(X, RKey, Mandatory, Immediate, Props, Body);
+        Err     -> Err
+    end.
+
+publish(X, Delivery) ->
+    {RoutingRes, DeliveredQPids} =
+        rabbit_router:deliver(rabbit_exchange:route(X, Delivery), Delivery),
+    {ok, RoutingRes, DeliveredQPids}.
 
 is_message_persistent(#content{properties = #'P_basic'{
                                  delivery_mode = Mode}}) ->
@@ -152,5 +180,18 @@ is_message_persistent(#content{properties = #'P_basic'{
         1         -> false;
         2         -> true;
         undefined -> false;
-        Other     -> {invalid, Other}
+        Other     -> throw({error, {delivery_mode_unknown, Other}})
     end.
+
+%% Extract CC routes from headers
+header_routes(undefined) ->
+    [];
+header_routes(HeadersTable) ->
+    lists:append(
+      [case rabbit_misc:table_lookup(HeadersTable, HeaderKey) of
+           {array, Routes} -> [Route || {longstr, Route} <- Routes];
+           undefined       -> [];
+           {Type, _Val}    -> throw({error, {unacceptable_type_in_header,
+                                             Type,
+                                             binary_to_list(HeaderKey)}})
+       end || HeaderKey <- ?ROUTING_HEADERS]).
diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl
index d67c7f58..68511a32 100644
--- a/src/rabbit_binary_generator.erl
+++ b/src/rabbit_binary_generator.erl
@@ -18,12 +18,13 @@
 -include("rabbit_framing.hrl").
 -include("rabbit.hrl").
 
-% EMPTY_CONTENT_BODY_FRAME_SIZE, 8 = 1 + 2 + 4 + 1
-%  - 1 byte of frame type
-%  - 2 bytes of channel number
-%  - 4 bytes of frame payload length
-%  - 1 byte of payload trailer FRAME_END byte
-% See definition of check_empty_content_body_frame_size/0, an assertion called at startup.
+%% EMPTY_CONTENT_BODY_FRAME_SIZE, 8 = 1 + 2 + 4 + 1
+%%  - 1 byte of frame type
+%%  - 2 bytes of channel number
+%%  - 4 bytes of frame payload length
+%%  - 1 byte of payload trailer FRAME_END byte
+%% See definition of check_empty_content_body_frame_size/0,
+%% an assertion called at startup.
 -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8).
 
 -export([build_simple_method_frame/3,
@@ -61,8 +62,7 @@
 -spec(map_exception/3 :: (rabbit_channel:channel_number(),
                           rabbit_types:amqp_error() | any(),
                           rabbit_types:protocol()) ->
-                              {boolean(),
-                               rabbit_channel:channel_number(),
+                              {rabbit_channel:channel_number(),
                                rabbit_framing:amqp_method_record()}).
 
 -endif.
@@ -301,24 +301,21 @@ clear_encoded_content(Content = #content{}) ->
 map_exception(Channel, Reason, Protocol) ->
     {SuggestedClose, ReplyCode, ReplyText, FailedMethod} =
         lookup_amqp_exception(Reason, Protocol),
-    ShouldClose = SuggestedClose orelse (Channel == 0),
     {ClassId, MethodId} = case FailedMethod of
                               {_, _} -> FailedMethod;
                               none   -> {0, 0};
                               _      -> Protocol:method_id(FailedMethod)
                           end,
-    {CloseChannel, CloseMethod} =
-        case ShouldClose of
-            true  -> {0, #'connection.close'{reply_code = ReplyCode,
-                                             reply_text = ReplyText,
-                                             class_id = ClassId,
-                                             method_id = MethodId}};
-            false -> {Channel, #'channel.close'{reply_code = ReplyCode,
-                                                reply_text = ReplyText,
-                                                class_id = ClassId,
-                                                method_id = MethodId}}
-        end,
-    {ShouldClose, CloseChannel, CloseMethod}.
+    case SuggestedClose orelse (Channel == 0) of
+        true  -> {0, #'connection.close'{reply_code = ReplyCode,
+                                         reply_text = ReplyText,
+                                         class_id   = ClassId,
+                                         method_id  = MethodId}};
+        false -> {Channel, #'channel.close'{reply_code = ReplyCode,
+                                            reply_text = ReplyText,
+                                            class_id   = ClassId,
+                                            method_id  = MethodId}}
+    end.
 
 lookup_amqp_exception(#amqp_error{name        = Name,
                                   explanation = Expl,
diff --git a/src/rabbit_binding.erl b/src/rabbit_binding.erl
index 96a22dca..205d5bba 100644
--- a/src/rabbit_binding.erl
+++ b/src/rabbit_binding.erl
@@ -17,11 +17,11 @@
 -module(rabbit_binding).
 -include("rabbit.hrl").
 
--export([recover/0, exists/1, add/1, remove/1, add/2, remove/2, list/1]).
+-export([recover/2, exists/1, add/1, add/2, remove/1, remove/2, list/1]).
 -export([list_for_source/1, list_for_destination/1,
          list_for_source_and_destination/2]).
 -export([new_deletions/0, combine_deletions/2, add_deletion/3,
-         process_deletions/2]).
+         process_deletions/1]).
 -export([info_keys/0, info/1, info/2, info_all/1, info_all/2]).
 %% these must all be run inside a mnesia tx
 -export([has_for_source/1, remove_for_source/1,
@@ -38,24 +38,24 @@
 -type(bind_errors() :: rabbit_types:error('source_not_found' |
                                           'destination_not_found' |
                                           'source_and_destination_not_found')).
--type(bind_res() :: 'ok' | bind_errors()).
+-type(bind_ok_or_error() :: 'ok' | bind_errors() |
+                            rabbit_types:error('binding_not_found')).
+-type(bind_res() :: bind_ok_or_error() | rabbit_misc:const(bind_ok_or_error())).
 -type(inner_fun() ::
         fun((rabbit_types:exchange(),
              rabbit_types:exchange() | rabbit_types:amqqueue()) ->
                    rabbit_types:ok_or_error(rabbit_types:amqp_error()))).
 -type(bindings() :: [rabbit_types:binding()]).
--type(add_res() :: bind_res() | rabbit_misc:const(bind_res())).
--type(bind_or_error() :: bind_res() | rabbit_types:error('binding_not_found')).
--type(remove_res() :: bind_or_error() | rabbit_misc:const(bind_or_error())).
 
 -opaque(deletions() :: dict()).
 
--spec(recover/0 :: () -> [rabbit_types:binding()]).
+-spec(recover/2 :: ([rabbit_exchange:name()], [rabbit_amqqueue:name()]) ->
+                        'ok').
 -spec(exists/1 :: (rabbit_types:binding()) -> boolean() | bind_errors()).
--spec(add/1 :: (rabbit_types:binding()) -> add_res()).
--spec(remove/1 :: (rabbit_types:binding()) -> remove_res()).
--spec(add/2 :: (rabbit_types:binding(), inner_fun()) -> add_res()).
--spec(remove/2 :: (rabbit_types:binding(), inner_fun()) -> remove_res()).
+-spec(add/1    :: (rabbit_types:binding())              -> bind_res()).
+-spec(add/2    :: (rabbit_types:binding(), inner_fun()) -> bind_res()).
+-spec(remove/1 :: (rabbit_types:binding())              -> bind_res()).
+-spec(remove/2 :: (rabbit_types:binding(), inner_fun()) -> bind_res()).
 -spec(list/1 :: (rabbit_types:vhost()) -> bindings()).
 -spec(list_for_source/1 ::
         (rabbit_types:binding_source()) -> bindings()).
@@ -70,14 +70,14 @@
                      rabbit_types:infos()).
 -spec(info_all/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]).
 -spec(info_all/2 ::(rabbit_types:vhost(), rabbit_types:info_keys())
-                    -> [rabbit_types:infos()]).
+                   -> [rabbit_types:infos()]).
 -spec(has_for_source/1 :: (rabbit_types:binding_source()) -> boolean()).
 -spec(remove_for_source/1 :: (rabbit_types:binding_source()) -> bindings()).
 -spec(remove_for_destination/1 ::
         (rabbit_types:binding_destination()) -> deletions()).
 -spec(remove_transient_for_destination/1 ::
         (rabbit_types:binding_destination()) -> deletions()).
--spec(process_deletions/2 :: (deletions(), boolean()) -> 'ok').
+-spec(process_deletions/1 :: (deletions()) -> rabbit_misc:thunk('ok')).
 -spec(combine_deletions/2 :: (deletions(), deletions()) -> deletions()).
 -spec(add_deletion/3 :: (rabbit_exchange:name(),
                          {'undefined' | rabbit_types:exchange(),
@@ -93,14 +93,42 @@
                     destination_name, destination_kind,
                     routing_key, arguments]).
 
-recover() ->
-    rabbit_misc:table_fold(
-      fun (Route = #route{binding = B}, Acc) ->
-              {_, ReverseRoute} = route_with_reverse(Route),
-              ok = mnesia:write(rabbit_route, Route, write),
-              ok = mnesia:write(rabbit_reverse_route, ReverseRoute, write),
-              [B | Acc]
-      end, [], rabbit_durable_route).
+recover(XNames, QNames) ->
+    rabbit_misc:table_filter(
+      fun (Route) ->
+              mnesia:read({rabbit_semi_durable_route, Route}) =:= []
+      end,
+      fun (Route,  true) ->
+              ok = mnesia:write(rabbit_semi_durable_route, Route, write);
+          (_Route, false) ->
+              ok
+      end, rabbit_durable_route),
+    XNameSet = sets:from_list(XNames),
+    QNameSet = sets:from_list(QNames),
+    SelectSet = fun (#resource{kind = exchange}) -> XNameSet;
+                    (#resource{kind = queue})    -> QNameSet
+                end,
+    [recover_semi_durable_route(R, SelectSet(Dst)) ||
+        R = #route{binding = #binding{destination = Dst}} <-
+            rabbit_misc:dirty_read_all(rabbit_semi_durable_route)],
+    ok.
+
+recover_semi_durable_route(R = #route{binding = B}, ToRecover) ->
+    #binding{source = Src, destination = Dst} = B,
+    {ok, X} = rabbit_exchange:lookup(Src),
+    rabbit_misc:execute_mnesia_transaction(
+      fun () ->
+              Rs = mnesia:match_object(rabbit_semi_durable_route, R, read),
+              case Rs =/= [] andalso sets:is_element(Dst, ToRecover) of
+                  false -> no_recover;
+                  true  -> ok = sync_transient_route(R, fun mnesia:write/3),
+                           rabbit_exchange:serial(X)
+              end
+      end,
+      fun (no_recover, _)     -> ok;
+          (_Serial,    true)  -> x_callback(transaction, X, add_binding, B);
+          (Serial,     false) -> x_callback(Serial,      X, add_binding, B)
+      end).
 
 exists(Binding) ->
     binding_action(
@@ -110,8 +138,6 @@ exists(Binding) ->
 
 add(Binding) -> add(Binding, fun (_Src, _Dst) -> ok end).
 
-remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end).
-
 add(Binding, InnerFun) ->
     binding_action(
       Binding,
@@ -120,51 +146,52 @@ add(Binding, InnerFun) ->
               %% in general, we want to fail on that in preference to
               %% anything else
               case InnerFun(Src, Dst) of
-                  ok ->
-                      case mnesia:read({rabbit_route, B}) of
-                          []  -> ok = sync_binding(B, all_durable([Src, Dst]),
-                                                   fun mnesia:write/3),
-                                 fun (Tx) ->
-                                         ok = rabbit_exchange:callback(
-                                                Src, add_binding, [Tx, Src, B]),
-                                         rabbit_event:notify_if(
-                                           not Tx, binding_created, info(B))
-                                 end;
-                          [_] -> fun rabbit_misc:const_ok/1
-                      end;
-                  {error, _} = Err ->
-                      rabbit_misc:const(Err)
+                  ok               -> case mnesia:read({rabbit_route, B}) of
+                                          []  -> add(Src, Dst, B);
+                                          [_] -> fun rabbit_misc:const_ok/0
+                                      end;
+                  {error, _} = Err -> rabbit_misc:const(Err)
               end
       end).
 
+add(Src, Dst, B) ->
+    [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]],
+    case (not (SrcDurable andalso DstDurable) orelse
+          mnesia:read({rabbit_durable_route, B}) =:= []) of
+        true  -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable,
+                                 fun mnesia:write/3),
+                 ok = rabbit_exchange:callback(
+                        Src, add_binding, [transaction, Src, B]),
+                 Serial = rabbit_exchange:serial(Src),
+                 fun () ->
+                     ok = rabbit_exchange:callback(
+                            Src, add_binding, [Serial, Src, B]),
+                     ok = rabbit_event:notify(binding_created, info(B))
+                 end;
+        false -> rabbit_misc:const({error, binding_not_found})
+    end.
+
+remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end).
+
 remove(Binding, InnerFun) ->
     binding_action(
       Binding,
       fun (Src, Dst, B) ->
-              Result =
-                  case mnesia:match_object(rabbit_route, #route{binding = B},
-                                           write) of
-                      [] ->
-                          {error, binding_not_found};
-                      [_] ->
-                          case InnerFun(Src, Dst) of
-                              ok ->
-                                  ok = sync_binding(B, all_durable([Src, Dst]),
-                                                    fun mnesia:delete_object/3),
-                                  {ok, maybe_auto_delete(B#binding.source,
-                                                         [B], new_deletions())};
-                              {error, _} = E ->
-                                  E
-                          end
-                  end,
-              case Result of
-                  {error, _} = Err ->
-                      rabbit_misc:const(Err);
-                  {ok, Deletions} ->
-                      fun (Tx) -> ok = process_deletions(Deletions, Tx) end
+              case mnesia:read(rabbit_route, B, write) of
+                  []  -> rabbit_misc:const({error, binding_not_found});
+                  [_] -> case InnerFun(Src, Dst) of
+                             ok               -> remove(Src, Dst, B);
+                             {error, _} = Err -> rabbit_misc:const(Err)
+                         end
               end
       end).
 
+remove(Src, Dst, B) ->
+    ok = sync_route(#route{binding = B}, durable(Src), durable(Dst),
+                    fun mnesia:delete_object/3),
+    Deletions = maybe_auto_delete(B#binding.source, [B], new_deletions()),
+    process_deletions(Deletions).
+
 list(VHostPath) ->
     VHostResource = rabbit_misc:r(VHostPath, '_'),
     Route = #route{binding = #binding{source      = VHostResource,
@@ -175,22 +202,33 @@ list(VHostPath) ->
                                                            Route)].
 
 list_for_source(SrcName) ->
-    Route = #route{binding = #binding{source = SrcName, _ = '_'}},
-    [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route,
-                                                           Route)].
+    mnesia:async_dirty(
+      fun() ->
+              Route = #route{binding = #binding{source = SrcName, _ = '_'}},
+              [B || #route{binding = B}
+                        <- mnesia:match_object(rabbit_route, Route, read)]
+      end).
 
 list_for_destination(DstName) ->
-    Route = #route{binding = #binding{destination = DstName, _ = '_'}},
-    [reverse_binding(B) || #reverse_route{reverse_binding = B} <-
-                               mnesia:dirty_match_object(rabbit_reverse_route,
-                                                         reverse_route(Route))].
+    mnesia:async_dirty(
+      fun() ->
+              Route = #route{binding = #binding{destination = DstName,
+                                                _ = '_'}},
+              [reverse_binding(B) ||
+                  #reverse_route{reverse_binding = B} <-
+                      mnesia:match_object(rabbit_reverse_route,
+                                          reverse_route(Route), read)]
+      end).
 
 list_for_source_and_destination(SrcName, DstName) ->
-    Route = #route{binding = #binding{source      = SrcName,
-                                      destination = DstName,
-                                      _           = '_'}},
-    [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route,
-                                                           Route)].
+    mnesia:async_dirty(
+      fun() ->
+              Route = #route{binding = #binding{source      = SrcName,
+                                                destination = DstName,
+                                                _           = '_'}},
+              [B || #route{binding = B} <- mnesia:match_object(rabbit_route,
+                                                               Route, read)]
+      end).
 
 info_keys() -> ?INFO_KEYS.
 
@@ -222,32 +260,31 @@ has_for_source(SrcName) ->
     %% we need to check for durable routes here too in case a bunch of
     %% routes to durable queues have been removed temporarily as a
     %% result of a node failure
-    contains(rabbit_route, Match) orelse contains(rabbit_durable_route, Match).
+    contains(rabbit_route, Match) orelse
+        contains(rabbit_semi_durable_route, Match).
 
 remove_for_source(SrcName) ->
+    Match = #route{binding = #binding{source = SrcName, _ = '_'}},
+    Routes = lists:usort(
+               mnesia:match_object(rabbit_route, Match, write) ++
+                   mnesia:match_object(rabbit_durable_route, Match, write)),
     [begin
-         ok = mnesia:delete_object(rabbit_reverse_route,
-                                   reverse_route(Route), write),
-         ok = delete_forward_routes(Route),
+         sync_route(Route, fun mnesia:delete_object/3),
          Route#route.binding
-     end || Route <- mnesia:match_object(
-                       rabbit_route,
-                       #route{binding = #binding{source = SrcName,
-                                                 _      = '_'}},
-                       write)].
+     end || Route <- Routes].
 
-remove_for_destination(DstName) ->
-    remove_for_destination(DstName, fun delete_forward_routes/1).
+remove_for_destination(Dst) ->
+    remove_for_destination(
+      Dst, fun (R) -> sync_route(R, fun mnesia:delete_object/3) end).
 
-remove_transient_for_destination(DstName) ->
-    remove_for_destination(DstName, fun delete_transient_forward_routes/1).
+remove_transient_for_destination(Dst) ->
+    remove_for_destination(
+      Dst, fun (R) -> sync_transient_route(R, fun mnesia:delete_object/3) end).
 
 %%----------------------------------------------------------------------------
 
-all_durable(Resources) ->
-    lists:all(fun (#exchange{durable = D}) -> D;
-                  (#amqqueue{durable = D}) -> D
-              end, Resources).
+durable(#exchange{durable = D}) -> D;
+durable(#amqqueue{durable = D}) -> D.
 
 binding_action(Binding = #binding{source      = SrcName,
                                   destination = DstName,
@@ -259,31 +296,36 @@ binding_action(Binding = #binding{source      = SrcName,
               Fun(Src, Dst, Binding#binding{args = SortedArgs})
       end).
 
-sync_binding(Binding, Durable, Fun) ->
-    ok = case Durable of
-             true  -> Fun(rabbit_durable_route,
-                          #route{binding = Binding}, write);
-             false -> ok
-         end,
-    {Route, ReverseRoute} = route_with_reverse(Binding),
+sync_route(R, Fun) -> sync_route(R, true, true, Fun).
+
+sync_route(Route, true, true, Fun) ->
+    ok = Fun(rabbit_durable_route, Route, write),
+    sync_route(Route, false, true, Fun);
+
+sync_route(Route, false, true, Fun) ->
+    ok = Fun(rabbit_semi_durable_route, Route, write),
+    sync_route(Route, false, false, Fun);
+
+sync_route(Route, _SrcDurable, false, Fun) ->
+    sync_transient_route(Route, Fun).
+
+sync_transient_route(Route, Fun) ->
     ok = Fun(rabbit_route, Route, write),
-    ok = Fun(rabbit_reverse_route, ReverseRoute, write),
-    ok.
+    ok = Fun(rabbit_reverse_route, reverse_route(Route), write).
 
 call_with_source_and_destination(SrcName, DstName, Fun) ->
     SrcTable = table_for_resource(SrcName),
     DstTable = table_for_resource(DstName),
-    ErrFun = fun (Err) -> rabbit_misc:const(Err) end,
+    ErrFun = fun (Err) -> rabbit_misc:const({error, Err}) end,
     rabbit_misc:execute_mnesia_tx_with_tail(
       fun () ->
               case {mnesia:read({SrcTable, SrcName}),
                     mnesia:read({DstTable, DstName})} of
                   {[Src], [Dst]} -> Fun(Src, Dst);
-                  {[],    [_]  } -> ErrFun({error, source_not_found});
-                  {[_],   []   } -> ErrFun({error, destination_not_found});
-                  {[],    []   } -> ErrFun({error,
-                                            source_and_destination_not_found})
-              end
+                  {[],    [_]  } -> ErrFun(source_not_found);
+                  {[_],   []   } -> ErrFun(destination_not_found);
+                  {[],    []   } -> ErrFun(source_and_destination_not_found)
+               end
       end).
 
 table_for_resource(#resource{kind = exchange}) -> rabbit_exchange;
@@ -296,22 +338,15 @@ continue('$end_of_table')    -> false;
 continue({[_|_], _})         -> true;
 continue({[], Continuation}) -> continue(mnesia:select(Continuation)).
 
-remove_for_destination(DstName, FwdDeleteFun) ->
-    Bindings =
-        [begin
-             Route = reverse_route(ReverseRoute),
-             ok = FwdDeleteFun(Route),
-             ok = mnesia:delete_object(rabbit_reverse_route,
-                                       ReverseRoute, write),
-             Route#route.binding
-         end || ReverseRoute
-                    <- mnesia:match_object(
-                         rabbit_reverse_route,
-                         reverse_route(#route{
-                                          binding = #binding{
-                                            destination = DstName,
-                                            _           = '_'}}),
-                         write)],
+remove_for_destination(DstName, DeleteFun) ->
+    Match = reverse_route(
+              #route{binding = #binding{destination = DstName, _ = '_'}}),
+    ReverseRoutes = mnesia:match_object(rabbit_reverse_route, Match, write),
+    Bindings = [begin
+                    Route = reverse_route(ReverseRoute),
+                    ok = DeleteFun(Route),
+                    Route#route.binding
+                end || ReverseRoute <- ReverseRoutes],
     group_bindings_fold(fun maybe_auto_delete/3, new_deletions(),
                         lists:keysort(#binding.source, Bindings)).
 
@@ -331,30 +366,18 @@ group_bindings_fold(Fun, SrcName, Acc, Removed, Bindings) ->
     group_bindings_fold(Fun, Fun(SrcName, Bindings, Acc), Removed).
 
 maybe_auto_delete(XName, Bindings, Deletions) ->
-    case mnesia:read({rabbit_exchange, XName}) of
-        [] ->
-            add_deletion(XName, {undefined, not_deleted, Bindings}, Deletions);
-        [X] ->
-            add_deletion(XName, {X, not_deleted, Bindings},
-                         case rabbit_exchange:maybe_auto_delete(X) of
-                             not_deleted           -> Deletions;
-                             {deleted, Deletions1} -> combine_deletions(
-                                                        Deletions, Deletions1)
-                         end)
-    end.
-
-delete_forward_routes(Route) ->
-    ok = mnesia:delete_object(rabbit_route, Route, write),
-    ok = mnesia:delete_object(rabbit_durable_route, Route, write).
-
-delete_transient_forward_routes(Route) ->
-    ok = mnesia:delete_object(rabbit_route, Route, write).
-
-route_with_reverse(#route{binding = Binding}) ->
-    route_with_reverse(Binding);
-route_with_reverse(Binding = #binding{}) ->
-    Route = #route{binding = Binding},
-    {Route, reverse_route(Route)}.
+    {Entry, Deletions1} =
+        case mnesia:read({rabbit_exchange, XName}) of
+            []  -> {{undefined, not_deleted, Bindings}, Deletions};
+            [X] -> case rabbit_exchange:maybe_auto_delete(X) of
+                       not_deleted ->
+                           {{X, not_deleted, Bindings}, Deletions};
+                       {deleted, Deletions2} ->
+                           {{X, deleted, Bindings},
+                            combine_deletions(Deletions, Deletions2)}
+                   end
+        end,
+    add_deletion(XName, Entry, Deletions1).
 
 reverse_route(#route{binding = Binding}) ->
     #reverse_route{reverse_binding = reverse_binding(Binding)};
@@ -404,19 +427,29 @@ merge_entry({X1, Deleted1, Bindings1}, {X2, Deleted2, Bindings2}) ->
      anything_but(not_deleted, Deleted1, Deleted2),
      [Bindings1 | Bindings2]}.
 
-process_deletions(Deletions, Tx) ->
-    dict:fold(
-      fun (_XName, {X, Deleted, Bindings}, ok) ->
-              FlatBindings = lists:flatten(Bindings),
-              [rabbit_event:notify_if(not Tx, binding_deleted, info(B)) ||
-                  B <- FlatBindings],
-              case Deleted of
-                  not_deleted ->
-                      rabbit_exchange:callback(X, remove_bindings,
-                                               [Tx, X, FlatBindings]);
-                  deleted ->
-                      rabbit_event:notify_if(not Tx, exchange_deleted,
-                                             [{name, X#exchange.name}]),
-                      rabbit_exchange:callback(X, delete, [Tx, X, FlatBindings])
-              end
-      end, ok, Deletions).
+process_deletions(Deletions) ->
+    AugmentedDeletions =
+        dict:map(fun (_XName, {X, deleted, Bindings}) ->
+                         Bs = lists:flatten(Bindings),
+                         x_callback(transaction, X, delete, Bs),
+                         {X, deleted, Bs, none};
+                     (_XName, {X, not_deleted, Bindings}) ->
+                         Bs = lists:flatten(Bindings),
+                         x_callback(transaction, X, remove_bindings, Bs),
+                         {X, not_deleted, Bs, rabbit_exchange:serial(X)}
+                 end, Deletions),
+    fun() ->
+            dict:fold(fun (XName, {X, deleted, Bs, Serial}, ok) ->
+                              ok = rabbit_event:notify(
+                                     exchange_deleted, [{name, XName}]),
+                              del_notify(Bs),
+                              x_callback(Serial, X, delete, Bs);
+                          (_XName, {X, not_deleted, Bs, Serial}, ok) ->
+                              del_notify(Bs),
+                              x_callback(Serial, X, remove_bindings, Bs)
+                      end, ok, AugmentedDeletions)
+    end.
+
+del_notify(Bs) -> [rabbit_event:notify(binding_deleted, info(B)) || B <- Bs].
+
+x_callback(Arg, X, F, Bs) -> ok = rabbit_exchange:callback(X, F, [Arg, X, Bs]).
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index eb80e437..6fbbc93e 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -20,21 +20,22 @@
 
 -behaviour(gen_server2).
 
--export([start_link/7, do/2, do/3, flush/1, shutdown/1]).
+-export([start_link/10, do/2, do/3, flush/1, shutdown/1]).
 -export([send_command/2, deliver/4, flushed/2, confirm/2]).
 -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]).
--export([emit_stats/1]).
+-export([refresh_config_all/0, emit_stats/1, ready_for_close/1]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
          handle_info/2, handle_pre_hibernate/1, prioritise_call/3,
-         prioritise_cast/2]).
+         prioritise_cast/2, format_message_queue/2]).
 
--record(ch, {state, channel, reader_pid, writer_pid, limiter_pid,
-             start_limiter_fun, transaction_id, tx_participants, next_tag,
-             uncommitted_ack_q, unacked_message_q,
+-record(ch, {state, protocol, channel, reader_pid, writer_pid, conn_pid,
+             limiter_pid, start_limiter_fun, tx_status, next_tag,
+             unacked_message_q, uncommitted_message_q, uncommitted_ack_q,
              user, virtual_host, most_recently_declared_queue,
-             consumer_mapping, blocking, queue_collector_pid, stats_timer,
-             confirm_enabled, publish_seqno, unconfirmed, confirmed}).
+             consumer_mapping, blocking, consumer_monitors, queue_collector_pid,
+             stats_timer, confirm_enabled, publish_seqno, unconfirmed_mq,
+             unconfirmed_qm, confirmed, capabilities, trace_state}).
 
 -define(MAX_PERMISSION_CACHE_SIZE, 12).
 
@@ -45,6 +46,7 @@
          consumer_count,
          messages_unacknowledged,
          messages_unconfirmed,
+         messages_uncommitted,
          acks_uncommitted,
          prefetch_count,
          client_flow_blocked]).
@@ -66,10 +68,10 @@
 
 -type(channel_number() :: non_neg_integer()).
 
--spec(start_link/7 ::
-      (channel_number(), pid(), pid(), rabbit_types:user(),
-       rabbit_types:vhost(), pid(),
-       fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) ->
+-spec(start_link/10 ::
+        (channel_number(), pid(), pid(), pid(), rabbit_types:protocol(),
+         rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(),
+         pid(), fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) ->
                            rabbit_types:ok_pid_or_error()).
 -spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok').
 -spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(),
@@ -88,16 +90,19 @@
 -spec(info/2 :: (pid(), rabbit_types:info_keys()) -> rabbit_types:infos()).
 -spec(info_all/0 :: () -> [rabbit_types:infos()]).
 -spec(info_all/1 :: (rabbit_types:info_keys()) -> [rabbit_types:infos()]).
+-spec(refresh_config_all/0 :: () -> 'ok').
 -spec(emit_stats/1 :: (pid()) -> 'ok').
+-spec(ready_for_close/1 :: (pid()) -> 'ok').
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start_link(Channel, ReaderPid, WriterPid, User, VHost, CollectorPid,
-           StartLimiterFun) ->
-    gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid, User,
-                                     VHost, CollectorPid, StartLimiterFun], []).
+start_link(Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost,
+           Capabilities, CollectorPid, StartLimiterFun) ->
+    gen_server2:start_link(
+      ?MODULE, [Channel, ReaderPid, WriterPid, ConnPid, Protocol, User,
+                VHost, Capabilities, CollectorPid, StartLimiterFun], []).
 
 do(Pid, Method) ->
     do(Pid, Method, none).
@@ -106,7 +111,7 @@ do(Pid, Method, Content) ->
     gen_server2:cast(Pid, {method, Method, Content}).
 
 flush(Pid) ->
-    gen_server2:call(Pid, flush).
+    gen_server2:call(Pid, flush, infinity).
 
 shutdown(Pid) ->
     gen_server2:cast(Pid, terminate).
@@ -143,38 +148,52 @@ info_all() ->
 info_all(Items) ->
     rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()).
 
+refresh_config_all() ->
+    rabbit_misc:upmap(
+      fun (C) -> gen_server2:call(C, refresh_config) end, list()),
+    ok.
+
 emit_stats(Pid) ->
     gen_server2:cast(Pid, emit_stats).
 
+ready_for_close(Pid) ->
+    gen_server2:cast(Pid, ready_for_close).
+
 %%---------------------------------------------------------------------------
 
-init([Channel, ReaderPid, WriterPid, User, VHost, CollectorPid,
-      StartLimiterFun]) ->
+init([Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost,
+      Capabilities, CollectorPid, StartLimiterFun]) ->
     process_flag(trap_exit, true),
     ok = pg_local:join(rabbit_channels, self()),
     StatsTimer = rabbit_event:init_stats_timer(),
     State = #ch{state                   = starting,
+                protocol                = Protocol,
                 channel                 = Channel,
                 reader_pid              = ReaderPid,
                 writer_pid              = WriterPid,
+                conn_pid                = ConnPid,
                 limiter_pid             = undefined,
                 start_limiter_fun       = StartLimiterFun,
-                transaction_id          = none,
-                tx_participants         = sets:new(),
+                tx_status               = none,
                 next_tag                = 1,
-                uncommitted_ack_q       = queue:new(),
                 unacked_message_q       = queue:new(),
+                uncommitted_message_q   = queue:new(),
+                uncommitted_ack_q       = queue:new(),
                 user                    = User,
                 virtual_host            = VHost,
                 most_recently_declared_queue = <<>>,
                 consumer_mapping        = dict:new(),
                 blocking                = dict:new(),
+                consumer_monitors       = dict:new(),
                 queue_collector_pid     = CollectorPid,
                 stats_timer             = StatsTimer,
                 confirm_enabled         = false,
                 publish_seqno           = 1,
-                unconfirmed             = gb_trees:empty(),
-                confirmed               = []},
+                unconfirmed_mq          = gb_trees:empty(),
+                unconfirmed_qm          = gb_trees:empty(),
+                confirmed               = [],
+                capabilities            = Capabilities,
+                trace_state             = rabbit_trace:init(VHost)},
     rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State)),
     rabbit_event:if_enabled(StatsTimer,
                             fun() -> internal_emit_stats(State) end),
@@ -207,6 +226,9 @@ handle_call({info, Items}, _From, State) ->
     catch Error -> reply({error, Error}, State)
     end;
 
+handle_call(refresh_config, _From, State = #ch{virtual_host = VHost}) ->
+    reply(ok, State#ch{trace_state = rabbit_trace:init(VHost)});
+
 handle_call(_Request, _From, State) ->
     noreply(State).
 
@@ -218,14 +240,11 @@ handle_cast({method, Method, Content}, State) ->
         {noreply, NewState} ->
             noreply(NewState);
         stop ->
-            {stop, normal, State#ch{state = terminating}}
+            {stop, normal, State}
     catch
         exit:Reason = #amqp_error{} ->
             MethodName = rabbit_misc:method_record_type(Method),
-            {stop, normal, terminating(Reason#amqp_error{method = MethodName},
-                                       State)};
-        exit:normal ->
-            {stop, normal, State};
+            send_exception(Reason#amqp_error{method = MethodName}, State);
         _:Reason ->
             {stop, {Reason, erlang:get_stacktrace()}, State}
     end;
@@ -233,9 +252,19 @@ handle_cast({method, Method, Content}, State) ->
 handle_cast({flushed, QPid}, State) ->
     {noreply, queue_blocked(QPid, State), hibernate};
 
+handle_cast(ready_for_close, State = #ch{state      = closing,
+                                         writer_pid = WriterPid}) ->
+    ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}),
+    {stop, normal, State};
+
 handle_cast(terminate, State) ->
     {stop, normal, State};
 
+handle_cast({command, #'basic.consume_ok'{consumer_tag = ConsumerTag} = Msg},
+            State = #ch{writer_pid = WriterPid}) ->
+    ok = rabbit_writer:send_command(WriterPid, Msg),
+    noreply(monitor_consumer(ConsumerTag, State));
+
 handle_cast({command, Msg}, State = #ch{writer_pid = WriterPid}) ->
     ok = rabbit_writer:send_command(WriterPid, Msg),
     noreply(State);
@@ -243,10 +272,11 @@ handle_cast({command, Msg}, State = #ch{writer_pid = WriterPid}) ->
 handle_cast({deliver, ConsumerTag, AckRequired,
              Msg = {_QName, QPid, _MsgId, Redelivered,
                     #basic_message{exchange_name = ExchangeName,
-                                   routing_key = RoutingKey,
+                                   routing_keys = [RoutingKey | _CcRoutes],
                                    content = Content}}},
-            State = #ch{writer_pid = WriterPid,
-                        next_tag = DeliveryTag}) ->
+            State = #ch{writer_pid  = WriterPid,
+                        next_tag    = DeliveryTag,
+                        trace_state = TraceState}) ->
     State1 = lock_message(AckRequired,
                           ack_record(DeliveryTag, ConsumerTag, Msg),
                           State),
@@ -257,12 +287,12 @@ handle_cast({deliver, ConsumerTag, AckRequired,
                          exchange = ExchangeName#resource.name,
                          routing_key = RoutingKey},
     rabbit_writer:send_command_and_notify(WriterPid, QPid, self(), M, Content),
-
-    maybe_incr_stats([{QPid, 1}],
-                     case AckRequired of
-                         true  -> deliver;
-                         false -> deliver_no_ack
-                     end, State),
+    maybe_incr_stats([{QPid, 1}], case AckRequired of
+                                      true  -> deliver;
+                                      false -> deliver_no_ack
+                                  end, State),
+    maybe_incr_redeliver_stats(Redelivered, QPid, State),
+    rabbit_trace:tap_trace_out(Msg, TraceState),
     noreply(State1#ch{next_tag = DeliveryTag + 1});
 
 handle_cast({confirm, MsgSeqNos, From}, State) ->
@@ -278,20 +308,18 @@ handle_info(emit_stats, State = #ch{stats_timer = StatsTimer}) ->
             State#ch{
               stats_timer = rabbit_event:reset_stats_timer(StatsTimer)});
 
-handle_info({'DOWN', _MRef, process, QPid, Reason},
-            State = #ch{unconfirmed = UC}) ->
-    %% TODO: this does a complete scan and partial rebuild of the
-    %% tree, which is quite efficient. To do better we'd need to
-    %% maintain a secondary mapping, from QPids to MsgSeqNos.
-    {MXs, UC1} = remove_queue_unconfirmed(
-                   gb_trees:next(gb_trees:iterator(UC)), QPid,
-                   {[], UC}, State),
-    erase_queue_stats(QPid),
-    State1 = case Reason of
-                 normal -> record_confirms(MXs, State#ch{unconfirmed = UC1});
-                 _      -> send_nacks(MXs, State#ch{unconfirmed = UC1})
-             end,
-    noreply(queue_blocked(QPid, State1)).
+handle_info({'DOWN', MRef, process, QPid, Reason},
+            State = #ch{consumer_monitors = ConsumerMonitors}) ->
+    noreply(
+      case dict:find(MRef, ConsumerMonitors) of
+          error ->
+              handle_publishing_queue_down(QPid, Reason, State);
+          {ok, ConsumerTag} ->
+              handle_consuming_queue_down(MRef, ConsumerTag, State)
+      end);
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State}.
 
 handle_pre_hibernate(State = #ch{stats_timer = StatsTimer}) ->
     ok = clear_permission_cache(),
@@ -303,22 +331,22 @@ handle_pre_hibernate(State = #ch{stats_timer = StatsTimer}) ->
     StatsTimer1 = rabbit_event:stop_stats_timer(StatsTimer),
     {hibernate, State#ch{stats_timer = StatsTimer1}}.
 
-terminate(_Reason, State = #ch{state = terminating}) ->
-    terminate(State);
-
 terminate(Reason, State) ->
-    Res = rollback_and_notify(State),
+    {Res, _State1} = notify_queues(State),
     case Reason of
         normal            -> ok = Res;
         shutdown          -> ok = Res;
         {shutdown, _Term} -> ok = Res;
         _                 -> ok
     end,
-    terminate(State).
+    pg_local:leave(rabbit_channels, self()),
+    rabbit_event:notify(channel_closed, [{pid, self()}]).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
+
 %%---------------------------------------------------------------------------
 
 reply(Reply, NewState) -> reply(Reply, [], NewState).
@@ -351,10 +379,23 @@ return_ok(State, false, Msg)  -> {reply, Msg, State}.
 ok_msg(true, _Msg) -> undefined;
 ok_msg(false, Msg) -> Msg.
 
-terminating(Reason, State = #ch{channel = Channel, reader_pid = Reader}) ->
-    ok = rollback_and_notify(State),
-    Reader ! {channel_exit, Channel, Reason},
-    State#ch{state = terminating}.
+send_exception(Reason, State = #ch{protocol   = Protocol,
+                                   channel    = Channel,
+                                   writer_pid = WriterPid,
+                                   reader_pid = ReaderPid,
+                                   conn_pid   = ConnPid}) ->
+    {CloseChannel, CloseMethod} =
+        rabbit_binary_generator:map_exception(Channel, Reason, Protocol),
+    rabbit_log:error("connection ~p, channel ~p - error:~n~p~n",
+                     [ConnPid, Channel, Reason]),
+    %% something bad's happened: notify_queues may not be 'ok'
+    {_Result, State1} = notify_queues(State),
+    case CloseChannel of
+        Channel -> ok = rabbit_writer:send_command(WriterPid, CloseMethod),
+                   {noreply, State1};
+        _       -> ReaderPid ! {channel_exit, Channel, Reason},
+                   {stop, normal, State1}
+    end.
 
 return_queue_declare_ok(#resource{name = ActualName},
                         NoWait, MessageCount, ConsumerCount, State) ->
@@ -476,13 +517,6 @@ queue_blocked(QPid, State = #ch{blocking = Blocking}) ->
                       State#ch{blocking = Blocking1}
     end.
 
-remove_queue_unconfirmed(none, _QPid, Acc, _State) ->
-    Acc;
-remove_queue_unconfirmed({MsgSeqNo, XQ, Next}, QPid, Acc, State) ->
-    remove_queue_unconfirmed(gb_trees:next(Next), QPid,
-                             remove_qmsg(MsgSeqNo, QPid, XQ, Acc, State),
-                             State).
-
 record_confirm(undefined, _, State) ->
     State;
 record_confirm(MsgSeqNo, XName, State) ->
@@ -495,25 +529,42 @@ record_confirms(MXs, State = #ch{confirmed = C}) ->
 
 confirm([], _QPid, State) ->
     State;
-confirm(MsgSeqNos, QPid, State = #ch{unconfirmed = UC}) ->
-    {MXs, UC1} =
+confirm(MsgSeqNos, QPid, State) ->
+    {MXs, State1} = process_confirms(MsgSeqNos, QPid, false, State),
+    record_confirms(MXs, State1).
+
+process_confirms(MsgSeqNos, QPid, Nack, State = #ch{unconfirmed_mq = UMQ,
+                                                    unconfirmed_qm = UQM}) ->
+    {MXs, UMQ1, UQM1} =
         lists:foldl(
-          fun(MsgSeqNo, {_DMs, UC0} = Acc) ->
-                  case gb_trees:lookup(MsgSeqNo, UC0) of
-                      none        -> Acc;
-                      {value, XQ} -> remove_qmsg(MsgSeqNo, QPid, XQ, Acc, State)
+          fun(MsgSeqNo, {_MXs, UMQ0, _UQM} = Acc) ->
+                  case gb_trees:lookup(MsgSeqNo, UMQ0) of
+                      {value, XQ} -> remove_unconfirmed(MsgSeqNo, QPid, XQ,
+                                                        Acc, Nack);
+                      none        -> Acc
                   end
-          end, {[], UC}, MsgSeqNos),
-    record_confirms(MXs, State#ch{unconfirmed = UC1}).
-
-remove_qmsg(MsgSeqNo, QPid, {XName, Qs}, {MXs, UC}, State) ->
-    Qs1 = sets:del_element(QPid, Qs),
-    %% these confirms will be emitted even when a queue dies, but that
-    %% should be fine, since the queue stats get erased immediately
-    maybe_incr_stats([{{QPid, XName}, 1}], confirm, State),
-    case sets:size(Qs1) of
-        0 -> {[{MsgSeqNo, XName} | MXs], gb_trees:delete(MsgSeqNo, UC)};
-        _ -> {MXs, gb_trees:update(MsgSeqNo, {XName, Qs1}, UC)}
+          end, {[], UMQ, UQM}, MsgSeqNos),
+    {MXs, State#ch{unconfirmed_mq = UMQ1, unconfirmed_qm = UQM1}}.
+
+remove_unconfirmed(MsgSeqNo, QPid, {XName, Qs}, {MXs, UMQ, UQM}, Nack) ->
+    UQM1 = case gb_trees:lookup(QPid, UQM) of
+               {value, MsgSeqNos} ->
+                   MsgSeqNos1 = gb_sets:delete(MsgSeqNo, MsgSeqNos),
+                   case gb_sets:is_empty(MsgSeqNos1) of
+                       true  -> gb_trees:delete(QPid, UQM);
+                       false -> gb_trees:update(QPid, MsgSeqNos1, UQM)
+                   end;
+               none ->
+                   UQM
+           end,
+    Qs1 = gb_sets:del_element(QPid, Qs),
+    %% If QPid somehow died initiating a nack, clear the message from
+    %% internal data-structures.  Also, cleanup empty entries.
+    case (Nack orelse gb_sets:is_empty(Qs1)) of
+        true  ->
+            {[{MsgSeqNo, XName} | MXs], gb_trees:delete(MsgSeqNo, UMQ), UQM1};
+        false ->
+            {MXs, gb_trees:update(MsgSeqNo, {XName, Qs1}, UMQ), UQM1}
     end.
 
 handle_method(#'channel.open'{}, _, State = #ch{state = starting}) ->
@@ -526,11 +577,29 @@ handle_method(#'channel.open'{}, _, _State) ->
 handle_method(_Method, _, #ch{state = starting}) ->
     rabbit_misc:protocol_error(channel_error, "expected 'channel.open'", []);
 
-handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) ->
-    ok = rollback_and_notify(State),
-    ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}),
+handle_method(#'channel.close_ok'{}, _, #ch{state = closing}) ->
     stop;
 
+handle_method(#'channel.close'{}, _, State = #ch{state = closing}) ->
+    {reply, #'channel.close_ok'{}, State};
+
+handle_method(_Method, _, State = #ch{state = closing}) ->
+    {noreply, State};
+
+handle_method(#'channel.close'{}, _, State = #ch{reader_pid = ReaderPid}) ->
+    {ok, State1} = notify_queues(State),
+    ReaderPid ! {channel_closing, self()},
+    {noreply, State1};
+
+%% Even though the spec prohibits the client from sending commands
+%% while waiting for the reply to a synchronous command, we generally
+%% do allow this...except in the case of a pending tx.commit, where
+%% it could wreak havoc.
+handle_method(_Method, _, #ch{tx_status = TxStatus})
+  when TxStatus =/= none andalso TxStatus =/= in_progress ->
+    rabbit_misc:protocol_error(
+      channel_error, "unexpected command while processing 'tx.commit'", []);
+
 handle_method(#'access.request'{},_, State) ->
     {reply, #'access.request_ok'{ticket = 1}, State};
 
@@ -539,8 +608,9 @@ handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
                                mandatory   = Mandatory,
                                immediate   = Immediate},
               Content, State = #ch{virtual_host    = VHostPath,
-                                   transaction_id  = TxnKey,
-                                   confirm_enabled = ConfirmEnabled}) ->
+                                   tx_status       = TxStatus,
+                                   confirm_enabled = ConfirmEnabled,
+                                   trace_state     = TraceState}) ->
     ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
     check_write_permitted(ExchangeName, State),
     Exchange = rabbit_exchange:lookup_or_die(ExchangeName),
@@ -549,32 +619,29 @@ handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
     %% certain to want to look at delivery-mode and priority.
     DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content),
     check_user_id_header(DecodedContent#content.properties, State),
-    IsPersistent = is_message_persistent(DecodedContent),
     {MsgSeqNo, State1} =
-        case ConfirmEnabled of
-            false -> {undefined, State};
-            true  -> SeqNo = State#ch.publish_seqno,
-                     {SeqNo, State#ch{publish_seqno = SeqNo + 1}}
+        case {TxStatus, ConfirmEnabled} of
+            {none, false} -> {undefined, State};
+            {_, _}        -> SeqNo = State#ch.publish_seqno,
+                             {SeqNo, State#ch{publish_seqno = SeqNo + 1}}
         end,
-    Message = #basic_message{exchange_name = ExchangeName,
-                             routing_key   = RoutingKey,
-                             content       = DecodedContent,
-                             guid          = rabbit_guid:guid(),
-                             is_persistent = IsPersistent},
-    {RoutingRes, DeliveredQPids} =
-        rabbit_exchange:publish(
-          Exchange,
-          rabbit_basic:delivery(Mandatory, Immediate, TxnKey, Message,
-                                MsgSeqNo)),
-    State2 = process_routing_result(RoutingRes, DeliveredQPids, ExchangeName,
-                                    MsgSeqNo, Message, State1),
-    maybe_incr_stats([{ExchangeName, 1} |
-                      [{{QPid, ExchangeName}, 1} ||
-                          QPid <- DeliveredQPids]], publish, State2),
-    {noreply, case TxnKey of
-                  none -> State2;
-                  _    -> add_tx_participants(DeliveredQPids, State2)
-              end};
+    case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of
+        {ok, Message} ->
+            rabbit_trace:tap_trace_in(Message, TraceState),
+            Delivery = rabbit_basic:delivery(Mandatory, Immediate, Message,
+                                             MsgSeqNo),
+            QNames = rabbit_exchange:route(Exchange, Delivery),
+            {noreply,
+             case TxStatus of
+                 none        -> deliver_to_queues({Delivery, QNames}, State1);
+                 in_progress -> TMQ = State1#ch.uncommitted_message_q,
+                                NewTMQ = queue:in({Delivery, QNames}, TMQ),
+                                State1#ch{uncommitted_message_q = NewTMQ}
+             end};
+        {error, Reason} ->
+            rabbit_misc:protocol_error(precondition_failed,
+                                       "invalid message: ~p", [Reason])
+    end;
 
 handle_method(#'basic.nack'{delivery_tag = DeliveryTag,
                             multiple     = Multiple,
@@ -584,46 +651,42 @@ handle_method(#'basic.nack'{delivery_tag = DeliveryTag,
 
 handle_method(#'basic.ack'{delivery_tag = DeliveryTag,
                            multiple = Multiple},
-              _, State = #ch{transaction_id = TxnKey,
-                             unacked_message_q = UAMQ}) ->
+              _, State = #ch{unacked_message_q = UAMQ,
+                             tx_status = TxStatus}) ->
     {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple),
-    QIncs = ack(TxnKey, Acked),
-    Participants = [QPid || {QPid, _} <- QIncs],
-    maybe_incr_stats(QIncs, ack, State),
-    {noreply, case TxnKey of
-                  none -> ok = notify_limiter(State#ch.limiter_pid, Acked),
-                          State#ch{unacked_message_q = Remaining};
-                  _    -> NewUAQ = queue:join(State#ch.uncommitted_ack_q,
-                                              Acked),
-                          add_tx_participants(
-                            Participants,
-                            State#ch{unacked_message_q = Remaining,
-                                     uncommitted_ack_q = NewUAQ})
-              end};
+    State1 = State#ch{unacked_message_q = Remaining},
+    {noreply,
+     case TxStatus of
+         none        -> ack(Acked, State1);
+         in_progress -> NewTAQ = queue:join(State1#ch.uncommitted_ack_q, Acked),
+                        State1#ch{uncommitted_ack_q = NewTAQ}
+     end};
 
 handle_method(#'basic.get'{queue = QueueNameBin,
                            no_ack = NoAck},
-              _, State = #ch{writer_pid = WriterPid,
-                             reader_pid = ReaderPid,
-                             next_tag   = DeliveryTag}) ->
+              _, State = #ch{writer_pid  = WriterPid,
+                             conn_pid    = ConnPid,
+                             next_tag    = DeliveryTag,
+                             trace_state = TraceState}) ->
     QueueName = expand_queue_name_shortcut(QueueNameBin, State),
     check_read_permitted(QueueName, State),
     case rabbit_amqqueue:with_exclusive_access_or_die(
-           QueueName, ReaderPid,
+           QueueName, ConnPid,
            fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of
         {ok, MessageCount,
          Msg = {_QName, QPid, _MsgId, Redelivered,
                 #basic_message{exchange_name = ExchangeName,
-                               routing_key = RoutingKey,
+                               routing_keys = [RoutingKey | _CcRoutes],
                                content = Content}}} ->
             State1 = lock_message(not(NoAck),
                                   ack_record(DeliveryTag, none, Msg),
                                   State),
-            maybe_incr_stats([{QPid, 1}],
-                             case NoAck of
-                                 true  -> get_no_ack;
-                                 false -> get
-                             end, State),
+            maybe_incr_stats([{QPid, 1}], case NoAck of
+                                              true  -> get_no_ack;
+                                              false -> get
+                                          end, State),
+            maybe_incr_redeliver_stats(Redelivered, QPid, State),
+            rabbit_trace:tap_trace_out(Msg, TraceState),
             ok = rabbit_writer:send_command(
                    WriterPid,
                    #'basic.get_ok'{delivery_tag = DeliveryTag,
@@ -643,9 +706,9 @@ handle_method(#'basic.consume'{queue        = QueueNameBin,
                                no_ack       = NoAck,
                                exclusive    = ExclusiveConsume,
                                nowait       = NoWait},
-              _, State = #ch{reader_pid       = ReaderPid,
-                             limiter_pid      = LimiterPid,
-                             consumer_mapping = ConsumerMapping }) ->
+              _, State = #ch{conn_pid          = ConnPid,
+                             limiter_pid       = LimiterPid,
+                             consumer_mapping  = ConsumerMapping}) ->
     case dict:find(ConsumerTag, ConsumerMapping) of
         error ->
             QueueName = expand_queue_name_shortcut(QueueNameBin, State),
@@ -660,20 +723,26 @@ handle_method(#'basic.consume'{queue        = QueueNameBin,
             %% behalf. This is for symmetry with basic.cancel - see
             %% the comment in that method for why.
             case rabbit_amqqueue:with_exclusive_access_or_die(
-                   QueueName, ReaderPid,
+                   QueueName, ConnPid,
                    fun (Q) ->
-                           rabbit_amqqueue:basic_consume(
-                             Q, NoAck, self(), LimiterPid,
-                             ActualConsumerTag, ExclusiveConsume,
-                             ok_msg(NoWait, #'basic.consume_ok'{
-                                      consumer_tag = ActualConsumerTag}))
+                           {rabbit_amqqueue:basic_consume(
+                              Q, NoAck, self(), LimiterPid,
+                              ActualConsumerTag, ExclusiveConsume,
+                              ok_msg(NoWait, #'basic.consume_ok'{
+                                       consumer_tag = ActualConsumerTag})),
+                            Q}
                    end) of
-                ok ->
-                    {noreply, State#ch{consumer_mapping =
-                                       dict:store(ActualConsumerTag,
-                                                  QueueName,
-                                                  ConsumerMapping)}};
-                {error, exclusive_consume_unavailable} ->
+                {ok, Q} ->
+                    State1 = State#ch{consumer_mapping =
+                                          dict:store(ActualConsumerTag,
+                                                     {Q, undefined},
+                                                     ConsumerMapping)},
+                    {noreply,
+                     case NoWait of
+                         true  -> monitor_consumer(ActualConsumerTag, State1);
+                         false -> State1
+                     end};
+                {{error, exclusive_consume_unavailable}, _Q} ->
                     rabbit_misc:protocol_error(
                       access_refused, "~s in exclusive use",
                       [rabbit_misc:rs(QueueName)])
@@ -686,26 +755,31 @@ handle_method(#'basic.consume'{queue        = QueueNameBin,
 
 handle_method(#'basic.cancel'{consumer_tag = ConsumerTag,
                               nowait = NoWait},
-              _, State = #ch{consumer_mapping = ConsumerMapping }) ->
+              _, State = #ch{consumer_mapping = ConsumerMapping,
+                             consumer_monitors = ConsumerMonitors}) ->
     OkMsg = #'basic.cancel_ok'{consumer_tag = ConsumerTag},
     case dict:find(ConsumerTag, ConsumerMapping) of
         error ->
             %% Spec requires we ignore this situation.
             return_ok(State, NoWait, OkMsg);
-        {ok, QueueName} ->
-            NewState = State#ch{consumer_mapping =
-                                dict:erase(ConsumerTag,
-                                           ConsumerMapping)},
-            case rabbit_amqqueue:with(
-                   QueueName,
-                   fun (Q) ->
-                           %% In order to ensure that no more messages
-                           %% are sent to the consumer after the
-                           %% cancel_ok has been sent, we get the
-                           %% queue process to send the cancel_ok on
-                           %% our behalf. If we were sending the
-                           %% cancel_ok ourselves it might overtake a
-                           %% message sent previously by the queue.
+        {ok, {Q, MRef}} ->
+            ConsumerMonitors1 =
+                case MRef of
+                    undefined -> ConsumerMonitors;
+                    _         -> true = erlang:demonitor(MRef),
+                                 dict:erase(MRef, ConsumerMonitors)
+                end,
+            NewState = State#ch{consumer_mapping  = dict:erase(ConsumerTag,
+                                                               ConsumerMapping),
+                                consumer_monitors = ConsumerMonitors1},
+            %% In order to ensure that no more messages are sent to
+            %% the consumer after the cancel_ok has been sent, we get
+            %% the queue process to send the cancel_ok on our
+            %% behalf. If we were sending the cancel_ok ourselves it
+            %% might overtake a message sent previously by the queue.
+            case rabbit_misc:with_exit_handler(
+                   fun () -> {error, not_found} end,
+                   fun () ->
                            rabbit_amqqueue:basic_cancel(
                              Q, self(), ConsumerTag,
                              ok_msg(NoWait, #'basic.cancel_ok'{
@@ -816,7 +890,6 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
                                   nowait = NoWait},
               _, State = #ch{virtual_host = VHostPath}) ->
     ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
-    check_configure_permitted(ExchangeName, State),
     check_not_default_exchange(ExchangeName),
     _ = rabbit_exchange:lookup_or_die(ExchangeName),
     return_ok(State, NoWait, #'exchange.declare_ok'{});
@@ -864,10 +937,10 @@ handle_method(#'queue.declare'{queue       = QueueNameBin,
                                nowait      = NoWait,
                                arguments   = Args} = Declare,
               _, State = #ch{virtual_host        = VHostPath,
-                             reader_pid          = ReaderPid,
+                             conn_pid            = ConnPid,
                              queue_collector_pid = CollectorPid}) ->
     Owner = case ExclusiveDeclare of
-                true  -> ReaderPid;
+                true  -> ConnPid;
                 false -> none
             end,
     ActualNameBin = case QueueNameBin of
@@ -910,13 +983,12 @@ handle_method(#'queue.declare'{queue   = QueueNameBin,
                                passive = true,
                                nowait  = NoWait},
               _, State = #ch{virtual_host = VHostPath,
-                             reader_pid   = ReaderPid}) ->
+                             conn_pid     = ConnPid}) ->
     QueueName = rabbit_misc:r(VHostPath, queue, QueueNameBin),
-    check_configure_permitted(QueueName, State),
     {{ok, MessageCount, ConsumerCount}, #amqqueue{} = Q} =
         rabbit_amqqueue:with_or_die(
           QueueName, fun (Q) -> {rabbit_amqqueue:stat(Q), Q} end),
-    ok = rabbit_amqqueue:check_exclusive_access(Q, ReaderPid),
+    ok = rabbit_amqqueue:check_exclusive_access(Q, ConnPid),
     return_queue_declare_ok(QueueName, NoWait, MessageCount, ConsumerCount,
                             State);
 
@@ -924,11 +996,11 @@ handle_method(#'queue.delete'{queue = QueueNameBin,
                               if_unused = IfUnused,
                               if_empty = IfEmpty,
                               nowait = NoWait},
-              _, State = #ch{reader_pid = ReaderPid}) ->
+              _, State = #ch{conn_pid = ConnPid}) ->
     QueueName = expand_queue_name_shortcut(QueueNameBin, State),
     check_configure_permitted(QueueName, State),
     case rabbit_amqqueue:with_exclusive_access_or_die(
-           QueueName, ReaderPid,
+           QueueName, ConnPid,
            fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of
         {error, in_use} ->
             rabbit_misc:protocol_error(
@@ -960,42 +1032,42 @@ handle_method(#'queue.unbind'{queue = QueueNameBin,
 
 handle_method(#'queue.purge'{queue = QueueNameBin,
                              nowait = NoWait},
-              _, State = #ch{reader_pid = ReaderPid}) ->
+              _, State = #ch{conn_pid = ConnPid}) ->
     QueueName = expand_queue_name_shortcut(QueueNameBin, State),
     check_read_permitted(QueueName, State),
     {ok, PurgedMessageCount} = rabbit_amqqueue:with_exclusive_access_or_die(
-                                 QueueName, ReaderPid,
+                                 QueueName, ConnPid,
                                  fun (Q) -> rabbit_amqqueue:purge(Q) end),
     return_ok(State, NoWait,
               #'queue.purge_ok'{message_count = PurgedMessageCount});
 
-
 handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) ->
     rabbit_misc:protocol_error(
       precondition_failed, "cannot switch from confirm to tx mode", []);
 
-handle_method(#'tx.select'{}, _, State = #ch{transaction_id = none}) ->
-    {reply, #'tx.select_ok'{}, new_tx(State)};
-
 handle_method(#'tx.select'{}, _, State) ->
-    {reply, #'tx.select_ok'{}, State};
+    {reply, #'tx.select_ok'{}, State#ch{tx_status = in_progress}};
 
-handle_method(#'tx.commit'{}, _, #ch{transaction_id = none}) ->
+handle_method(#'tx.commit'{}, _, #ch{tx_status = none}) ->
     rabbit_misc:protocol_error(
       precondition_failed, "channel is not transactional", []);
 
-handle_method(#'tx.commit'{}, _, State) ->
-    {reply, #'tx.commit_ok'{}, internal_commit(State)};
+handle_method(#'tx.commit'{}, _, State = #ch{uncommitted_message_q = TMQ,
+                                             uncommitted_ack_q     = TAQ}) ->
+    State1 = new_tx(ack(TAQ, rabbit_misc:queue_fold(fun deliver_to_queues/2,
+                                                    State, TMQ))),
+    {noreply, maybe_complete_tx(State1#ch{tx_status = committing})};
 
-handle_method(#'tx.rollback'{}, _, #ch{transaction_id = none}) ->
+handle_method(#'tx.rollback'{}, _, #ch{tx_status = none}) ->
     rabbit_misc:protocol_error(
       precondition_failed, "channel is not transactional", []);
 
-handle_method(#'tx.rollback'{}, _, State) ->
-    {reply, #'tx.rollback_ok'{}, internal_rollback(State)};
+handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q     = UAMQ,
+                                               uncommitted_ack_q     = TAQ}) ->
+    {reply, #'tx.rollback_ok'{}, new_tx(State#ch{unacked_message_q =
+                                                     queue:join(TAQ, UAMQ)})};
 
-handle_method(#'confirm.select'{}, _, #ch{transaction_id = TxId})
-  when TxId =/= none ->
+handle_method(#'confirm.select'{}, _, #ch{tx_status = in_progress}) ->
     rabbit_misc:protocol_error(
       precondition_failed, "cannot switch from tx to confirm mode", []);
 
@@ -1035,10 +1107,63 @@ handle_method(_MethodRecord, _Content, _State) ->
 
 %%----------------------------------------------------------------------------
 
+monitor_consumer(ConsumerTag, State = #ch{consumer_mapping = ConsumerMapping,
+                                          consumer_monitors = ConsumerMonitors,
+                                          capabilities = Capabilities}) ->
+    case rabbit_misc:table_lookup(
+           Capabilities, <<"consumer_cancel_notify">>) of
+        {bool, true} ->
+            {#amqqueue{pid = QPid} = Q, undefined} =
+                dict:fetch(ConsumerTag, ConsumerMapping),
+            MRef = erlang:monitor(process, QPid),
+            State#ch{consumer_mapping =
+                         dict:store(ConsumerTag, {Q, MRef}, ConsumerMapping),
+                     consumer_monitors =
+                         dict:store(MRef, ConsumerTag, ConsumerMonitors)};
+        _ ->
+            State
+    end.
+
+handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed_qm = UQM}) ->
+    MsgSeqNos = case gb_trees:lookup(QPid, UQM) of
+                    {value, MsgSet} -> gb_sets:to_list(MsgSet);
+                    none            -> []
+                end,
+    %% We remove the MsgSeqNos from UQM before calling
+    %% process_confirms to prevent each MsgSeqNo being removed from
+    %% the set one by one which which would be inefficient
+    State1 = State#ch{unconfirmed_qm = gb_trees:delete_any(QPid, UQM)},
+    {Nack, SendFun} =
+        case Reason of
+            Reason when Reason =:= noproc; Reason =:= noconnection;
+                        Reason =:= normal; Reason =:= shutdown ->
+                {false, fun record_confirms/2};
+            {shutdown, _} ->
+                {false, fun record_confirms/2};
+            _ ->
+                {true,  fun send_nacks/2}
+        end,
+    {MXs, State2} = process_confirms(MsgSeqNos, QPid, Nack, State1),
+    erase_queue_stats(QPid),
+    State3 = SendFun(MXs, State2),
+    queue_blocked(QPid, State3).
+
+handle_consuming_queue_down(MRef, ConsumerTag,
+                            State = #ch{consumer_mapping  = ConsumerMapping,
+                                        consumer_monitors = ConsumerMonitors,
+                                        writer_pid        = WriterPid}) ->
+    ConsumerMapping1 = dict:erase(ConsumerTag, ConsumerMapping),
+    ConsumerMonitors1 = dict:erase(MRef, ConsumerMonitors),
+    Cancel = #'basic.cancel'{consumer_tag = ConsumerTag,
+                             nowait       = true},
+    ok = rabbit_writer:send_command(WriterPid, Cancel),
+    State#ch{consumer_mapping = ConsumerMapping1,
+             consumer_monitors = ConsumerMonitors1}.
+
 binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin,
                RoutingKey, Arguments, ReturnMethod, NoWait,
                State = #ch{virtual_host = VHostPath,
-                           reader_pid   = ReaderPid}) ->
+                           conn_pid     = ConnPid }) ->
     %% FIXME: connection exception (!) on failure??
     %% (see rule named "failure" in spec-XML)
     %% FIXME: don't allow binding to internal exchanges -
@@ -1054,7 +1179,7 @@ binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin,
                       key         = ActualRoutingKey,
                       args        = Arguments},
              fun (_X, Q = #amqqueue{}) ->
-                     try rabbit_amqqueue:check_exclusive_access(Q, ReaderPid)
+                     try rabbit_amqqueue:check_exclusive_access(Q, ConnPid)
                      catch exit:Reason -> {error, Reason}
                      end;
                  (_X, #exchange{}) ->
@@ -1079,11 +1204,10 @@ binding_action(Fun, ExchangeNameBin, DestinationType, DestinationNameBin,
     end.
 
 basic_return(#basic_message{exchange_name = ExchangeName,
-                            routing_key   = RoutingKey,
+                            routing_keys  = [RoutingKey | _CcRoutes],
                             content       = Content},
-             WriterPid, Reason) ->
-    {_Close, ReplyCode, ReplyText} =
-        rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason),
+             #ch{protocol = Protocol, writer_pid = WriterPid}, Reason) ->
+    {_Close, ReplyCode, ReplyText} = Protocol:lookup_amqp_exception(Reason),
     ok = rabbit_writer:send_command(
            WriterPid,
            #'basic.return'{reply_code  = ReplyCode,
@@ -1128,52 +1252,24 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) ->
               precondition_failed, "unknown delivery tag ~w", [DeliveryTag])
     end.
 
-add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) ->
-    State#ch{tx_participants = sets:union(Participants,
-                                          sets:from_list(MoreP))}.
-
-ack(TxnKey, UAQ) ->
-    fold_per_queue(
-      fun (QPid, MsgIds, L) ->
-              ok = rabbit_amqqueue:ack(QPid, TxnKey, MsgIds, self()),
-              [{QPid, length(MsgIds)} | L]
-      end, [], UAQ).
-
-make_tx_id() -> rabbit_guid:guid().
-
-new_tx(State) ->
-    State#ch{transaction_id    = make_tx_id(),
-             tx_participants   = sets:new(),
-             uncommitted_ack_q = queue:new()}.
-
-internal_commit(State = #ch{transaction_id = TxnKey,
-                            tx_participants = Participants}) ->
-    case rabbit_amqqueue:commit_all(sets:to_list(Participants),
-                                    TxnKey, self()) of
-        ok              -> ok = notify_limiter(State#ch.limiter_pid,
-                                               State#ch.uncommitted_ack_q),
-                           new_tx(State);
-        {error, Errors} -> rabbit_misc:protocol_error(
-                             internal_error, "commit failed: ~w", [Errors])
-    end.
+ack(Acked, State) ->
+    QIncs = fold_per_queue(
+              fun (QPid, MsgIds, L) ->
+                      ok = rabbit_amqqueue:ack(QPid, MsgIds, self()),
+                      [{QPid, length(MsgIds)} | L]
+              end, [], Acked),
+    maybe_incr_stats(QIncs, ack, State),
+    ok = notify_limiter(State#ch.limiter_pid, Acked),
+    State.
+
+new_tx(State) -> State#ch{uncommitted_message_q = queue:new(),
+                          uncommitted_ack_q     = queue:new()}.
 
-internal_rollback(State = #ch{transaction_id = TxnKey,
-                              tx_participants = Participants,
-                              uncommitted_ack_q = UAQ,
-                              unacked_message_q = UAMQ}) ->
-    ?LOGDEBUG("rollback ~p~n  - ~p acks uncommitted, ~p messages unacked~n",
-              [self(),
-               queue:len(UAQ),
-               queue:len(UAMQ)]),
-    ok = rabbit_amqqueue:rollback_all(sets:to_list(Participants),
-                                      TxnKey, self()),
-    NewUAMQ = queue:join(UAQ, UAMQ),
-    new_tx(State#ch{unacked_message_q = NewUAMQ}).
-
-rollback_and_notify(State = #ch{transaction_id = none}) ->
-    notify_queues(State);
-rollback_and_notify(State) ->
-    notify_queues(internal_rollback(State)).
+notify_queues(State = #ch{state = closing}) ->
+    {ok, State};
+notify_queues(State = #ch{consumer_mapping = Consumers}) ->
+    {rabbit_amqqueue:notify_down_all(consumer_queues(Consumers), self()),
+     State#ch{state = closing}}.
 
 fold_per_queue(F, Acc0, UAQ) ->
     D = rabbit_misc:queue_fold(
@@ -1192,9 +1288,6 @@ start_limiter(State = #ch{unacked_message_q = UAMQ, start_limiter_fun = SLF}) ->
     ok = limit_queues(LPid, State),
     LPid.
 
-notify_queues(#ch{consumer_mapping = Consumers}) ->
-    rabbit_amqqueue:notify_down_all(consumer_queues(Consumers), self()).
-
 unlimit_queues(State) ->
     ok = limit_queues(undefined, State),
     undefined.
@@ -1203,16 +1296,9 @@ limit_queues(LPid, #ch{consumer_mapping = Consumers}) ->
     rabbit_amqqueue:limit_all(consumer_queues(Consumers), self(), LPid).
 
 consumer_queues(Consumers) ->
-    [QPid || QueueName <-
-                 sets:to_list(
-                   dict:fold(fun (_ConsumerTag, QueueName, S) ->
-                                     sets:add_element(QueueName, S)
-                             end, sets:new(), Consumers)),
-             case rabbit_amqqueue:lookup(QueueName) of
-                 {ok, Q} -> QPid = Q#amqqueue.pid, true;
-                 %% queue has been deleted in the meantime
-                 {error, not_found} -> QPid = none, false
-             end].
+    lists:usort([QPid ||
+                    {_Key, {#amqqueue{pid = QPid}, _MRef}}
+                        <- dict:to_list(Consumers)]).
 
 %% tell the limiter about the number of acks that have been received
 %% for messages delivered to subscribed consumers, but not acks for
@@ -1228,32 +1314,47 @@ notify_limiter(LimiterPid, Acked) ->
         Count -> rabbit_limiter:ack(LimiterPid, Count)
     end.
 
-is_message_persistent(Content) ->
-    case rabbit_basic:is_message_persistent(Content) of
-        {invalid, Other} ->
-            rabbit_log:warning("Unknown delivery mode ~p - "
-                               "treating as 1, non-persistent~n",
-                               [Other]),
-            false;
-        IsPersistent when is_boolean(IsPersistent) ->
-            IsPersistent
-    end.
+deliver_to_queues({Delivery = #delivery{message    = Message = #basic_message{
+                                                       exchange_name = XName},
+                                        msg_seq_no = MsgSeqNo},
+                   QNames}, State) ->
+    {RoutingRes, DeliveredQPids} = rabbit_router:deliver(QNames, Delivery),
+    State1 = process_routing_result(RoutingRes, DeliveredQPids,
+                                    XName, MsgSeqNo, Message, State),
+    maybe_incr_stats([{XName, 1} |
+                      [{{QPid, XName}, 1} ||
+                          QPid <- DeliveredQPids]], publish, State1),
+    State1.
 
 process_routing_result(unroutable,    _, XName,  MsgSeqNo, Msg, State) ->
-    ok = basic_return(Msg, State#ch.writer_pid, no_route),
+    ok = basic_return(Msg, State, no_route),
+    maybe_incr_stats([{Msg#basic_message.exchange_name, 1}],
+                     return_unroutable, State),
     record_confirm(MsgSeqNo, XName, State);
 process_routing_result(not_delivered, _, XName,  MsgSeqNo, Msg, State) ->
-    ok = basic_return(Msg, State#ch.writer_pid, no_consumers),
+    ok = basic_return(Msg, State, no_consumers),
+    maybe_incr_stats([{XName, 1}], return_not_delivered, State),
     record_confirm(MsgSeqNo, XName, State);
 process_routing_result(routed,       [], XName,  MsgSeqNo,   _, State) ->
     record_confirm(MsgSeqNo, XName, State);
 process_routing_result(routed,        _,     _, undefined,   _, State) ->
     State;
 process_routing_result(routed,    QPids, XName,  MsgSeqNo,   _, State) ->
-    #ch{unconfirmed = UC} = State,
-    [maybe_monitor(QPid) || QPid <- QPids],
-    UC1 = gb_trees:insert(MsgSeqNo, {XName, sets:from_list(QPids)}, UC),
-    State#ch{unconfirmed = UC1}.
+    #ch{unconfirmed_mq = UMQ, unconfirmed_qm = UQM} = State,
+    UMQ1 = gb_trees:insert(MsgSeqNo, {XName, gb_sets:from_list(QPids)}, UMQ),
+    SingletonSet = gb_sets:singleton(MsgSeqNo),
+    UQM1 = lists:foldl(
+             fun (QPid, UQM2) ->
+                     maybe_monitor(QPid),
+                     case gb_trees:lookup(QPid, UQM2) of
+                         {value, MsgSeqNos} ->
+                             MsgSeqNos1 = gb_sets:insert(MsgSeqNo, MsgSeqNos),
+                             gb_trees:update(QPid, MsgSeqNos1, UQM2);
+                         none ->
+                             gb_trees:insert(QPid, SingletonSet, UQM2)
+                     end
+             end, UQM, QPids),
+    State#ch{unconfirmed_mq = UMQ1, unconfirmed_qm = UQM1}.
 
 lock_message(true, MsgStruct, State = #ch{unacked_message_q = UAMQ}) ->
     State#ch{unacked_message_q = queue:in(MsgStruct, UAMQ)};
@@ -1262,20 +1363,25 @@ lock_message(false, _MsgStruct, State) ->
 
 send_nacks([], State) ->
     State;
-send_nacks(MXs, State) ->
+send_nacks(MXs, State = #ch{tx_status = none}) ->
     MsgSeqNos = [ MsgSeqNo || {MsgSeqNo, _} <- MXs ],
     coalesce_and_send(MsgSeqNos,
                       fun(MsgSeqNo, Multiple) ->
                               #'basic.nack'{delivery_tag = MsgSeqNo,
                                             multiple = Multiple}
-                      end, State).
+                      end, State);
+send_nacks(_, State) ->
+    maybe_complete_tx(State#ch{tx_status = failed}).
 
-send_confirms(State = #ch{confirmed = C}) ->
+send_confirms(State = #ch{tx_status = none, confirmed = C}) ->
     C1 = lists:append(C),
     MsgSeqNos = [ begin maybe_incr_stats([{ExchangeName, 1}], confirm, State),
                         MsgSeqNo
                   end || {MsgSeqNo, ExchangeName} <- C1 ],
-    send_confirms(MsgSeqNos, State #ch{confirmed = []}).
+    send_confirms(MsgSeqNos, State #ch{confirmed = []});
+send_confirms(State) ->
+    maybe_complete_tx(State).
+
 send_confirms([], State) ->
     State;
 send_confirms([MsgSeqNo], State = #ch{writer_pid = WriterPid}) ->
@@ -1289,11 +1395,11 @@ send_confirms(Cs, State) ->
                           end, State).
 
 coalesce_and_send(MsgSeqNos, MkMsgFun,
-                  State = #ch{writer_pid = WriterPid, unconfirmed = UC}) ->
+                  State = #ch{writer_pid = WriterPid, unconfirmed_mq = UMQ}) ->
     SMsgSeqNos = lists:usort(MsgSeqNos),
-    CutOff = case gb_trees:is_empty(UC) of
+    CutOff = case gb_trees:is_empty(UMQ) of
                  true  -> lists:last(SMsgSeqNos) + 1;
-                 false -> {SeqNo, _XQ} = gb_trees:smallest(UC), SeqNo
+                 false -> {SeqNo, _XQ} = gb_trees:smallest(UMQ), SeqNo
              end,
     {Ms, Ss} = lists:splitwith(fun(X) -> X < CutOff end, SMsgSeqNos),
     case Ms of
@@ -1305,28 +1411,44 @@ coalesce_and_send(MsgSeqNos, MkMsgFun,
             WriterPid, MkMsgFun(SeqNo, false)) || SeqNo <- Ss],
     State.
 
-terminate(_State) ->
-    pg_local:leave(rabbit_channels, self()),
-    rabbit_event:notify(channel_closed, [{pid, self()}]).
+maybe_complete_tx(State = #ch{tx_status = in_progress}) ->
+    State;
+maybe_complete_tx(State = #ch{unconfirmed_mq = UMQ}) ->
+    case gb_trees:is_empty(UMQ) of
+        false -> State;
+        true  -> complete_tx(State#ch{confirmed = []})
+    end.
+
+complete_tx(State = #ch{tx_status = committing}) ->
+    ok = rabbit_writer:send_command(State#ch.writer_pid, #'tx.commit_ok'{}),
+    State#ch{tx_status = in_progress};
+complete_tx(State = #ch{tx_status = failed}) ->
+    {noreply, State1} = send_exception(
+                          rabbit_misc:amqp_error(
+                            precondition_failed, "partial tx completion", [],
+                            'tx.commit'),
+                          State),
+    State1#ch{tx_status = in_progress}.
 
 infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
-i(pid,            _)                                 -> self();
-i(connection,     #ch{reader_pid       = ReaderPid}) -> ReaderPid;
-i(number,         #ch{channel          = Channel})   -> Channel;
-i(user,           #ch{user             = User})      -> User#user.username;
-i(vhost,          #ch{virtual_host     = VHost})     -> VHost;
-i(transactional,  #ch{transaction_id   = TxnKey})    -> TxnKey =/= none;
-i(confirm,        #ch{confirm_enabled  = CE})        -> CE;
+i(pid,            _)                               -> self();
+i(connection,     #ch{conn_pid         = ConnPid}) -> ConnPid;
+i(number,         #ch{channel          = Channel}) -> Channel;
+i(user,           #ch{user             = User})    -> User#user.username;
+i(vhost,          #ch{virtual_host     = VHost})   -> VHost;
+i(transactional,  #ch{tx_status        = TE})      -> TE =/= none;
+i(confirm,        #ch{confirm_enabled  = CE})      -> CE;
 i(consumer_count, #ch{consumer_mapping = ConsumerMapping}) ->
     dict:size(ConsumerMapping);
-i(messages_unconfirmed, #ch{unconfirmed = UC}) ->
-    gb_trees:size(UC);
-i(messages_unacknowledged, #ch{unacked_message_q = UAMQ,
-                               uncommitted_ack_q = UAQ}) ->
-    queue:len(UAMQ) + queue:len(UAQ);
-i(acks_uncommitted, #ch{uncommitted_ack_q = UAQ}) ->
-    queue:len(UAQ);
+i(messages_unconfirmed, #ch{unconfirmed_mq = UMQ}) ->
+    gb_trees:size(UMQ);
+i(messages_unacknowledged, #ch{unacked_message_q = UAMQ}) ->
+    queue:len(UAMQ);
+i(messages_uncommitted, #ch{uncommitted_message_q = TMQ}) ->
+    queue:len(TMQ);
+i(acks_uncommitted, #ch{uncommitted_ack_q = TAQ}) ->
+    queue:len(TAQ);
 i(prefetch_count, #ch{limiter_pid = LimiterPid}) ->
     rabbit_limiter:get_limit(LimiterPid);
 i(client_flow_blocked, #ch{limiter_pid = LimiterPid}) ->
@@ -1334,6 +1456,11 @@ i(client_flow_blocked, #ch{limiter_pid = LimiterPid}) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
+maybe_incr_redeliver_stats(true, QPid, State) ->
+    maybe_incr_stats([{QPid, 1}], redeliver, State);
+maybe_incr_redeliver_stats(_, _, _) ->
+    ok.
+
 maybe_incr_stats(QXIncs, Measure, #ch{stats_timer = StatsTimer}) ->
     case rabbit_event:stats_level(StatsTimer) of
         fine -> [incr_stats(QX, Inc, Measure) || {QX, Inc} <- QXIncs];
diff --git a/src/rabbit_channel_sup.erl b/src/rabbit_channel_sup.erl
index d21cfdb7..65ccca02 100644
--- a/src/rabbit_channel_sup.erl
+++ b/src/rabbit_channel_sup.erl
@@ -31,11 +31,13 @@
 -export_type([start_link_args/0]).
 
 -type(start_link_args() ::
-        {'tcp', rabbit_types:protocol(), rabbit_net:socket(),
-         rabbit_channel:channel_number(), non_neg_integer(), pid(),
-         rabbit_types:user(), rabbit_types:vhost(), pid()} |
-        {'direct', rabbit_channel:channel_number(), pid(), rabbit_types:user(),
-         rabbit_types:vhost(), pid()}).
+        {'tcp', rabbit_net:socket(), rabbit_channel:channel_number(),
+         non_neg_integer(), pid(), rabbit_types:protocol(), rabbit_types:user(),
+         rabbit_types:vhost(), rabbit_framing:amqp_table(),
+         pid()} |
+        {'direct', rabbit_channel:channel_number(), pid(),
+         rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(),
+         rabbit_framing:amqp_table(), pid()}).
 
 -spec(start_link/1 :: (start_link_args()) -> {'ok', pid(), {pid(), any()}}).
 
@@ -43,8 +45,8 @@
 
 %%----------------------------------------------------------------------------
 
-start_link({tcp, Protocol, Sock, Channel, FrameMax, ReaderPid, User, VHost,
-            Collector}) ->
+start_link({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol, User, VHost,
+            Capabilities, Collector}) ->
     {ok, SupPid} = supervisor2:start_link(?MODULE, []),
     {ok, WriterPid} =
         supervisor2:start_child(
@@ -56,20 +58,23 @@ start_link({tcp, Protocol, Sock, Channel, FrameMax, ReaderPid, User, VHost,
         supervisor2:start_child(
           SupPid,
           {channel, {rabbit_channel, start_link,
-                     [Channel, ReaderPid, WriterPid, User, VHost,
-                      Collector, start_limiter_fun(SupPid)]},
+                     [Channel, ReaderPid, WriterPid, ReaderPid, Protocol,
+                      User, VHost, Capabilities, Collector,
+                      start_limiter_fun(SupPid)]},
            intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}),
     {ok, AState} = rabbit_command_assembler:init(Protocol),
     {ok, SupPid, {ChannelPid, AState}};
-start_link({direct, Channel, ClientChannelPid, User, VHost, Collector}) ->
+start_link({direct, Channel, ClientChannelPid, ConnPid, Protocol, User, VHost,
+            Capabilities, Collector}) ->
     {ok, SupPid} = supervisor2:start_link(?MODULE, []),
     {ok, ChannelPid} =
         supervisor2:start_child(
-            SupPid,
-            {channel, {rabbit_channel, start_link,
-                       [Channel, ClientChannelPid, ClientChannelPid,
-                        User, VHost, Collector, start_limiter_fun(SupPid)]},
-             intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}),
+          SupPid,
+          {channel, {rabbit_channel, start_link,
+                     [Channel, ClientChannelPid, ClientChannelPid, ConnPid,
+                      Protocol, User, VHost, Capabilities, Collector,
+                      start_limiter_fun(SupPid)]},
+           intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}),
     {ok, SupPid, {ChannelPid, none}}.
 
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_client_sup.erl b/src/rabbit_client_sup.erl
index dbdc6cd4..15e92542 100644
--- a/src/rabbit_client_sup.erl
+++ b/src/rabbit_client_sup.erl
@@ -29,9 +29,9 @@
 -ifdef(use_specs).
 
 -spec(start_link/1 :: (mfa()) ->
-                          rabbit_types:ok_pid_or_error()).
+                           rabbit_types:ok_pid_or_error()).
 -spec(start_link/2 :: ({'local', atom()}, mfa()) ->
-                          rabbit_types:ok_pid_or_error()).
+                           rabbit_types:ok_pid_or_error()).
 
 -endif.
 
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 80483097..6eb1aaba 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -20,11 +20,25 @@
 -export([start/0, stop/0, action/5, diagnostics/1]).
 
 -define(RPC_TIMEOUT, infinity).
+-define(WAIT_FOR_VM_ATTEMPTS, 5).
 
 -define(QUIET_OPT, "-q").
 -define(NODE_OPT, "-n").
 -define(VHOST_OPT, "-p").
 
+-define(GLOBAL_QUERIES,
+        [{"Connections", rabbit_networking, connection_info_all,
+          connection_info_keys},
+         {"Channels",  rabbit_channel,  info_all, info_keys}]).
+
+-define(VHOST_QUERIES,
+        [{"Queues",    rabbit_amqqueue, info_all, info_keys},
+         {"Exchanges", rabbit_exchange, info_all, info_keys},
+         {"Bindings",  rabbit_binding,  info_all, info_keys},
+         {"Consumers", rabbit_amqqueue, consumers_all, consumer_info_keys},
+         {"Permissions", rabbit_auth_backend_internal, list_vhost_permissions,
+          vhost_perms_info_keys}]).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
@@ -44,22 +58,18 @@
 
 start() ->
     {ok, [[NodeStr|_]|_]} = init:get_argument(nodename),
-    FullCommand = init:get_plain_arguments(),
-    case FullCommand of
-        [] -> usage();
-        _ -> ok
-    end,
     {[Command0 | Args], Opts} =
-        rabbit_misc:get_options(
-          [{flag, ?QUIET_OPT}, {option, ?NODE_OPT, NodeStr},
-           {option, ?VHOST_OPT, "/"}],
-          FullCommand),
-    Opts1 = lists:map(fun({K, V}) ->
-                              case K of
-                                  ?NODE_OPT -> {?NODE_OPT, rabbit_misc:makenode(V)};
-                                  _    -> {K, V}
-                              end
-                      end, Opts),
+        case rabbit_misc:get_options([{flag, ?QUIET_OPT},
+                                      {option, ?NODE_OPT, NodeStr},
+                                      {option, ?VHOST_OPT, "/"}],
+                                     init:get_plain_arguments()) of
+            {[], _Opts}    -> usage();
+            CmdArgsAndOpts -> CmdArgsAndOpts
+        end,
+    Opts1 = [case K of
+                 ?NODE_OPT -> {?NODE_OPT, rabbit_misc:makenode(V)};
+                 _         -> {K, V}
+             end || {K, V} <- Opts],
     Command = list_to_atom(Command0),
     Quiet = proplists:get_bool(?QUIET_OPT, Opts1),
     Node = proplists:get_value(?NODE_OPT, Opts1),
@@ -99,6 +109,23 @@ start() ->
 
 fmt_stderr(Format, Args) -> rabbit_misc:format_stderr(Format ++ "~n", Args).
 
+print_report(Node, {Descr, Module, InfoFun, KeysFun}) ->
+    io:format("~s:~n", [Descr]),
+    print_report0(Node, {Module, InfoFun, KeysFun}, []).
+
+print_report(Node, {Descr, Module, InfoFun, KeysFun}, VHostArg) ->
+    io:format("~s on ~s:~n", [Descr, VHostArg]),
+    print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg).
+
+print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg) ->
+    case Results = rpc_call(Node, Module, InfoFun, VHostArg) of
+        [_|_] -> InfoItems = rpc_call(Node, Module, KeysFun, []),
+                 display_row([atom_to_list(I) || I <- InfoItems]),
+                 display_info_list(Results, InfoItems);
+        _     -> ok
+    end,
+    io:nl().
+
 print_error(Format, Args) -> fmt_stderr("Error: " ++ Format, Args).
 
 print_badrpc_diagnostics(Node) ->
@@ -106,24 +133,22 @@ print_badrpc_diagnostics(Node) ->
 
 diagnostics(Node) ->
     {_NodeName, NodeHost} = rabbit_misc:nodeparts(Node),
-    [
-        {"diagnostics:", []},
-        case net_adm:names(NodeHost) of
-            {error, EpmdReason} ->
-                {"- unable to connect to epmd on ~s: ~w",
-                    [NodeHost, EpmdReason]};
-            {ok, NamePorts} ->
-                {"- nodes and their ports on ~s: ~p",
-                              [NodeHost, [{list_to_atom(Name), Port} ||
-                                          {Name, Port} <- NamePorts]]}
-        end,
-        {"- current node: ~w", [node()]},
-        case init:get_argument(home) of
-            {ok, [[Home]]} -> {"- current node home dir: ~s", [Home]};
-            Other          -> {"- no current node home dir: ~p", [Other]}
-        end,
-        {"- current node cookie hash: ~s", [rabbit_misc:cookie_hash()]}
-    ].
+    [{"diagnostics:", []},
+     case net_adm:names(NodeHost) of
+         {error, EpmdReason} ->
+             {"- unable to connect to epmd on ~s: ~w",
+              [NodeHost, EpmdReason]};
+         {ok, NamePorts} ->
+             {"- nodes and their ports on ~s: ~p",
+              [NodeHost, [{list_to_atom(Name), Port} ||
+                             {Name, Port} <- NamePorts]]}
+     end,
+     {"- current node: ~w", [node()]},
+     case init:get_argument(home) of
+         {ok, [[Home]]} -> {"- current node home dir: ~s", [Home]};
+         Other          -> {"- no current node home dir: ~p", [Other]}
+     end,
+     {"- current node cookie hash: ~s", [rabbit_misc:cookie_hash()]}].
 
 stop() ->
     ok.
@@ -132,6 +157,8 @@ usage() ->
     io:format("~s", [rabbit_ctl_usage:usage()]),
     quit(1).
 
+%%----------------------------------------------------------------------------
+
 action(stop, Node, [], _Opts, Inform) ->
     Inform("Stopping and halting node ~p", [Node]),
     call(Node, {rabbit, stop_and_halt, []});
@@ -155,22 +182,30 @@ action(force_reset, Node, [], _Opts, Inform) ->
 action(cluster, Node, ClusterNodeSs, _Opts, Inform) ->
     ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
     Inform("Clustering node ~p with ~p",
-              [Node, ClusterNodes]),
+           [Node, ClusterNodes]),
     rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]);
 
 action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) ->
     ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
     Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)",
-              [Node, ClusterNodes]),
+           [Node, ClusterNodes]),
     rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]);
 
+action(wait, Node, [], _Opts, Inform) ->
+    Inform("Waiting for ~p", [Node]),
+    wait_for_application(Node, ?WAIT_FOR_VM_ATTEMPTS);
+
 action(status, Node, [], _Opts, Inform) ->
     Inform("Status of node ~p", [Node]),
-    case call(Node, {rabbit, status, []}) of
-        {badrpc, _} = Res -> Res;
-        Res               -> io:format("~p~n", [Res]),
-                             ok
-    end;
+    display_call_result(Node, {rabbit, status, []});
+
+action(cluster_status, Node, [], _Opts, Inform) ->
+    Inform("Cluster status of node ~p", [Node]),
+    display_call_result(Node, {rabbit_mnesia, status, []});
+
+action(environment, Node, _App, _Opts, Inform) ->
+    Inform("Application environment of node ~p", [Node]),
+    display_call_result(Node, {rabbit, environment, []});
 
 action(rotate_logs, Node, [], _Opts, Inform) ->
     Inform("Reopening logs for node ~p", [Node]),
@@ -200,17 +235,17 @@ action(clear_password, Node, Args = [Username], _Opts, Inform) ->
     Inform("Clearing password for user ~p", [Username]),
     call(Node, {rabbit_auth_backend_internal, clear_password, Args});
 
-action(set_admin, Node, [Username], _Opts, Inform) ->
-    Inform("Setting administrative status for user ~p", [Username]),
-    call(Node, {rabbit_auth_backend_internal, set_admin, [Username]});
-
-action(clear_admin, Node, [Username], _Opts, Inform) ->
-    Inform("Clearing administrative status for user ~p", [Username]),
-    call(Node, {rabbit_auth_backend_internal, clear_admin, [Username]});
+action(set_user_tags, Node, [Username | TagsStr], _Opts, Inform) ->
+    Tags = [list_to_atom(T) || T <- TagsStr],
+    Inform("Setting tags for user ~p to ~p", [Username, Tags]),
+    rpc_call(Node, rabbit_auth_backend_internal, set_tags,
+             [list_to_binary(Username), Tags]);
 
 action(list_users, Node, [], _Opts, Inform) ->
     Inform("Listing users", []),
-    display_list(call(Node, {rabbit_auth_backend_internal, list_users, []}));
+    display_info_list(
+      call(Node, {rabbit_auth_backend_internal, list_users, []}),
+      rabbit_auth_backend_internal:user_info_keys());
 
 action(add_vhost, Node, Args = [_VHostPath], _Opts, Inform) ->
     Inform("Creating vhost ~p", Args),
@@ -220,14 +255,16 @@ action(delete_vhost, Node, Args = [_VHostPath], _Opts, Inform) ->
     Inform("Deleting vhost ~p", Args),
     call(Node, {rabbit_vhost, delete, Args});
 
-action(list_vhosts, Node, [], _Opts, Inform) ->
+action(list_vhosts, Node, Args, _Opts, Inform) ->
     Inform("Listing vhosts", []),
-    display_list(call(Node, {rabbit_vhost, list, []}));
+    ArgAtoms = default_if_empty(Args, [name]),
+    display_info_list(call(Node, {rabbit_vhost, info_all, []}), ArgAtoms);
 
 action(list_user_permissions, Node, Args = [_Username], _Opts, Inform) ->
     Inform("Listing permissions for user ~p", Args),
-    display_list(call(Node, {rabbit_auth_backend_internal,
-                             list_user_permissions, Args}));
+    display_info_list(call(Node, {rabbit_auth_backend_internal,
+                                  list_user_permissions, Args}),
+                      rabbit_auth_backend_internal:user_perms_info_keys());
 
 action(list_queues, Node, Args, Opts, Inform) ->
     Inform("Listing queues", []),
@@ -264,7 +301,7 @@ action(list_connections, Node, Args, _Opts, Inform) ->
 
 action(list_channels, Node, Args, _Opts, Inform) ->
     Inform("Listing channels", []),
-    ArgAtoms = default_if_empty(Args, [pid, user, transactional, consumer_count,
+    ArgAtoms = default_if_empty(Args, [pid, user, consumer_count,
                                        messages_unacknowledged]),
     display_info_list(rpc_call(Node, rabbit_channel, info_all, [ArgAtoms]),
                       ArgAtoms);
@@ -272,14 +309,18 @@ action(list_channels, Node, Args, _Opts, Inform) ->
 action(list_consumers, Node, _Args, Opts, Inform) ->
     Inform("Listing consumers", []),
     VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
-    InfoKeys = [queue_name, channel_pid, consumer_tag, ack_required],
-    case rpc_call(Node, rabbit_amqqueue, consumers_all, [VHostArg]) of
-        L when is_list(L) -> display_info_list(
-                               [lists:zip(InfoKeys, tuple_to_list(X)) ||
-                                   X <- L],
-                               InfoKeys);
-        Other             -> Other
-    end;
+    display_info_list(rpc_call(Node, rabbit_amqqueue, consumers_all, [VHostArg]),
+                      rabbit_amqqueue:consumer_info_keys());
+
+action(trace_on, Node, [], Opts, Inform) ->
+    VHost = proplists:get_value(?VHOST_OPT, Opts),
+    Inform("Starting tracing for vhost ~p", [VHost]),
+    rpc_call(Node, rabbit_trace, start, [list_to_binary(VHost)]);
+
+action(trace_off, Node, [], Opts, Inform) ->
+    VHost = proplists:get_value(?VHOST_OPT, Opts),
+    Inform("Stopping tracing for vhost ~p", [VHost]),
+    rpc_call(Node, rabbit_trace, stop, [list_to_binary(VHost)]);
 
 action(set_permissions, Node, [Username, CPerm, WPerm, RPerm], Opts, Inform) ->
     VHost = proplists:get_value(?VHOST_OPT, Opts),
@@ -296,14 +337,44 @@ action(clear_permissions, Node, [Username], Opts, Inform) ->
 action(list_permissions, Node, [], Opts, Inform) ->
     VHost = proplists:get_value(?VHOST_OPT, Opts),
     Inform("Listing permissions in vhost ~p", [VHost]),
-    display_list(call(Node, {rabbit_auth_backend_internal,
-                             list_vhost_permissions, [VHost]})).
+    display_info_list(call(Node, {rabbit_auth_backend_internal,
+                             list_vhost_permissions, [VHost]}),
+                      rabbit_auth_backend_internal:vhost_perms_info_keys());
+
+action(report, Node, _Args, _Opts, Inform) ->
+    io:format("Reporting server status on ~p~n~n", [erlang:universaltime()]),
+    [begin ok = action(Action, N, [], [], Inform), io:nl() end ||
+        N      <- unsafe_rpc(Node, rabbit_mnesia, running_clustered_nodes, []),
+        Action <- [status, cluster_status, environment]],
+    VHosts = unsafe_rpc(Node, rabbit_vhost, list, []),
+    [print_report(Node, Q)      || Q <- ?GLOBAL_QUERIES],
+    [print_report(Node, Q, [V]) || Q <- ?VHOST_QUERIES, V <- VHosts],
+    io:format("End of server status report~n"),
+    ok.
+
+%%----------------------------------------------------------------------------
+
+wait_for_application(Node, Attempts) ->
+    case rpc_call(Node, application, which_applications, [infinity]) of
+        {badrpc, _} = E -> case Attempts of
+                               0 -> E;
+                               _ -> wait_for_application0(Node, Attempts - 1)
+                           end;
+        Apps            -> case proplists:is_defined(rabbit, Apps) of
+                               %% We've seen the node up; if it goes down
+                               %% die immediately.
+                               true  -> ok;
+                               false -> wait_for_application0(Node, 0)
+                           end
+    end.
+
+wait_for_application0(Node, Attempts) ->
+    timer:sleep(1000),
+    wait_for_application(Node, Attempts).
 
 default_if_empty(List, Default) when is_list(List) ->
-    if List == [] ->
-        Default;
-       true ->
-        [list_to_atom(X) || X <- List]
+    if List == [] -> Default;
+       true       -> [list_to_atom(X) || X <- List]
     end.
 
 display_info_list(Results, InfoItemKeys) when is_list(Results) ->
@@ -342,19 +413,27 @@ format_info_item([{TableEntryKey, TableEntryType, _TableEntryValue} | _] =
                      Value) when is_binary(TableEntryKey) andalso
                                  is_atom(TableEntryType) ->
     io_lib:format("~1000000000000p", [prettify_amqp_table(Value)]);
+format_info_item([T | _] = Value)
+  when is_tuple(T) orelse is_pid(T) orelse is_binary(T) orelse is_atom(T) orelse
+       is_list(T) ->
+    "[" ++
+        lists:nthtail(2, lists:append(
+                           [", " ++ format_info_item(E) || E <- Value])) ++ "]";
 format_info_item(Value) ->
     io_lib:format("~w", [Value]).
 
-display_list(L) when is_list(L) ->
-    lists:foreach(fun (I) when is_binary(I) ->
-                          io:format("~s~n", [escape(I)]);
-                      (I) when is_tuple(I) ->
-                          display_row([escape(V)
-                                       || V <- tuple_to_list(I)])
-                  end,
-                  lists:sort(L)),
-    ok;
-display_list(Other) -> Other.
+display_call_result(Node, MFA) ->
+    case call(Node, MFA) of
+        {badrpc, _} = Res -> throw(Res);
+        Res               -> io:format("~p~n", [Res]),
+                             ok
+    end.
+
+unsafe_rpc(Node, Mod, Fun, Args) ->
+    case rpc_call(Node, Mod, Fun, Args) of
+        {badrpc, _} = Res -> throw(Res);
+        Normal            -> Normal
+    end.
 
 call(Node, {Mod, Fun, Args}) ->
     rpc_call(Node, Mod, Fun, lists:map(fun list_to_binary/1, Args)).
@@ -366,12 +445,9 @@ rpc_call(Node, Mod, Fun, Args) ->
 %% characters.  We don't escape characters above 127, since they may
 %% form part of UTF-8 strings.
 
-escape(Atom) when is_atom(Atom) ->
-    escape(atom_to_list(Atom));
-escape(Bin) when is_binary(Bin) ->
-    escape(binary_to_list(Bin));
-escape(L) when is_list(L) ->
-    escape_char(lists:reverse(L), []).
+escape(Atom) when is_atom(Atom)  -> escape(atom_to_list(Atom));
+escape(Bin)  when is_binary(Bin) -> escape(binary_to_list(Bin));
+escape(L)    when is_list(L)     -> escape_char(lists:reverse(L), []).
 
 escape_char([$\\ | T], Acc) ->
     escape_char(T, [$\\, $\\ | Acc]);
@@ -386,19 +462,15 @@ escape_char([], Acc) ->
 prettify_amqp_table(Table) ->
     [{escape(K), prettify_typed_amqp_value(T, V)} || {K, T, V} <- Table].
 
-prettify_typed_amqp_value(Type, Value) ->
-    case Type of
-        longstr -> escape(Value);
-        table   -> prettify_amqp_table(Value);
-        array   -> [prettify_typed_amqp_value(T, V) || {T, V} <- Value];
-        _       -> Value
-    end.
+prettify_typed_amqp_value(longstr, Value) -> escape(Value);
+prettify_typed_amqp_value(table,   Value) -> prettify_amqp_table(Value);
+prettify_typed_amqp_value(array,   Value) -> [prettify_typed_amqp_value(T, V) ||
+                                                 {T, V} <- Value];
+prettify_typed_amqp_value(_Type,   Value) -> Value.
 
-% the slower shutdown on windows required to flush stdout
+%% the slower shutdown on windows required to flush stdout
 quit(Status) ->
     case os:type() of
-        {unix, _} ->
-            halt(Status);
-        {win32, _} ->
-            init:stop(Status)
+        {unix,  _} -> halt(Status);
+        {win32, _} -> init:stop(Status)
     end.
diff --git a/src/rabbit_direct.erl b/src/rabbit_direct.erl
index 3b8c9fba..7ff534ee 100644
--- a/src/rabbit_direct.erl
+++ b/src/rabbit_direct.erl
@@ -16,7 +16,7 @@
 
 -module(rabbit_direct).
 
--export([boot/0, connect/3, start_channel/5]).
+-export([boot/0, connect/4, start_channel/8, disconnect/1]).
 
 -include("rabbit.hrl").
 
@@ -25,12 +25,16 @@
 -ifdef(use_specs).
 
 -spec(boot/0 :: () -> 'ok').
--spec(connect/3 :: (binary(), binary(), binary()) ->
-                       {'ok', {rabbit_types:user(),
-                               rabbit_framing:amqp_table()}}).
--spec(start_channel/5 :: (rabbit_channel:channel_number(), pid(),
-                          rabbit_types:user(), rabbit_types:vhost(), pid()) ->
-                             {'ok', pid()}).
+-spec(connect/4 :: (rabbit_types:username(), rabbit_types:vhost(),
+                    rabbit_types:protocol(), rabbit_event:event_props()) ->
+                        {'ok', {rabbit_types:user(),
+                                rabbit_framing:amqp_table()}}).
+-spec(start_channel/8 ::
+        (rabbit_channel:channel_number(), pid(), pid(), rabbit_types:protocol(),
+         rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(),
+         pid()) -> {'ok', pid()}).
+
+-spec(disconnect/1 :: (rabbit_event:event_props()) -> 'ok').
 
 -endif.
 
@@ -39,37 +43,44 @@
 boot() ->
     {ok, _} =
         supervisor2:start_child(
-            rabbit_sup,
-            {rabbit_direct_client_sup,
-             {rabbit_client_sup, start_link,
-              [{local, rabbit_direct_client_sup},
-               {rabbit_channel_sup, start_link, []}]},
-             transient, infinity, supervisor, [rabbit_client_sup]}),
+          rabbit_sup,
+          {rabbit_direct_client_sup,
+           {rabbit_client_sup, start_link,
+            [{local, rabbit_direct_client_sup},
+             {rabbit_channel_sup, start_link, []}]},
+           transient, infinity, supervisor, [rabbit_client_sup]}),
     ok.
 
 %%----------------------------------------------------------------------------
 
-connect(Username, Password, VHost) ->
+connect(Username, VHost, Protocol, Infos) ->
     case lists:keymember(rabbit, 1, application:which_applications()) of
         true  ->
-            try rabbit_access_control:user_pass_login(Username, Password) of
-                #user{} = User ->
+            case rabbit_access_control:check_user_login(Username, []) of
+                {ok, User} ->
                     try rabbit_access_control:check_vhost_access(User, VHost) of
-                        ok -> {ok, {User, rabbit_reader:server_properties()}}
+                        ok -> rabbit_event:notify(connection_created, Infos),
+                              {ok, {User,
+                                    rabbit_reader:server_properties(Protocol)}}
                     catch
                         exit:#amqp_error{name = access_refused} ->
                             {error, access_refused}
-                    end
-            catch
-                exit:#amqp_error{name = access_refused} -> {error, auth_failure}
+                    end;
+                {refused, _Msg, _Args} ->
+                    {error, auth_failure}
             end;
         false ->
             {error, broker_not_found_on_node}
     end.
 
-start_channel(Number, ClientChannelPid, User, VHost, Collector) ->
+start_channel(Number, ClientChannelPid, ConnPid, Protocol, User, VHost,
+              Capabilities, Collector) ->
     {ok, _, {ChannelPid, _}} =
         supervisor2:start_child(
-            rabbit_direct_client_sup,
-            [{direct, Number, ClientChannelPid, User, VHost, Collector}]),
+          rabbit_direct_client_sup,
+          [{direct, Number, ClientChannelPid, ConnPid, Protocol, User, VHost,
+            Capabilities, Collector}]),
     {ok, ChannelPid}.
+
+disconnect(Infos) ->
+    rabbit_event:notify(connection_closed, Infos).
diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl
index 0120f0d6..93aad9e3 100644
--- a/src/rabbit_error_logger.erl
+++ b/src/rabbit_error_logger.erl
@@ -67,8 +67,12 @@ publish(_Other, _Format, _Data, _State) ->
     ok.
 
 publish1(RoutingKey, Format, Data, LogExch) ->
+    %% 0-9-1 says the timestamp is a "64 bit POSIX timestamp". That's
+    %% second resolution, not millisecond.
+    Timestamp = rabbit_misc:now_ms() div 1000,
     {ok, _RoutingRes, _DeliveredQPids} =
-        rabbit_basic:publish(LogExch, RoutingKey, false, false, none,
-                             #'P_basic'{content_type = <<"text/plain">>},
+        rabbit_basic:publish(LogExch, RoutingKey, false, false,
+                             #'P_basic'{content_type = <<"text/plain">>,
+                                        timestamp    = Timestamp},
                              list_to_binary(io_lib:format(Format, Data))),
     ok.
diff --git a/src/rabbit_event.erl b/src/rabbit_event.erl
index f4ee279b..887e4a1f 100644
--- a/src/rabbit_event.erl
+++ b/src/rabbit_event.erl
@@ -26,7 +26,7 @@
 
 %%----------------------------------------------------------------------------
 
--record(state, {level, timer}).
+-record(state, {level, interval, timer}).
 
 %%----------------------------------------------------------------------------
 
@@ -49,6 +49,7 @@
 
 -opaque(state() :: #state {
                level :: level(),
+               interval :: integer(),
                timer :: atom()
               }).
 
@@ -95,12 +96,14 @@ start_link() ->
 
 init_stats_timer() ->
     {ok, StatsLevel} = application:get_env(rabbit, collect_statistics),
-    #state{level = StatsLevel, timer = undefined}.
+    {ok, Interval} = application:get_env(rabbit, collect_statistics_interval),
+    #state{level = StatsLevel, interval = Interval, timer = undefined}.
 
 ensure_stats_timer(State = #state{level = none}, _Pid, _Msg) ->
     State;
-ensure_stats_timer(State = #state{timer = undefined}, Pid, Msg) ->
-    TRef = erlang:send_after(?STATS_INTERVAL, Pid, Msg),
+ensure_stats_timer(State = #state{interval = Interval,
+                                  timer    = undefined}, Pid, Msg) ->
+    TRef = erlang:send_after(Interval, Pid, Msg),
     State#state{timer = TRef};
 ensure_stats_timer(State, _Pid, _Msg) ->
     State.
@@ -129,15 +132,8 @@ notify_if(true,   Type,  Props) -> notify(Type, Props);
 notify_if(false, _Type, _Props) -> ok.
 
 notify(Type, Props) ->
-    try
-        %% TODO: switch to os:timestamp() when we drop support for
-        %% Erlang/OTP < R13B01
-        gen_event:notify(rabbit_event, #event{type = Type,
-                                              props = Props,
-                                              timestamp = now()})
-    catch error:badarg ->
-            %% badarg means rabbit_event is no longer registered. We never
-            %% unregister it so the great likelihood is that we're shutting
-            %% down the broker but some events were backed up. Ignore it.
-            ok
-    end.
+    %% TODO: switch to os:timestamp() when we drop support for
+    %% Erlang/OTP < R13B01
+    gen_event:notify(rabbit_event, #event{type = Type,
+                                          props = Props,
+                                          timestamp = now()}).
diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl
index 92259195..afa48355 100644
--- a/src/rabbit_exchange.erl
+++ b/src/rabbit_exchange.erl
@@ -18,12 +18,13 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([recover/0, declare/6, lookup/1, lookup_or_die/1, list/1, info_keys/0,
-         info/1, info/2, info_all/1, info_all/2, publish/2, delete/2]).
--export([callback/3]).
-%% this must be run inside a mnesia tx
--export([maybe_auto_delete/1]).
--export([assert_equivalence/6, assert_args_equivalence/2, check_type/1]).
+-export([recover/0, callback/3, declare/6,
+         assert_equivalence/6, assert_args_equivalence/2, check_type/1,
+         lookup/1, lookup_or_die/1, list/1, update_scratch/2,
+         info_keys/0, info/1, info/2, info_all/1, info_all/2,
+         route/2, delete/2]).
+%% these must be run inside a mnesia tx
+-export([maybe_auto_delete/1, serial/1, peek_serial/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -33,8 +34,10 @@
 
 -type(name() :: rabbit_types:r('exchange')).
 -type(type() :: atom()).
+-type(fun_name() :: atom()).
 
--spec(recover/0 :: () -> 'ok').
+-spec(recover/0 :: () -> [name()]).
+-spec(callback/3:: (rabbit_types:exchange(), fun_name(), [any()]) -> 'ok').
 -spec(declare/6 ::
         (name(), type(), boolean(), boolean(), boolean(),
          rabbit_framing:amqp_table())
@@ -55,6 +58,7 @@
         (name()) -> rabbit_types:exchange() |
                     rabbit_types:channel_exit()).
 -spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]).
+-spec(update_scratch/2 :: (name(), fun((any()) -> any())) -> 'ok').
 -spec(info_keys/0 :: () -> rabbit_types:info_keys()).
 -spec(info/1 :: (rabbit_types:exchange()) -> rabbit_types:infos()).
 -spec(info/2 ::
@@ -62,9 +66,9 @@
         -> rabbit_types:infos()).
 -spec(info_all/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]).
 -spec(info_all/2 ::(rabbit_types:vhost(), rabbit_types:info_keys())
-                    -> [rabbit_types:infos()]).
--spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
-                   -> {rabbit_router:routing_result(), [pid()]}).
+                   -> [rabbit_types:infos()]).
+-spec(route/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
+                 -> [rabbit_amqqueue:name()]).
 -spec(delete/2 ::
         (name(), boolean())-> 'ok' |
                               rabbit_types:error('not_found') |
@@ -72,7 +76,8 @@
 -spec(maybe_auto_delete/1::
         (rabbit_types:exchange())
         -> 'not_deleted' | {'deleted', rabbit_binding:deletions()}).
--spec(callback/3:: (rabbit_types:exchange(), atom(), [any()]) -> 'ok').
+-spec(serial/1 :: (rabbit_types:exchange()) -> 'none' | pos_integer()).
+-spec(peek_serial/1 :: (name()) -> pos_integer() | 'undefined').
 
 -endif.
 
@@ -81,25 +86,22 @@
 -define(INFO_KEYS, [name, type, durable, auto_delete, internal, arguments]).
 
 recover() ->
-    Xs = rabbit_misc:table_fold(
-           fun (X, Acc) ->
-                   ok = mnesia:write(rabbit_exchange, X, write),
-                   [X | Acc]
-           end, [], rabbit_durable_exchange),
-    Bs = rabbit_binding:recover(),
-    recover_with_bindings(
-      lists:keysort(#binding.source, Bs),
-      lists:keysort(#exchange.name, Xs), []).
-
-recover_with_bindings([B = #binding{source = XName} | Rest],
-                      Xs = [#exchange{name = XName} | _],
-                      Bindings) ->
-    recover_with_bindings(Rest, Xs, [B | Bindings]);
-recover_with_bindings(Bs, [X = #exchange{type = Type} | Xs], Bindings) ->
-    (type_to_module(Type)):recover(X, Bindings),
-    recover_with_bindings(Bs, Xs, []);
-recover_with_bindings([], [], []) ->
-    ok.
+    Xs = rabbit_misc:table_filter(
+           fun (#exchange{name = XName}) ->
+                   mnesia:read({rabbit_exchange, XName}) =:= []
+           end,
+           fun (X, Tx) ->
+                   case Tx of
+                       true  -> store(X);
+                       false -> ok
+                   end,
+                   rabbit_exchange:callback(X, create, [map_create_tx(Tx), X])
+           end,
+           rabbit_durable_exchange),
+    [XName || #exchange{name = XName} <- Xs].
+
+callback(#exchange{type = XType}, Fun, Args) ->
+    apply(type_to_module(XType), Fun, Args).
 
 declare(XName, Type, Durable, AutoDelete, Internal, Args) ->
     X = #exchange{name        = XName,
@@ -108,13 +110,14 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) ->
                   auto_delete = AutoDelete,
                   internal    = Internal,
                   arguments   = Args},
+    XT = type_to_module(Type),
     %% We want to upset things if it isn't ok
-    ok = (type_to_module(Type)):validate(X),
+    ok = XT:validate(X),
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
               case mnesia:wread({rabbit_exchange, XName}) of
                   [] ->
-                      ok = mnesia:write(rabbit_exchange, X, write),
+                      store(X),
                       ok = case Durable of
                                true  -> mnesia:write(rabbit_durable_exchange,
                                                      X, write);
@@ -126,7 +129,7 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) ->
               end
       end,
       fun ({new, Exchange}, Tx) ->
-              callback(Exchange, create, [Tx, Exchange]),
+              ok = XT:create(map_create_tx(Tx), Exchange),
               rabbit_event:notify_if(not Tx, exchange_created, info(Exchange)),
               Exchange;
           ({existing, Exchange}, _Tx) ->
@@ -135,10 +138,16 @@ declare(XName, Type, Durable, AutoDelete, Internal, Args) ->
               Err
       end).
 
-%% Used with atoms from records; e.g., the type is expected to exist.
-type_to_module(T) ->
-    {ok, Module} = rabbit_registry:lookup_module(exchange, T),
-    Module.
+map_create_tx(true)  -> transaction;
+map_create_tx(false) -> none.
+
+store(X = #exchange{name = Name, type = Type}) ->
+    ok = mnesia:write(rabbit_exchange, X, write),
+    case (type_to_module(Type)):serialise_events() of
+        true  -> S = #exchange_serial{name = Name, next = 1},
+                 ok = mnesia:write(rabbit_exchange_serial, S, write);
+        false -> ok
+    end.
 
 %% Used with binaries sent over the wire; the type may not exist.
 check_type(TypeBin) ->
@@ -191,6 +200,23 @@ list(VHostPath) ->
       rabbit_exchange,
       #exchange{name = rabbit_misc:r(VHostPath, exchange), _ = '_'}).
 
+update_scratch(Name, Fun) ->
+    rabbit_misc:execute_mnesia_transaction(
+      fun() ->
+              case mnesia:wread({rabbit_exchange, Name}) of
+                  [X = #exchange{durable = Durable, scratch = Scratch}] ->
+                      X1 = X#exchange{scratch = Fun(Scratch)},
+                      ok = mnesia:write(rabbit_exchange, X1, write),
+                      case Durable of
+                          true -> ok = mnesia:write(rabbit_durable_exchange,
+                                                    X1, write);
+                          _    -> ok
+                      end;
+                  [] ->
+                      ok
+              end
+      end).
+
 info_keys() -> ?INFO_KEYS.
 
 map(VHostPath, F) ->
@@ -216,21 +242,19 @@ info_all(VHostPath) -> map(VHostPath, fun (X) -> info(X) end).
 
 info_all(VHostPath, Items) -> map(VHostPath, fun (X) -> info(X, Items) end).
 
-publish(X = #exchange{name = XName}, Delivery) ->
-    rabbit_router:deliver(
-      route(Delivery, {queue:from_list([X]), XName, []}),
-      Delivery).
+route(X = #exchange{name = XName}, Delivery) ->
+    route1(Delivery, {queue:from_list([X]), XName, []}).
 
-route(Delivery, {WorkList, SeenXs, QNames}) ->
+route1(Delivery, {WorkList, SeenXs, QNames}) ->
     case queue:out(WorkList) of
         {empty, _WorkList} ->
             lists:usort(QNames);
         {{value, X = #exchange{type = Type}}, WorkList1} ->
             DstNames = process_alternate(
                          X, ((type_to_module(Type)):route(X, Delivery))),
-            route(Delivery,
-                  lists:foldl(fun process_route/2, {WorkList1, SeenXs, QNames},
-                              DstNames))
+            route1(Delivery,
+                   lists:foldl(fun process_route/2, {WorkList1, SeenXs, QNames},
+                               DstNames))
     end.
 
 process_alternate(#exchange{name = XName, arguments = Args}, []) ->
@@ -263,27 +287,30 @@ process_route(#resource{kind = queue} = QName,
               {WorkList, SeenXs, QNames}) ->
     {WorkList, SeenXs, [QName | QNames]}.
 
-call_with_exchange(XName, Fun, PrePostCommitFun) ->
-    rabbit_misc:execute_mnesia_transaction(
+call_with_exchange(XName, Fun) ->
+    rabbit_misc:execute_mnesia_tx_with_tail(
       fun () -> case mnesia:read({rabbit_exchange, XName}) of
-                   []  -> {error, not_found};
-                   [X] -> Fun(X)
-               end
-      end, PrePostCommitFun).
+                    []  -> rabbit_misc:const({error, not_found});
+                    [X] -> Fun(X)
+                end
+      end).
 
 delete(XName, IfUnused) ->
+    Fun = case IfUnused of
+              true  -> fun conditional_delete/1;
+              false -> fun unconditional_delete/1
+          end,
     call_with_exchange(
       XName,
-      case IfUnused of
-          true  -> fun conditional_delete/1;
-          false -> fun unconditional_delete/1
-      end,
-      fun ({deleted, X, Bs, Deletions}, Tx) ->
-              ok = rabbit_binding:process_deletions(
-                     rabbit_binding:add_deletion(
-                       XName, {X, deleted, Bs}, Deletions), Tx);
-          (Error = {error, _InUseOrNotFound}, _Tx) ->
-              Error
+      fun (X) ->
+              case Fun(X) of
+                  {deleted, X, Bs, Deletions} ->
+                      rabbit_binding:process_deletions(
+                        rabbit_binding:add_deletion(
+                          XName, {X, deleted, Bs}, Deletions));
+                  {error, _InUseOrNotFound} = E ->
+                      rabbit_misc:const(E)
+              end
       end).
 
 maybe_auto_delete(#exchange{auto_delete = false}) ->
@@ -294,9 +321,6 @@ maybe_auto_delete(#exchange{auto_delete = true} = X) ->
         {deleted, X, [], Deletions} -> {deleted, Deletions}
     end.
 
-callback(#exchange{type = XType}, Fun, Args) ->
-    apply(type_to_module(XType), Fun, Args).
-
 conditional_delete(X = #exchange{name = XName}) ->
     case rabbit_binding:has_for_source(XName) of
         false  -> unconditional_delete(X);
@@ -306,5 +330,30 @@ conditional_delete(X = #exchange{name = XName}) ->
 unconditional_delete(X = #exchange{name = XName}) ->
     ok = mnesia:delete({rabbit_durable_exchange, XName}),
     ok = mnesia:delete({rabbit_exchange, XName}),
+    ok = mnesia:delete({rabbit_exchange_serial, XName}),
     Bindings = rabbit_binding:remove_for_source(XName),
     {deleted, X, Bindings, rabbit_binding:remove_for_destination(XName)}.
+
+serial(#exchange{name = XName, type = Type}) ->
+    case (type_to_module(Type)):serialise_events() of
+        true  -> next_serial(XName);
+        false -> none
+    end.
+
+next_serial(XName) ->
+    [#exchange_serial{next = Serial}] =
+        mnesia:read(rabbit_exchange_serial, XName, write),
+    ok = mnesia:write(rabbit_exchange_serial,
+                      #exchange_serial{name = XName, next = Serial + 1}, write),
+    Serial.
+
+peek_serial(XName) ->
+    case mnesia:read({rabbit_exchange_serial, XName}) of
+        [#exchange_serial{next = Serial}]  -> Serial;
+        _                                  -> undefined
+    end.
+
+%% Used with atoms from records; e.g., the type is expected to exist.
+type_to_module(T) ->
+    {ok, Module} = rabbit_registry:lookup_module(exchange, T),
+    Module.
diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl
index 547583e9..ab3d00dc 100644
--- a/src/rabbit_exchange_type.erl
+++ b/src/rabbit_exchange_type.erl
@@ -21,21 +21,25 @@
 behaviour_info(callbacks) ->
     [
      {description, 0},
+
+     %% Should Rabbit ensure that all binding events that are
+     %% delivered to an individual exchange can be serialised? (they
+     %% might still be delivered out of order, but there'll be a
+     %% serial number).
+     {serialise_events, 0},
+
      {route, 2},
 
      %% called BEFORE declaration, to check args etc; may exit with #amqp_error{}
      {validate, 1},
 
-     %% called after declaration when previously absent
+     %% called after declaration and recovery
      {create, 2},
 
-     %% called when recovering
-     {recover, 2},
-
-     %% called after exchange deletion.
+     %% called after exchange (auto)deletion.
      {delete, 3},
 
-     %% called after a binding has been added
+     %% called after a binding has been added or recovered
      {add_binding, 3},
 
      %% called after bindings have been deleted.
diff --git a/src/rabbit_exchange_type_direct.erl b/src/rabbit_exchange_type_direct.erl
index c51b0913..b485e31f 100644
--- a/src/rabbit_exchange_type_direct.erl
+++ b/src/rabbit_exchange_type_direct.erl
@@ -19,8 +19,8 @@
 
 -behaviour(rabbit_exchange_type).
 
--export([description/0, route/2]).
--export([validate/1, create/2, recover/2, delete/3,
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, create/2, delete/3,
          add_binding/3, remove_bindings/3, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
@@ -35,13 +35,14 @@ description() ->
     [{name, <<"direct">>},
      {description, <<"AMQP direct exchange, as per the AMQP specification">>}].
 
+serialise_events() -> false.
+
 route(#exchange{name = Name},
-      #delivery{message = #basic_message{routing_key = RoutingKey}}) ->
-    rabbit_router:match_routing_key(Name, RoutingKey).
+      #delivery{message = #basic_message{routing_keys = Routes}}) ->
+    rabbit_router:match_routing_key(Name, Routes).
 
 validate(_X) -> ok.
 create(_Tx, _X) -> ok.
-recover(_X, _Bs) -> ok.
 delete(_Tx, _X, _Bs) -> ok.
 add_binding(_Tx, _X, _B) -> ok.
 remove_bindings(_Tx, _X, _Bs) -> ok.
diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl
index 382fb627..3c029722 100644
--- a/src/rabbit_exchange_type_fanout.erl
+++ b/src/rabbit_exchange_type_fanout.erl
@@ -19,8 +19,8 @@
 
 -behaviour(rabbit_exchange_type).
 
--export([description/0, route/2]).
--export([validate/1, create/2, recover/2, delete/3, add_binding/3,
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, create/2, delete/3, add_binding/3,
          remove_bindings/3, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
@@ -35,12 +35,13 @@ description() ->
     [{name, <<"fanout">>},
      {description, <<"AMQP fanout exchange, as per the AMQP specification">>}].
 
+serialise_events() -> false.
+
 route(#exchange{name = Name}, _Delivery) ->
-    rabbit_router:match_routing_key(Name, '_').
+    rabbit_router:match_routing_key(Name, ['_']).
 
 validate(_X) -> ok.
 create(_Tx, _X) -> ok.
-recover(_X, _Bs) -> ok.
 delete(_Tx, _X, _Bs) -> ok.
 add_binding(_Tx, _X, _B) -> ok.
 remove_bindings(_Tx, _X, _Bs) -> ok.
diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl
index d3529b06..f09e4aae 100644
--- a/src/rabbit_exchange_type_headers.erl
+++ b/src/rabbit_exchange_type_headers.erl
@@ -20,8 +20,8 @@
 
 -behaviour(rabbit_exchange_type).
 
--export([description/0, route/2]).
--export([validate/1, create/2, recover/2, delete/3, add_binding/3,
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, create/2, delete/3, add_binding/3,
          remove_bindings/3, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
@@ -41,6 +41,8 @@ description() ->
     [{name, <<"headers">>},
      {description, <<"AMQP headers exchange, as per the AMQP specification">>}].
 
+serialise_events() -> false.
+
 route(#exchange{name = Name},
       #delivery{message = #basic_message{content = Content}}) ->
     Headers = case (Content#content.properties)#'P_basic'.headers of
@@ -114,7 +116,6 @@ headers_match([{PK, PT, PV} | PRest], [{DK, DT, DV} | DRest],
 
 validate(_X) -> ok.
 create(_Tx, _X) -> ok.
-recover(_X, _Bs) -> ok.
 delete(_Tx, _X, _Bs) -> ok.
 add_binding(_Tx, _X, _B) -> ok.
 remove_bindings(_Tx, _X, _Bs) -> ok.
diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl
index 9cbf8100..348655b1 100644
--- a/src/rabbit_exchange_type_topic.erl
+++ b/src/rabbit_exchange_type_topic.erl
@@ -15,12 +15,13 @@
 %%
 
 -module(rabbit_exchange_type_topic).
+
 -include("rabbit.hrl").
 
 -behaviour(rabbit_exchange_type).
 
--export([description/0, route/2]).
--export([validate/1, create/2, recover/2, delete/3, add_binding/3,
+-export([description/0, serialise_events/0, route/2]).
+-export([validate/1, create/2, delete/3, add_binding/3,
          remove_bindings/3, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
@@ -31,58 +32,247 @@
                     {requires,    rabbit_registry},
                     {enables,     kernel_ready}]}).
 
--export([topic_matches/2]).
-
--ifdef(use_specs).
-
--spec(topic_matches/2 :: (binary(), binary()) -> boolean()).
-
--endif.
+%%----------------------------------------------------------------------------
 
 description() ->
     [{name, <<"topic">>},
      {description, <<"AMQP topic exchange, as per the AMQP specification">>}].
 
-route(#exchange{name = Name},
-        #delivery{message = #basic_message{routing_key = RoutingKey}}) ->
-    rabbit_router:match_bindings(Name,
-                                 fun (#binding{key = BindingKey}) ->
-                                         topic_matches(BindingKey, RoutingKey)
-                                 end).
+serialise_events() -> false.
 
-split_topic_key(Key) ->
-    string:tokens(binary_to_list(Key), ".").
-
-topic_matches(PatternKey, RoutingKey) ->
-    P = split_topic_key(PatternKey),
-    R = split_topic_key(RoutingKey),
-    topic_matches1(P, R).
-
-topic_matches1(["#"], _R) ->
-    true;
-topic_matches1(["#" | PTail], R) ->
-    last_topic_match(PTail, [], lists:reverse(R));
-topic_matches1([], []) ->
-    true;
-topic_matches1(["*" | PatRest], [_ | ValRest]) ->
-    topic_matches1(PatRest, ValRest);
-topic_matches1([PatElement | PatRest], [ValElement | ValRest])
-  when PatElement == ValElement ->
-    topic_matches1(PatRest, ValRest);
-topic_matches1(_, _) ->
-    false.
-
-last_topic_match(P, R, []) ->
-    topic_matches1(P, R);
-last_topic_match(P, R, [BacktrackNext | BacktrackList]) ->
-    topic_matches1(P, R) or
-        last_topic_match(P, [BacktrackNext | R], BacktrackList).
+%% NB: This may return duplicate results in some situations (that's ok)
+route(#exchange{name = X},
+      #delivery{message = #basic_message{routing_keys = Routes}}) ->
+    lists:append([begin
+                      Words = split_topic_key(RKey),
+                      mnesia:async_dirty(fun trie_match/2, [X, Words])
+                  end || RKey <- Routes]).
 
 validate(_X) -> ok.
 create(_Tx, _X) -> ok.
-recover(_X, _Bs) -> ok.
-delete(_Tx, _X, _Bs) -> ok.
-add_binding(_Tx, _X, _B) -> ok.
-remove_bindings(_Tx, _X, _Bs) -> ok.
+
+delete(transaction, #exchange{name = X}, _Bs) ->
+    trie_remove_all_edges(X),
+    trie_remove_all_bindings(X),
+    ok;
+delete(none, _Exchange, _Bs) ->
+    ok.
+
+add_binding(transaction, _Exchange, Binding) ->
+    internal_add_binding(Binding);
+add_binding(none, _Exchange, _Binding) ->
+    ok.
+
+remove_bindings(transaction, #exchange{name = X}, Bs) ->
+    %% The remove process is split into two distinct phases. In the
+    %% first phase we gather the lists of bindings and edges to
+    %% delete, then in the second phase we process all the
+    %% deletions. This is to prevent interleaving of read/write
+    %% operations in mnesia that can adversely affect performance.
+    {ToDelete, Paths} =
+       lists:foldl(
+         fun(#binding{source = S, key = K, destination = D}, {Acc, PathAcc}) ->
+                 Path = [{FinalNode, _} | _] =
+                     follow_down_get_path(S, split_topic_key(K)),
+                 {[{FinalNode, D} | Acc],
+                  decrement_bindings(X, Path, maybe_add_path(X, Path, PathAcc))}
+         end, {[], gb_trees:empty()}, Bs),
+
+    [trie_remove_binding(X, FinalNode, D) || {FinalNode, D} <- ToDelete],
+    [trie_remove_edge(X, Parent, Node, W) ||
+        {Node, {Parent, W, {0, 0}}} <- gb_trees:to_list(Paths)],
+    ok;
+remove_bindings(none, _X, _Bs) ->
+    ok.
+
+maybe_add_path(_X, [{root, none}], PathAcc) ->
+    PathAcc;
+maybe_add_path(X, [{Node, W}, {Parent, _} | _], PathAcc) ->
+    case gb_trees:is_defined(Node, PathAcc) of
+        true  -> PathAcc;
+        false -> gb_trees:insert(Node, {Parent, W, {trie_binding_count(X, Node),
+                                                    trie_child_count(X, Node)}},
+                                 PathAcc)
+    end.
+
+decrement_bindings(X, Path, PathAcc) ->
+    with_path_acc(X, fun({Bindings, Edges}) -> {Bindings - 1, Edges} end,
+                  Path, PathAcc).
+
+decrement_edges(X, Path, PathAcc) ->
+    with_path_acc(X, fun({Bindings, Edges}) -> {Bindings, Edges - 1} end,
+                  Path, PathAcc).
+
+with_path_acc(_X, _Fun, [{root, none}], PathAcc) ->
+    PathAcc;
+with_path_acc(X, Fun, [{Node, _} | ParentPath], PathAcc) ->
+    {Parent, W, Counts} = gb_trees:get(Node, PathAcc),
+    NewCounts = Fun(Counts),
+    NewPathAcc = gb_trees:update(Node, {Parent, W, NewCounts}, PathAcc),
+    case NewCounts of
+        {0, 0} -> decrement_edges(X, ParentPath,
+                                  maybe_add_path(X, ParentPath, NewPathAcc));
+        _      -> NewPathAcc
+    end.
+
+
 assert_args_equivalence(X, Args) ->
     rabbit_exchange:assert_args_equivalence(X, Args).
+
+%%----------------------------------------------------------------------------
+
+internal_add_binding(#binding{source = X, key = K, destination = D}) ->
+    FinalNode = follow_down_create(X, split_topic_key(K)),
+    trie_add_binding(X, FinalNode, D),
+    ok.
+
+trie_match(X, Words) ->
+    trie_match(X, root, Words, []).
+
+trie_match(X, Node, [], ResAcc) ->
+    trie_match_part(X, Node, "#", fun trie_match_skip_any/4, [],
+                    trie_bindings(X, Node) ++ ResAcc);
+trie_match(X, Node, [W | RestW] = Words, ResAcc) ->
+    lists:foldl(fun ({WArg, MatchFun, RestWArg}, Acc) ->
+                        trie_match_part(X, Node, WArg, MatchFun, RestWArg, Acc)
+                end, ResAcc, [{W, fun trie_match/4, RestW},
+                              {"*", fun trie_match/4, RestW},
+                              {"#", fun trie_match_skip_any/4, Words}]).
+
+trie_match_part(X, Node, Search, MatchFun, RestW, ResAcc) ->
+    case trie_child(X, Node, Search) of
+        {ok, NextNode} -> MatchFun(X, NextNode, RestW, ResAcc);
+        error          -> ResAcc
+    end.
+
+trie_match_skip_any(X, Node, [], ResAcc) ->
+    trie_match(X, Node, [], ResAcc);
+trie_match_skip_any(X, Node, [_ | RestW] = Words, ResAcc) ->
+    trie_match_skip_any(X, Node, RestW,
+                        trie_match(X, Node, Words, ResAcc)).
+
+follow_down_create(X, Words) ->
+    case follow_down_last_node(X, Words) of
+        {ok, FinalNode}      -> FinalNode;
+        {error, Node, RestW} -> lists:foldl(
+                                  fun (W, CurNode) ->
+                                          NewNode = new_node_id(),
+                                          trie_add_edge(X, CurNode, NewNode, W),
+                                          NewNode
+                                  end, Node, RestW)
+    end.
+
+follow_down_last_node(X, Words) ->
+    follow_down(X, fun (_, Node, _) -> Node end, root, Words).
+
+follow_down_get_path(X, Words) ->
+    {ok, Path} =
+        follow_down(X, fun (W, Node, PathAcc) -> [{Node, W} | PathAcc] end,
+                    [{root, none}], Words),
+    Path.
+
+follow_down(X, AccFun, Acc0, Words) ->
+    follow_down(X, root, AccFun, Acc0, Words).
+
+follow_down(_X, _CurNode, _AccFun, Acc, []) ->
+    {ok, Acc};
+follow_down(X, CurNode, AccFun, Acc, Words = [W | RestW]) ->
+    case trie_child(X, CurNode, W) of
+        {ok, NextNode} -> follow_down(X, NextNode, AccFun,
+                                      AccFun(W, NextNode, Acc), RestW);
+        error          -> {error, Acc, Words}
+    end.
+
+trie_child(X, Node, Word) ->
+    case mnesia:read({rabbit_topic_trie_edge,
+                      #trie_edge{exchange_name = X,
+                                 node_id       = Node,
+                                 word          = Word}}) of
+        [#topic_trie_edge{node_id = NextNode}] -> {ok, NextNode};
+        []                                     -> error
+    end.
+
+trie_bindings(X, Node) ->
+    MatchHead = #topic_trie_binding{
+      trie_binding = #trie_binding{exchange_name = X,
+                                   node_id       = Node,
+                                   destination   = '$1'}},
+    mnesia:select(rabbit_topic_trie_binding, [{MatchHead, [], ['$1']}]).
+
+trie_add_edge(X, FromNode, ToNode, W) ->
+    trie_edge_op(X, FromNode, ToNode, W, fun mnesia:write/3).
+
+trie_remove_edge(X, FromNode, ToNode, W) ->
+    trie_edge_op(X, FromNode, ToNode, W, fun mnesia:delete_object/3).
+
+trie_edge_op(X, FromNode, ToNode, W, Op) ->
+    ok = Op(rabbit_topic_trie_edge,
+            #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X,
+                                                    node_id       = FromNode,
+                                                    word          = W},
+                             node_id   = ToNode},
+            write).
+
+trie_add_binding(X, Node, D) ->
+    trie_binding_op(X, Node, D, fun mnesia:write/3).
+
+trie_remove_binding(X, Node, D) ->
+    trie_binding_op(X, Node, D, fun mnesia:delete_object/3).
+
+trie_binding_op(X, Node, D, Op) ->
+    ok = Op(rabbit_topic_trie_binding,
+            #topic_trie_binding{
+              trie_binding = #trie_binding{exchange_name = X,
+                                           node_id       = Node,
+                                           destination   = D}},
+            write).
+
+trie_child_count(X, Node) ->
+    count(rabbit_topic_trie_edge,
+            #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X,
+                                                    node_id       = Node,
+                                                    _             = '_'},
+                             _         = '_'}).
+
+trie_binding_count(X, Node) ->
+    count(rabbit_topic_trie_binding,
+            #topic_trie_binding{
+              trie_binding = #trie_binding{exchange_name = X,
+                                           node_id       = Node,
+                                           _             = '_'},
+              _            = '_'}).
+
+count(Table, Match) ->
+    length(mnesia:match_object(Table, Match, read)).
+
+trie_remove_all_edges(X) ->
+    remove_all(rabbit_topic_trie_edge,
+               #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X,
+                                                       _             = '_'},
+                                _         = '_'}).
+
+trie_remove_all_bindings(X) ->
+    remove_all(rabbit_topic_trie_binding,
+               #topic_trie_binding{
+                 trie_binding = #trie_binding{exchange_name = X, _ = '_'},
+                 _            = '_'}).
+
+remove_all(Table, Pattern) ->
+    lists:foreach(fun (R) -> mnesia:delete_object(Table, R, write) end,
+                  mnesia:match_object(Table, Pattern, write)).
+
+new_node_id() ->
+    rabbit_guid:guid().
+
+split_topic_key(Key) ->
+    split_topic_key(Key, [], []).
+
+split_topic_key(<<>>, [], []) ->
+    [];
+split_topic_key(<<>>, RevWordAcc, RevResAcc) ->
+    lists:reverse([lists:reverse(RevWordAcc) | RevResAcc]);
+split_topic_key(<<$., Rest/binary>>, RevWordAcc, RevResAcc) ->
+    split_topic_key(Rest, [], [lists:reverse(RevWordAcc) | RevResAcc]);
+split_topic_key(<<C:8, Rest/binary>>, RevWordAcc, RevResAcc) ->
+    split_topic_key(Rest, [C | RevWordAcc], RevResAcc).
+
diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl
index 86ea7282..8f9ab032 100644
--- a/src/rabbit_limiter.erl
+++ b/src/rabbit_limiter.erl
@@ -49,7 +49,7 @@
 -record(lim, {prefetch_count = 0,
               ch_pid,
               blocked = false,
-              queues = dict:new(), % QPid -> {MonitorRef, Notify}
+              queues = orddict:new(), % QPid -> {MonitorRef, Notify}
               volume = 0}).
 %% 'Notify' is a boolean that indicates whether a queue should be
 %% notified of a change in the limit or volume that may allow it to
@@ -65,7 +65,7 @@ start_link(ChPid, UnackedMsgCount) ->
 limit(undefined, 0) ->
     ok;
 limit(LimiterPid, PrefetchCount) ->
-    gen_server2:call(LimiterPid, {limit, PrefetchCount}).
+    gen_server2:call(LimiterPid, {limit, PrefetchCount}, infinity).
 
 %% Ask the limiter whether the queue can deliver a message without
 %% breaching a limit
@@ -120,9 +120,9 @@ init([ChPid, UnackedMsgCount]) ->
 prioritise_call(get_limit, _From, _State) -> 9;
 prioritise_call(_Msg,      _From, _State) -> 0.
 
-handle_call({can_send, _QPid, _AckRequired}, _From,
+handle_call({can_send, QPid, _AckRequired}, _From,
             State = #lim{blocked = true}) ->
-    {reply, false, State};
+    {reply, false, limit_queue(QPid, State)};
 handle_call({can_send, QPid, AckRequired}, _From,
             State = #lim{volume = Volume}) ->
     case limit_reached(State) of
@@ -196,31 +196,30 @@ limit_reached(#lim{prefetch_count = Limit, volume = Volume}) ->
 blocked(#lim{blocked = Blocked}) -> Blocked.
 
 remember_queue(QPid, State = #lim{queues = Queues}) ->
-    case dict:is_key(QPid, Queues) of
+    case orddict:is_key(QPid, Queues) of
         false -> MRef = erlang:monitor(process, QPid),
-                 State#lim{queues = dict:store(QPid, {MRef, false}, Queues)};
+                 State#lim{queues = orddict:store(QPid, {MRef, false}, Queues)};
         true  -> State
     end.
 
 forget_queue(QPid, State = #lim{ch_pid = ChPid, queues = Queues}) ->
-    case dict:find(QPid, Queues) of
-        {ok, {MRef, _}} ->
-            true = erlang:demonitor(MRef),
-            ok = rabbit_amqqueue:unblock(QPid, ChPid),
-            State#lim{queues = dict:erase(QPid, Queues)};
-        error -> State
+    case orddict:find(QPid, Queues) of
+        {ok, {MRef, _}} -> true = erlang:demonitor(MRef),
+                           ok = rabbit_amqqueue:unblock(QPid, ChPid),
+                           State#lim{queues = orddict:erase(QPid, Queues)};
+        error           -> State
     end.
 
 limit_queue(QPid, State = #lim{queues = Queues}) ->
     UpdateFun = fun ({MRef, _}) -> {MRef, true} end,
-    State#lim{queues = dict:update(QPid, UpdateFun, Queues)}.
+    State#lim{queues = orddict:update(QPid, UpdateFun, Queues)}.
 
 notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) ->
     {QList, NewQueues} =
-        dict:fold(fun (_QPid, {_, false}, Acc) -> Acc;
-                      (QPid, {MRef, true}, {L, D}) ->
-                          {[QPid | L], dict:store(QPid, {MRef, false}, D)}
-                  end, {[], Queues}, Queues),
+        orddict:fold(fun (_QPid, {_, false}, Acc) -> Acc;
+                         (QPid, {MRef, true}, {L, D}) ->
+                             {[QPid | L], orddict:store(QPid, {MRef, false}, D)}
+                     end, {[], Queues}, Queues),
     case length(QList) of
         0 -> ok;
         L ->
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 2f8c940b..996b0a98 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -111,11 +111,11 @@ stop() ->
 
 init([]) ->
     MemoryLimit = trunc(?MEMORY_LIMIT_SCALING *
-                        (try
-                             vm_memory_monitor:get_memory_limit()
-                         catch
-                             exit:{noproc, _} -> ?MEMORY_SIZE_FOR_DISABLED_VMM
-                         end)),
+                            (try
+                                 vm_memory_monitor:get_memory_limit()
+                             catch
+                                 exit:{noproc, _} -> ?MEMORY_SIZE_FOR_DISABLED_VMM
+                             end)),
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
                                       ?SERVER, update, []),
diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl
new file mode 100644
index 00000000..f6664a27
--- /dev/null
+++ b/src/rabbit_mirror_queue_coordinator.erl
@@ -0,0 +1,395 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2010-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_coordinator).
+
+-export([start_link/3, get_gm/1, ensure_monitoring/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3]).
+
+-export([joined/2, members_changed/3, handle_msg/3]).
+
+-behaviour(gen_server2).
+-behaviour(gm).
+
+-include("rabbit.hrl").
+-include("gm_specs.hrl").
+
+-record(state, { q,
+                 gm,
+                 monitors,
+                 death_fun
+               }).
+
+-define(ONE_SECOND, 1000).
+
+-ifdef(use_specs).
+
+-spec(start_link/3 :: (rabbit_types:amqqueue(), pid() | 'undefined',
+                       rabbit_mirror_queue_master:death_fun()) ->
+                           rabbit_types:ok_pid_or_error()).
+-spec(get_gm/1 :: (pid()) -> pid()).
+-spec(ensure_monitoring/2 :: (pid(), [pid()]) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+%%
+%% Mirror Queues
+%%
+%% A queue with mirrors consists of the following:
+%%
+%%  #amqqueue{ pid, mirror_pids }
+%%             |    |
+%%  +----------+    +-------+--------------+-----------...etc...
+%%  |                       |              |
+%%  V                       V              V
+%% amqqueue_process---+    slave-----+    slave-----+  ...etc...
+%% | BQ = master----+ |    | BQ = vq |    | BQ = vq |
+%% |      | BQ = vq | |    +-+-------+    +-+-------+
+%% |      +-+-------+ |      |              |
+%% +-++-----|---------+      |              |  (some details elided)
+%%   ||     |                |              |
+%%   ||   coordinator-+      |              |
+%%   ||   +-+---------+      |              |
+%%   ||     |                |              |
+%%   ||     gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc...
+%%   ||     +--+             +--+           +--+
+%%   ||
+%%  consumers
+%%
+%% The master is merely an implementation of bq, and thus is invoked
+%% through the normal bq interface by the amqqueue_process. The slaves
+%% meanwhile are processes in their own right (as is the
+%% coordinator). The coordinator and all slaves belong to the same gm
+%% group. Every member of a gm group receives messages sent to the gm
+%% group. Because the master is the bq of amqqueue_process, it doesn't
+%% have sole control over its mailbox, and as a result, the master
+%% itself cannot be passed messages directly (well, it could by via
+%% the amqqueue:run_backing_queue callback but that would induce
+%% additional unnecessary loading on the master queue process), yet it
+%% needs to react to gm events, such as the death of slaves. Thus the
+%% master creates the coordinator, and it is the coordinator that is
+%% the gm callback module and event handler for the master.
+%%
+%% Consumers are only attached to the master. Thus the master is
+%% responsible for informing all slaves when messages are fetched from
+%% the bq, when they're acked, and when they're requeued.
+%%
+%% The basic goal is to ensure that all slaves performs actions on
+%% their bqs in the same order as the master. Thus the master
+%% intercepts all events going to its bq, and suitably broadcasts
+%% these events on the gm. The slaves thus receive two streams of
+%% events: one stream is via the gm, and one stream is from channels
+%% directly. Whilst the stream via gm is guaranteed to be consistently
+%% seen by all slaves, the same is not true of the stream via
+%% channels. For example, in the event of an unexpected death of a
+%% channel during a publish, only some of the mirrors may receive that
+%% publish. As a result of this problem, the messages broadcast over
+%% the gm contain published content, and thus slaves can operate
+%% successfully on messages that they only receive via the gm. The key
+%% purpose of also sending messages directly from the channels to the
+%% slaves is that without this, in the event of the death of the
+%% master, messages could be lost until a suitable slave is promoted.
+%%
+%% However, that is not the only reason. For example, if confirms are
+%% in use, then there is no guarantee that every slave will see the
+%% delivery with the same msg_seq_no. As a result, the slaves have to
+%% wait until they've seen both the publish via gm, and the publish
+%% via the channel before they have enough information to be able to
+%% perform the publish to their own bq, and subsequently issue the
+%% confirm, if necessary. Either form of publish can arrive first, and
+%% a slave can be upgraded to the master at any point during this
+%% process. Confirms continue to be issued correctly, however.
+%%
+%% Because the slave is a full process, it impersonates parts of the
+%% amqqueue API. However, it does not need to implement all parts: for
+%% example, no ack or consumer-related message can arrive directly at
+%% a slave from a channel: it is only publishes that pass both
+%% directly to the slaves and go via gm.
+%%
+%% Slaves can be added dynamically. When this occurs, there is no
+%% attempt made to sync the current contents of the master with the
+%% new slave, thus the slave will start empty, regardless of the state
+%% of the master. Thus the slave needs to be able to detect and ignore
+%% operations which are for messages it has not received: because of
+%% the strict FIFO nature of queues in general, this is
+%% straightforward - all new publishes that the new slave receives via
+%% gm should be processed as normal, but fetches which are for
+%% messages the slave has never seen should be ignored. Similarly,
+%% acks for messages the slave never fetched should be
+%% ignored. Eventually, as the master is consumed from, the messages
+%% at the head of the queue which were there before the slave joined
+%% will disappear, and the slave will become fully synced with the
+%% state of the master. The detection of the sync-status of a slave is
+%% done entirely based on length: if the slave and the master both
+%% agree on the length of the queue after the fetch of the head of the
+%% queue, then the queues must be in sync. The only other possibility
+%% is that the slave's queue is shorter, and thus the fetch should be
+%% ignored.
+%%
+%% Because acktags are issued by the bq independently, and because
+%% there is no requirement for the master and all slaves to use the
+%% same bq, all references to msgs going over gm is by msg_id. Thus
+%% upon acking, the master must convert the acktags back to msg_ids
+%% (which happens to be what bq:ack returns), then sends the msg_ids
+%% over gm, the slaves must convert the msg_ids to acktags (a mapping
+%% the slaves themselves must maintain).
+%%
+%% When the master dies, a slave gets promoted. This will be the
+%% eldest slave, and thus the hope is that that slave is most likely
+%% to be sync'd with the master. The design of gm is that the
+%% notification of the death of the master will only appear once all
+%% messages in-flight from the master have been fully delivered to all
+%% members of the gm group. Thus at this point, the slave that gets
+%% promoted cannot broadcast different events in a different order
+%% than the master for the same msgs: there is no possibility for the
+%% same msg to be processed by the old master and the new master - if
+%% it was processed by the old master then it will have been processed
+%% by the slave before the slave was promoted, and vice versa.
+%%
+%% Upon promotion, all msgs pending acks are requeued as normal, the
+%% slave constructs state suitable for use in the master module, and
+%% then dynamically changes into an amqqueue_process with the master
+%% as the bq, and the slave's bq as the master's bq. Thus the very
+%% same process that was the slave is now a full amqqueue_process.
+%%
+%% It is important that we avoid memory leaks due to the death of
+%% senders (i.e. channels) and partial publications. A sender
+%% publishing a message may fail mid way through the publish and thus
+%% only some of the mirrors will receive the message. We need the
+%% mirrors to be able to detect this and tidy up as necessary to avoid
+%% leaks. If we just had the master monitoring all senders then we
+%% would have the possibility that a sender appears and only sends the
+%% message to a few of the slaves before dying. Those slaves would
+%% then hold on to the message, assuming they'll receive some
+%% instruction eventually from the master. Thus we have both slaves
+%% and the master monitor all senders they become aware of. But there
+%% is a race: if the slave receives a DOWN of a sender, how does it
+%% know whether or not the master is going to send it instructions
+%% regarding those messages?
+%%
+%% Whilst the master monitors senders, it can't access its mailbox
+%% directly, so it delegates monitoring to the coordinator. When the
+%% coordinator receives a DOWN message from a sender, it informs the
+%% master via a callback. This allows the master to do any tidying
+%% necessary, but more importantly allows the master to broadcast a
+%% sender_death message to all the slaves, saying the sender has
+%% died. Once the slaves receive the sender_death message, they know
+%% that they're not going to receive any more instructions from the gm
+%% regarding that sender, thus they throw away any publications from
+%% the sender pending publication instructions. However, it is
+%% possible that the coordinator receives the DOWN and communicates
+%% that to the master before the master has finished receiving and
+%% processing publishes from the sender. This turns out not to be a
+%% problem: the sender has actually died, and so will not need to
+%% receive confirms or other feedback, and should further messages be
+%% "received" from the sender, the master will ask the coordinator to
+%% set up a new monitor, and will continue to process the messages
+%% normally. Slaves may thus receive publishes via gm from previously
+%% declared "dead" senders, but again, this is fine: should the slave
+%% have just thrown out the message it had received directly from the
+%% sender (due to receiving a sender_death message via gm), it will be
+%% able to cope with the publication purely from the master via gm.
+%%
+%% When a slave receives a DOWN message for a sender, if it has not
+%% received the sender_death message from the master via gm already,
+%% then it will wait 20 seconds before broadcasting a request for
+%% confirmation from the master that the sender really has died.
+%% Should a sender have only sent a publish to slaves, this allows
+%% slaves to inform the master of the previous existence of the
+%% sender. The master will thus monitor the sender, receive the DOWN,
+%% and subsequently broadcast the sender_death message, allowing the
+%% slaves to tidy up. This process can repeat for the same sender:
+%% consider one slave receives the publication, then the DOWN, then
+%% asks for confirmation of death, then the master broadcasts the
+%% sender_death message. Only then does another slave receive the
+%% publication and thus set up its monitoring. Eventually that slave
+%% too will receive the DOWN, ask for confirmation and the master will
+%% monitor the sender again, receive another DOWN, and send out
+%% another sender_death message. Given the 20 second delay before
+%% requesting death confirmation, this is highly unlikely, but it is a
+%% possibility.
+%%
+%% When the 20 second timer expires, the slave first checks to see
+%% whether it still needs confirmation of the death before requesting
+%% it. This prevents unnecessary traffic on gm as it allows one
+%% broadcast of the sender_death message to satisfy many slaves.
+%%
+%% If we consider the promotion of a slave at this point, we have two
+%% possibilities: that of the slave that has received the DOWN and is
+%% thus waiting for confirmation from the master that the sender
+%% really is down; and that of the slave that has not received the
+%% DOWN. In the first case, in the act of promotion to master, the new
+%% master will monitor again the dead sender, and after it has
+%% finished promoting itself, it should find another DOWN waiting,
+%% which it will then broadcast. This will allow slaves to tidy up as
+%% normal. In the second case, we have the possibility that
+%% confirmation-of-sender-death request has been broadcast, but that
+%% it was broadcast before the master failed, and that the slave being
+%% promoted does not know anything about that sender, and so will not
+%% monitor it on promotion. Thus a slave that broadcasts such a
+%% request, at the point of broadcasting it, recurses, setting another
+%% 20 second timer. As before, on expiry of the timer, the slaves
+%% checks to see whether it still has not received a sender_death
+%% message for the dead sender, and if not, broadcasts a death
+%% confirmation request. Thus this ensures that even when a master
+%% dies and the new slave has no knowledge of the dead sender, it will
+%% eventually receive a death confirmation request, shall monitor the
+%% dead sender, receive the DOWN and broadcast the sender_death
+%% message.
+%%
+%% The preceding commentary deals with the possibility of slaves
+%% receiving publications from senders which the master does not, and
+%% the need to prevent memory leaks in such scenarios. The inverse is
+%% also possible: a partial publication may cause only the master to
+%% receive a publication. It will then publish the message via gm. The
+%% slaves will receive it via gm, will publish it to their BQ and will
+%% set up monitoring on the sender. They will then receive the DOWN
+%% message and the master will eventually publish the corresponding
+%% sender_death message. The slave will then be able to tidy up its
+%% state as normal.
+%%
+%% Recovery of mirrored queues is straightforward: as nodes die, the
+%% remaining nodes record this, and eventually a situation is reached
+%% in which only one node is alive, which is the master. This is the
+%% only node which, upon recovery, will resurrect a mirrored queue:
+%% nodes which die and then rejoin as a slave will start off empty as
+%% if they have no mirrored content at all. This is not surprising: to
+%% achieve anything more sophisticated would require the master and
+%% recovering slave to be able to check to see whether they agree on
+%% the last seen state of the queue: checking length alone is not
+%% sufficient in this case.
+%%
+%% For more documentation see the comments in bug 23554.
+%%
+%%----------------------------------------------------------------------------
+
+start_link(Queue, GM, DeathFun) ->
+    gen_server2:start_link(?MODULE, [Queue, GM, DeathFun], []).
+
+get_gm(CPid) ->
+    gen_server2:call(CPid, get_gm, infinity).
+
+ensure_monitoring(CPid, Pids) ->
+    gen_server2:cast(CPid, {ensure_monitoring, Pids}).
+
+%% ---------------------------------------------------------------------------
+%% gen_server
+%% ---------------------------------------------------------------------------
+
+init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) ->
+    GM1 = case GM of
+              undefined ->
+                  {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]),
+                  receive {joined, GM2, _Members} ->
+                          ok
+                  end,
+                  GM2;
+              _ ->
+                  true = link(GM),
+                  GM
+          end,
+    {ok, _TRef} =
+        timer:apply_interval(?ONE_SECOND, gm, broadcast, [GM1, heartbeat]),
+    {ok, #state { q         = Q,
+                  gm        = GM1,
+                  monitors  = dict:new(),
+                  death_fun = DeathFun },
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(get_gm, _From, State = #state { gm = GM }) ->
+    reply(GM, State).
+
+handle_cast({gm_deaths, Deaths},
+            State = #state { q  = #amqqueue { name = QueueName } }) ->
+    rabbit_log:info("Mirrored-queue (~s): Master ~s saw deaths of mirrors ~s~n",
+                    [rabbit_misc:rs(QueueName),
+                     rabbit_misc:pid_to_string(self()),
+                     [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]),
+    case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of
+        {ok, Pid} when node(Pid) =:= node() ->
+            noreply(State);
+        {error, not_found} ->
+            {stop, normal, State}
+    end;
+
+handle_cast({ensure_monitoring, Pids},
+            State = #state { monitors = Monitors }) ->
+    Monitors1 =
+        lists:foldl(fun (Pid, MonitorsN) ->
+                            case dict:is_key(Pid, MonitorsN) of
+                                true  -> MonitorsN;
+                                false -> MRef = erlang:monitor(process, Pid),
+                                         dict:store(Pid, MRef, MonitorsN)
+                            end
+                    end, Monitors, Pids),
+    noreply(State #state { monitors = Monitors1 }).
+
+handle_info({'DOWN', _MonitorRef, process, Pid, _Reason},
+            State = #state { monitors  = Monitors,
+                             death_fun = Fun }) ->
+    noreply(
+      case dict:is_key(Pid, Monitors) of
+          false -> State;
+          true  -> ok = Fun(Pid),
+                   State #state { monitors = dict:erase(Pid, Monitors) }
+      end);
+
+handle_info(Msg, State) ->
+    {stop, {unexpected_info, Msg}, State}.
+
+terminate(_Reason, #state{}) ->
+    %% gen_server case
+    ok;
+terminate([_CPid], _Reason) ->
+    %% gm case
+    ok.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%% ---------------------------------------------------------------------------
+%% GM
+%% ---------------------------------------------------------------------------
+
+joined([CPid], Members) ->
+    CPid ! {joined, self(), Members},
+    ok.
+
+members_changed([_CPid], _Births, []) ->
+    ok;
+members_changed([CPid], _Births, Deaths) ->
+    ok = gen_server2:cast(CPid, {gm_deaths, Deaths}).
+
+handle_msg([_CPid], _From, heartbeat) ->
+    ok;
+handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) ->
+    ok = gen_server2:cast(CPid, Msg);
+handle_msg([_CPid], _From, _Msg) ->
+    ok.
+
+%% ---------------------------------------------------------------------------
+%% Others
+%% ---------------------------------------------------------------------------
+
+noreply(State) ->
+    {noreply, State, hibernate}.
+
+reply(Reply, State) ->
+    {reply, Reply, State, hibernate}.
diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl
new file mode 100644
index 00000000..532911f2
--- /dev/null
+++ b/src/rabbit_mirror_queue_master.erl
@@ -0,0 +1,390 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2010-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_master).
+
+-export([init/3, terminate/2, delete_and_terminate/2,
+         purge/1, publish/4, publish_delivered/5, fetch/2, ack/2,
+         requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2,
+         set_ram_duration_target/2, ram_duration/1,
+         needs_timeout/1, timeout/1, handle_pre_hibernate/1,
+         status/1, invoke/3, is_duplicate/2, discard/3]).
+
+-export([start/1, stop/0]).
+
+-export([promote_backing_queue_state/6, sender_death_fun/0]).
+
+-behaviour(rabbit_backing_queue).
+
+-include("rabbit.hrl").
+
+-record(state, { gm,
+                 coordinator,
+                 backing_queue,
+                 backing_queue_state,
+                 set_delivered,
+                 seen_status,
+                 confirmed,
+                 ack_msg_id,
+                 known_senders
+               }).
+
+-ifdef(use_specs).
+
+-export_type([death_fun/0]).
+
+-type(death_fun() :: fun ((pid()) -> 'ok')).
+-type(master_state() :: #state { gm                  :: pid(),
+                                 coordinator         :: pid(),
+                                 backing_queue       :: atom(),
+                                 backing_queue_state :: any(),
+                                 set_delivered       :: non_neg_integer(),
+                                 seen_status         :: dict(),
+                                 confirmed           :: [rabbit_guid:guid()],
+                                 ack_msg_id          :: dict(),
+                                 known_senders       :: set()
+                               }).
+
+-spec(promote_backing_queue_state/6 ::
+        (pid(), atom(), any(), pid(), dict(), [pid()]) -> master_state()).
+-spec(sender_death_fun/0 :: () -> death_fun()).
+
+-endif.
+
+%% For general documentation of HA design, see
+%% rabbit_mirror_queue_coordinator
+
+%% ---------------------------------------------------------------------------
+%% Backing queue
+%% ---------------------------------------------------------------------------
+
+start(_DurableQueues) ->
+    %% This will never get called as this module will never be
+    %% installed as the default BQ implementation.
+    exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+stop() ->
+    %% Same as start/1.
+    exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover,
+     AsyncCallback) ->
+    {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
+                   Q, undefined, sender_death_fun()),
+    GM = rabbit_mirror_queue_coordinator:get_gm(CPid),
+    MNodes1 =
+        (case MNodes of
+             all       -> rabbit_mnesia:all_clustered_nodes();
+             undefined -> [];
+             _         -> [list_to_atom(binary_to_list(Node)) || Node <- MNodes]
+         end) -- [node()],
+    [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1],
+    {ok, BQ} = application:get_env(backing_queue_module),
+    BQS = BQ:init(Q, Recover, AsyncCallback),
+    #state { gm                  = GM,
+             coordinator         = CPid,
+             backing_queue       = BQ,
+             backing_queue_state = BQS,
+             set_delivered       = 0,
+             seen_status         = dict:new(),
+             confirmed           = [],
+             ack_msg_id          = dict:new(),
+             known_senders       = sets:new() }.
+
+terminate({shutdown, dropped} = Reason,
+          State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    %% Backing queue termination - this node has been explicitly
+    %% dropped. Normally, non-durable queues would be tidied up on
+    %% startup, but there's a possibility that we will be added back
+    %% in without this node being restarted. Thus we must do the full
+    %% blown delete_and_terminate now, but only locally: we do not
+    %% broadcast delete_and_terminate.
+    State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS),
+                   set_delivered       = 0 };
+terminate(Reason,
+          State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    %% Backing queue termination. The queue is going down but
+    %% shouldn't be deleted. Most likely safe shutdown of this
+    %% node. Thus just let some other slave take over.
+    State #state { backing_queue_state = BQ:terminate(Reason, BQS) }.
+
+delete_and_terminate(Reason, State = #state { gm                  = GM,
+                                              backing_queue       = BQ,
+                                              backing_queue_state = BQS }) ->
+    ok = gm:broadcast(GM, {delete_and_terminate, Reason}),
+    State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS),
+                   set_delivered       = 0 }.
+
+purge(State = #state { gm                  = GM,
+                       backing_queue       = BQ,
+                       backing_queue_state = BQS }) ->
+    ok = gm:broadcast(GM, {set_length, 0}),
+    {Count, BQS1} = BQ:purge(BQS),
+    {Count, State #state { backing_queue_state = BQS1,
+                           set_delivered       = 0 }}.
+
+publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid,
+        State = #state { gm                  = GM,
+                         seen_status         = SS,
+                         backing_queue       = BQ,
+                         backing_queue_state = BQS }) ->
+    false = dict:is_key(MsgId, SS), %% ASSERTION
+    ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}),
+    BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
+    ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }).
+
+publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps,
+                  ChPid, State = #state { gm                  = GM,
+                                          seen_status         = SS,
+                                          backing_queue       = BQ,
+                                          backing_queue_state = BQS,
+                                          ack_msg_id          = AM }) ->
+    false = dict:is_key(MsgId, SS), %% ASSERTION
+    %% Must use confirmed_broadcast here in order to guarantee that
+    %% all slaves are forced to interpret this publish_delivered at
+    %% the same point, especially if we die and a slave is promoted.
+    ok = gm:confirmed_broadcast(
+           GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}),
+    {AckTag, BQS1} =
+        BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS),
+    AM1 = maybe_store_acktag(AckTag, MsgId, AM),
+    {AckTag,
+     ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1,
+                                             ack_msg_id          = AM1 })}.
+
+dropwhile(Fun, State = #state { gm                  = GM,
+                                backing_queue       = BQ,
+                                backing_queue_state = BQS,
+                                set_delivered       = SetDelivered }) ->
+    Len = BQ:len(BQS),
+    BQS1 = BQ:dropwhile(Fun, BQS),
+    Dropped = Len - BQ:len(BQS1),
+    SetDelivered1 = lists:max([0, SetDelivered - Dropped]),
+    ok = gm:broadcast(GM, {set_length, BQ:len(BQS1)}),
+    State #state { backing_queue_state = BQS1,
+                   set_delivered       = SetDelivered1 }.
+
+drain_confirmed(State = #state { backing_queue       = BQ,
+                                 backing_queue_state = BQS,
+                                 seen_status         = SS,
+                                 confirmed           = Confirmed }) ->
+    {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+    {MsgIds1, SS1} =
+        lists:foldl(
+          fun (MsgId, {MsgIdsN, SSN}) ->
+                  %% We will never see 'discarded' here
+                  case dict:find(MsgId, SSN) of
+                      error ->
+                          {[MsgId | MsgIdsN], SSN};
+                      {ok, published} ->
+                          %% It was published when we were a slave,
+                          %% and we were promoted before we saw the
+                          %% publish from the channel. We still
+                          %% haven't seen the channel publish, and
+                          %% consequently we need to filter out the
+                          %% confirm here. We will issue the confirm
+                          %% when we see the publish from the channel.
+                          {MsgIdsN, dict:store(MsgId, confirmed, SSN)};
+                      {ok, confirmed} ->
+                          %% Well, confirms are racy by definition.
+                          {[MsgId | MsgIdsN], SSN}
+                  end
+          end, {[], SS}, MsgIds),
+    {Confirmed ++ MsgIds1, State #state { backing_queue_state = BQS1,
+                                          seen_status         = SS1,
+                                          confirmed           = [] }}.
+
+fetch(AckRequired, State = #state { gm                  = GM,
+                                    backing_queue       = BQ,
+                                    backing_queue_state = BQS,
+                                    set_delivered       = SetDelivered,
+                                    ack_msg_id          = AM }) ->
+    {Result, BQS1} = BQ:fetch(AckRequired, BQS),
+    State1 = State #state { backing_queue_state = BQS1 },
+    case Result of
+        empty ->
+            {Result, State1};
+        {#basic_message { id = MsgId } = Message, IsDelivered, AckTag,
+         Remaining} ->
+            ok = gm:broadcast(GM, {fetch, AckRequired, MsgId, Remaining}),
+            IsDelivered1 = IsDelivered orelse SetDelivered > 0,
+            SetDelivered1 = lists:max([0, SetDelivered - 1]),
+            AM1 = maybe_store_acktag(AckTag, MsgId, AM),
+            {{Message, IsDelivered1, AckTag, Remaining},
+             State1 #state { set_delivered = SetDelivered1,
+                             ack_msg_id    = AM1 }}
+    end.
+
+ack(AckTags, State = #state { gm                  = GM,
+                              backing_queue       = BQ,
+                              backing_queue_state = BQS,
+                              ack_msg_id          = AM }) ->
+    {MsgIds, BQS1} = BQ:ack(AckTags, BQS),
+    AM1 = lists:foldl(fun dict:erase/2, AM, AckTags),
+    case MsgIds of
+        [] -> ok;
+        _  -> ok = gm:broadcast(GM, {ack, MsgIds})
+    end,
+    {MsgIds, State #state { backing_queue_state = BQS1,
+                            ack_msg_id          = AM1 }}.
+
+requeue(AckTags, MsgPropsFun, State = #state { gm                  = GM,
+                                               backing_queue       = BQ,
+                                               backing_queue_state = BQS }) ->
+    {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS),
+    ok = gm:broadcast(GM, {requeue, MsgPropsFun, MsgIds}),
+    {MsgIds, State #state { backing_queue_state = BQS1 }}.
+
+len(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    BQ:len(BQS).
+
+is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    BQ:is_empty(BQS).
+
+set_ram_duration_target(Target, State = #state { backing_queue       = BQ,
+                                                 backing_queue_state = BQS }) ->
+    State #state { backing_queue_state =
+                       BQ:set_ram_duration_target(Target, BQS) }.
+
+ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    {Result, BQS1} = BQ:ram_duration(BQS),
+    {Result, State #state { backing_queue_state = BQS1 }}.
+
+needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    BQ:needs_timeout(BQS).
+
+timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    State #state { backing_queue_state = BQ:timeout(BQS) }.
+
+handle_pre_hibernate(State = #state { backing_queue       = BQ,
+                                      backing_queue_state = BQS }) ->
+    State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }.
+
+status(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    BQ:status(BQS).
+
+invoke(?MODULE, Fun, State) ->
+    Fun(?MODULE, State);
+invoke(Mod, Fun, State = #state { backing_queue       = BQ,
+                                  backing_queue_state = BQS }) ->
+    State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
+
+is_duplicate(Message = #basic_message { id = MsgId },
+             State = #state { seen_status         = SS,
+                              backing_queue       = BQ,
+                              backing_queue_state = BQS,
+                              confirmed           = Confirmed }) ->
+    %% Here, we need to deal with the possibility that we're about to
+    %% receive a message that we've already seen when we were a slave
+    %% (we received it via gm). Thus if we do receive such message now
+    %% via the channel, there may be a confirm waiting to issue for
+    %% it.
+
+    %% We will never see {published, ChPid, MsgSeqNo} here.
+    case dict:find(MsgId, SS) of
+        error ->
+            %% We permit the underlying BQ to have a peek at it, but
+            %% only if we ourselves are not filtering out the msg.
+            {Result, BQS1} = BQ:is_duplicate(Message, BQS),
+            {Result, State #state { backing_queue_state = BQS1 }};
+        {ok, published} ->
+            %% It already got published when we were a slave and no
+            %% confirmation is waiting. amqqueue_process will have, in
+            %% its msg_id_to_channel mapping, the entry for dealing
+            %% with the confirm when that comes back in (it's added
+            %% immediately after calling is_duplicate). The msg is
+            %% invalid. We will not see this again, nor will we be
+            %% further involved in confirming this message, so erase.
+            {published, State #state { seen_status = dict:erase(MsgId, SS) }};
+        {ok, confirmed} ->
+            %% It got published when we were a slave via gm, and
+            %% confirmed some time after that (maybe even after
+            %% promotion), but before we received the publish from the
+            %% channel, so couldn't previously know what the
+            %% msg_seq_no was (and thus confirm as a slave). So we
+            %% need to confirm now. As above, amqqueue_process will
+            %% have the entry for the msg_id_to_channel mapping added
+            %% immediately after calling is_duplicate/2.
+            {published, State #state { seen_status = dict:erase(MsgId, SS),
+                                       confirmed = [MsgId | Confirmed] }};
+        {ok, discarded} ->
+            %% Don't erase from SS here because discard/2 is about to
+            %% be called and we need to be able to detect this case
+            {discarded, State}
+    end.
+
+discard(Msg = #basic_message { id = MsgId }, ChPid,
+        State = #state { gm                  = GM,
+                         backing_queue       = BQ,
+                         backing_queue_state = BQS,
+                         seen_status         = SS }) ->
+    %% It's a massive error if we get told to discard something that's
+    %% already been published or published-and-confirmed. To do that
+    %% would require non FIFO access. Hence we should not find
+    %% 'published' or 'confirmed' in this dict:find.
+    case dict:find(MsgId, SS) of
+        error ->
+            ok = gm:broadcast(GM, {discard, ChPid, Msg}),
+            State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS),
+                           seen_status         = dict:erase(MsgId, SS) };
+        {ok, discarded} ->
+            State
+    end.
+
+%% ---------------------------------------------------------------------------
+%% Other exported functions
+%% ---------------------------------------------------------------------------
+
+promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) ->
+    #state { gm                  = GM,
+             coordinator         = CPid,
+             backing_queue       = BQ,
+             backing_queue_state = BQS,
+             set_delivered       = BQ:len(BQS),
+             seen_status         = SeenStatus,
+             confirmed           = [],
+             ack_msg_id          = dict:new(),
+             known_senders       = sets:from_list(KS) }.
+
+sender_death_fun() ->
+    Self = self(),
+    fun (DeadPid) ->
+            rabbit_amqqueue:run_backing_queue(
+              Self, ?MODULE,
+              fun (?MODULE, State = #state { gm = GM, known_senders = KS }) ->
+                      ok = gm:broadcast(GM, {sender_death, DeadPid}),
+                      KS1 = sets:del_element(DeadPid, KS),
+                      State #state { known_senders = KS1 }
+              end)
+    end.
+
+%% ---------------------------------------------------------------------------
+%% Helpers
+%% ---------------------------------------------------------------------------
+
+maybe_store_acktag(undefined, _MsgId, AM) ->
+    AM;
+maybe_store_acktag(AckTag, MsgId, AM) ->
+    dict:store(AckTag, MsgId, AM).
+
+ensure_monitoring(ChPid, State = #state { coordinator = CPid,
+                                          known_senders = KS }) ->
+    case sets:is_element(ChPid, KS) of
+        true  -> State;
+        false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring(
+                        CPid, [ChPid]),
+                 State #state { known_senders = sets:add_element(ChPid, KS) }
+    end.
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
new file mode 100644
index 00000000..6a9f733e
--- /dev/null
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -0,0 +1,135 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2010-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_misc).
+
+-export([remove_from_queue/2, on_node_up/0,
+         drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3]).
+
+-include("rabbit.hrl").
+
+%% If the dead pids include the queue pid (i.e. the master has died)
+%% then only remove that if we are about to be promoted. Otherwise we
+%% can have the situation where a slave updates the mnesia record for
+%% a queue, promoting another slave before that slave realises it has
+%% become the new master, which is bad because it could then mean the
+%% slave (now master) receives messages it's not ready for (for
+%% example, new consumers).
+remove_from_queue(QueueName, DeadPids) ->
+    DeadNodes = [node(DeadPid) || DeadPid <- DeadPids],
+    rabbit_misc:execute_mnesia_transaction(
+      fun () ->
+              %% Someone else could have deleted the queue before we
+              %% get here.
+              case mnesia:read({rabbit_queue, QueueName}) of
+                  [] -> {error, not_found};
+                  [Q = #amqqueue { pid          = QPid,
+                                   slave_pids   = SPids }] ->
+                      [QPid1 | SPids1] =
+                          [Pid || Pid <- [QPid | SPids],
+                                  not lists:member(node(Pid), DeadNodes)],
+                      case {{QPid, SPids}, {QPid1, SPids1}} of
+                          {Same, Same} ->
+                              ok;
+                          _ when QPid =:= QPid1 orelse node(QPid1) =:= node() ->
+                              %% Either master hasn't changed, so
+                              %% we're ok to update mnesia; or we have
+                              %% become the master.
+                              Q1 = Q #amqqueue { pid        = QPid1,
+                                                 slave_pids = SPids1 },
+                              ok = rabbit_amqqueue:store_queue(Q1);
+                          _ ->
+                              %% Master has changed, and we're not it,
+                              %% so leave alone to allow the promoted
+                              %% slave to find it and make its
+                              %% promotion atomic.
+                              ok
+                      end,
+                      {ok, QPid1}
+              end
+      end).
+
+on_node_up() ->
+    Qs =
+        rabbit_misc:execute_mnesia_transaction(
+          fun () ->
+                  mnesia:foldl(
+                    fun (#amqqueue { mirror_nodes = undefined }, QsN) ->
+                            QsN;
+                        (#amqqueue { name         = QName,
+                                     mirror_nodes = all }, QsN) ->
+                            [QName | QsN];
+                        (#amqqueue { name         = QName,
+                                     mirror_nodes = MNodes }, QsN) ->
+                            case lists:member(node(), MNodes) of
+                                true  -> [QName | QsN];
+                                false -> QsN
+                            end
+                    end, [], rabbit_queue)
+          end),
+    [add_mirror(Q, node()) || Q <- Qs],
+    ok.
+
+drop_mirror(VHostPath, QueueName, MirrorNode) ->
+    drop_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode).
+
+drop_mirror(Queue, MirrorNode) ->
+    if_mirrored_queue(
+      Queue,
+      fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids }) ->
+              case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
+                  [] ->
+                      {error, {queue_not_mirrored_on_node, MirrorNode}};
+                  [QPid] when SPids =:= [] ->
+                      {error, cannot_drop_only_mirror};
+                  [Pid] ->
+                      rabbit_log:info(
+                        "Dropping queue mirror on node ~p for ~s~n",
+                        [MirrorNode, rabbit_misc:rs(Name)]),
+                      exit(Pid, {shutdown, dropped}),
+                      ok
+              end
+      end).
+
+add_mirror(VHostPath, QueueName, MirrorNode) ->
+    add_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode).
+
+add_mirror(Queue, MirrorNode) ->
+    if_mirrored_queue(
+      Queue,
+      fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) ->
+              case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
+                  []  -> Result = rabbit_mirror_queue_slave_sup:start_child(
+                                    MirrorNode, [Q]),
+                         rabbit_log:info(
+                           "Adding mirror of queue ~s on node ~p: ~p~n",
+                           [rabbit_misc:rs(Name), MirrorNode, Result]),
+                         case Result of
+                             {ok, _Pid} -> ok;
+                             _          -> Result
+                         end;
+                  [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}}
+              end
+      end).
+
+if_mirrored_queue(Queue, Fun) ->
+    rabbit_amqqueue:with(
+      Queue, fun (#amqqueue { arguments = Args } = Q) ->
+                     case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of
+                         undefined -> ok;
+                         _         -> Fun(Q)
+                     end
+             end).
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
new file mode 100644
index 00000000..b38a8967
--- /dev/null
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -0,0 +1,850 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2010-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_slave).
+
+%% For general documentation of HA design, see
+%% rabbit_mirror_queue_coordinator
+%%
+%% We join the GM group before we add ourselves to the amqqueue
+%% record. As a result:
+%% 1. We can receive msgs from GM that correspond to messages we will
+%% never receive from publishers.
+%% 2. When we receive a message from publishers, we must receive a
+%% message from the GM group for it.
+%% 3. However, that instruction from the GM group can arrive either
+%% before or after the actual message. We need to be able to
+%% distinguish between GM instructions arriving early, and case (1)
+%% above.
+%%
+%% All instructions from the GM group must be processed in the order
+%% in which they're received.
+
+-export([start_link/1, set_maximum_since_use/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3, handle_pre_hibernate/1, prioritise_call/3,
+         prioritise_cast/2]).
+
+-export([joined/2, members_changed/3, handle_msg/3]).
+
+-behaviour(gen_server2).
+-behaviour(gm).
+
+-include("rabbit.hrl").
+-include("gm_specs.hrl").
+
+-define(SYNC_INTERVAL,                 25). %% milliseconds
+-define(RAM_DURATION_UPDATE_INTERVAL,  5000).
+-define(DEATH_TIMEOUT,                 20000). %% 20 seconds
+
+-record(state, { q,
+                 gm,
+                 master_pid,
+                 backing_queue,
+                 backing_queue_state,
+                 sync_timer_ref,
+                 rate_timer_ref,
+
+                 sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId}
+                 msg_id_ack,    %% :: MsgId -> AckTag
+                 ack_num,
+
+                 msg_id_status,
+                 known_senders
+               }).
+
+start_link(Q) ->
+    gen_server2:start_link(?MODULE, [Q], []).
+
+set_maximum_since_use(QPid, Age) ->
+    gen_server2:cast(QPid, {set_maximum_since_use, Age}).
+
+init([#amqqueue { name = QueueName } = Q]) ->
+    process_flag(trap_exit, true), %% amqqueue_process traps exits too.
+    {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]),
+    receive {joined, GM} ->
+            ok
+    end,
+    Self = self(),
+    Node = node(),
+    {ok, MPid} =
+        rabbit_misc:execute_mnesia_transaction(
+          fun () ->
+                  [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] =
+                      mnesia:read({rabbit_queue, QueueName}),
+                  %% ASSERTION
+                  [] = [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node],
+                  MPids1 = MPids ++ [Self],
+                  mnesia:write(rabbit_queue,
+                               Q1 #amqqueue { slave_pids = MPids1 },
+                               write),
+                  {ok, QPid}
+          end),
+    erlang:monitor(process, MPid),
+    ok = file_handle_cache:register_callback(
+           rabbit_amqqueue, set_maximum_since_use, [self()]),
+    ok = rabbit_memory_monitor:register(
+           self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
+    {ok, BQ} = application:get_env(backing_queue_module),
+    BQS = bq_init(BQ, Q, false),
+    {ok, #state { q                   = Q,
+                  gm                  = GM,
+                  master_pid          = MPid,
+                  backing_queue       = BQ,
+                  backing_queue_state = BQS,
+                  rate_timer_ref      = undefined,
+                  sync_timer_ref      = undefined,
+
+                  sender_queues       = dict:new(),
+                  msg_id_ack          = dict:new(),
+                  ack_num             = 0,
+
+                  msg_id_status       = dict:new(),
+                  known_senders       = dict:new()
+                }, hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) ->
+    %% Synchronous, "immediate" delivery mode
+
+    %% It is safe to reply 'false' here even if a) we've not seen the
+    %% msg via gm, or b) the master dies before we receive the msg via
+    %% gm. In the case of (a), we will eventually receive the msg via
+    %% gm, and it's only the master's result to the channel that is
+    %% important. In the case of (b), if the master does die and we do
+    %% get promoted then at that point we have no consumers, thus
+    %% 'false' is precisely the correct answer. However, we must be
+    %% careful to _not_ enqueue the message in this case.
+
+    %% Note this is distinct from the case where we receive the msg
+    %% via gm first, then we're promoted to master, and only then do
+    %% we receive the msg from the channel.
+    gen_server2:reply(From, false), %% master may deliver it, not us
+    noreply(maybe_enqueue_message(Delivery, false, State));
+
+handle_call({deliver, Delivery = #delivery {}}, From, State) ->
+    %% Synchronous, "mandatory" delivery mode
+    gen_server2:reply(From, true), %% amqqueue throws away the result anyway
+    noreply(maybe_enqueue_message(Delivery, true, State));
+
+handle_call({gm_deaths, Deaths}, From,
+            State = #state { q          = #amqqueue { name = QueueName },
+                             gm         = GM,
+                             master_pid = MPid }) ->
+    rabbit_log:info("Mirrored-queue (~s): Slave ~s saw deaths of mirrors ~s~n",
+                    [rabbit_misc:rs(QueueName),
+                     rabbit_misc:pid_to_string(self()),
+                     [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]),
+    %% The GM has told us about deaths, which means we're not going to
+    %% receive any more messages from GM
+    case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of
+        {ok, Pid} when node(Pid) =:= node(MPid) ->
+            %% master hasn't changed
+            reply(ok, State);
+        {ok, Pid} when node(Pid) =:= node() ->
+            %% we've become master
+            promote_me(From, State);
+        {ok, Pid} ->
+            %% master has changed to not us.
+            gen_server2:reply(From, ok),
+            erlang:monitor(process, Pid),
+            ok = gm:broadcast(GM, heartbeat),
+            noreply(State #state { master_pid = Pid });
+        {error, not_found} ->
+            gen_server2:reply(From, ok),
+            {stop, normal, State}
+    end.
+
+handle_cast({run_backing_queue, Mod, Fun}, State) ->
+    noreply(run_backing_queue(Mod, Fun, State));
+
+handle_cast({gm, Instruction}, State) ->
+    handle_process_result(process_instruction(Instruction, State));
+
+handle_cast({deliver, Delivery = #delivery {}}, State) ->
+    %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
+    noreply(maybe_enqueue_message(Delivery, true, State));
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State);
+
+handle_cast({set_ram_duration_target, Duration},
+            State = #state { backing_queue       = BQ,
+                             backing_queue_state = BQS }) ->
+    BQS1 = BQ:set_ram_duration_target(Duration, BQS),
+    noreply(State #state { backing_queue_state = BQS1 });
+
+handle_cast(update_ram_duration,
+            State = #state { backing_queue = BQ,
+                             backing_queue_state = BQS }) ->
+    {RamDuration, BQS1} = BQ:ram_duration(BQS),
+    DesiredDuration =
+        rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+    BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+    noreply(State #state { rate_timer_ref = just_measured,
+                           backing_queue_state = BQS2 });
+
+handle_cast(sync_timeout, State) ->
+    noreply(backing_queue_timeout(
+              State #state { sync_timer_ref = undefined })).
+
+handle_info(timeout, State) ->
+    noreply(backing_queue_timeout(State));
+
+handle_info({'DOWN', _MonitorRef, process, MPid, _Reason},
+           State = #state { gm = GM, master_pid = MPid }) ->
+    ok = gm:broadcast(GM, {process_death, MPid}),
+    noreply(State);
+
+handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) ->
+    noreply(local_sender_death(ChPid, State));
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State};
+
+handle_info(Msg, State) ->
+    {stop, {unexpected_info, Msg}, State}.
+
+%% If the Reason is shutdown, or {shutdown, _}, it is not the queue
+%% being deleted: it's just the node going down. Even though we're a
+%% slave, we have no idea whether or not we'll be the only copy coming
+%% back up. Thus we must assume we will be, and preserve anything we
+%% have on disk.
+terminate(_Reason, #state { backing_queue_state = undefined }) ->
+    %% We've received a delete_and_terminate from gm, thus nothing to
+    %% do here.
+    ok;
+terminate({shutdown, dropped} = R, #state { backing_queue       = BQ,
+                                            backing_queue_state = BQS }) ->
+    %% See rabbit_mirror_queue_master:terminate/2
+    BQ:delete_and_terminate(R, BQS);
+terminate(Reason, #state { q                   = Q,
+                           gm                  = GM,
+                           backing_queue       = BQ,
+                           backing_queue_state = BQS,
+                           rate_timer_ref      = RateTRef }) ->
+    ok = gm:leave(GM),
+    QueueState = rabbit_amqqueue_process:init_with_backing_queue_state(
+                   Q, BQ, BQS, RateTRef, [], [], dict:new()),
+    rabbit_amqqueue_process:terminate(Reason, QueueState);
+terminate([_SPid], _Reason) ->
+    %% gm case
+    ok.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+handle_pre_hibernate(State = #state { backing_queue       = BQ,
+                                      backing_queue_state = BQS }) ->
+    {RamDuration, BQS1} = BQ:ram_duration(BQS),
+    DesiredDuration =
+        rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+    BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+    BQS3 = BQ:handle_pre_hibernate(BQS2),
+    {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}.
+
+prioritise_call(Msg, _From, _State) ->
+    case Msg of
+        {gm_deaths, _Deaths}                 -> 5;
+        _                                    -> 0
+    end.
+
+prioritise_cast(Msg, _State) ->
+    case Msg of
+        update_ram_duration                  -> 8;
+        {set_ram_duration_target, _Duration} -> 8;
+        {set_maximum_since_use, _Age}        -> 8;
+        {run_backing_queue, _Mod, _Fun}      -> 6;
+        sync_timeout                         -> 6;
+        {gm, _Msg}                           -> 5;
+        {post_commit, _Txn, _AckTags}        -> 4;
+        _                                    -> 0
+    end.
+
+%% ---------------------------------------------------------------------------
+%% GM
+%% ---------------------------------------------------------------------------
+
+joined([SPid], _Members) ->
+    SPid ! {joined, self()},
+    ok.
+
+members_changed([_SPid], _Births, []) ->
+    ok;
+members_changed([SPid], _Births, Deaths) ->
+    inform_deaths(SPid, Deaths).
+
+handle_msg([_SPid], _From, heartbeat) ->
+    ok;
+handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) ->
+    %% This is only of value to the master
+    ok;
+handle_msg([SPid], _From, {process_death, Pid}) ->
+    inform_deaths(SPid, [Pid]);
+handle_msg([SPid], _From, Msg) ->
+    ok = gen_server2:cast(SPid, {gm, Msg}).
+
+inform_deaths(SPid, Deaths) ->
+    rabbit_misc:with_exit_handler(
+      fun () -> {stop, normal} end,
+      fun () ->
+              case gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) of
+                  ok ->
+                      ok;
+                  {promote, CPid} ->
+                      {become, rabbit_mirror_queue_coordinator, [CPid]}
+              end
+      end).
+
+%% ---------------------------------------------------------------------------
+%% Others
+%% ---------------------------------------------------------------------------
+
+bq_init(BQ, Q, Recover) ->
+    Self = self(),
+    BQ:init(Q, Recover,
+            fun (Mod, Fun) ->
+                    rabbit_amqqueue:run_backing_queue(Self, Mod, Fun)
+            end).
+
+run_backing_queue(rabbit_mirror_queue_master, Fun, State) ->
+    %% Yes, this might look a little crazy, but see comments in
+    %% confirm_sender_death/1
+    Fun(?MODULE, State);
+run_backing_queue(Mod, Fun, State = #state { backing_queue       = BQ,
+                                             backing_queue_state = BQS }) ->
+    State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
+
+needs_confirming(#delivery{ msg_seq_no = undefined }, _State) ->
+    never;
+needs_confirming(#delivery { message = #basic_message {
+                               is_persistent = true } },
+                 #state { q = #amqqueue { durable = true } }) ->
+    eventually;
+needs_confirming(_Delivery, _State) ->
+    immediately.
+
+confirm_messages(MsgIds, State = #state { msg_id_status = MS }) ->
+    {MS1, CMs} =
+        lists:foldl(
+          fun (MsgId, {MSN, CMsN} = Acc) ->
+                  %% We will never see 'discarded' here
+                  case dict:find(MsgId, MSN) of
+                      error ->
+                          %% If it needed confirming, it'll have
+                          %% already been done.
+                          Acc;
+                      {ok, {published, ChPid}} ->
+                          %% Still not seen it from the channel, just
+                          %% record that it's been confirmed.
+                          {dict:store(MsgId, {confirmed, ChPid}, MSN), CMsN};
+                      {ok, {published, ChPid, MsgSeqNo}} ->
+                          %% Seen from both GM and Channel. Can now
+                          %% confirm.
+                          {dict:erase(MsgId, MSN),
+                           gb_trees_cons(ChPid, MsgSeqNo, CMsN)};
+                      {ok, {confirmed, _ChPid}} ->
+                          %% It's already been confirmed. This is
+                          %% probably it's been both sync'd to disk
+                          %% and then delivered and ack'd before we've
+                          %% seen the publish from the
+                          %% channel. Nothing to do here.
+                          Acc
+                  end
+          end, {MS, gb_trees:empty()}, MsgIds),
+    [ok = rabbit_channel:confirm(ChPid, MsgSeqNos)
+     || {ChPid, MsgSeqNos} <- gb_trees:to_list(CMs)],
+    State #state { msg_id_status = MS1 }.
+
+gb_trees_cons(Key, Value, Tree) ->
+    case gb_trees:lookup(Key, Tree) of
+        {value, Values} -> gb_trees:update(Key, [Value | Values], Tree);
+        none            -> gb_trees:insert(Key, [Value], Tree)
+    end.
+
+handle_process_result({ok,   State}) -> noreply(State);
+handle_process_result({stop, State}) -> {stop, normal, State}.
+
+promote_me(From, #state { q                   = Q,
+                          gm                  = GM,
+                          backing_queue       = BQ,
+                          backing_queue_state = BQS,
+                          rate_timer_ref      = RateTRef,
+                          sender_queues       = SQ,
+                          msg_id_ack          = MA,
+                          msg_id_status       = MS,
+                          known_senders       = KS }) ->
+    rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n",
+                    [rabbit_misc:rs(Q #amqqueue.name),
+                     rabbit_misc:pid_to_string(self())]),
+    Q1 = Q #amqqueue { pid = self() },
+    {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
+                   Q1, GM, rabbit_mirror_queue_master:sender_death_fun()),
+    true = unlink(GM),
+    gen_server2:reply(From, {promote, CPid}),
+    ok = gm:confirmed_broadcast(GM, heartbeat),
+
+    %% Everything that we're monitoring, we need to ensure our new
+    %% coordinator is monitoring.
+
+    MonitoringPids = [begin true = erlang:demonitor(MRef),
+                            Pid
+                      end || {Pid, MRef} <- dict:to_list(KS)],
+    ok = rabbit_mirror_queue_coordinator:ensure_monitoring(
+           CPid, MonitoringPids),
+
+    %% We find all the messages that we've received from channels but
+    %% not from gm, and if they're due to be enqueued on promotion
+    %% then we pass them to the
+    %% queue_process:init_with_backing_queue_state to be enqueued.
+    %%
+    %% We also have to requeue messages which are pending acks: the
+    %% consumers from the master queue have been lost and so these
+    %% messages need requeuing. They might also be pending
+    %% confirmation, and indeed they might also be pending arrival of
+    %% the publication from the channel itself, if we received both
+    %% the publication and the fetch via gm first! Requeuing doesn't
+    %% affect confirmations: if the message was previously pending a
+    %% confirmation then it still will be, under the same msg_id. So
+    %% as a master, we need to be prepared to filter out the
+    %% publication of said messages from the channel (is_duplicate
+    %% (thus such requeued messages must remain in the msg_id_status
+    %% (MS) which becomes seen_status (SS) in the master)).
+    %%
+    %% Then there are messages we already have in the queue, which are
+    %% not currently pending acknowledgement:
+    %% 1. Messages we've only received via gm:
+    %%    Filter out subsequent publication from channel through
+    %%    validate_message. Might have to issue confirms then or
+    %%    later, thus queue_process state will have to know that
+    %%    there's a pending confirm.
+    %% 2. Messages received via both gm and channel:
+    %%    Queue will have to deal with issuing confirms if necessary.
+    %%
+    %% MS contains the following three entry types:
+    %%
+    %% a) {published, ChPid}:
+    %%   published via gm only; pending arrival of publication from
+    %%   channel, maybe pending confirm.
+    %%
+    %% b) {published, ChPid, MsgSeqNo}:
+    %%   published via gm and channel; pending confirm.
+    %%
+    %% c) {confirmed, ChPid}:
+    %%   published via gm only, and confirmed; pending publication
+    %%   from channel.
+    %%
+    %% d) discarded
+    %%   seen via gm only as discarded. Pending publication from
+    %%   channel
+    %%
+    %% The forms a, c and d only, need to go to the master state
+    %% seen_status (SS).
+    %%
+    %% The form b only, needs to go through to the queue_process
+    %% state to form the msg_id_to_channel mapping (MTC).
+    %%
+    %% No messages that are enqueued from SQ at this point will have
+    %% entries in MS.
+    %%
+    %% Messages that are extracted from MA may have entries in MS, and
+    %% those messages are then requeued. However, as discussed above,
+    %% this does not affect MS, nor which bits go through to SS in
+    %% Master, or MTC in queue_process.
+    %%
+    %% Everything that's in MA gets requeued. Consequently the new
+    %% master should start with a fresh AM as there are no messages
+    %% pending acks.
+
+    MSList = dict:to_list(MS),
+    SS = dict:from_list(
+           [E || E = {_MsgId, discarded} <- MSList] ++
+               [{MsgId, Status}
+                || {MsgId, {Status, _ChPid}} <- MSList,
+                   Status =:= published orelse Status =:= confirmed]),
+
+    MasterState = rabbit_mirror_queue_master:promote_backing_queue_state(
+                    CPid, BQ, BQS, GM, SS, MonitoringPids),
+
+    MTC = dict:from_list(
+            [{MsgId, {ChPid, MsgSeqNo}} ||
+                {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]),
+    NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)],
+    AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)],
+    Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ),
+                              {Delivery, true} <- queue:to_list(PubQ)],
+    QueueState = rabbit_amqqueue_process:init_with_backing_queue_state(
+                   Q1, rabbit_mirror_queue_master, MasterState, RateTRef,
+                   AckTags, Deliveries, MTC),
+    {become, rabbit_amqqueue_process, QueueState, hibernate}.
+
+noreply(State) ->
+    {NewState, Timeout} = next_state(State),
+    {noreply, NewState, Timeout}.
+
+reply(Reply, State) ->
+    {NewState, Timeout} = next_state(State),
+    {reply, Reply, NewState, Timeout}.
+
+next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) ->
+    {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+    State1 = ensure_rate_timer(
+               confirm_messages(MsgIds, State #state {
+                                          backing_queue_state = BQS1 })),
+    case BQ:needs_timeout(BQS1) of
+        false -> {stop_sync_timer(State1),   hibernate};
+        idle  -> {stop_sync_timer(State1),   0        };
+        timed -> {ensure_sync_timer(State1), 0        }
+    end.
+
+backing_queue_timeout(State = #state { backing_queue = BQ }) ->
+    run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State).
+
+ensure_sync_timer(State = #state { sync_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(
+                   ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]),
+    State #state { sync_timer_ref = TRef };
+ensure_sync_timer(State) ->
+    State.
+
+stop_sync_timer(State = #state { sync_timer_ref = undefined }) ->
+    State;
+stop_sync_timer(State = #state { sync_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #state { sync_timer_ref = undefined }.
+
+ensure_rate_timer(State = #state { rate_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(
+                   ?RAM_DURATION_UPDATE_INTERVAL,
+                   rabbit_amqqueue, update_ram_duration,
+                   [self()]),
+    State #state { rate_timer_ref = TRef };
+ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) ->
+    State #state { rate_timer_ref = undefined };
+ensure_rate_timer(State) ->
+    State.
+
+stop_rate_timer(State = #state { rate_timer_ref = undefined }) ->
+    State;
+stop_rate_timer(State = #state { rate_timer_ref = just_measured }) ->
+    State #state { rate_timer_ref = undefined };
+stop_rate_timer(State = #state { rate_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #state { rate_timer_ref = undefined }.
+
+ensure_monitoring(ChPid, State = #state { known_senders = KS }) ->
+    case dict:is_key(ChPid, KS) of
+        true  -> State;
+        false -> MRef = erlang:monitor(process, ChPid),
+                 State #state { known_senders = dict:store(ChPid, MRef, KS) }
+    end.
+
+local_sender_death(ChPid, State = #state { known_senders = KS }) ->
+    ok = case dict:is_key(ChPid, KS) of
+             false -> ok;
+             true  -> confirm_sender_death(ChPid)
+         end,
+    State.
+
+confirm_sender_death(Pid) ->
+    %% We have to deal with the possibility that we'll be promoted to
+    %% master before this thing gets run. Consequently we set the
+    %% module to rabbit_mirror_queue_master so that if we do become a
+    %% rabbit_amqqueue_process before then, sane things will happen.
+    Fun =
+        fun (?MODULE, State = #state { known_senders = KS,
+                                       gm            = GM }) ->
+                %% We're running still as a slave
+                ok = case dict:is_key(Pid, KS) of
+                         false -> ok;
+                         true  -> gm:broadcast(GM, {ensure_monitoring, [Pid]}),
+                                  confirm_sender_death(Pid)
+                     end,
+                State;
+            (rabbit_mirror_queue_master, State) ->
+                %% We've become a master. State is now opaque to
+                %% us. When we became master, if Pid was still known
+                %% to us then we'd have set up monitoring of it then,
+                %% so this is now a noop.
+                State
+        end,
+    %% Note that we do not remove our knowledge of this ChPid until we
+    %% get the sender_death from GM.
+    {ok, _TRef} = timer:apply_after(
+                    ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue,
+                    [self(), rabbit_mirror_queue_master, Fun]),
+    ok.
+
+maybe_enqueue_message(
+  Delivery = #delivery { message    = #basic_message { id = MsgId },
+                         msg_seq_no = MsgSeqNo,
+                         sender     = ChPid },
+  EnqueueOnPromotion,
+  State = #state { sender_queues = SQ, msg_id_status = MS }) ->
+    State1 = ensure_monitoring(ChPid, State),
+    %% We will never see {published, ChPid, MsgSeqNo} here.
+    case dict:find(MsgId, MS) of
+        error ->
+            {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
+            MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ),
+            SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ),
+            State1 #state { sender_queues = SQ1 };
+        {ok, {confirmed, ChPid}} ->
+            %% BQ has confirmed it but we didn't know what the
+            %% msg_seq_no was at the time. We do now!
+            ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]),
+            SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+            State1 #state { sender_queues = SQ1,
+                            msg_id_status = dict:erase(MsgId, MS) };
+        {ok, {published, ChPid}} ->
+            %% It was published to the BQ and we didn't know the
+            %% msg_seq_no so couldn't confirm it at the time.
+            case needs_confirming(Delivery, State1) of
+                never ->
+                    SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+                    State1 #state { msg_id_status = dict:erase(MsgId, MS),
+                                    sender_queues = SQ1 };
+                eventually ->
+                    State1 #state {
+                      msg_id_status =
+                          dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) };
+                immediately ->
+                    ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]),
+                    SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+                    State1 #state { msg_id_status = dict:erase(MsgId, MS),
+                                    sender_queues = SQ1 }
+            end;
+        {ok, discarded} ->
+            %% We've already heard from GM that the msg is to be
+            %% discarded. We won't see this again.
+            SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+            State1 #state { msg_id_status = dict:erase(MsgId, MS),
+                            sender_queues = SQ1 }
+    end.
+
+get_sender_queue(ChPid, SQ) ->
+    case dict:find(ChPid, SQ) of
+        error     -> {queue:new(), sets:new()};
+        {ok, Val} -> Val
+    end.
+
+remove_from_pending_ch(MsgId, ChPid, SQ) ->
+    case dict:find(ChPid, SQ) of
+        error ->
+            SQ;
+        {ok, {MQ, PendingCh}} ->
+            dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ)
+    end.
+
+process_instruction(
+  {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }},
+  State = #state { sender_queues       = SQ,
+                   backing_queue       = BQ,
+                   backing_queue_state = BQS,
+                   msg_id_status       = MS }) ->
+
+    %% We really are going to do the publish right now, even though we
+    %% may not have seen it directly from the channel. As a result, we
+    %% may know that it needs confirming without knowing its
+    %% msg_seq_no, which means that we can see the confirmation come
+    %% back from the backing queue without knowing the msg_seq_no,
+    %% which means that we're going to have to hang on to the fact
+    %% that we've seen the msg_id confirmed until we can associate it
+    %% with a msg_seq_no.
+    State1 = ensure_monitoring(ChPid, State),
+    {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
+    {MQ1, PendingCh1, MS1} =
+        case queue:out(MQ) of
+            {empty, _MQ2} ->
+                {MQ, sets:add_element(MsgId, PendingCh),
+                 dict:store(MsgId, {published, ChPid}, MS)};
+            {{value, {Delivery = #delivery {
+                        msg_seq_no = MsgSeqNo,
+                        message    = #basic_message { id = MsgId } },
+                      _EnqueueOnPromotion}}, MQ2} ->
+                %% We received the msg from the channel first. Thus we
+                %% need to deal with confirms here.
+                case needs_confirming(Delivery, State1) of
+                    never ->
+                        {MQ2, PendingCh, MS};
+                    eventually ->
+                        {MQ2, sets:add_element(MsgId, PendingCh),
+                         dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)};
+                    immediately ->
+                        ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]),
+                        {MQ2, PendingCh, MS}
+                end;
+            {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} ->
+                %% The instruction was sent to us before we were
+                %% within the slave_pids within the #amqqueue{}
+                %% record. We'll never receive the message directly
+                %% from the channel. And the channel will not be
+                %% expecting any confirms from us.
+                {MQ, PendingCh, MS}
+        end,
+
+    SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ),
+    State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 },
+
+    {ok,
+     case Deliver of
+         false ->
+             BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
+             State2 #state { backing_queue_state = BQS1 };
+         {true, AckRequired} ->
+             {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps,
+                                                   ChPid, BQS),
+             maybe_store_ack(AckRequired, MsgId, AckTag,
+                             State2 #state { backing_queue_state = BQS1 })
+     end};
+process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }},
+                    State = #state { sender_queues       = SQ,
+                                     backing_queue       = BQ,
+                                     backing_queue_state = BQS,
+                                     msg_id_status       = MS }) ->
+    %% Many of the comments around the publish head above apply here
+    %% too.
+    State1 = ensure_monitoring(ChPid, State),
+    {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
+    {MQ1, PendingCh1, MS1} =
+        case queue:out(MQ) of
+            {empty, _MQ} ->
+                {MQ, sets:add_element(MsgId, PendingCh),
+                 dict:store(MsgId, discarded, MS)};
+            {{value, {#delivery { message = #basic_message { id = MsgId } },
+                      _EnqueueOnPromotion}}, MQ2} ->
+                %% We've already seen it from the channel, we're not
+                %% going to see this again, so don't add it to MS
+                {MQ2, PendingCh, MS};
+            {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} ->
+                %% The instruction was sent to us before we were
+                %% within the slave_pids within the #amqqueue{}
+                %% record. We'll never receive the message directly
+                %% from the channel.
+                {MQ, PendingCh, MS}
+        end,
+    SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ),
+    BQS1 = BQ:discard(Msg, ChPid, BQS),
+    {ok, State1 #state { sender_queues       = SQ1,
+                         msg_id_status       = MS1,
+                         backing_queue_state = BQS1 }};
+process_instruction({set_length, Length},
+                    State = #state { backing_queue       = BQ,
+                                     backing_queue_state = BQS }) ->
+    QLen = BQ:len(BQS),
+    ToDrop = QLen - Length,
+    {ok, case ToDrop > 0 of
+             true  -> BQS1 =
+                          lists:foldl(
+                            fun (const, BQSN) ->
+                                    {{_Msg, _IsDelivered, _AckTag, _Remaining},
+                                     BQSN1} = BQ:fetch(false, BQSN),
+                                    BQSN1
+                            end, BQS, lists:duplicate(ToDrop, const)),
+                      State #state { backing_queue_state = BQS1 };
+             false -> State
+         end};
+process_instruction({fetch, AckRequired, MsgId, Remaining},
+                    State = #state { backing_queue       = BQ,
+                                     backing_queue_state = BQS }) ->
+    QLen = BQ:len(BQS),
+    {ok, case QLen - 1 of
+             Remaining ->
+                 {{#basic_message{id = MsgId}, _IsDelivered,
+                   AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS),
+                 maybe_store_ack(AckRequired, MsgId, AckTag,
+                                 State #state { backing_queue_state = BQS1 });
+             Other when Other < Remaining ->
+                 %% we must be shorter than the master
+                 State
+         end};
+process_instruction({ack, MsgIds},
+                    State = #state { backing_queue       = BQ,
+                                     backing_queue_state = BQS,
+                                     msg_id_ack          = MA }) ->
+    {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
+    {MsgIds1, BQS1} = BQ:ack(AckTags, BQS),
+    [] = MsgIds1 -- MsgIds, %% ASSERTION
+    {ok, State #state { msg_id_ack          = MA1,
+                        backing_queue_state = BQS1 }};
+process_instruction({requeue, MsgPropsFun, MsgIds},
+                    State = #state { backing_queue       = BQ,
+                                     backing_queue_state = BQS,
+                                     msg_id_ack          = MA }) ->
+    {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
+    {ok, case length(AckTags) =:= length(MsgIds) of
+             true ->
+                 {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS),
+                 State #state { msg_id_ack          = MA1,
+                                backing_queue_state = BQS1 };
+             false ->
+                 %% The only thing we can safely do is nuke out our BQ
+                 %% and MA. The interaction between this and confirms
+                 %% doesn't really bear thinking about...
+                 {_Count, BQS1} = BQ:purge(BQS),
+                 {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1),
+                 State #state { msg_id_ack          = dict:new(),
+                                backing_queue_state = BQS2 }
+         end};
+process_instruction({sender_death, ChPid},
+                    State = #state { sender_queues = SQ,
+                                     msg_id_status = MS,
+                                     known_senders = KS }) ->
+    {ok, case dict:find(ChPid, KS) of
+             error ->
+                 State;
+             {ok, MRef} ->
+                 true = erlang:demonitor(MRef),
+                 MS1 = case dict:find(ChPid, SQ) of
+                           error ->
+                               MS;
+                           {ok, {_MQ, PendingCh}} ->
+                               lists:foldl(fun dict:erase/2, MS,
+                                           sets:to_list(PendingCh))
+                       end,
+                 State #state { sender_queues = dict:erase(ChPid, SQ),
+                                msg_id_status = MS1,
+                                known_senders = dict:erase(ChPid, KS) }
+         end};
+process_instruction({delete_and_terminate, Reason},
+                    State = #state { backing_queue       = BQ,
+                                     backing_queue_state = BQS }) ->
+    BQ:delete_and_terminate(Reason, BQS),
+    {stop, State #state { backing_queue_state = undefined }}.
+
+msg_ids_to_acktags(MsgIds, MA) ->
+    {AckTags, MA1} =
+        lists:foldl(
+          fun (MsgId, {Acc, MAN}) ->
+                  case dict:find(MsgId, MA) of
+                      error                -> {Acc, MAN};
+                      {ok, {_Num, AckTag}} -> {[AckTag | Acc],
+                                               dict:erase(MsgId, MAN)}
+                  end
+          end, {[], MA}, MsgIds),
+    {lists:reverse(AckTags), MA1}.
+
+ack_all(BQ, MA, BQS) ->
+    BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS).
+
+maybe_store_ack(false, _MsgId, _AckTag, State) ->
+    State;
+maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA,
+                                                      ack_num    = Num }) ->
+    State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA),
+                   ack_num    = Num + 1 }.
diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl
new file mode 100644
index 00000000..fc04ec79
--- /dev/null
+++ b/src/rabbit_mirror_queue_slave_sup.erl
@@ -0,0 +1,48 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2010-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_slave_sup).
+
+-behaviour(supervisor2).
+
+-export([start/0, start_link/0, start_child/2]).
+
+-export([init/1]).
+
+-include_lib("rabbit.hrl").
+
+-define(SERVER, ?MODULE).
+
+start() ->
+    {ok, _} =
+        supervisor2:start_child(
+          rabbit_sup,
+          {rabbit_mirror_queue_slave_sup,
+           {rabbit_mirror_queue_slave_sup, start_link, []},
+           transient, infinity, supervisor, [rabbit_mirror_queue_slave_sup]}),
+    ok.
+
+start_link() ->
+    supervisor2:start_link({local, ?SERVER}, ?MODULE, []).
+
+start_child(Node, Args) ->
+    supervisor2:start_child({?SERVER, Node}, Args).
+
+init([]) ->
+    {ok, {{simple_one_for_one_terminate, 10, 10},
+          [{rabbit_mirror_queue_slave,
+            {rabbit_mirror_queue_slave, start_link, []},
+            temporary, ?MAX_WAIT, worker, [rabbit_mirror_queue_slave]}]}}.
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 7d916797..3bbfb1d7 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -25,7 +25,7 @@
          protocol_error/3, protocol_error/4, protocol_error/1]).
 -export([not_found/1, assert_args_equivalence/4]).
 -export([dirty_read/1]).
--export([table_lookup/2]).
+-export([table_lookup/2, set_table_value/4]).
 -export([r/3, r/2, r_arg/4, rs/1]).
 -export([enable_cover/0, report_cover/0]).
 -export([enable_cover/1, report_cover/1]).
@@ -38,9 +38,9 @@
 -export([ensure_ok/2]).
 -export([makenode/1, nodeparts/1, cookie_hash/0, tcp_name/3]).
 -export([upmap/2, map_in_order/2]).
--export([table_fold/3]).
+-export([table_filter/3]).
 -export([dirty_read_all/1, dirty_foreach_key/2, dirty_dump_log/1]).
--export([read_term_file/1, write_term_file/2]).
+-export([read_term_file/1, write_term_file/2, write_file/2, write_file/3]).
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
@@ -48,24 +48,25 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
--export([recursive_delete/1, recursive_copy/2, dict_cons/3, orddict_cons/3,
-         unlink_and_capture_exit/1]).
+-export([recursive_delete/1, recursive_copy/2, dict_cons/3, orddict_cons/3]).
 -export([get_options/2]).
 -export([all_module_attributes/1, build_acyclic_graph/3]).
 -export([now_ms/0]).
 -export([lock_file/1]).
--export([const_ok/1, const/1]).
+-export([const_ok/0, const/1]).
 -export([ntoa/1, ntoab/1]).
+-export([is_process_alive/1]).
+-export([pget/2, pget/3, pget_or_die/2]).
+-export([format_message_queue/2]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--export_type([resource_name/0, thunk/1, const/1]).
+-export_type([resource_name/0, thunk/1]).
 
 -type(ok_or_error() :: rabbit_types:ok_or_error(any())).
 -type(thunk(T) :: fun(() -> T)).
--type(const(T) :: fun((any()) -> T)).
 -type(resource_name() :: binary()).
 -type(optdef() :: {flag, string()} | {option, string(), any()}).
 -type(channel_or_connection_exit()
@@ -104,7 +105,12 @@
         ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')).
 -spec(table_lookup/2 ::
         (rabbit_framing:amqp_table(), binary())
-         -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}).
+        -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}).
+-spec(set_table_value/4 ::
+        (rabbit_framing:amqp_table(), binary(),
+         rabbit_framing:amqp_field_type(), rabbit_framing:amqp_value())
+        -> rabbit_framing:amqp_table()).
+
 -spec(r/2 :: (rabbit_types:vhost(), K)
              -> rabbit_types:r3(rabbit_types:vhost(), K, '_')
                     when is_subtype(K, atom())).
@@ -145,7 +151,8 @@
         -> atom()).
 -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]).
--spec(table_fold/3 :: (fun ((any(), A) -> A), A, atom()) -> A).
+-spec(table_filter/3:: (fun ((A) -> boolean()), fun ((A, boolean()) -> 'ok'),
+                                                    atom()) -> [A]).
 -spec(dirty_read_all/1 :: (atom()) -> [any()]).
 -spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom())
                              -> 'ok' | 'aborted').
@@ -153,6 +160,8 @@
 -spec(read_term_file/1 ::
         (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any())).
 -spec(write_term_file/2 :: (file:filename(), [any()]) -> ok_or_error()).
+-spec(write_file/2 :: (file:filename(), iodata()) -> ok_or_error()).
+-spec(write_file/3 :: (file:filename(), iodata(), [any()]) -> ok_or_error()).
 -spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()).
 -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok').
 -spec(format_stderr/2 :: (string(), [any()]) -> 'ok').
@@ -177,7 +186,6 @@
         -> rabbit_types:ok_or_error({file:filename(), file:filename(), any()})).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 -spec(orddict_cons/3 :: (any(), any(), orddict:orddict()) -> orddict:orddict()).
--spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
 -spec(get_options/2 :: ([optdef()], [string()])
                        -> {[string()], [{string(), any()}]}).
 -spec(all_module_attributes/1 :: (atom()) -> [{atom(), [term()]}]).
@@ -190,10 +198,15 @@
                                       digraph:vertex(), digraph:vertex()})).
 -spec(now_ms/0 :: () -> non_neg_integer()).
 -spec(lock_file/1 :: (file:filename()) -> rabbit_types:ok_or_error('eexist')).
--spec(const_ok/1 :: (any()) -> 'ok').
--spec(const/1 :: (A) -> const(A)).
+-spec(const_ok/0 :: () -> 'ok').
+-spec(const/1 :: (A) -> thunk(A)).
 -spec(ntoa/1 :: (inet:ip_address()) -> string()).
 -spec(ntoab/1 :: (inet:ip_address()) -> string()).
+-spec(is_process_alive/1 :: (pid()) -> boolean()).
+-spec(pget/2 :: (term(), [term()]) -> term()).
+-spec(pget/3 :: (term(), [term()], term()) -> term()).
+-spec(pget_or_die/2 :: (term(), [term()]) -> term() | no_return()).
+-spec(format_message_queue/2 :: (any(), priority_queue:q()) -> term()).
 
 -endif.
 
@@ -266,6 +279,10 @@ table_lookup(Table, Key) ->
         false                           -> undefined
     end.
 
+set_table_value(Table, Key, Type, Value) ->
+    sort_field_table(
+      lists:keystore(Key, 1, Table, {Key, Type, Value})).
+
 r(#resource{virtual_host = VHostPath}, Kind, Name)
   when is_binary(Name) ->
     #resource{virtual_host = VHostPath, kind = Kind, name = Name};
@@ -350,8 +367,11 @@ throw_on_error(E, Thunk) ->
 with_exit_handler(Handler, Thunk) ->
     try
         Thunk()
-    catch exit:{R, _} when R =:= noproc; R =:= nodedown;
-                           R =:= normal; R =:= shutdown ->
+    catch
+        exit:{R, _} when R =:= noproc; R =:= nodedown;
+                         R =:= normal; R =:= shutdown ->
+            Handler();
+        exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown ->
             Handler()
     end.
 
@@ -400,17 +420,12 @@ execute_mnesia_transaction(TxFun, PrePostCommitFun) ->
                        end), false).
 
 %% Like execute_mnesia_transaction/2, but TxFun is expected to return a
-%% TailFun which gets called immediately before and after the tx commit
+%% TailFun which gets called (only) immediately after the tx commit
 execute_mnesia_tx_with_tail(TxFun) ->
     case mnesia:is_transaction() of
         true  -> execute_mnesia_transaction(TxFun);
-        false -> TailFun = execute_mnesia_transaction(
-                             fun () ->
-                                     TailFun1 = TxFun(),
-                                     TailFun1(true),
-                                     TailFun1
-                             end),
-                 TailFun(false)
+        false -> TailFun = execute_mnesia_transaction(TxFun),
+                 TailFun()
     end.
 
 ensure_ok(ok, _) -> ok;
@@ -456,20 +471,23 @@ map_in_order(F, L) ->
     lists:reverse(
       lists:foldl(fun (E, Acc) -> [F(E) | Acc] end, [], L)).
 
-%% Fold over each entry in a table, executing the cons function in a
-%% transaction.  This is often far more efficient than wrapping a tx
-%% around the lot.
+%% Apply a pre-post-commit function to all entries in a table that
+%% satisfy a predicate, and return those entries.
 %%
 %% We ignore entries that have been modified or removed.
-table_fold(F, Acc0, TableName) ->
+table_filter(Pred, PrePostCommitFun, TableName) ->
     lists:foldl(
-      fun (E, Acc) -> execute_mnesia_transaction(
-                   fun () -> case mnesia:match_object(TableName, E, read) of
-                                 [] -> Acc;
-                                 _  -> F(E, Acc)
-                             end
-                   end)
-      end, Acc0, dirty_read_all(TableName)).
+      fun (E, Acc) ->
+              case execute_mnesia_transaction(
+                     fun () -> mnesia:match_object(TableName, E, read) =/= []
+                                   andalso Pred(E) end,
+                     fun (false, _Tx) -> false;
+                         (true,   Tx) -> PrePostCommitFun(E, Tx), true
+                     end) of
+                  false -> Acc;
+                  true  -> [E | Acc]
+              end
+      end, [], dirty_read_all(TableName)).
 
 dirty_read_all(TableName) ->
     mnesia:dirty_select(TableName, [{'$1',[],['$1']}]).
@@ -508,8 +526,42 @@ dirty_dump_log1(LH, {K, Terms, BadBytes}) ->
 read_term_file(File) -> file:consult(File).
 
 write_term_file(File, Terms) ->
-    file:write_file(File, list_to_binary([io_lib:format("~w.~n", [Term]) ||
-                                             Term <- Terms])).
+    write_file(File, list_to_binary([io_lib:format("~w.~n", [Term]) ||
+                                        Term <- Terms])).
+
+write_file(Path, Data) ->
+    write_file(Path, Data, []).
+
+%% write_file/3 and make_binary/1 are both based on corresponding
+%% functions in the kernel/file.erl module of the Erlang R14B02
+%% release, which is licensed under the EPL. That implementation of
+%% write_file/3 does not do an fsync prior to closing the file, hence
+%% the existence of this version. APIs are otherwise identical.
+write_file(Path, Data, Modes) ->
+    Modes1 = [binary, write | (Modes -- [binary, write])],
+    case make_binary(Data) of
+        Bin when is_binary(Bin) ->
+            case file:open(Path, Modes1) of
+                {ok, Hdl}      -> try file:write(Hdl, Bin) of
+                                      ok             -> file:sync(Hdl);
+                                      {error, _} = E -> E
+                                  after
+                                      file:close(Hdl)
+                                  end;
+                {error, _} = E -> E
+            end;
+        {error, _} = E -> E
+    end.
+
+make_binary(Bin) when is_binary(Bin) ->
+    Bin;
+make_binary(List) ->
+    try
+        iolist_to_binary(List)
+    catch error:Reason ->
+            {error, Reason}
+    end.
+
 
 append_file(File, Suffix) ->
     case file:read_file_info(File) of
@@ -527,7 +579,7 @@ append_file(File, 0, Suffix) ->
     end;
 append_file(File, _, Suffix) ->
     case file:read_file(File) of
-        {ok, Data} -> file:write_file([File, Suffix], Data, [append]);
+        {ok, Data} -> write_file([File, Suffix], Data, [append]);
         Error      -> Error
     end.
 
@@ -744,18 +796,12 @@ dict_cons(Key, Value, Dict) ->
 orddict_cons(Key, Value, Dict) ->
     orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
 
-unlink_and_capture_exit(Pid) ->
-    unlink(Pid),
-    receive {'EXIT', Pid, _} -> ok
-    after 0 -> ok
-    end.
-
-% Separate flags and options from arguments.
-% get_options([{flag, "-q"}, {option, "-p", "/"}],
-%             ["set_permissions","-p","/","guest",
-%              "-q",".*",".*",".*"])
-% == {["set_permissions","guest",".*",".*",".*"],
-%     [{"-q",true},{"-p","/"}]}
+%% Separate flags and options from arguments.
+%% get_options([{flag, "-q"}, {option, "-p", "/"}],
+%%             ["set_permissions","-p","/","guest",
+%%              "-q",".*",".*",".*"])
+%% == {["set_permissions","guest",".*",".*",".*"],
+%%     [{"-q",true},{"-p","/"}]}
 get_options(Defs, As) ->
     lists:foldl(fun(Def, {AsIn, RsIn}) ->
                         {AsOut, Value} = case Def of
@@ -842,8 +888,8 @@ lock_file(Path) ->
                  ok = file:close(Lock)
     end.
 
-const_ok(_) -> ok.
-const(X) -> fun (_) -> X end.
+const_ok() -> ok.
+const(X) -> fun () -> X end.
 
 %% Format IPv4-mapped IPv6 addresses as IPv4, since they're what we see
 %% when IPv6 is enabled but not used (i.e. 99% of the time).
@@ -858,3 +904,41 @@ ntoab(IP) ->
         0 -> Str;
         _ -> "[" ++ Str ++ "]"
     end.
+
+is_process_alive(Pid) when node(Pid) =:= node() ->
+    erlang:is_process_alive(Pid);
+is_process_alive(Pid) ->
+    case rpc:call(node(Pid), erlang, is_process_alive, [Pid]) of
+        true -> true;
+        _    -> false
+    end.
+
+pget(K, P) -> proplists:get_value(K, P).
+pget(K, P, D) -> proplists:get_value(K, P, D).
+
+pget_or_die(K, P) ->
+    case proplists:get_value(K, P) of
+        undefined -> exit({error, key_missing, K});
+        V         -> V
+    end.
+
+format_message_queue(_Opt, MQ) ->
+    Len = priority_queue:len(MQ),
+    {Len,
+     case Len > 100 of
+         false -> priority_queue:to_list(MQ);
+         true  -> {summary,
+                   orddict:to_list(
+                     lists:foldl(
+                       fun ({P, V}, Counts) ->
+                               orddict:update_counter(
+                                 {P, format_message_queue_entry(V)}, 1, Counts)
+                       end, orddict:new(), priority_queue:to_list(MQ)))}
+     end}.
+
+format_message_queue_entry(V) when is_atom(V) ->
+    V;
+format_message_queue_entry(V) when is_tuple(V) ->
+    list_to_tuple([format_message_queue_entry(E) || E <- tuple_to_list(V)]);
+format_message_queue_entry(_V) ->
+    '_'.
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index a9b4e177..ab553a8b 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -18,9 +18,13 @@
 -module(rabbit_mnesia).
 
 -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0,
-         cluster/1, force_cluster/1, reset/0, force_reset/0,
+         cluster/1, force_cluster/1, reset/0, force_reset/0, init_db/3,
          is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0,
-         empty_ram_only_tables/0, copy_db/1]).
+         empty_ram_only_tables/0, copy_db/1, wait_for_tables/1,
+         create_cluster_nodes_config/1, read_cluster_nodes_config/0,
+         record_running_nodes/0, read_previously_running_nodes/0,
+         delete_previously_running_nodes/0, running_nodes_filename/0,
+         is_disc_node/0]).
 
 -export([table_names/0]).
 
@@ -42,6 +46,7 @@
 -spec(dir/0 :: () -> file:filename()).
 -spec(ensure_mnesia_dir/0 :: () -> 'ok').
 -spec(init/0 :: () -> 'ok').
+-spec(init_db/3 :: ([node()], boolean(), rabbit_misc:thunk('ok')) -> 'ok').
 -spec(is_db_empty/0 :: () -> boolean()).
 -spec(cluster/1 :: ([node()]) -> 'ok').
 -spec(force_cluster/1 :: ([node()]) -> 'ok').
@@ -54,6 +59,14 @@
 -spec(empty_ram_only_tables/0 :: () -> 'ok').
 -spec(create_tables/0 :: () -> 'ok').
 -spec(copy_db/1 :: (file:filename()) ->  rabbit_types:ok_or_error(any())).
+-spec(wait_for_tables/1 :: ([atom()]) -> 'ok').
+-spec(create_cluster_nodes_config/1 :: ([node()]) ->  'ok').
+-spec(read_cluster_nodes_config/0 :: () ->  [node()]).
+-spec(record_running_nodes/0 :: () ->  'ok').
+-spec(read_previously_running_nodes/0 :: () ->  [node()]).
+-spec(delete_previously_running_nodes/0 :: () ->  'ok').
+-spec(running_nodes_filename/0 :: () -> file:filename()).
+-spec(is_disc_node/0 :: () -> boolean()).
 
 -endif.
 
@@ -77,9 +90,14 @@ status() ->
      {running_nodes, running_clustered_nodes()}].
 
 init() ->
-    ok = ensure_mnesia_running(),
-    ok = ensure_mnesia_dir(),
-    ok = init_db(read_cluster_nodes_config(), true),
+    ensure_mnesia_running(),
+    ensure_mnesia_dir(),
+    ok = init_db(read_cluster_nodes_config(), true,
+                 fun maybe_upgrade_local_or_record_desired/0),
+    %% We intuitively expect the global name server to be synced when
+    %% Mnesia is up. In fact that's not guaranteed to be the case - let's
+    %% make it so.
+    ok = global:sync(),
     ok.
 
 is_db_empty() ->
@@ -97,14 +115,49 @@ force_cluster(ClusterNodes) ->
 %% node.  If Force is false, only connections to online nodes are
 %% allowed.
 cluster(ClusterNodes, Force) ->
-    ok = ensure_mnesia_not_running(),
-    ok = ensure_mnesia_dir(),
-    rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+    ensure_mnesia_not_running(),
+    ensure_mnesia_dir(),
+
+    %% Wipe mnesia if we're changing type from disc to ram
+    case {is_disc_node(), should_be_disc_node(ClusterNodes)} of
+        {true, false} -> error_logger:warning_msg(
+                           "changing node type; wiping mnesia...~n~n"),
+                         rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+                                               cannot_delete_schema);
+        _             -> ok
+    end,
+
+    %% Pre-emptively leave the cluster
+    %%
+    %% We're trying to handle the following two cases:
+    %% 1. We have a two-node cluster, where both nodes are disc nodes.
+    %% One node is re-clustered as a ram node.  When it tries to
+    %% re-join the cluster, but before it has time to update its
+    %% tables definitions, the other node will order it to re-create
+    %% its disc tables.  So, we need to leave the cluster before we
+    %% can join it again.
+    %% 2. We have a two-node cluster, where both nodes are disc nodes.
+    %% One node is forcefully reset (so, the other node thinks its
+    %% still a part of the cluster).  The reset node is re-clustered
+    %% as a ram node.  Same as above, we need to leave the cluster
+    %% before we can join it.  But, since we don't know if we're in a
+    %% cluster or not, we just pre-emptively leave it before joining.
+    ProperClusterNodes = ClusterNodes -- [node()],
+    try
+        ok = leave_cluster(ProperClusterNodes, ProperClusterNodes)
+    catch
+        {error, {no_running_cluster_nodes, _, _}} when Force ->
+            ok
+    end,
+
+    %% Join the cluster
+    start_mnesia(),
     try
-        ok = init_db(ClusterNodes, Force),
+        ok = init_db(ClusterNodes, Force,
+                     fun maybe_upgrade_local_or_record_desired/0),
         ok = create_cluster_nodes_config(ClusterNodes)
     after
-        mnesia:stop()
+        stop_mnesia()
     end,
     ok.
 
@@ -128,10 +181,10 @@ empty_ram_only_tables() ->
     Node = node(),
     lists:foreach(
       fun (TabName) ->
-          case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of
-              true  -> {atomic, ok} = mnesia:clear_table(TabName);
-              false -> ok
-          end
+              case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of
+                  true  -> {atomic, ok} = mnesia:clear_table(TabName);
+                  false -> ok
+              end
       end, table_names()),
     ok.
 
@@ -141,10 +194,13 @@ nodes_of_type(Type) ->
     %% This function should return the nodes of a certain type (ram,
     %% disc or disc_only) in the current cluster.  The type of nodes
     %% is determined when the cluster is initially configured.
-    %% Specifically, we check whether a certain table, which we know
-    %% will be written to disk on a disc node, is stored on disk or in
-    %% RAM.
-    mnesia:table_info(rabbit_durable_exchange, Type).
+    mnesia:table_info(schema, Type).
+
+%% The tables aren't supposed to be on disk on a ram node
+table_definitions(disc) ->
+    table_definitions();
+table_definitions(ram) ->
+    [{Tab, copy_type_to_ram(TabDef)} || {Tab, TabDef} <- table_definitions()].
 
 table_definitions() ->
     [{rabbit_user,
@@ -174,6 +230,11 @@ table_definitions() ->
        {attributes, record_info(fields, route)},
        {disc_copies, [node()]},
        {match, #route{binding = binding_match(), _='_'}}]},
+     {rabbit_semi_durable_route,
+      [{record_name, route},
+       {attributes, record_info(fields, route)},
+       {type, ordered_set},
+       {match, #route{binding = binding_match(), _='_'}}]},
      {rabbit_route,
       [{record_name, route},
        {attributes, record_info(fields, route)},
@@ -185,8 +246,17 @@ table_definitions() ->
        {type, ordered_set},
        {match, #reverse_route{reverse_binding = reverse_binding_match(),
                               _='_'}}]},
-     %% Consider the implications to nodes_of_type/1 before altering
-     %% the next entry.
+     {rabbit_topic_trie_edge,
+      [{record_name, topic_trie_edge},
+       {attributes, record_info(fields, topic_trie_edge)},
+       {type, ordered_set},
+       {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]},
+     {rabbit_topic_trie_binding,
+      [{record_name, topic_trie_binding},
+       {attributes, record_info(fields, topic_trie_binding)},
+       {type, ordered_set},
+       {match, #topic_trie_binding{trie_binding = trie_binding_match(),
+                                   _='_'}}]},
      {rabbit_durable_exchange,
       [{record_name, exchange},
        {attributes, record_info(fields, exchange)},
@@ -196,6 +266,10 @@ table_definitions() ->
       [{record_name, exchange},
        {attributes, record_info(fields, exchange)},
        {match, #exchange{name = exchange_name_match(), _='_'}}]},
+     {rabbit_exchange_serial,
+      [{record_name, exchange_serial},
+       {attributes, record_info(fields, exchange_serial)},
+       {match, #exchange_serial{name = exchange_name_match(), _='_'}}]},
      {rabbit_durable_queue,
       [{record_name, amqqueue},
        {attributes, record_info(fields, amqqueue)},
@@ -204,7 +278,8 @@ table_definitions() ->
      {rabbit_queue,
       [{record_name, amqqueue},
        {attributes, record_info(fields, amqqueue)},
-       {match, #amqqueue{name = queue_name_match(), _='_'}}]}].
+       {match, #amqqueue{name = queue_name_match(), _='_'}}]}]
+        ++ gm:table_definitions().
 
 binding_match() ->
     #binding{source = exchange_name_match(),
@@ -216,6 +291,12 @@ reverse_binding_match() ->
                      _='_'}.
 binding_destination_match() ->
     resource_match('_').
+trie_edge_match() ->
+    #trie_edge{exchange_name = exchange_name_match(),
+               _='_'}.
+trie_binding_match() ->
+    #trie_binding{exchange_name = exchange_name_match(),
+                  _='_'}.
 exchange_name_match() ->
     resource_match(exchange).
 queue_name_match() ->
@@ -264,45 +345,52 @@ ensure_schema_integrity() ->
 
 check_schema_integrity() ->
     Tables = mnesia:system_info(tables),
-    case [Error || {Tab, TabDef} <- table_definitions(),
-                   case lists:member(Tab, Tables) of
-                       false ->
-                           Error = {table_missing, Tab},
-                           true;
-                       true  ->
-                           {_, ExpAttrs} = proplists:lookup(attributes, TabDef),
-                           Attrs = mnesia:table_info(Tab, attributes),
-                           Error = {table_attributes_mismatch, Tab,
-                                    ExpAttrs, Attrs},
-                           Attrs /= ExpAttrs
-                   end] of
-        []     -> check_table_integrity();
-        Errors -> {error, Errors}
+    case check_tables(fun (Tab, TabDef) ->
+                              case lists:member(Tab, Tables) of
+                                  false -> {error, {table_missing, Tab}};
+                                  true  -> check_table_attributes(Tab, TabDef)
+                              end
+                      end) of
+        ok     -> ok = wait_for_tables(),
+                  check_tables(fun check_table_content/2);
+        Other  -> Other
     end.
 
-check_table_integrity() ->
-    ok = wait_for_tables(),
-    case lists:all(fun ({Tab, TabDef}) ->
-                           {_, Match} = proplists:lookup(match, TabDef),
-                           read_test_table(Tab, Match)
-                   end, table_definitions()) of
-        true  -> ok;
-        false -> {error, invalid_table_content}
+check_table_attributes(Tab, TabDef) ->
+    {_, ExpAttrs} = proplists:lookup(attributes, TabDef),
+    case mnesia:table_info(Tab, attributes) of
+        ExpAttrs -> ok;
+        Attrs    -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}}
     end.
 
-read_test_table(Tab, Match) ->
+check_table_content(Tab, TabDef) ->
+    {_, Match} = proplists:lookup(match, TabDef),
     case mnesia:dirty_first(Tab) of
         '$end_of_table' ->
-            true;
+            ok;
         Key ->
             ObjList = mnesia:dirty_read(Tab, Key),
             MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]),
             case ets:match_spec_run(ObjList, MatchComp) of
-                ObjList -> true;
-                _       -> false
+                ObjList -> ok;
+                _       -> {error, {table_content_invalid, Tab, Match, ObjList}}
             end
     end.
 
+check_tables(Fun) ->
+    case [Error || {Tab, TabDef} <- table_definitions(
+                                      case is_disc_node() of
+                                          true  -> disc;
+                                          false -> ram
+                                      end),
+                   case Fun(Tab, TabDef) of
+                       ok             -> Error = none, false;
+                       {error, Error} -> true
+                   end] of
+        []     -> ok;
+        Errors -> {error, Errors}
+    end.
+
 %% The cluster node config file contains some or all of the disk nodes
 %% that are members of the cluster this node is / should be a part of.
 %%
@@ -346,11 +434,40 @@ delete_cluster_nodes_config() ->
                            FileName, Reason}})
     end.
 
+running_nodes_filename() ->
+    filename:join(dir(), "nodes_running_at_shutdown").
+
+record_running_nodes() ->
+    FileName = running_nodes_filename(),
+    Nodes = running_clustered_nodes() -- [node()],
+    %% Don't check the result: we're shutting down anyway and this is
+    %% a best-effort-basis.
+    rabbit_misc:write_term_file(FileName, [Nodes]),
+    ok.
+
+read_previously_running_nodes() ->
+    FileName = running_nodes_filename(),
+    case rabbit_misc:read_term_file(FileName) of
+        {ok, [Nodes]}   -> Nodes;
+        {error, enoent} -> [];
+        {error, Reason} -> throw({error, {cannot_read_previous_nodes_file,
+                                          FileName, Reason}})
+    end.
+
+delete_previously_running_nodes() ->
+    FileName = running_nodes_filename(),
+    case file:delete(FileName) of
+        ok              -> ok;
+        {error, enoent} -> ok;
+        {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file,
+                                          FileName, Reason}})
+    end.
+
 %% Take a cluster node config and create the right kind of node - a
 %% standalone disk node, or disk or ram node connected to the
 %% specified cluster nodes.  If Force is false, don't allow
 %% connections to offline nodes.
-init_db(ClusterNodes, Force) ->
+init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) ->
     UClusterNodes = lists:usort(ClusterNodes),
     ProperClusterNodes = UClusterNodes -- [node()],
     case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of
@@ -366,37 +483,49 @@ init_db(ClusterNodes, Force) ->
                          end;
                 true  -> ok
             end,
-            case {Nodes, mnesia:system_info(use_dir), all_clustered_nodes()} of
-                {[], true, [_]} ->
-                    %% True single disc node, attempt upgrade
-                    ok = wait_for_tables(),
-                    case rabbit_upgrade:maybe_upgrade() of
-                        ok                    -> ensure_schema_ok();
-                        version_not_available -> schema_ok_or_move()
-                    end;
-                {[], true, _} ->
-                    %% "Master" (i.e. without config) disc node in cluster,
-                    %% verify schema
-                    ok = wait_for_tables(),
-                    ensure_version_ok(rabbit_upgrade:read_version()),
-                    ensure_schema_ok();
-                {[], false, _} ->
+            WantDiscNode = should_be_disc_node(ClusterNodes),
+            WasDiscNode = is_disc_node(),
+            %% We create a new db (on disk, or in ram) in the first
+            %% two cases and attempt to upgrade the in the other two
+            case {Nodes, WasDiscNode, WantDiscNode} of
+                {[], _, false} ->
+                    %% New ram node; start from scratch
+                    ok = create_schema(ram);
+                {[], false, true} ->
                     %% Nothing there at all, start from scratch
-                    ok = create_schema();
+                    ok = create_schema(disc);
+                {[], true, true} ->
+                    %% We're the first node up
+                    case rabbit_upgrade:maybe_upgrade_local() of
+                        ok                    -> ensure_schema_integrity();
+                        version_not_available -> ok = schema_ok_or_move()
+                    end;
                 {[AnotherNode|_], _, _} ->
                     %% Subsequent node in cluster, catch up
-                    ensure_version_ok(rabbit_upgrade:read_version()),
                     ensure_version_ok(
-                      rpc:call(AnotherNode, rabbit_upgrade, read_version, [])),
-                    IsDiskNode = ClusterNodes == [] orelse
-                        lists:member(node(), ClusterNodes),
+                      rpc:call(AnotherNode, rabbit_version, recorded, [])),
+                    {CopyType, CopyTypeAlt} =
+                        case WantDiscNode of
+                            true  -> {disc, disc_copies};
+                            false -> {ram, ram_copies}
+                        end,
                     ok = wait_for_replicated_tables(),
-                    ok = create_local_table_copy(schema, disc_copies),
-                    ok = create_local_table_copies(case IsDiskNode of
-                                                       true  -> disc;
-                                                       false -> ram
-                                                   end),
-                    ensure_schema_ok()
+                    ok = create_local_table_copy(schema, CopyTypeAlt),
+                    ok = create_local_table_copies(CopyType),
+
+                    ok = SecondaryPostMnesiaFun(),
+                    %% We've taken down mnesia, so ram nodes will need
+                    %% to re-sync
+                    case is_disc_node() of
+                        false -> start_mnesia(),
+                                 mnesia:change_config(extra_db_nodes,
+                                                      ProperClusterNodes),
+                                 wait_for_replicated_tables();
+                        true  -> ok
+                    end,
+
+                    ensure_schema_integrity(),
+                    ok
             end;
         {error, Reason} ->
             %% one reason we may end up here is if we try to join
@@ -405,6 +534,14 @@ init_db(ClusterNodes, Force) ->
             throw({error, {unable_to_join_cluster, ClusterNodes, Reason}})
     end.
 
+maybe_upgrade_local_or_record_desired() ->
+    case rabbit_upgrade:maybe_upgrade_local() of
+        ok                    -> ok;
+        %% If we're just starting up a new node we won't have a
+        %% version
+        version_not_available -> ok = rabbit_version:record_desired()
+    end.
+
 schema_ok_or_move() ->
     case check_schema_integrity() of
         ok ->
@@ -417,37 +554,39 @@ schema_ok_or_move() ->
                                      "and recreating schema from scratch~n",
                                      [Reason]),
             ok = move_db(),
-            ok = create_schema()
+            ok = create_schema(disc)
     end.
 
 ensure_version_ok({ok, DiscVersion}) ->
-    case rabbit_upgrade:desired_version() of
-        DiscVersion    ->  ok;
-        DesiredVersion ->  throw({error, {schema_mismatch,
-                                          DesiredVersion, DiscVersion}})
+    DesiredVersion = rabbit_version:desired(),
+    case rabbit_version:matches(DesiredVersion, DiscVersion) of
+        true  -> ok;
+        false -> throw({error, {version_mismatch, DesiredVersion, DiscVersion}})
     end;
 ensure_version_ok({error, _}) ->
-    ok = rabbit_upgrade:write_version().
+    ok = rabbit_version:record_desired().
+
+create_schema(Type) ->
+    stop_mnesia(),
+    case Type of
+        disc -> rabbit_misc:ensure_ok(mnesia:create_schema([node()]),
+                                      cannot_create_schema);
+        ram  -> %% remove the disc schema since this is a ram node
+                rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+                                      cannot_delete_schema)
+    end,
+    start_mnesia(),
+    ok = create_tables(Type),
+    ensure_schema_integrity(),
+    ok = rabbit_version:record_desired().
 
-ensure_schema_ok() ->
-    case check_schema_integrity() of
-        ok              -> ok;
-        {error, Reason} -> throw({error, {schema_invalid, Reason}})
-    end.
+is_disc_node() -> mnesia:system_info(use_dir).
 
-create_schema() ->
-    mnesia:stop(),
-    rabbit_misc:ensure_ok(mnesia:create_schema([node()]),
-                          cannot_create_schema),
-    rabbit_misc:ensure_ok(mnesia:start(),
-                          cannot_start_mnesia),
-    ok = create_tables(),
-    ok = ensure_schema_integrity(),
-    ok = wait_for_tables(),
-    ok = rabbit_upgrade:write_version().
+should_be_disc_node(ClusterNodes) ->
+    ClusterNodes == [] orelse lists:member(node(), ClusterNodes).
 
 move_db() ->
-    mnesia:stop(),
+    stop_mnesia(),
     MnesiaDir = filename:dirname(dir() ++ "/"),
     {{Year, Month, Day}, {Hour, Minute, Second}} = erlang:universaltime(),
     BackupDir = lists:flatten(
@@ -464,21 +603,17 @@ move_db() ->
         {error, Reason} -> throw({error, {cannot_backup_mnesia,
                                           MnesiaDir, BackupDir, Reason}})
     end,
-    ok = ensure_mnesia_dir(),
-    rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+    ensure_mnesia_dir(),
+    start_mnesia(),
     ok.
 
 copy_db(Destination) ->
-    mnesia:stop(),
-    case rabbit_misc:recursive_copy(dir(), Destination) of
-        ok ->
-            rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
-            ok = wait_for_tables();
-        {error, E} ->
-            {error, E}
-    end.
+    ok = ensure_mnesia_not_running(),
+    rabbit_misc:recursive_copy(dir(), Destination).
+
+create_tables() -> create_tables(disc).
 
-create_tables() ->
+create_tables(Type) ->
     lists:foreach(fun ({Tab, TabDef}) ->
                           TabDef1 = proplists:delete(match, TabDef),
                           case mnesia:create_table(Tab, TabDef1) of
@@ -488,9 +623,13 @@ create_tables() ->
                                                  Tab, TabDef1, Reason}})
                           end
                   end,
-                  table_definitions()),
+                  table_definitions(Type)),
     ok.
 
+copy_type_to_ram(TabDef) ->
+    [{disc_copies, []}, {ram_copies, [node()]}
+     | proplists:delete(ram_copies, proplists:delete(disc_copies, TabDef))].
+
 table_has_copy_type(TabDef, DiscType) ->
     lists:member(node(), proplists:get_value(DiscType, TabDef, [])).
 
@@ -508,19 +647,19 @@ create_local_table_copies(Type) ->
                               HasDiscOnlyCopies -> disc_only_copies;
                               true              -> ram_copies
                           end;
-%% unused code - commented out to keep dialyzer happy
-%%                      Type =:= disc_only ->
-%%                          if
-%%                              HasDiscCopies or HasDiscOnlyCopies ->
-%%                                  disc_only_copies;
-%%                              true -> ram_copies
-%%                          end;
+%%% unused code - commented out to keep dialyzer happy
+%%%                      Type =:= disc_only ->
+%%%                          if
+%%%                              HasDiscCopies or HasDiscOnlyCopies ->
+%%%                                  disc_only_copies;
+%%%                              true -> ram_copies
+%%%                          end;
                       Type =:= ram ->
                           ram_copies
                   end,
               ok = create_local_table_copy(Tab, StorageType)
       end,
-      table_definitions()),
+      table_definitions(Type)),
     ok.
 
 create_local_table_copy(Tab, Type) ->
@@ -541,7 +680,8 @@ wait_for_tables() -> wait_for_tables(table_names()).
 
 wait_for_tables(TableNames) ->
     case mnesia:wait_for_tables(TableNames, 30000) of
-        ok -> ok;
+        ok ->
+            ok;
         {timeout, BadTabs} ->
             throw({error, {timeout_waiting_for_tables, BadTabs}});
         {error, Reason} ->
@@ -549,20 +689,20 @@ wait_for_tables(TableNames) ->
     end.
 
 reset(Force) ->
-    ok = ensure_mnesia_not_running(),
+    ensure_mnesia_not_running(),
     Node = node(),
     case Force of
         true  -> ok;
         false ->
-            ok = ensure_mnesia_dir(),
-            rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+            ensure_mnesia_dir(),
+            start_mnesia(),
             {Nodes, RunningNodes} =
                 try
                     ok = init(),
                     {all_clustered_nodes() -- [Node],
                      running_clustered_nodes() -- [Node]}
                 after
-                    mnesia:stop()
+                    stop_mnesia()
                 end,
             leave_cluster(Nodes, RunningNodes),
             rabbit_misc:ensure_ok(mnesia:delete_schema([Node]),
@@ -585,6 +725,7 @@ leave_cluster(Nodes, RunningNodes) ->
                                  [schema, node()]) of
                        {atomic, ok} -> true;
                        {badrpc, nodedown} -> false;
+                       {aborted, {node_not_running, _}} -> false;
                        {aborted, Reason} ->
                            throw({error, {failed_to_leave_cluster,
                                           Nodes, RunningNodes, Reason}})
@@ -595,3 +736,11 @@ leave_cluster(Nodes, RunningNodes) ->
         false -> throw({error, {no_running_cluster_nodes,
                                 Nodes, RunningNodes}})
     end.
+
+start_mnesia() ->
+    rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+    ensure_mnesia_running().
+
+stop_mnesia() ->
+    stopped = mnesia:stop(),
+    ensure_mnesia_not_running().
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index cfea4982..b7de27d4 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -16,7 +16,7 @@
 
 -module(rabbit_msg_file).
 
--export([append/3, read/2, scan/2]).
+-export([append/3, read/2, scan/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -27,8 +27,8 @@
 -define(WRITE_OK_SIZE_BITS,      8).
 -define(WRITE_OK_MARKER,         255).
 -define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
--define(GUID_SIZE_BYTES,         16).
--define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
+-define(MSG_ID_SIZE_BYTES,       16).
+-define(MSG_ID_SIZE_BITS,        (8 * ?MSG_ID_SIZE_BYTES)).
 -define(SCAN_BLOCK_SIZE,         4194304). %% 4MB
 
 %%----------------------------------------------------------------------------
@@ -39,83 +39,87 @@
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
 -type(file_size() :: non_neg_integer()).
+-type(message_accumulator(A) ::
+        fun (({rabbit_types:msg_id(), msg_size(), position(), binary()}, A) ->
+                    A)).
 
--spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) ->
+-spec(append/3 :: (io_device(), rabbit_types:msg_id(), msg()) ->
                        rabbit_types:ok_or_error2(msg_size(), any())).
 -spec(read/2 :: (io_device(), msg_size()) ->
-                     rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()},
+                     rabbit_types:ok_or_error2({rabbit_types:msg_id(), msg()},
                                                any())).
--spec(scan/2 :: (io_device(), file_size()) ->
-                     {'ok', [{rabbit_guid:guid(), msg_size(), position()}],
-                      position()}).
+-spec(scan/4 :: (io_device(), file_size(), message_accumulator(A), A) ->
+                     {'ok', A, position()}).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, Guid, MsgBody)
-  when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES ->
+append(FileHdl, MsgId, MsgBody)
+  when is_binary(MsgId) andalso size(MsgId) =:= ?MSG_ID_SIZE_BYTES ->
     MsgBodyBin  = term_to_binary(MsgBody),
     MsgBodyBinSize = size(MsgBodyBin),
-    Size = MsgBodyBinSize + ?GUID_SIZE_BYTES,
+    Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES,
     case file_handle_cache:append(FileHdl,
                                   <<Size:?INTEGER_SIZE_BITS,
-                                   Guid:?GUID_SIZE_BYTES/binary,
-                                   MsgBodyBin:MsgBodyBinSize/binary,
-                                   ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
+                                    MsgId:?MSG_ID_SIZE_BYTES/binary,
+                                    MsgBodyBin:MsgBodyBinSize/binary,
+                                    ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
         KO -> KO
     end.
 
 read(FileHdl, TotalSize) ->
     Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
-    BodyBinSize = Size - ?GUID_SIZE_BYTES,
+    BodyBinSize = Size - ?MSG_ID_SIZE_BYTES,
     case file_handle_cache:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
-              Guid:?GUID_SIZE_BYTES/binary,
-              MsgBodyBin:BodyBinSize/binary,
-              ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
-            {ok, {Guid, binary_to_term(MsgBodyBin)}};
+               MsgId:?MSG_ID_SIZE_BYTES/binary,
+               MsgBodyBin:BodyBinSize/binary,
+               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
+            {ok, {MsgId, binary_to_term(MsgBodyBin)}};
         KO -> KO
     end.
 
-scan(FileHdl, FileSize) when FileSize >= 0 ->
-    scan(FileHdl, FileSize, <<>>, 0, [], 0).
+scan(FileHdl, FileSize, Fun, Acc) when FileSize >= 0 ->
+    scan(FileHdl, FileSize, <<>>, 0, 0, Fun, Acc).
 
-scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
+scan(_FileHdl, FileSize, _Data, FileSize, ScanOffset, _Fun, Acc) ->
     {ok, Acc, ScanOffset};
-scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
+scan(FileHdl, FileSize, Data, ReadOffset, ScanOffset, Fun, Acc) ->
     Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
     case file_handle_cache:read(FileHdl, Read) of
         {ok, Data1} ->
             {Data2, Acc1, ScanOffset1} =
-                scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
+                scanner(<<Data/binary, Data1/binary>>, ScanOffset, Fun, Acc),
             ReadOffset1 = ReadOffset + size(Data1),
-            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1);
+            scan(FileHdl, FileSize, Data2, ReadOffset1, ScanOffset1, Fun, Acc1);
         _KO ->
             {ok, Acc, ScanOffset}
     end.
 
-scan(<<>>, Acc, Offset) ->
+scanner(<<>>, Offset, _Fun, Acc) ->
     {<<>>, Acc, Offset};
-scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
+scanner(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Offset, _Fun, Acc) ->
     {<<>>, Acc, Offset}; %% Nothing to do other than stop.
-scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
-       WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
+scanner(<<Size:?INTEGER_SIZE_BITS, MsgIdAndMsg:Size/binary,
+          WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Offset, Fun, Acc) ->
     TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
     case WriteMarker of
         ?WRITE_OK_MARKER ->
             %% Here we take option 5 from
             %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in
-            %% which we read the Guid as a number, and then convert it
+            %% which we read the MsgId as a number, and then convert it
             %% back to a binary in order to work around bugs in
             %% Erlang's GC.
-            <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> =
-                <<GuidAndMsg:Size/binary>>,
-            <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>,
-            scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
+            <<MsgIdNum:?MSG_ID_SIZE_BITS, Msg/binary>> =
+                <<MsgIdAndMsg:Size/binary>>,
+            <<MsgId:?MSG_ID_SIZE_BYTES/binary>> =
+                <<MsgIdNum:?MSG_ID_SIZE_BITS>>,
+            scanner(Rest, Offset + TotalSize, Fun,
+                    Fun({MsgId, TotalSize, Offset, Msg}, Acc));
         _ ->
-            scan(Rest, Acc, Offset + TotalSize)
+            scanner(Rest, Offset + TotalSize, Fun, Acc)
     end;
-scan(Data, Acc, Offset) ->
+scanner(Data, Offset, _Fun, Acc) ->
     {Data, Acc, Offset}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 75ca0b8b..6c5035a0 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -21,14 +21,16 @@
 -export([start_link/4, successfully_recovered_state/1,
          client_init/4, client_terminate/1, client_delete_and_terminate/1,
          client_ref/1, close_all_indicated/1,
-         write/3, read/2, contains/2, remove/2, release/2, sync/3]).
+         write/3, read/2, contains/2, remove/2, sync/3]).
 
 -export([set_maximum_since_use/2, has_readers/2, combine_files/3,
          delete_file/2]). %% internal
 
+-export([transform_dir/3, force_recovery/2]). %% upgrade
+
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
          code_change/3, prioritise_call/3, prioritise_cast/2,
-         prioritise_info/2]).
+         prioritise_info/2, format_message_queue/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -37,6 +39,7 @@
 -define(SYNC_INTERVAL,  5).   %% milliseconds
 -define(CLEAN_FILENAME, "clean.dot").
 -define(FILE_SUMMARY_FILENAME, "file_summary.ets").
+-define(TRANSFORM_TMP, "transform_tmp").
 
 -define(BINARY_MODE,     [raw, binary]).
 -define(READ_MODE,       [read]).
@@ -65,15 +68,14 @@
           gc_pid,                 %% pid of our GC
           file_handles_ets,       %% tid of the shared file handles table
           file_summary_ets,       %% tid of the file summary table
-          dedup_cache_ets,        %% tid of dedup cache table
           cur_file_cache_ets,     %% tid of current file cache table
           dying_clients,          %% set of dying clients
           clients,                %% map of references of all registered clients
                                   %% to callbacks
           successfully_recovered, %% boolean: did we recover state?
           file_size_limit,        %% how big are our files allowed to get?
-          cref_to_guids           %% client ref to synced messages mapping
-         }).
+          cref_to_msg_ids         %% client ref to synced messages mapping
+        }).
 
 -record(client_msstate,
         { server,
@@ -85,9 +87,8 @@
           gc_pid,
           file_handles_ets,
           file_summary_ets,
-          dedup_cache_ets,
           cur_file_cache_ets
-         }).
+        }).
 
 -record(file_summary,
         {file, valid_total_size, left, right, file_size, locked, readers}).
@@ -128,38 +129,39 @@
                       gc_pid             :: pid(),
                       file_handles_ets   :: ets:tid(),
                       file_summary_ets   :: ets:tid(),
-                      dedup_cache_ets    :: ets:tid(),
                       cur_file_cache_ets :: ets:tid()}).
--type(startup_fun_state() ::
-        {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})),
-         A}).
--type(maybe_guid_fun() :: 'undefined' | fun ((gb_set()) -> any())).
+-type(msg_ref_delta_gen(A) ::
+        fun ((A) -> 'finished' |
+                    {rabbit_types:msg_id(), non_neg_integer(), A})).
+-type(maybe_msg_id_fun() :: 'undefined' | fun ((gb_set()) -> any())).
 -type(maybe_close_fds_fun() :: 'undefined' | fun (() -> 'ok')).
 -type(deletion_thunk() :: fun (() -> boolean())).
 
 -spec(start_link/4 ::
         (atom(), file:filename(), [binary()] | 'undefined',
-         startup_fun_state()) -> rabbit_types:ok_pid_or_error()).
+         {msg_ref_delta_gen(A), A}) -> rabbit_types:ok_pid_or_error()).
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
--spec(client_init/4 :: (server(), client_ref(), maybe_guid_fun(),
+-spec(client_init/4 :: (server(), client_ref(), maybe_msg_id_fun(),
                         maybe_close_fds_fun()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
 -spec(client_delete_and_terminate/1 :: (client_msstate()) -> 'ok').
 -spec(client_ref/1 :: (client_msstate()) -> client_ref()).
--spec(write/3 :: (rabbit_guid:guid(), msg(), client_msstate()) -> 'ok').
--spec(read/2 :: (rabbit_guid:guid(), client_msstate()) ->
-             {rabbit_types:ok(msg()) | 'not_found', client_msstate()}).
--spec(contains/2 :: (rabbit_guid:guid(), client_msstate()) -> boolean()).
--spec(remove/2 :: ([rabbit_guid:guid()], client_msstate()) -> 'ok').
--spec(release/2 :: ([rabbit_guid:guid()], client_msstate()) -> 'ok').
--spec(sync/3 :: ([rabbit_guid:guid()], fun (() -> any()), client_msstate()) ->
-             'ok').
+-spec(write/3 :: (rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok').
+-spec(read/2 :: (rabbit_types:msg_id(), client_msstate()) ->
+                     {rabbit_types:ok(msg()) | 'not_found', client_msstate()}).
+-spec(contains/2 :: (rabbit_types:msg_id(), client_msstate()) -> boolean()).
+-spec(remove/2 :: ([rabbit_types:msg_id()], client_msstate()) -> 'ok').
+-spec(sync/3 ::
+        ([rabbit_types:msg_id()], fun (() -> any()), client_msstate()) -> 'ok').
 
 -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
 -spec(has_readers/2 :: (non_neg_integer(), gc_state()) -> boolean()).
 -spec(combine_files/3 :: (non_neg_integer(), non_neg_integer(), gc_state()) ->
                               deletion_thunk()).
 -spec(delete_file/2 :: (non_neg_integer(), gc_state()) -> deletion_thunk()).
+-spec(force_recovery/2 :: (file:filename(), server()) -> 'ok').
+-spec(transform_dir/3 :: (file:filename(), server(),
+        fun ((any()) -> (rabbit_types:ok_or_error2(msg(), any())))) -> 'ok').
 
 -endif.
 
@@ -171,8 +173,8 @@
 
 %% The components:
 %%
-%% Index: this is a mapping from Guid to #msg_location{}:
-%%        {Guid, RefCount, File, Offset, TotalSize}
+%% Index: this is a mapping from MsgId to #msg_location{}:
+%%        {MsgId, RefCount, File, Offset, TotalSize}
 %%        By default, it's in ets, but it's also pluggable.
 %% FileSummary: this is an ets table which maps File to #file_summary{}:
 %%        {File, ValidTotalSize, Left, Right, FileSize, Locked, Readers}
@@ -273,7 +275,7 @@
 %% alternating full files and files with only one tiny message in
 %% them).
 %%
-%% Messages are reference-counted. When a message with the same guid
+%% Messages are reference-counted. When a message with the same msg id
 %% is written several times we only store it once, and only remove it
 %% from the store when it has been removed the same number of times.
 %%
@@ -390,7 +392,7 @@ successfully_recovered_state(Server) ->
 
 client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) ->
     {IState, IModule, Dir, GCPid,
-     FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} =
+     FileHandlesEts, FileSummaryEts, CurFileCacheEts} =
         gen_server2:call(
           Server, {new_client_state, Ref, MsgOnDiskFun, CloseFDsFun}, infinity),
     #client_msstate { server             = Server,
@@ -402,7 +404,6 @@ client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) ->
                       gc_pid             = GCPid,
                       file_handles_ets   = FileHandlesEts,
                       file_summary_ets   = FileSummaryEts,
-                      dedup_cache_ets    = DedupCacheEts,
                       cur_file_cache_ets = CurFileCacheEts }.
 
 client_terminate(CState = #client_msstate { client_ref = Ref }) ->
@@ -416,44 +417,31 @@ client_delete_and_terminate(CState = #client_msstate { client_ref = Ref }) ->
 
 client_ref(#client_msstate { client_ref = Ref }) -> Ref.
 
-write(Guid, Msg,
+write(MsgId, Msg,
       CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
                                  client_ref         = CRef }) ->
-    ok = update_msg_cache(CurFileCacheEts, Guid, Msg),
-    ok = server_cast(CState, {write, CRef, Guid}).
-
-read(Guid,
-     CState = #client_msstate { dedup_cache_ets    = DedupCacheEts,
-                                cur_file_cache_ets = CurFileCacheEts }) ->
-    %% 1. Check the dedup cache
-    case fetch_and_increment_cache(DedupCacheEts, Guid) of
-        not_found ->
-            %% 2. Check the cur file cache
-            case ets:lookup(CurFileCacheEts, Guid) of
-                [] ->
-                    Defer = fun() ->
-                                    {server_call(CState, {read, Guid}), CState}
-                            end,
-                    case index_lookup_positive_ref_count(Guid, CState) of
-                        not_found   -> Defer();
-                        MsgLocation -> client_read1(MsgLocation, Defer, CState)
-                    end;
-                [{Guid, Msg, _CacheRefCount}] ->
-                    %% Although we've found it, we don't know the
-                    %% refcount, so can't insert into dedup cache
-                    {{ok, Msg}, CState}
+    ok = update_msg_cache(CurFileCacheEts, MsgId, Msg),
+    ok = server_cast(CState, {write, CRef, MsgId}).
+
+read(MsgId,
+     CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+    %% Check the cur file cache
+    case ets:lookup(CurFileCacheEts, MsgId) of
+        [] ->
+            Defer = fun() -> {server_call(CState, {read, MsgId}), CState} end,
+            case index_lookup_positive_ref_count(MsgId, CState) of
+                not_found   -> Defer();
+                MsgLocation -> client_read1(MsgLocation, Defer, CState)
             end;
-        Msg ->
+        [{MsgId, Msg, _CacheRefCount}] ->
             {{ok, Msg}, CState}
     end.
 
-contains(Guid, CState) -> server_call(CState, {contains, Guid}).
+contains(MsgId, CState) -> server_call(CState, {contains, MsgId}).
 remove([],    _CState) -> ok;
-remove(Guids, CState = #client_msstate { client_ref = CRef }) ->
-    server_cast(CState, {remove, CRef, Guids}).
-release([],   _CState) -> ok;
-release(Guids, CState) -> server_cast(CState, {release, Guids}).
-sync(Guids, K, CState) -> server_cast(CState, {sync, Guids, K}).
+remove(MsgIds, CState = #client_msstate { client_ref = CRef }) ->
+    server_cast(CState, {remove, CRef, MsgIds}).
+sync(MsgIds, K, CState) -> server_cast(CState, {sync, MsgIds, K}).
 
 set_maximum_since_use(Server, Age) ->
     gen_server2:cast(Server, {set_maximum_since_use, Age}).
@@ -468,11 +456,11 @@ server_call(#client_msstate { server = Server }, Msg) ->
 server_cast(#client_msstate { server = Server }, Msg) ->
     gen_server2:cast(Server, Msg).
 
-client_read1(#msg_location { guid = Guid, file = File } = MsgLocation, Defer,
+client_read1(#msg_location { msg_id = MsgId, file = File } = MsgLocation, Defer,
              CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
     case ets:lookup(FileSummaryEts, File) of
         [] -> %% File has been GC'd and no longer exists. Go around again.
-            read(Guid, CState);
+            read(MsgId, CState);
         [#file_summary { locked = Locked, right = Right }] ->
             client_read2(Locked, Right, MsgLocation, Defer, CState)
     end.
@@ -494,7 +482,7 @@ client_read2(true, _Right, _MsgLocation, Defer, _CState) ->
     %% the safest and simplest thing to do.
     Defer();
 client_read2(false, _Right,
-             MsgLocation = #msg_location { guid = Guid, file = File },
+             MsgLocation = #msg_location { msg_id = MsgId, file = File },
              Defer,
              CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
     %% It's entirely possible that everything we're doing from here on
@@ -503,12 +491,11 @@ client_read2(false, _Right,
     safe_ets_update_counter(
       FileSummaryEts, File, {#file_summary.readers, +1},
       fun (_) -> client_read3(MsgLocation, Defer, CState) end,
-      fun () -> read(Guid, CState) end).
+      fun () -> read(MsgId, CState) end).
 
-client_read3(#msg_location { guid = Guid, file = File }, Defer,
+client_read3(#msg_location { msg_id = MsgId, file = File }, Defer,
              CState = #client_msstate { file_handles_ets = FileHandlesEts,
                                         file_summary_ets = FileSummaryEts,
-                                        dedup_cache_ets  = DedupCacheEts,
                                         gc_pid           = GCPid,
                                         client_ref       = Ref }) ->
     Release =
@@ -530,7 +517,7 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer,
     %% too).
     case ets:lookup(FileSummaryEts, File) of
         [] -> %% GC has deleted our file, just go round again.
-            read(Guid, CState);
+            read(MsgId, CState);
         [#file_summary { locked = true }] ->
             %% If we get a badarg here, then the GC has finished and
             %% deleted our file. Try going around again. Otherwise,
@@ -540,8 +527,8 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer,
             %% GC ends, we +1 readers, msg_store ets:deletes (and
             %% unlocks the dest)
             try Release(),
-                Defer()
-            catch error:badarg -> read(Guid, CState)
+                 Defer()
+            catch error:badarg -> read(MsgId, CState)
             end;
         [#file_summary { locked = false }] ->
             %% Ok, we're definitely safe to continue - a GC involving
@@ -554,7 +541,7 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer,
             %% us doing the lookup and the +1 on the readers. (Same as
             %% badarg scenario above, but we don't have a missing file
             %% - we just have the /wrong/ file).
-            case index_lookup(Guid, CState) of
+            case index_lookup(MsgId, CState) of
                 #msg_location { file = File } = MsgLocation ->
                     %% Still the same file.
                     {ok, CState1} = close_all_indicated(CState),
@@ -565,8 +552,8 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer,
                     %% Could the msg_store now mark the file to be
                     %% closed? No: marks for closing are issued only
                     %% when the msg_store has locked the file.
-                    {Msg, CState2} = %% This will never be the current file
-                        read_from_disk(MsgLocation, CState1, DedupCacheEts),
+                    %% This will never be the current file
+                    {Msg, CState2} = read_from_disk(MsgLocation, CState1),
                     Release(), %% this MUST NOT fail with badarg
                     {{ok, Msg}, CState2};
                 #msg_location {} = MsgLocation -> %% different file!
@@ -580,9 +567,9 @@ client_read3(#msg_location { guid = Guid, file = File }, Defer,
             end
     end.
 
-clear_client(CRef, State = #msstate { cref_to_guids = CTG,
+clear_client(CRef, State = #msstate { cref_to_msg_ids = CTM,
                                       dying_clients = DyingClients }) ->
-    State #msstate { cref_to_guids = dict:erase(CRef, CTG),
+    State #msstate { cref_to_msg_ids = dict:erase(CRef, CTM),
                      dying_clients = sets:del_element(CRef, DyingClients) }.
 
 
@@ -630,13 +617,21 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) ->
     %% CleanShutdown <=> msg location index and file_summary both
     %% recovered correctly.
 
-    DedupCacheEts   = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
     FileHandlesEts  = ets:new(rabbit_msg_store_shared_file_handles,
                               [ordered_set, public]),
     CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
 
     {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit),
 
+    {ok, GCPid} = rabbit_msg_store_gc:start_link(
+                    #gc_state { dir              = Dir,
+                                index_module     = IndexModule,
+                                index_state      = IndexState,
+                                file_summary_ets = FileSummaryEts,
+                                file_handles_ets = FileHandlesEts,
+                                msg_store        = self()
+                              }),
+
     State = #msstate { dir                    = Dir,
                        index_module           = IndexModule,
                        index_state            = IndexState,
@@ -648,17 +643,16 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) ->
                        sum_valid_data         = 0,
                        sum_file_size          = 0,
                        pending_gc_completion  = orddict:new(),
-                       gc_pid                 = undefined,
+                       gc_pid                 = GCPid,
                        file_handles_ets       = FileHandlesEts,
                        file_summary_ets       = FileSummaryEts,
-                       dedup_cache_ets        = DedupCacheEts,
                        cur_file_cache_ets     = CurFileCacheEts,
                        dying_clients          = sets:new(),
                        clients                = Clients,
                        successfully_recovered = CleanShutdown,
                        file_size_limit        = FileSizeLimit,
-                       cref_to_guids          = dict:new()
-                      },
+                       cref_to_msg_ids        = dict:new()
+                     },
 
     %% If we didn't recover the msg location index then we need to
     %% rebuild it now.
@@ -671,17 +665,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) ->
     {ok, Offset} = file_handle_cache:position(CurHdl, Offset),
     ok = file_handle_cache:truncate(CurHdl),
 
-    {ok, GCPid} = rabbit_msg_store_gc:start_link(
-                    #gc_state { dir              = Dir,
-                                index_module     = IndexModule,
-                                index_state      = IndexState,
-                                file_summary_ets = FileSummaryEts,
-                                file_handles_ets = FileHandlesEts,
-                                msg_store        = self()
-                              }),
-
-    {ok, maybe_compact(
-           State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }),
+    {ok, maybe_compact(State1 #msstate { current_file_handle = CurHdl }),
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -689,7 +673,7 @@ prioritise_call(Msg, _From, _State) ->
     case Msg of
         successfully_recovered_state                  -> 7;
         {new_client_state, _Ref, _MODC, _CloseFDsFun} -> 7;
-        {read, _Guid}                                 -> 2;
+        {read, _MsgId}                                -> 2;
         _                                             -> 0
     end.
 
@@ -712,29 +696,27 @@ handle_call(successfully_recovered_state, _From, State) ->
     reply(State #msstate.successfully_recovered, State);
 
 handle_call({new_client_state, CRef, MsgOnDiskFun, CloseFDsFun}, _From,
-            State = #msstate { dir                    = Dir,
-                               index_state            = IndexState,
-                               index_module           = IndexModule,
-                               file_handles_ets       = FileHandlesEts,
-                               file_summary_ets       = FileSummaryEts,
-                               dedup_cache_ets        = DedupCacheEts,
-                               cur_file_cache_ets     = CurFileCacheEts,
-                               clients                = Clients,
-                               gc_pid                 = GCPid }) ->
+            State = #msstate { dir                = Dir,
+                               index_state        = IndexState,
+                               index_module       = IndexModule,
+                               file_handles_ets   = FileHandlesEts,
+                               file_summary_ets   = FileSummaryEts,
+                               cur_file_cache_ets = CurFileCacheEts,
+                               clients            = Clients,
+                               gc_pid             = GCPid }) ->
     Clients1 = dict:store(CRef, {MsgOnDiskFun, CloseFDsFun}, Clients),
-    reply({IndexState, IndexModule, Dir, GCPid,
-           FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts},
-          State #msstate { clients = Clients1 });
+    reply({IndexState, IndexModule, Dir, GCPid, FileHandlesEts, FileSummaryEts,
+           CurFileCacheEts}, State #msstate { clients = Clients1 });
 
 handle_call({client_terminate, CRef}, _From, State) ->
     reply(ok, clear_client(CRef, State));
 
-handle_call({read, Guid}, From, State) ->
-    State1 = read_message(Guid, From, State),
+handle_call({read, MsgId}, From, State) ->
+    State1 = read_message(MsgId, From, State),
     noreply(State1);
 
-handle_call({contains, Guid}, From, State) ->
-    State1 = contains_message(Guid, From, State),
+handle_call({contains, MsgId}, From, State) ->
+    State1 = contains_message(MsgId, From, State),
     noreply(State1).
 
 handle_cast({client_dying, CRef},
@@ -747,53 +729,47 @@ handle_cast({client_delete, CRef}, State = #msstate { clients = Clients }) ->
     State1 = State #msstate { clients = dict:erase(CRef, Clients) },
     noreply(remove_message(CRef, CRef, clear_client(CRef, State1)));
 
-handle_cast({write, CRef, Guid},
+handle_cast({write, CRef, MsgId},
             State = #msstate { cur_file_cache_ets = CurFileCacheEts }) ->
-    true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}),
-    [{Guid, Msg, _CacheRefCount}] = ets:lookup(CurFileCacheEts, Guid),
+    true = 0 =< ets:update_counter(CurFileCacheEts, MsgId, {3, -1}),
+    [{MsgId, Msg, _CacheRefCount}] = ets:lookup(CurFileCacheEts, MsgId),
     noreply(
-      case write_action(should_mask_action(CRef, Guid, State), Guid, State) of
+      case write_action(should_mask_action(CRef, MsgId, State), MsgId, State) of
           {write, State1} ->
-              write_message(CRef, Guid, Msg, State1);
+              write_message(CRef, MsgId, Msg, State1);
           {ignore, CurFile, State1 = #msstate { current_file = CurFile }} ->
               State1;
           {ignore, _File, State1} ->
-              true = ets:delete_object(CurFileCacheEts, {Guid, Msg, 0}),
+              true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}),
               State1;
           {confirm, CurFile, State1 = #msstate { current_file = CurFile }}->
-              record_pending_confirm(CRef, Guid, State1);
+              record_pending_confirm(CRef, MsgId, State1);
           {confirm, _File, State1} ->
-              true = ets:delete_object(CurFileCacheEts, {Guid, Msg, 0}),
+              true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}),
               update_pending_confirms(
-                fun (MsgOnDiskFun, CTG) ->
-                        MsgOnDiskFun(gb_sets:singleton(Guid), written),
-                        CTG
+                fun (MsgOnDiskFun, CTM) ->
+                        MsgOnDiskFun(gb_sets:singleton(MsgId), written),
+                        CTM
                 end, CRef, State1)
       end);
 
-handle_cast({remove, CRef, Guids}, State) ->
+handle_cast({remove, CRef, MsgIds}, State) ->
     State1 = lists:foldl(
-               fun (Guid, State2) -> remove_message(Guid, CRef, State2) end,
-               State, Guids),
-    noreply(maybe_compact(
-              client_confirm(CRef, gb_sets:from_list(Guids), removed, State1)));
-
-handle_cast({release, Guids}, State =
-                #msstate { dedup_cache_ets = DedupCacheEts }) ->
-    lists:foreach(
-      fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids),
-    noreply(State);
+               fun (MsgId, State2) -> remove_message(MsgId, CRef, State2) end,
+               State, MsgIds),
+    noreply(maybe_compact(client_confirm(CRef, gb_sets:from_list(MsgIds),
+                                         removed, State1)));
 
-handle_cast({sync, Guids, K},
+handle_cast({sync, MsgIds, K},
             State = #msstate { current_file        = CurFile,
                                current_file_handle = CurHdl,
                                on_sync             = Syncs }) ->
     {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
-    case lists:any(fun (Guid) ->
+    case lists:any(fun (MsgId) ->
                            #msg_location { file = File, offset = Offset } =
-                               index_lookup(Guid, State),
+                               index_lookup(MsgId, State),
                            File =:= CurFile andalso Offset >= SyncOffset
-                   end, Guids) of
+                   end, MsgIds) of
         false -> K(),
                  noreply(State);
         true  -> noreply(State #msstate { on_sync = [K | Syncs] })
@@ -837,7 +813,6 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
                                       gc_pid              = GCPid,
                                       file_handles_ets    = FileHandlesEts,
                                       file_summary_ets    = FileSummaryEts,
-                                      dedup_cache_ets     = DedupCacheEts,
                                       cur_file_cache_ets  = CurFileCacheEts,
                                       clients             = Clients,
                                       dir                 = Dir }) ->
@@ -847,22 +822,24 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
     State1 = case CurHdl of
                  undefined -> State;
                  _         -> State2 = internal_sync(State),
-                              file_handle_cache:close(CurHdl),
+                              ok = file_handle_cache:close(CurHdl),
                               State2
              end,
     State3 = close_all_handles(State1),
-    store_file_summary(FileSummaryEts, Dir),
-    [ets:delete(T) ||
-        T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]],
+    ok = store_file_summary(FileSummaryEts, Dir),
+    [true = ets:delete(T) ||
+        T <- [FileSummaryEts, FileHandlesEts, CurFileCacheEts]],
     IndexModule:terminate(IndexState),
-    store_recovery_terms([{client_refs, dict:fetch_keys(Clients)},
-                          {index_module, IndexModule}], Dir),
+    ok = store_recovery_terms([{client_refs, dict:fetch_keys(Clients)},
+                               {index_module, IndexModule}], Dir),
     State3 #msstate { index_state         = undefined,
                       current_file_handle = undefined }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
+format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ).
+
 %%----------------------------------------------------------------------------
 %% general helper functions
 %%----------------------------------------------------------------------------
@@ -875,16 +852,16 @@ reply(Reply, State) ->
     {State1, Timeout} = next_state(State),
     {reply, Reply, State1, Timeout}.
 
-next_state(State = #msstate { sync_timer_ref = undefined,
-                              on_sync        = Syncs,
-                              cref_to_guids  = CTG }) ->
-    case {Syncs, dict:size(CTG)} of
+next_state(State = #msstate { sync_timer_ref  = undefined,
+                              on_sync         = Syncs,
+                              cref_to_msg_ids = CTM }) ->
+    case {Syncs, dict:size(CTM)} of
         {[], 0} -> {State, hibernate};
         _       -> {start_sync_timer(State), 0}
     end;
-next_state(State = #msstate { on_sync       = Syncs,
-                              cref_to_guids = CTG }) ->
-    case {Syncs, dict:size(CTG)} of
+next_state(State = #msstate { on_sync         = Syncs,
+                              cref_to_msg_ids = CTM }) ->
+    case {Syncs, dict:size(CTM)} of
         {[], 0} -> {stop_sync_timer(State), hibernate};
         _       -> {State, 0}
     end.
@@ -901,66 +878,69 @@ stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) ->
 
 internal_sync(State = #msstate { current_file_handle = CurHdl,
                                  on_sync             = Syncs,
-                                 cref_to_guids       = CTG }) ->
+                                 cref_to_msg_ids     = CTM }) ->
     State1 = stop_sync_timer(State),
-    CGs = dict:fold(fun (CRef, Guids, NS) ->
-                            case gb_sets:is_empty(Guids) of
+    CGs = dict:fold(fun (CRef, MsgIds, NS) ->
+                            case gb_sets:is_empty(MsgIds) of
                                 true  -> NS;
-                                false -> [{CRef, Guids} | NS]
+                                false -> [{CRef, MsgIds} | NS]
                             end
-                    end, [], CTG),
-    case {Syncs, CGs} of
-        {[], []} -> ok;
-        _        -> file_handle_cache:sync(CurHdl)
-    end,
+                    end, [], CTM),
+    ok = case {Syncs, CGs} of
+             {[], []} -> ok;
+             _        -> file_handle_cache:sync(CurHdl)
+         end,
     [K() || K <- lists:reverse(Syncs)],
-    [client_confirm(CRef, Guids, written, State1) || {CRef, Guids} <- CGs],
-    State1 #msstate { cref_to_guids = dict:new(), on_sync = [] }.
+    State2 = lists:foldl(
+               fun ({CRef, MsgIds}, StateN) ->
+                       client_confirm(CRef, MsgIds, written, StateN)
+               end, State1, CGs),
+    State2 #msstate { on_sync = [] }.
 
-write_action({true, not_found}, _Guid, State) ->
+write_action({true, not_found}, _MsgId, State) ->
     {ignore, undefined, State};
-write_action({true, #msg_location { file = File }}, _Guid, State) ->
+write_action({true, #msg_location { file = File }}, _MsgId, State) ->
     {ignore, File, State};
-write_action({false, not_found}, _Guid, State) ->
+write_action({false, not_found}, _MsgId, State) ->
     {write, State};
 write_action({Mask, #msg_location { ref_count = 0, file = File,
                                     total_size = TotalSize }},
-             Guid, State = #msstate { file_summary_ets = FileSummaryEts }) ->
+             MsgId, State = #msstate { file_summary_ets = FileSummaryEts }) ->
     case {Mask, ets:lookup(FileSummaryEts, File)} of
         {false, [#file_summary { locked = true }]} ->
-            ok = index_delete(Guid, State),
+            ok = index_delete(MsgId, State),
             {write, State};
         {false_if_increment, [#file_summary { locked = true }]} ->
-            %% The msg for Guid is older than the client death
+            %% The msg for MsgId is older than the client death
             %% message, but as it is being GC'd currently we'll have
             %% to write a new copy, which will then be younger, so
             %% ignore this write.
             {ignore, File, State};
         {_Mask, [#file_summary {}]} ->
-            ok = index_update_ref_count(Guid, 1, State),
+            ok = index_update_ref_count(MsgId, 1, State),
             State1 = adjust_valid_total_size(File, TotalSize, State),
             {confirm, File, State1}
     end;
 write_action({_Mask, #msg_location { ref_count = RefCount, file = File }},
-             Guid, State) ->
-    ok = index_update_ref_count(Guid, RefCount + 1, State),
+             MsgId, State) ->
+    ok = index_update_ref_count(MsgId, RefCount + 1, State),
     %% We already know about it, just update counter. Only update
     %% field otherwise bad interaction with concurrent GC
     {confirm, File, State}.
 
-write_message(CRef, Guid, Msg, State) ->
-    write_message(Guid, Msg, record_pending_confirm(CRef, Guid, State)).
+write_message(CRef, MsgId, Msg, State) ->
+    write_message(MsgId, Msg, record_pending_confirm(CRef, MsgId, State)).
 
-write_message(Guid, Msg,
+write_message(MsgId, Msg,
               State = #msstate { current_file_handle = CurHdl,
                                  current_file        = CurFile,
                                  sum_valid_data      = SumValid,
                                  sum_file_size       = SumFileSize,
                                  file_summary_ets    = FileSummaryEts }) ->
     {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
-    {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg),
+    {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
     ok = index_insert(
-           #msg_location { guid = Guid, ref_count = 1, file = CurFile,
+           #msg_location { msg_id = MsgId, ref_count = 1, file = CurFile,
                            offset = CurOffset, total_size = TotalSize }, State),
     [#file_summary { right = undefined, locked = false }] =
         ets:lookup(FileSummaryEts, CurFile),
@@ -972,31 +952,23 @@ write_message(Guid, Msg,
                              sum_valid_data = SumValid    + TotalSize,
                              sum_file_size  = SumFileSize + TotalSize }).
 
-read_message(Guid, From,
-             State = #msstate { dedup_cache_ets = DedupCacheEts }) ->
-    case index_lookup_positive_ref_count(Guid, State) of
-        not_found ->
-            gen_server2:reply(From, not_found),
-            State;
-        MsgLocation ->
-            case fetch_and_increment_cache(DedupCacheEts, Guid) of
-                not_found -> read_message1(From, MsgLocation, State);
-                Msg       -> gen_server2:reply(From, {ok, Msg}),
-                             State
-            end
+read_message(MsgId, From, State) ->
+    case index_lookup_positive_ref_count(MsgId, State) of
+        not_found   -> gen_server2:reply(From, not_found),
+                       State;
+        MsgLocation -> read_message1(From, MsgLocation, State)
     end.
 
-read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
-                                    file = File, offset = Offset } = MsgLoc,
+read_message1(From, #msg_location { msg_id = MsgId, file = File,
+                                    offset = Offset } = MsgLoc,
               State = #msstate { current_file        = CurFile,
                                  current_file_handle = CurHdl,
                                  file_summary_ets    = FileSummaryEts,
-                                 dedup_cache_ets     = DedupCacheEts,
                                  cur_file_cache_ets  = CurFileCacheEts }) ->
     case File =:= CurFile of
         true  -> {Msg, State1} =
                      %% can return [] if msg in file existed on startup
-                     case ets:lookup(CurFileCacheEts, Guid) of
+                     case ets:lookup(CurFileCacheEts, MsgId) of
                          [] ->
                              {ok, RawOffSet} =
                                  file_handle_cache:current_raw_offset(CurHdl),
@@ -1004,10 +976,8 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                                       true  -> file_handle_cache:flush(CurHdl);
                                       false -> ok
                                   end,
-                             read_from_disk(MsgLoc, State, DedupCacheEts);
-                         [{Guid, Msg1, _CacheRefCount}] ->
-                             ok = maybe_insert_into_cache(
-                                    DedupCacheEts, RefCount, Guid, Msg1),
+                             read_from_disk(MsgLoc, State);
+                         [{MsgId, Msg1, _CacheRefCount}] ->
                              {Msg1, State}
                      end,
                  gen_server2:reply(From, {ok, Msg}),
@@ -1015,56 +985,51 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
         false -> [#file_summary { locked = Locked }] =
                      ets:lookup(FileSummaryEts, File),
                  case Locked of
-                     true  -> add_to_pending_gc_completion({read, Guid, From},
+                     true  -> add_to_pending_gc_completion({read, MsgId, From},
                                                            File, State);
-                     false -> {Msg, State1} =
-                                  read_from_disk(MsgLoc, State, DedupCacheEts),
+                     false -> {Msg, State1} = read_from_disk(MsgLoc, State),
                               gen_server2:reply(From, {ok, Msg}),
                               State1
                  end
     end.
 
-read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
-                               file = File, offset = Offset,
-                               total_size = TotalSize },
-               State, DedupCacheEts) ->
+read_from_disk(#msg_location { msg_id = MsgId, file = File, offset = Offset,
+                               total_size = TotalSize }, State) ->
     {Hdl, State1} = get_read_handle(File, State),
     {ok, Offset} = file_handle_cache:position(Hdl, Offset),
-    {ok, {Guid, Msg}} =
+    {ok, {MsgId, Msg}} =
         case rabbit_msg_file:read(Hdl, TotalSize) of
-            {ok, {Guid, _}} = Obj ->
+            {ok, {MsgId, _}} = Obj ->
                 Obj;
             Rest ->
                 {error, {misread, [{old_state, State},
                                    {file_num,  File},
                                    {offset,    Offset},
-                                   {guid,      Guid},
+                                   {msg_id,    MsgId},
                                    {read,      Rest},
                                    {proc_dict, get()}
                                   ]}}
         end,
-    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
     {Msg, State1}.
 
-contains_message(Guid, From,
+contains_message(MsgId, From,
                  State = #msstate { pending_gc_completion = Pending }) ->
-    case index_lookup_positive_ref_count(Guid, State) of
+    case index_lookup_positive_ref_count(MsgId, State) of
         not_found ->
             gen_server2:reply(From, false),
             State;
         #msg_location { file = File } ->
             case orddict:is_key(File, Pending) of
                 true  -> add_to_pending_gc_completion(
-                           {contains, Guid, From}, File, State);
+                           {contains, MsgId, From}, File, State);
                 false -> gen_server2:reply(From, true),
                          State
             end
     end.
 
-remove_message(Guid, CRef,
-               State = #msstate { file_summary_ets = FileSummaryEts,
-                                  dedup_cache_ets  = DedupCacheEts }) ->
-    case should_mask_action(CRef, Guid, State) of
+remove_message(MsgId, CRef,
+               State = #msstate { file_summary_ets = FileSummaryEts }) ->
+    case should_mask_action(CRef, MsgId, State) of
         {true, _Location} ->
             State;
         {false_if_increment, #msg_location { ref_count = 0 }} ->
@@ -1077,25 +1042,24 @@ remove_message(Guid, CRef,
                                 total_size = TotalSize }} when RefCount > 0 ->
             %% only update field, otherwise bad interaction with
             %% concurrent GC
-            Dec =
-                fun () -> index_update_ref_count(Guid, RefCount - 1, State) end,
+            Dec = fun () ->
+                          index_update_ref_count(MsgId, RefCount - 1, State)
+                  end,
             case RefCount of
                 %% don't remove from CUR_FILE_CACHE_ETS_NAME here
                 %% because there may be further writes in the mailbox
                 %% for the same msg.
-                1 -> ok = remove_cache_entry(DedupCacheEts, Guid),
-                     case ets:lookup(FileSummaryEts, File) of
+                1 -> case ets:lookup(FileSummaryEts, File) of
                          [#file_summary { locked = true }] ->
                              add_to_pending_gc_completion(
-                               {remove, Guid, CRef}, File, State);
+                               {remove, MsgId, CRef}, File, State);
                          [#file_summary {}] ->
                              ok = Dec(),
                              delete_file_if_empty(
                                File, adjust_valid_total_size(File, -TotalSize,
                                                              State))
                      end;
-                _ -> ok = decrement_cache(DedupCacheEts, Guid),
-                     ok = Dec(),
+                _ -> ok = Dec(),
                      State
             end
     end.
@@ -1115,12 +1079,12 @@ run_pending(Files, State) ->
                 lists:reverse(orddict:fetch(File, Pending)))
       end, State, Files).
 
-run_pending_action({read, Guid, From}, State) ->
-    read_message(Guid, From, State);
-run_pending_action({contains, Guid, From}, State) ->
-    contains_message(Guid, From, State);
-run_pending_action({remove, Guid, CRef}, State) ->
-    remove_message(Guid, CRef, State).
+run_pending_action({read, MsgId, From}, State) ->
+    read_message(MsgId, From, State);
+run_pending_action({contains, MsgId, From}, State) ->
+    contains_message(MsgId, From, State);
+run_pending_action({remove, MsgId, CRef}, State) ->
+    remove_message(MsgId, CRef, State).
 
 safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
     try
@@ -1142,44 +1106,46 @@ orddict_store(Key, Val, Dict) ->
     false = orddict:is_key(Key, Dict),
     orddict:store(Key, Val, Dict).
 
-update_pending_confirms(Fun, CRef, State = #msstate { clients       = Clients,
-                                                      cref_to_guids = CTG }) ->
+update_pending_confirms(Fun, CRef,
+                        State = #msstate { clients         = Clients,
+                                           cref_to_msg_ids = CTM }) ->
     case dict:fetch(CRef, Clients) of
         {undefined,    _CloseFDsFun} -> State;
-        {MsgOnDiskFun, _CloseFDsFun} -> CTG1 = Fun(MsgOnDiskFun, CTG),
-                                        State #msstate { cref_to_guids = CTG1 }
+        {MsgOnDiskFun, _CloseFDsFun} -> CTM1 = Fun(MsgOnDiskFun, CTM),
+                                        State #msstate {
+                                          cref_to_msg_ids = CTM1 }
     end.
 
-record_pending_confirm(CRef, Guid, State) ->
+record_pending_confirm(CRef, MsgId, State) ->
     update_pending_confirms(
-      fun (_MsgOnDiskFun, CTG) ->
-              dict:update(CRef, fun (Guids) -> gb_sets:add(Guid, Guids) end,
-                          gb_sets:singleton(Guid), CTG)
+      fun (_MsgOnDiskFun, CTM) ->
+              dict:update(CRef, fun (MsgIds) -> gb_sets:add(MsgId, MsgIds) end,
+                          gb_sets:singleton(MsgId), CTM)
       end, CRef, State).
 
-client_confirm(CRef, Guids, ActionTaken, State) ->
+client_confirm(CRef, MsgIds, ActionTaken, State) ->
     update_pending_confirms(
-      fun (MsgOnDiskFun, CTG) ->
-              MsgOnDiskFun(Guids, ActionTaken),
-              case dict:find(CRef, CTG) of
-                  {ok, Gs} -> Guids1 = gb_sets:difference(Gs, Guids),
-                              case gb_sets:is_empty(Guids1) of
-                                  true  -> dict:erase(CRef, CTG);
-                                  false -> dict:store(CRef, Guids1, CTG)
+      fun (MsgOnDiskFun, CTM) ->
+              MsgOnDiskFun(MsgIds, ActionTaken),
+              case dict:find(CRef, CTM) of
+                  {ok, Gs} -> MsgIds1 = gb_sets:difference(Gs, MsgIds),
+                              case gb_sets:is_empty(MsgIds1) of
+                                  true  -> dict:erase(CRef, CTM);
+                                  false -> dict:store(CRef, MsgIds1, CTM)
                               end;
-                  error    -> CTG
+                  error    -> CTM
               end
       end, CRef, State).
 
-%% Detect whether the Guid is older or younger than the client's death
+%% Detect whether the MsgId is older or younger than the client's death
 %% msg (if there is one). If the msg is older than the client death
 %% msg, and it has a 0 ref_count we must only alter the ref_count, not
 %% rewrite the msg - rewriting it would make it younger than the death
 %% msg and thus should be ignored. Note that this (correctly) returns
 %% false when testing to remove the death msg itself.
-should_mask_action(CRef, Guid,
+should_mask_action(CRef, MsgId,
                    State = #msstate { dying_clients = DyingClients }) ->
-    case {sets:is_element(CRef, DyingClients), index_lookup(Guid, State)} of
+    case {sets:is_element(CRef, DyingClients), index_lookup(MsgId, State)} of
         {false, Location} ->
             {false, Location};
         {true, not_found} ->
@@ -1252,7 +1218,7 @@ safe_file_delete(File, Dir, FileHandlesEts) ->
 
 close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts,
                                       client_ref       = Ref } =
-                    CState) ->
+                        CState) ->
     Objs = ets:match_object(FileHandlesEts, {{Ref, '_'}, close}),
     {ok, lists:foldl(fun ({Key = {_Ref, File}, close}, CStateM) ->
                              true = ets:delete(FileHandlesEts, Key),
@@ -1316,48 +1282,14 @@ list_sorted_file_names(Dir, Ext) ->
 %% message cache helper functions
 %%----------------------------------------------------------------------------
 
-maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
-  when RefCount > 1 ->
-    update_msg_cache(DedupCacheEts, Guid, Msg);
-maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
-    ok.
-
-update_msg_cache(CacheEts, Guid, Msg) ->
-    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
+update_msg_cache(CacheEts, MsgId, Msg) ->
+    case ets:insert_new(CacheEts, {MsgId, Msg, 1}) of
         true  -> ok;
         false -> safe_ets_update_counter_ok(
-                   CacheEts, Guid, {3, +1},
-                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
+                   CacheEts, MsgId, {3, +1},
+                   fun () -> update_msg_cache(CacheEts, MsgId, Msg) end)
     end.
 
-remove_cache_entry(DedupCacheEts, Guid) ->
-    true = ets:delete(DedupCacheEts, Guid),
-    ok.
-
-fetch_and_increment_cache(DedupCacheEts, Guid) ->
-    case ets:lookup(DedupCacheEts, Guid) of
-        [] ->
-            not_found;
-        [{_Guid, Msg, _RefCount}] ->
-            safe_ets_update_counter_ok(
-              DedupCacheEts, Guid, {3, +1},
-              %% someone has deleted us in the meantime, insert us
-              fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end),
-            Msg
-    end.
-
-decrement_cache(DedupCacheEts, Guid) ->
-    true = safe_ets_update_counter(
-             DedupCacheEts, Guid, {3, -1},
-             fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid);
-                 (_N)            -> true
-             end,
-             %% Guid is not in there because although it's been
-             %% delivered, it's never actually been read (think:
-             %% persistent message held in RAM)
-             fun () -> true end),
-    ok.
-
 %%----------------------------------------------------------------------------
 %% index
 %%----------------------------------------------------------------------------
@@ -1460,8 +1392,8 @@ recover_file_summary(false, _Dir) ->
 recover_file_summary(true, Dir) ->
     Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
     case ets:file2tab(Path) of
-        {ok, Tid}       -> file:delete(Path),
-                          {true, Tid};
+        {ok, Tid}       -> ok = file:delete(Path),
+                           {true, Tid};
         {error, _Error} -> recover_file_summary(false, Dir)
     end.
 
@@ -1469,19 +1401,19 @@ count_msg_refs(Gen, Seed, State) ->
     case Gen(Seed) of
         finished ->
             ok;
-        {_Guid, 0, Next} ->
+        {_MsgId, 0, Next} ->
             count_msg_refs(Gen, Next, State);
-        {Guid, Delta, Next} ->
-            ok = case index_lookup(Guid, State) of
+        {MsgId, Delta, Next} ->
+            ok = case index_lookup(MsgId, State) of
                      not_found ->
-                         index_insert(#msg_location { guid = Guid,
+                         index_insert(#msg_location { msg_id = MsgId,
                                                       file = undefined,
                                                       ref_count = Delta },
                                       State);
                      #msg_location { ref_count = RefCount } = StoreEntry ->
                          NewRefCount = RefCount + Delta,
                          case NewRefCount of
-                             0 -> index_delete(Guid, State);
+                             0 -> index_delete(MsgId, State);
                              _ -> index_update(StoreEntry #msg_location {
                                                  ref_count = NewRefCount },
                                                State)
@@ -1525,15 +1457,17 @@ scan_file_for_valid_messages(Dir, FileName) ->
     case open_file(Dir, FileName, ?READ_MODE) of
         {ok, Hdl}       -> Valid = rabbit_msg_file:scan(
                                      Hdl, filelib:file_size(
-                                            form_filename(Dir, FileName))),
-                           %% if something really bad has happened,
-                           %% the close could fail, but ignore
-                           file_handle_cache:close(Hdl),
+                                            form_filename(Dir, FileName)),
+                                     fun scan_fun/2, []),
+                           ok = file_handle_cache:close(Hdl),
                            Valid;
         {error, enoent} -> {ok, [], 0};
         {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
     end.
 
+scan_fun({MsgId, TotalSize, Offset, _Msg}, Acc) ->
+    [{MsgId, TotalSize, Offset} | Acc].
+
 %% Takes the list in *ascending* order (i.e. eldest message
 %% first). This is the opposite of what scan_file_for_valid_messages
 %% produces. The list of msgs that is produced is youngest first.
@@ -1581,8 +1515,8 @@ build_index(Gatherer, Left, [],
                                sum_file_size    = SumFileSize }) ->
     case gatherer:out(Gatherer) of
         empty ->
+            unlink(Gatherer),
             ok = gatherer:stop(Gatherer),
-            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             ok = index_delete_by_file(undefined, State),
             Offset = case ets:lookup(FileSummaryEts, Left) of
                          []                                       -> 0;
@@ -1611,8 +1545,8 @@ build_index_worker(Gatherer, State = #msstate { dir = Dir },
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
-          fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                  case index_lookup(Guid, State) of
+          fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+                  case index_lookup(MsgId, State) of
                       #msg_location { file = undefined } = StoreEntry ->
                           ok = index_update(StoreEntry #msg_location {
                                               file = File, offset = Offset,
@@ -1630,7 +1564,7 @@ build_index_worker(Gatherer, State = #msstate { dir = Dir },
             %% file size.
             []    -> {undefined, case ValidMessages of
                                      [] -> 0;
-                                     _  -> {_Guid, TotalSize, Offset} =
+                                     _  -> {_MsgId, TotalSize, Offset} =
                                                lists:last(ValidMessages),
                                            Offset + TotalSize
                                  end};
@@ -1685,8 +1619,8 @@ maybe_compact(State = #msstate { sum_valid_data        = SumValid,
                                  pending_gc_completion = Pending,
                                  file_summary_ets      = FileSummaryEts,
                                  file_size_limit       = FileSizeLimit })
-  when (SumFileSize > 2 * FileSizeLimit andalso
-        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) ->
+  when SumFileSize > 2 * FileSizeLimit andalso
+       (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     %% TODO: the algorithm here is sub-optimal - it may result in a
     %% complete traversal of FileSummaryEts.
     case ets:first(FileSummaryEts) of
@@ -1749,10 +1683,10 @@ delete_file_if_empty(File, State = #msstate {
                      locked           = false }] =
         ets:lookup(FileSummaryEts, File),
     case ValidData of
-        0 -> %% don't delete the file_summary_ets entry for File here
-             %% because we could have readers which need to be able to
-             %% decrement the readers count.
-             true = ets:update_element(FileSummaryEts, File,
+        %% don't delete the file_summary_ets entry for File here
+        %% because we could have readers which need to be able to
+        %% decrement the readers count.
+        0 -> true = ets:update_element(FileSummaryEts, File,
                                        {#file_summary.locked, true}),
              ok = rabbit_msg_store_gc:delete(GCPid, File),
              Pending1 = orddict_store(File, [], Pending),
@@ -1805,17 +1739,17 @@ combine_files(Source, Destination,
                                   dir              = Dir,
                                   msg_store        = Server }) ->
     [#file_summary {
-       readers          = 0,
-       left             = Destination,
-       valid_total_size = SourceValid,
-       file_size        = SourceFileSize,
-       locked           = true }] = ets:lookup(FileSummaryEts, Source),
+        readers          = 0,
+        left             = Destination,
+        valid_total_size = SourceValid,
+        file_size        = SourceFileSize,
+        locked           = true }] = ets:lookup(FileSummaryEts, Source),
     [#file_summary {
-       readers          = 0,
-       right            = Source,
-       valid_total_size = DestinationValid,
-       file_size        = DestinationFileSize,
-       locked           = true }] = ets:lookup(FileSummaryEts, Destination),
+        readers          = 0,
+        right            = Source,
+        valid_total_size = DestinationValid,
+        file_size        = DestinationFileSize,
+        locked           = true }] = ets:lookup(FileSummaryEts, Destination),
 
     SourceName           = filenum_to_name(Source),
     DestinationName      = filenum_to_name(Destination),
@@ -1895,8 +1829,8 @@ load_and_vacuum_message_file(File, #gc_state { dir          = Dir,
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(
-      fun ({Guid, TotalSize, Offset}, Acc = {List, Size}) ->
-              case Index:lookup(Guid, IndexState) of
+      fun ({MsgId, TotalSize, Offset}, Acc = {List, Size}) ->
+              case Index:lookup(MsgId, IndexState) of
                   #msg_location { file = File, total_size = TotalSize,
                                   offset = Offset, ref_count = 0 } = Entry ->
                       ok = Index:delete_object(Entry, IndexState),
@@ -1921,13 +1855,13 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
            end,
     case
         lists:foldl(
-          fun (#msg_location { guid = Guid, offset = Offset,
+          fun (#msg_location { msg_id = MsgId, offset = Offset,
                                total_size = TotalSize },
                {CurOffset, Block = {BlockStart, BlockEnd}}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   %% update MsgLocation to reflect change of file and offset
-                  ok = Index:update_fields(Guid,
+                  ok = Index:update_fields(MsgId,
                                            [{#msg_location.file, Destination},
                                             {#msg_location.offset, CurOffset}],
                                            IndexState),
@@ -1958,3 +1892,54 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                         {got, FinalOffsetZ},
                         {destination, Destination}]}
     end.
+
+force_recovery(BaseDir, Store) ->
+    Dir = filename:join(BaseDir, atom_to_list(Store)),
+    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+        ok              -> ok;
+        {error, enoent} -> ok
+    end,
+    recover_crashed_compactions(BaseDir),
+    ok.
+
+foreach_file(D, Fun, Files) ->
+    [ok = Fun(filename:join(D, File)) || File <- Files].
+
+foreach_file(D1, D2, Fun, Files) ->
+    [ok = Fun(filename:join(D1, File), filename:join(D2, File)) || File <- Files].
+
+transform_dir(BaseDir, Store, TransformFun) ->
+    Dir = filename:join(BaseDir, atom_to_list(Store)),
+    TmpDir = filename:join(Dir, ?TRANSFORM_TMP),
+    TransformFile = fun (A, B) -> transform_msg_file(A, B, TransformFun) end,
+    CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end,
+    case filelib:is_dir(TmpDir) of
+        true  -> throw({error, transform_failed_previously});
+        false -> FileList = list_sorted_file_names(Dir, ?FILE_EXTENSION),
+                 foreach_file(Dir, TmpDir, TransformFile,     FileList),
+                 foreach_file(Dir,         fun file:delete/1, FileList),
+                 foreach_file(TmpDir, Dir, CopyFile,          FileList),
+                 foreach_file(TmpDir,      fun file:delete/1, FileList),
+                 ok = file:del_dir(TmpDir)
+    end.
+
+transform_msg_file(FileOld, FileNew, TransformFun) ->
+    ok = rabbit_misc:ensure_parent_dirs_exist(FileNew),
+    {ok, RefOld} = file_handle_cache:open(FileOld, [raw, binary, read], []),
+    {ok, RefNew} = file_handle_cache:open(FileNew, [raw, binary, write],
+                                          [{write_buffer,
+                                            ?HANDLE_CACHE_BUFFER_SIZE}]),
+    {ok, _Acc, _IgnoreSize} =
+        rabbit_msg_file:scan(
+          RefOld, filelib:file_size(FileOld),
+          fun({MsgId, _Size, _Offset, BinMsg}, ok) ->
+                  {ok, MsgNew} = case binary_to_term(BinMsg) of
+                                     <<>> -> {ok, <<>>};  %% dying client marker
+                                     Msg  -> TransformFun(Msg)
+                                 end,
+                  {ok, _} = rabbit_msg_file:append(RefNew, MsgId, MsgNew),
+                  ok
+          end, ok),
+    ok = file_handle_cache:close(RefOld),
+    ok = file_handle_cache:close(RefNew),
+    ok.
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index 077400d6..d6dc5568 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -31,7 +31,7 @@
 
 new(Dir) ->
     file:delete(filename:join(Dir, ?FILENAME)),
-    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]),
+    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]),
     #state { table = Tid, dir = Dir }.
 
 recover(Dir) ->
diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl
deleted file mode 100644
index ebd7fe8a..00000000
--- a/src/rabbit_multi.erl
+++ /dev/null
@@ -1,349 +0,0 @@
-%% The contents of this file are subject to the Mozilla Public License
-%% Version 1.1 (the "License"); you may not use this file except in
-%% compliance with the License. You may obtain a copy of the License
-%% at http://www.mozilla.org/MPL/
-%%
-%% Software distributed under the License is distributed on an "AS IS"
-%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-%% the License for the specific language governing rights and
-%% limitations under the License.
-%%
-%% The Original Code is RabbitMQ.
-%%
-%% The Initial Developer of the Original Code is VMware, Inc.
-%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
-%%
-
--module(rabbit_multi).
--include("rabbit.hrl").
-
--export([start/0, stop/0]).
-
--define(RPC_SLEEP, 500).
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--spec(start/0 :: () -> no_return()).
--spec(stop/0 :: () -> 'ok').
--spec(usage/0 :: () -> no_return()).
-
--endif.
-
-%%----------------------------------------------------------------------------
-
-start() ->
-    RpcTimeout =
-        case init:get_argument(maxwait) of
-            {ok,[[N1]]} -> 1000 * list_to_integer(N1);
-            _           -> ?MAX_WAIT
-        end,
-    case init:get_plain_arguments() of
-        [] ->
-            usage();
-        FullCommand ->
-            {Command, Args} = parse_args(FullCommand),
-            case catch action(Command, Args, RpcTimeout) of
-                ok ->
-                    io:format("done.~n"),
-                    halt();
-                {'EXIT', {function_clause, [{?MODULE, action, _} | _]}} ->
-                    print_error("invalid command '~s'",
-                                [string:join(FullCommand, " ")]),
-                    usage();
-                timeout ->
-                    print_error("timeout starting some nodes.", []),
-                    halt(1);
-                Other ->
-                    print_error("~p", [Other]),
-                    halt(2)
-            end
-    end.
-
-print_error(Format, Args) ->
-    rabbit_misc:format_stderr("Error: " ++ Format ++ "~n", Args).
-
-parse_args([Command | Args]) ->
-    {list_to_atom(Command), Args}.
-
-stop() ->
-    ok.
-
-usage() ->
-    io:format("~s", [rabbit_multi_usage:usage()]),
-    halt(1).
-
-action(start_all, [NodeCount], RpcTimeout) ->
-    io:format("Starting all nodes...~n", []),
-    application:load(rabbit),
-    {_NodeNamePrefix, NodeHost} = NodeName = rabbit_misc:nodeparts(
-                                               getenv("RABBITMQ_NODENAME")),
-    case net_adm:names(NodeHost) of
-        {error, EpmdReason} ->
-            throw({cannot_connect_to_epmd, NodeHost, EpmdReason});
-        {ok, _} ->
-            ok
-    end,
-    {NodePids, Running} =
-        case list_to_integer(NodeCount) of
-            1 -> {NodePid, Started} = start_node(rabbit_misc:makenode(NodeName),
-                                                 RpcTimeout),
-                 {[NodePid], Started};
-            N -> start_nodes(N, N, [], true, NodeName,
-                             get_node_tcp_listener(), RpcTimeout)
-        end,
-    write_pids_file(NodePids),
-    case Running of
-        true  -> ok;
-        false -> timeout
-    end;
-
-action(status, [], RpcTimeout) ->
-    io:format("Status of all running nodes...~n", []),
-    call_all_nodes(
-      fun ({Node, Pid}) ->
-              RabbitRunning =
-                  case is_rabbit_running(Node, RpcTimeout) of
-                      false -> not_running;
-                      true  -> running
-                  end,
-              io:format("Node '~p' with Pid ~p: ~p~n",
-                        [Node, Pid, RabbitRunning])
-      end);
-
-action(stop_all, [], RpcTimeout) ->
-    io:format("Stopping all nodes...~n", []),
-    call_all_nodes(fun ({Node, Pid}) ->
-                           io:format("Stopping node ~p~n", [Node]),
-                           rpc:call(Node, rabbit, stop_and_halt, []),
-                           case kill_wait(Pid, RpcTimeout, false) of
-                               false -> kill_wait(Pid, RpcTimeout, true);
-                               true  -> ok
-                           end,
-                           io:format("OK~n", [])
-                   end),
-    delete_pids_file();
-
-action(rotate_logs, [], RpcTimeout) ->
-    action(rotate_logs, [""], RpcTimeout);
-
-action(rotate_logs, [Suffix], RpcTimeout) ->
-    io:format("Rotating logs for all nodes...~n", []),
-    BinarySuffix = list_to_binary(Suffix),
-    call_all_nodes(
-      fun ({Node, _}) ->
-              io:format("Rotating logs for node ~p", [Node]),
-              case rpc:call(Node, rabbit, rotate_logs,
-                            [BinarySuffix], RpcTimeout) of
-                  {badrpc, Error} -> io:format(": ~p.~n", [Error]);
-                  ok              -> io:format(": ok.~n", [])
-              end
-      end).
-
-%% PNodePid is the list of PIDs
-%% Running is a boolean exhibiting success at some moment
-start_nodes(0, _, PNodePid, Running, _, _, _) -> {PNodePid, Running};
-
-start_nodes(N, Total, PNodePid, Running, NodeNameBase, Listener, RpcTimeout) ->
-    {NodePre, NodeSuff} = NodeNameBase,
-    NodeNumber = Total - N,
-    NodePre1 = case NodeNumber of
-                   %% For compatibility with running a single node
-                   0 -> NodePre;
-                   _ -> NodePre ++ "_" ++ integer_to_list(NodeNumber)
-               end,
-    Node = rabbit_misc:makenode({NodePre1, NodeSuff}),
-    os:putenv("RABBITMQ_NODENAME", atom_to_list(Node)),
-    case Listener of
-        {NodeIpAddress, NodePortBase} ->
-            NodePort = NodePortBase + NodeNumber,
-            os:putenv("RABBITMQ_NODE_PORT", integer_to_list(NodePort)),
-            os:putenv("RABBITMQ_NODE_IP_ADDRESS", NodeIpAddress);
-        undefined ->
-            ok
-    end,
-    {NodePid, Started} = start_node(Node, RpcTimeout),
-    start_nodes(N - 1, Total, [NodePid | PNodePid],
-                Started and Running, NodeNameBase, Listener, RpcTimeout).
-
-start_node(Node, RpcTimeout) ->
-    io:format("Starting node ~s...~n", [Node]),
-    case rpc:call(Node, os, getpid, []) of
-        {badrpc, _} ->
-            Port = run_rabbitmq_server(),
-            Started = wait_for_rabbit_to_start(Node, RpcTimeout, Port),
-            Pid = case rpc:call(Node, os, getpid, []) of
-                      {badrpc, _} -> throw(cannot_get_pid);
-                      PidS -> list_to_integer(PidS)
-                  end,
-            io:format("~s~n", [case Started of
-                                   true  -> "OK";
-                                   false -> "timeout"
-                               end]),
-            {{Node, Pid}, Started};
-        PidS ->
-            Pid = list_to_integer(PidS),
-            throw({node_already_running, Node, Pid})
-    end.
-
-wait_for_rabbit_to_start(_ , RpcTimeout, _) when RpcTimeout < 0 ->
-    false;
-wait_for_rabbit_to_start(Node, RpcTimeout, Port) ->
-    case is_rabbit_running(Node, RpcTimeout) of
-        true  -> true;
-        false -> receive
-                     {'EXIT', Port, PosixCode} ->
-                         throw({node_start_failed, PosixCode})
-                 after ?RPC_SLEEP ->
-                         wait_for_rabbit_to_start(
-                           Node, RpcTimeout - ?RPC_SLEEP, Port)
-                 end
-    end.
-
-run_rabbitmq_server() ->
-    with_os([{unix, fun run_rabbitmq_server_unix/0},
-             {win32, fun run_rabbitmq_server_win32/0}]).
-
-run_rabbitmq_server_unix() ->
-    CmdLine = getenv("RABBITMQ_SCRIPT_HOME") ++ "/rabbitmq-server -noinput",
-    erlang:open_port({spawn, CmdLine}, [nouse_stdio]).
-
-run_rabbitmq_server_win32() ->
-    Cmd = filename:nativename(os:find_executable("cmd")),
-    CmdLine = "\"" ++ getenv("RABBITMQ_SCRIPT_HOME") ++
-              "\\rabbitmq-server.bat\" -noinput -detached",
-    erlang:open_port({spawn_executable, Cmd},
-                     [{arg0, Cmd}, {args, ["/q", "/s", "/c", CmdLine]},
-                      nouse_stdio]).
-
-is_rabbit_running(Node, RpcTimeout) ->
-    case rpc:call(Node, rabbit, status, [], RpcTimeout) of
-        {badrpc, _} -> false;
-        Status      -> case proplists:get_value(running_applications, Status) of
-                           undefined -> false;
-                           Apps      -> lists:keymember(rabbit, 1, Apps)
-                       end
-    end.
-
-with_os(Handlers) ->
-    {OsFamily, _} = os:type(),
-    case proplists:get_value(OsFamily, Handlers) of
-        undefined -> throw({unsupported_os, OsFamily});
-        Handler   -> Handler()
-    end.
-
-pids_file() -> getenv("RABBITMQ_PIDS_FILE").
-
-write_pids_file(Pids) ->
-    FileName = pids_file(),
-    Handle = case file:open(FileName, [write]) of
-                 {ok, Device} ->
-                     Device;
-                 {error, Reason} ->
-                     throw({cannot_create_pids_file, FileName, Reason})
-             end,
-    try
-        ok = io:write(Handle, Pids),
-        ok = io:put_chars(Handle, [$.])
-    after
-        case file:close(Handle) of
-            ok -> ok;
-            {error, Reason1} ->
-                throw({cannot_create_pids_file, FileName, Reason1})
-        end
-    end,
-    ok.
-
-delete_pids_file() ->
-    FileName = pids_file(),
-    case file:delete(FileName) of
-        ok              -> ok;
-        {error, enoent} -> ok;
-        {error, Reason} -> throw({cannot_delete_pids_file, FileName, Reason})
-    end.
-
-read_pids_file() ->
-    FileName = pids_file(),
-    case file:consult(FileName) of
-        {ok, [Pids]}    -> Pids;
-        {error, enoent} -> [];
-        {error, Reason} -> throw({cannot_read_pids_file, FileName, Reason})
-    end.
-
-kill_wait(Pid, TimeLeft, Forceful) when TimeLeft < 0 ->
-    Cmd = with_os([{unix, fun () -> if Forceful -> "kill -9";
-                                       true     -> "kill"
-                                    end
-                          end},
-                   %% Kill forcefully always on Windows, since erl.exe
-                   %% seems to completely ignore non-forceful killing
-                   %% even when everything is working
-                   {win32, fun () -> "taskkill /f /pid" end}]),
-    os:cmd(Cmd ++ " " ++ integer_to_list(Pid)),
-    false; % Don't assume what we did just worked!
-
-% Returns true if the process is dead, false otherwise.
-kill_wait(Pid, TimeLeft, Forceful) ->
-    timer:sleep(?RPC_SLEEP),
-    io:format(".", []),
-    is_dead(Pid) orelse kill_wait(Pid, TimeLeft - ?RPC_SLEEP, Forceful).
-
-% Test using some OS clunkiness since we shouldn't trust
-% rpc:call(os, getpid, []) at this point
-is_dead(Pid) ->
-    PidS = integer_to_list(Pid),
-    with_os([{unix, fun () ->
-                            system("kill -0 " ++ PidS
-                                   ++ " >/dev/null 2>&1") /= 0
-                    end},
-             {win32, fun () ->
-                             Res = os:cmd("tasklist /nh /fi \"pid eq " ++
-                                          PidS ++ "\" 2>&1"),
-                             case re:run(Res, "erl\\.exe", [{capture, none}]) of
-                                 match -> false;
-                                 _     -> true
-                             end
-                     end}]).
-
-% Like system(3)
-system(Cmd) ->
-    ShCmd = "sh -c '" ++ escape_quotes(Cmd) ++ "'",
-    Port = erlang:open_port({spawn, ShCmd}, [exit_status,nouse_stdio]),
-    receive {Port, {exit_status, Status}} -> Status end.
-
-% Escape the quotes in a shell command so that it can be used in "sh -c 'cmd'"
-escape_quotes(Cmd) ->
-    lists:flatten(lists:map(fun ($') -> "'\\''"; (Ch) -> Ch end, Cmd)).
-
-call_all_nodes(Func) ->
-    case read_pids_file() of
-        []       -> throw(no_nodes_running);
-        NodePids -> lists:foreach(Func, NodePids)
-    end.
-
-getenv(Var) ->
-    case os:getenv(Var) of
-        false -> throw({missing_env_var, Var});
-        Value -> Value
-    end.
-
-get_node_tcp_listener() ->
-    try
-        {getenv("RABBITMQ_NODE_IP_ADDRESS"),
-         list_to_integer(getenv("RABBITMQ_NODE_PORT"))}
-    catch _ ->
-            case application:get_env(rabbit, tcp_listeners) of
-                {ok, [{_IpAddy, _Port} = Listener]} ->
-                    Listener;
-                {ok, [Port]} when is_number(Port) ->
-                    {"0.0.0.0", Port};
-                {ok, []} ->
-                    undefined;
-                {ok, Other} ->
-                    throw({cannot_start_multiple_nodes, multiple_tcp_listeners,
-                           Other});
-                undefined ->
-                    throw({missing_configuration, tcp_listeners})
-            end
-    end.
diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl
index c500548a..b944ec81 100644
--- a/src/rabbit_net.erl
+++ b/src/rabbit_net.erl
@@ -18,7 +18,7 @@
 -include("rabbit.hrl").
 
 -export([is_ssl/1, ssl_info/1, controlling_process/2, getstat/2,
-         async_recv/3, port_command/2, send/2, close/1,
+         recv/1, async_recv/3, port_command/2, setopts/2, send/2, close/1,
          sockname/1, peername/1, peercert/1]).
 
 %%---------------------------------------------------------------------------
@@ -28,8 +28,8 @@
 -export_type([socket/0]).
 
 -type(stat_option() ::
-	'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' |
-	'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend').
+        'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' |
+        'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend').
 -type(ok_val_or_error(A) :: rabbit_types:ok_or_error2(A, any())).
 -type(ok_or_any_error() :: rabbit_types:ok_or_error(any())).
 -type(socket() :: port() | #ssl_socket{}).
@@ -42,9 +42,15 @@
 -spec(getstat/2 ::
         (socket(), [stat_option()])
         -> ok_val_or_error([{stat_option(), integer()}])).
+-spec(recv/1 :: (socket()) ->
+                     {'data', [char()] | binary()} | 'closed' |
+                     rabbit_types:error(any()) | {'other', any()}).
 -spec(async_recv/3 ::
         (socket(), integer(), timeout()) -> rabbit_types:ok(any())).
 -spec(port_command/2 :: (socket(), iolist()) -> 'true').
+-spec(setopts/2 :: (socket(), [{atom(), any()} |
+                               {raw, non_neg_integer(), non_neg_integer(),
+                                binary()}]) -> ok_or_any_error()).
 -spec(send/2 :: (socket(), binary() | iolist()) -> ok_or_any_error()).
 -spec(close/1 :: (socket()) -> ok_or_any_error()).
 -spec(sockname/1 ::
@@ -80,6 +86,19 @@ getstat(Sock, Stats) when ?IS_SSL(Sock) ->
 getstat(Sock, Stats) when is_port(Sock) ->
     inet:getstat(Sock, Stats).
 
+recv(Sock) when ?IS_SSL(Sock) ->
+    recv(Sock#ssl_socket.ssl, {ssl, ssl_closed, ssl_error});
+recv(Sock) when is_port(Sock) ->
+    recv(Sock, {tcp, tcp_closed, tcp_error}).
+
+recv(S, {DataTag, ClosedTag, ErrorTag}) ->
+    receive
+        {DataTag, S, Data}    -> {data, Data};
+        {ClosedTag, S}        -> closed;
+        {ErrorTag, S, Reason} -> {error, Reason};
+        Other                 -> {other, Other}
+    end.
+
 async_recv(Sock, Length, Timeout) when ?IS_SSL(Sock) ->
     Pid = self(),
     Ref = make_ref(),
@@ -103,6 +122,11 @@ port_command(Sock, Data) when ?IS_SSL(Sock) ->
 port_command(Sock, Data) when is_port(Sock) ->
     erlang:port_command(Sock, Data).
 
+setopts(Sock, Options) when ?IS_SSL(Sock) ->
+    ssl:setopts(Sock#ssl_socket.ssl, Options);
+setopts(Sock, Options) when is_port(Sock) ->
+    inet:setopts(Sock, Options).
+
 send(Sock, Data) when ?IS_SSL(Sock) -> ssl:send(Sock#ssl_socket.ssl, Data);
 send(Sock, Data) when is_port(Sock) -> gen_tcp:send(Sock, Data).
 
diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl
index 283d25c7..451e56e8 100644
--- a/src/rabbit_networking.erl
+++ b/src/rabbit_networking.erl
@@ -24,7 +24,8 @@
          close_connection/2]).
 
 %%used by TCP-based transports, e.g. STOMP adapter
--export([check_tcp_listener_address/2]).
+-export([check_tcp_listener_address/2,
+         ensure_ssl/0, ssl_transform_fun/1]).
 
 -export([tcp_listener_started/3, tcp_listener_stopped/3,
          start_client/1, start_ssl_client/2]).
@@ -32,16 +33,6 @@
 -include("rabbit.hrl").
 -include_lib("kernel/include/inet.hrl").
 
--define(RABBIT_TCP_OPTS, [
-        binary,
-        {packet, raw}, % no packaging
-        {reuseaddr, true}, % allow rebind without waiting
-        {backlog, 128}, % use the maximum listen(2) backlog value
-        %% {nodelay, true}, % TCP_NODELAY - disable Nagle's alg.
-        %% {delay_send, true},
-        {exit_on_close, false}
-    ]).
-
 -define(SSL_TIMEOUT, 5). %% seconds
 
 -define(FIRST_TEST_BIND_PORT, 10000).
@@ -52,6 +43,9 @@
 
 -export_type([ip_port/0, hostname/0]).
 
+-type(hostname() :: inet:hostname()).
+-type(ip_port() :: inet:ip_port()).
+
 -type(family() :: atom()).
 -type(listener_config() :: ip_port() |
                            {hostname(), ip_port()} |
@@ -98,19 +92,8 @@ boot_ssl() ->
         {ok, []} ->
             ok;
         {ok, SslListeners} ->
-            ok = rabbit_misc:start_applications([crypto, public_key, ssl]),
-            {ok, SslOptsConfig} = application:get_env(ssl_options),
-            % unknown_ca errors are silently ignored  prior to R14B unless we
-            % supply this verify_fun - remove when at least R14B is required
-            SslOpts =
-                case proplists:get_value(verify, SslOptsConfig, verify_none) of
-                    verify_none -> SslOptsConfig;
-                    verify_peer -> [{verify_fun, fun([])    -> true;
-                                                    ([_|_]) -> false
-                                                 end}
-                                   | SslOptsConfig]
-                end,
-            [start_ssl_listener(Listener, SslOpts) || Listener <- SslListeners],
+            [start_ssl_listener(Listener, ensure_ssl())
+             || Listener <- SslListeners],
             ok
     end.
 
@@ -157,6 +140,34 @@ resolve_family({_,_,_,_,_,_,_,_}, auto) -> inet6;
 resolve_family(IP,                auto) -> throw({error, {strange_family, IP}});
 resolve_family(_,                 F)    -> F.
 
+ensure_ssl() ->
+    ok = rabbit_misc:start_applications([crypto, public_key, ssl]),
+    {ok, SslOptsConfig} = application:get_env(rabbit, ssl_options),
+
+    % unknown_ca errors are silently ignored prior to R14B unless we
+    % supply this verify_fun - remove when at least R14B is required
+    case proplists:get_value(verify, SslOptsConfig, verify_none) of
+        verify_none -> SslOptsConfig;
+        verify_peer -> [{verify_fun, fun([])    -> true;
+                                        ([_|_]) -> false
+                                     end}
+                        | SslOptsConfig]
+    end.
+
+ssl_transform_fun(SslOpts) ->
+    fun (Sock) ->
+            case catch ssl:ssl_accept(Sock, SslOpts, ?SSL_TIMEOUT * 1000) of
+                {ok, SslSock} ->
+                    rabbit_log:info("upgraded TCP connection ~p to SSL~n",
+                                    [self()]),
+                    {ok, #ssl_socket{tcp = Sock, ssl = SslSock}};
+                {error, Reason} ->
+                    {error, {ssl_upgrade_error, Reason}};
+                {'EXIT', Reason} ->
+                    {error, {ssl_upgrade_failure, Reason}}
+            end
+    end.
+
 check_tcp_listener_address(NamePrefix, Port) when is_integer(Port) ->
     check_tcp_listener_address_auto(NamePrefix, Port);
 
@@ -200,7 +211,7 @@ start_listener0({IPAddress, Port, Family, Name}, Protocol, Label, OnConnect) ->
                rabbit_sup,
                {Name,
                 {tcp_listener_sup, start_link,
-                 [IPAddress, Port, [Family | ?RABBIT_TCP_OPTS],
+                 [IPAddress, Port, [Family | tcp_opts()],
                   {?MODULE, tcp_listener_started, [Protocol]},
                   {?MODULE, tcp_listener_stopped, [Protocol]},
                   OnConnect, Label]},
@@ -256,21 +267,7 @@ start_client(Sock) ->
     start_client(Sock, fun (S) -> {ok, S} end).
 
 start_ssl_client(SslOpts, Sock) ->
-    start_client(
-      Sock,
-      fun (Sock1) ->
-              case catch ssl:ssl_accept(Sock1, SslOpts, ?SSL_TIMEOUT * 1000) of
-                  {ok, SslSock} ->
-                      rabbit_log:info("upgraded TCP connection ~p to SSL~n",
-                                      [self()]),
-                      {ok, #ssl_socket{tcp = Sock1, ssl = SslSock}};
-                  {error, Reason} ->
-                      {error, {ssl_upgrade_error, Reason}};
-                  {'EXIT', Reason} ->
-                      {error, {ssl_upgrade_failure, Reason}}
-
-              end
-      end).
+    start_client(Sock, ssl_transform_fun(SslOpts)).
 
 connections() ->
     [rabbit_connection_sup:reader(ConnSup) ||
@@ -315,6 +312,10 @@ hostname() ->
 
 cmap(F) -> rabbit_misc:filter_exit_map(F, connections()).
 
+tcp_opts() ->
+    {ok, Opts} = application:get_env(rabbit, tcp_listen_options),
+    Opts.
+
 %%--------------------------------------------------------------------
 
 %% There are three kinds of machine (for our purposes).
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index e4bc1cdc..1f30a2fc 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -22,14 +22,41 @@
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
+-export([notify_cluster/0, rabbit_running_on/1]).
 
 -define(SERVER, ?MODULE).
+-define(RABBIT_UP_RPC_TIMEOUT, 2000).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(rabbit_running_on/1 :: (node()) -> 'ok').
+-spec(notify_cluster/0 :: () -> 'ok').
+
+-endif.
 
 %%--------------------------------------------------------------------
 
 start_link() ->
     gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
 
+rabbit_running_on(Node) ->
+    gen_server:cast(rabbit_node_monitor, {rabbit_running_on, Node}).
+
+notify_cluster() ->
+    Node = node(),
+    Nodes = rabbit_mnesia:running_clustered_nodes() -- [Node],
+    %% notify other rabbits of this rabbit
+    case rpc:multicall(Nodes, rabbit_node_monitor, rabbit_running_on,
+                       [Node], ?RABBIT_UP_RPC_TIMEOUT) of
+        {_, [] } -> ok;
+        {_, Bad} -> rabbit_log:info("failed to contact nodes ~p~n", [Bad])
+    end,
+    %% register other active rabbits with this rabbit
+    [ rabbit_node_monitor:rabbit_running_on(N) || N <- Nodes ],
+    ok.
+
 %%--------------------------------------------------------------------
 
 init([]) ->
@@ -39,19 +66,21 @@ init([]) ->
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
+handle_cast({rabbit_running_on, Node}, State) ->
+    rabbit_log:info("node ~p up~n", [Node]),
+    erlang:monitor(process, {rabbit, Node}),
+    ok = rabbit_alarm:on_node_up(Node),
+    {noreply, State};
 handle_cast(_Msg, State) ->
     {noreply, State}.
 
-handle_info({nodeup, Node}, State) ->
-    rabbit_log:info("node ~p up", [Node]),
-    {noreply, State};
 handle_info({nodedown, Node}, State) ->
-    rabbit_log:info("node ~p down", [Node]),
-    %% TODO: This may turn out to be a performance hog when there are
-    %% lots of nodes.  We really only need to execute this code on
-    %% *one* node, rather than all of them.
-    ok = rabbit_networking:on_node_down(Node),
-    ok = rabbit_amqqueue:on_node_down(Node),
+    rabbit_log:info("node ~p down~n", [Node]),
+    ok = handle_dead_rabbit(Node),
+    {noreply, State};
+handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, State) ->
+    rabbit_log:info("node ~p lost 'rabbit'~n", [Node]),
+    ok = handle_dead_rabbit(Node),
     {noreply, State};
 handle_info(_Info, State) ->
     {noreply, State}.
@@ -64,3 +93,10 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%--------------------------------------------------------------------
 
+%% TODO: This may turn out to be a performance hog when there are lots
+%% of nodes.  We really only need to execute some of these statements
+%% on *one* node, rather than all of them.
+handle_dead_rabbit(Node) ->
+    ok = rabbit_networking:on_node_down(Node),
+    ok = rabbit_amqqueue:on_node_down(Node),
+    ok = rabbit_alarm:on_node_down(Node).
diff --git a/src/rabbit_prelaunch.erl b/src/rabbit_prelaunch.erl
index d9d92788..92829e49 100644
--- a/src/rabbit_prelaunch.erl
+++ b/src/rabbit_prelaunch.erl
@@ -67,11 +67,15 @@ start() ->
              AppVersions},
 
     %% Write it out to $RABBITMQ_PLUGINS_EXPAND_DIR/rabbit.rel
-    file:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])),
+    rabbit_misc:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])),
+
+    %% We exclude mochiweb due to its optional use of fdsrv.
+    XRefExclude = [mochiweb],
 
     %% Compile the script
     ScriptFile = RootName ++ ".script",
-    case systools:make_script(RootName, [local, silent, exref]) of
+    case systools:make_script(RootName, [local, silent,
+                                         {exref, AllApps -- XRefExclude}]) of
         {ok, Module, Warnings} ->
             %% This gets lots of spurious no-source warnings when we
             %% have .ez files, so we want to supress them to prevent
@@ -93,7 +97,8 @@ start() ->
                                  end]),
             case length(WarningStr) of
                 0 -> ok;
-                _ -> io:format("~s", [WarningStr])
+                _ -> S = string:copies("*", 80),
+                     io:format("~n~s~n~s~s~n~n", [S, WarningStr, S])
             end,
             ok;
         {error, Module, Error} ->
@@ -235,7 +240,7 @@ post_process_script(ScriptFile) ->
             {error, {failed_to_load_script, Reason}}
     end.
 
-process_entry(Entry = {apply,{application,start_boot,[rabbit,permanent]}}) ->
+process_entry(Entry = {apply,{application,start_boot,[mnesia,permanent]}}) ->
     [{apply,{rabbit,prepare,[]}}, Entry];
 process_entry(Entry) ->
     [Entry].
@@ -250,16 +255,21 @@ duplicate_node_check(NodeStr) ->
     case net_adm:names(NodeHost) of
         {ok, NamePorts}  ->
             case proplists:is_defined(NodeName, NamePorts) of
-                     true -> io:format("node with name ~p "
-                                       "already running on ~p~n",
-                                       [NodeName, NodeHost]),
-                             [io:format(Fmt ++ "~n", Args) ||
-                              {Fmt, Args} <- rabbit_control:diagnostics(Node)],
-                             terminate(?ERROR_CODE);
-                     false -> ok
+                true -> io:format("node with name ~p "
+                                  "already running on ~p~n",
+                                  [NodeName, NodeHost]),
+                        [io:format(Fmt ++ "~n", Args) ||
+                            {Fmt, Args} <- rabbit_control:diagnostics(Node)],
+                        terminate(?ERROR_CODE);
+                false -> ok
             end;
-        {error, EpmdReason} -> terminate("unexpected epmd error: ~p~n",
-                                         [EpmdReason])
+        {error, EpmdReason} ->
+            terminate("epmd error for host ~p: ~p (~s)~n",
+                      [NodeHost, EpmdReason,
+                       case EpmdReason of
+                           address -> "unable to establish tcp connection";
+                           _       -> inet:format_error(EpmdReason)
+                       end])
     end.
 
 terminate(Fmt, Args) ->
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 76b1136f..bf89cdb2 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -76,17 +76,16 @@
 %% the segment file combined with the journal, no writing needs to be
 %% done to the segment file either (in fact it is deleted if it exists
 %% at all). This is safe given that the set of acks is a subset of the
-%% set of publishes. When it's necessary to sync messages because of
-%% transactions, it's only necessary to fsync on the journal: when
-%% entries are distributed from the journal to segment files, those
-%% segments appended to are fsync'd prior to the journal being
-%% truncated.
+%% set of publishes. When it is necessary to sync messages, it is
+%% sufficient to fsync on the journal: when entries are distributed
+%% from the journal to segment files, those segments appended to are
+%% fsync'd prior to the journal being truncated.
 %%
 %% This module is also responsible for scanning the queue index files
 %% and seeding the message store on start up.
 %%
 %% Note that in general, the representation of a message's state as
-%% the tuple: {('no_pub'|{Guid, MsgProps, IsPersistent}),
+%% the tuple: {('no_pub'|{MsgId, MsgProps, IsPersistent}),
 %% ('del'|'no_del'), ('ack'|'no_ack')} is richer than strictly
 %% necessary for most operations. However, for startup, and to ensure
 %% the safe and correct combination of journal entries with entries
@@ -126,31 +125,33 @@
 %% (range: 0 - 16383)
 -define(REL_SEQ_ONLY_PREFIX, 00).
 -define(REL_SEQ_ONLY_PREFIX_BITS, 2).
--define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
+-define(REL_SEQ_ONLY_RECORD_BYTES, 2).
 
 %% publish record is binary 1 followed by a bit for is_persistent,
 %% then 14 bits of rel seq id, 64 bits for message expiry and 128 bits
 %% of md5sum msg id
--define(PUBLISH_PREFIX, 1).
--define(PUBLISH_PREFIX_BITS, 1).
+-define(PUB_PREFIX, 1).
+-define(PUB_PREFIX_BITS, 1).
 
 -define(EXPIRY_BYTES, 8).
 -define(EXPIRY_BITS, (?EXPIRY_BYTES * 8)).
 -define(NO_EXPIRY, 0).
 
--define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
--define(GUID_BITS, (?GUID_BYTES * 8)).
-%% 16 bytes for md5sum + 8 for expiry + 2 for seq, bits and prefix
--define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + ?EXPIRY_BYTES + 2).
+-define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)).
+
+%% 16 bytes for md5sum + 8 for expiry
+-define(PUB_RECORD_BODY_BYTES, (?MSG_ID_BYTES + ?EXPIRY_BYTES)).
+%% + 2 for seq, bits and prefix
+-define(PUB_RECORD_BYTES, (?PUB_RECORD_BODY_BYTES + 2)).
 
 %% 1 publish, 1 deliver, 1 ack per msg
 -define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
-        (?PUBLISH_RECORD_LENGTH_BYTES +
-         (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
+            (?PUB_RECORD_BYTES + (2 * ?REL_SEQ_ONLY_RECORD_BYTES))).
 
 %% ---- misc ----
 
--define(PUB, {_, _, _}). %% {Guid, MsgProps, IsPersistent}
+-define(PUB, {_, _, _}). %% {MsgId, MsgProps, IsPersistent}
 
 -define(READ_MODE, [binary, raw, read]).
 -define(READ_AHEAD_MODE, [{read_ahead, ?SEGMENT_TOTAL_SIZE} | ?READ_MODE]).
@@ -159,7 +160,7 @@
 %%----------------------------------------------------------------------------
 
 -record(qistate, { dir, segments, journal_handle, dirty_count,
-                   max_journal_entries, on_sync, unsynced_guids }).
+                   max_journal_entries, on_sync, unsynced_msg_ids }).
 
 -record(segment, { num, path, journal_entries, unacked }).
 
@@ -167,7 +168,7 @@
 
 %%----------------------------------------------------------------------------
 
--rabbit_upgrade({add_queue_ttl, []}).
+-rabbit_upgrade({add_queue_ttl, local, []}).
 
 -ifdef(use_specs).
 
@@ -177,7 +178,7 @@
                                path            :: file:filename(),
                                journal_entries :: array(),
                                unacked         :: non_neg_integer()
-                              })).
+                             })).
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
 -type(on_sync_fun() :: fun ((gb_set()) -> ok)).
@@ -187,21 +188,21 @@
                               dirty_count         :: integer(),
                               max_journal_entries :: non_neg_integer(),
                               on_sync             :: on_sync_fun(),
-                              unsynced_guids      :: [rabbit_guid:guid()]
-                             }).
--type(startup_fun_state() ::
-        {fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A}),
-         A}).
+                              unsynced_msg_ids    :: [rabbit_types:msg_id()]
+                            }).
+-type(contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean())).
+-type(walker(A) :: fun ((A) -> 'finished' |
+                               {rabbit_types:msg_id(), non_neg_integer(), A})).
 -type(shutdown_terms() :: [any()]).
 
 -spec(init/2 :: (rabbit_amqqueue:name(), on_sync_fun()) -> qistate()).
 -spec(shutdown_terms/1 :: (rabbit_amqqueue:name()) -> shutdown_terms()).
 -spec(recover/5 :: (rabbit_amqqueue:name(), shutdown_terms(), boolean(),
-                    fun ((rabbit_guid:guid()) -> boolean()), on_sync_fun()) ->
-             {'undefined' | non_neg_integer(), qistate()}).
+                    contains_predicate(), on_sync_fun()) ->
+                        {'undefined' | non_neg_integer(), qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(delete_and_terminate/1 :: (qistate()) -> qistate()).
--spec(publish/5 :: (rabbit_guid:guid(), seq_id(),
+-spec(publish/5 :: (rabbit_types:msg_id(), seq_id(),
                     rabbit_types:message_properties(), boolean(), qistate())
                    -> qistate()).
 -spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()).
@@ -209,14 +210,13 @@
 -spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush/1 :: (qistate()) -> qistate()).
 -spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
-                     {[{rabbit_guid:guid(), seq_id(),
+                     {[{rabbit_types:msg_id(), seq_id(),
                         rabbit_types:message_properties(),
                         boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(bounds/1 :: (qistate()) ->
-             {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(recover/1 :: ([rabbit_amqqueue:name()]) ->
-                        {[[any()]], startup_fun_state()}).
+                       {non_neg_integer(), non_neg_integer(), qistate()}).
+-spec(recover/1 :: ([rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}).
 
 -spec(add_queue_ttl/0 :: () -> 'ok').
 
@@ -259,22 +259,22 @@ delete_and_terminate(State) ->
     ok = rabbit_misc:recursive_delete([Dir]),
     State1.
 
-publish(Guid, SeqId, MsgProps, IsPersistent,
-        State = #qistate { unsynced_guids = UnsyncedGuids })
-  when is_binary(Guid) ->
-    ?GUID_BYTES = size(Guid),
+publish(MsgId, SeqId, MsgProps, IsPersistent,
+        State = #qistate { unsynced_msg_ids = UnsyncedMsgIds })
+  when is_binary(MsgId) ->
+    ?MSG_ID_BYTES = size(MsgId),
     {JournalHdl, State1} = get_journal_handle(
                              State #qistate {
-                               unsynced_guids = [Guid | UnsyncedGuids] }),
+                               unsynced_msg_ids = [MsgId | UnsyncedMsgIds] }),
     ok = file_handle_cache:append(
            JournalHdl, [<<(case IsPersistent of
                                true  -> ?PUB_PERSIST_JPREFIX;
                                false -> ?PUB_TRANS_JPREFIX
                            end):?JPREFIX_BITS,
                           SeqId:?SEQ_BITS>>,
-                          create_pub_record_body(Guid, MsgProps)]),
+                        create_pub_record_body(MsgId, MsgProps)]),
     maybe_flush_journal(
-      add_to_journal(SeqId, {Guid, MsgProps, IsPersistent}, State1)).
+      add_to_journal(SeqId, {MsgId, MsgProps, IsPersistent}, State1)).
 
 deliver(SeqIds, State) ->
     deliver_or_ack(del, SeqIds, State).
@@ -284,18 +284,17 @@ ack(SeqIds, State) ->
 
 %% This is only called when there are outstanding confirms and the
 %% queue is idle.
-sync(State = #qistate { unsynced_guids = Guids }) ->
-    sync_if([] =/= Guids, State).
+sync(State = #qistate { unsynced_msg_ids = MsgIds }) ->
+    sync_if([] =/= MsgIds, State).
 
 sync(SeqIds, State) ->
-    %% The SeqIds here contains the SeqId of every publish and ack in
-    %% the transaction. Ideally we should go through these seqids and
-    %% only sync the journal if the pubs or acks appear in the
+    %% The SeqIds here contains the SeqId of every publish and ack to
+    %% be sync'ed. Ideally we should go through these seqids and only
+    %% sync the journal if the pubs or acks appear in the
     %% journal. However, this would be complex to do, and given that
     %% the variable queue publishes and acks to the qi, and then
     %% syncs, all in one operation, there is no possibility of the
-    %% seqids not being in the journal, provided the transaction isn't
-    %% emptied (handled by sync_if anyway).
+    %% seqids not being in the journal.
     sync_if([] =/= SeqIds, State).
 
 flush(State = #qistate { dirty_count = 0 }) -> State;
@@ -388,7 +387,7 @@ blank_state(QueueName) ->
                dirty_count         = 0,
                max_journal_entries = MaxJournal,
                on_sync             = fun (_) -> ok end,
-               unsynced_guids      = [] }.
+               unsynced_msg_ids    = [] }.
 
 clean_file_name(Dir) -> filename:join(Dir, ?CLEAN_FILENAME).
 
@@ -470,8 +469,9 @@ recover_segment(ContainsCheckFun, CleanShutdown,
     {SegEntries1, UnackedCountDelta} =
         segment_plus_journal(SegEntries, JEntries),
     array:sparse_foldl(
-      fun (RelSeq, {{Guid, _MsgProps, _IsPersistent}, Del, no_ack}, Segment1) ->
-              recover_message(ContainsCheckFun(Guid), CleanShutdown,
+      fun (RelSeq, {{MsgId, _MsgProps, _IsPersistent}, Del, no_ack},
+           Segment1) ->
+              recover_message(ContainsCheckFun(MsgId), CleanShutdown,
                               Del, RelSeq, Segment1)
       end,
       Segment #segment { unacked = UnackedCount + UnackedCountDelta },
@@ -512,20 +512,20 @@ queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
 queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
     case gatherer:out(Gatherer) of
         empty ->
+            unlink(Gatherer),
             ok = gatherer:stop(Gatherer),
-            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             finished;
-        {value, {Guid, Count}} ->
-            {Guid, Count, {next, Gatherer}}
+        {value, {MsgId, Count}} ->
+            {MsgId, Count, {next, Gatherer}}
     end.
 
 queue_index_walker_reader(QueueName, Gatherer) ->
     State = #qistate { segments = Segments, dir = Dir } =
         recover_journal(blank_state(QueueName)),
     [ok = segment_entries_foldr(
-            fun (_RelSeq, {{Guid, _MsgProps, true}, _IsDelivered, no_ack},
+            fun (_RelSeq, {{MsgId, _MsgProps, true}, _IsDelivered, no_ack},
                  ok) ->
-                    gatherer:in(Gatherer, {Guid, 1});
+                    gatherer:in(Gatherer, {MsgId, 1});
                 (_RelSeq, _Value, Acc) ->
                     Acc
             end, ok, segment_find_or_new(Seg, Dir, Segments)) ||
@@ -537,27 +537,21 @@ queue_index_walker_reader(QueueName, Gatherer) ->
 %% expiry/binary manipulation
 %%----------------------------------------------------------------------------
 
-create_pub_record_body(Guid, #message_properties{expiry = Expiry}) ->
-    [Guid, expiry_to_binary(Expiry)].
+create_pub_record_body(MsgId, #message_properties { expiry = Expiry }) ->
+    [MsgId, expiry_to_binary(Expiry)].
 
 expiry_to_binary(undefined) -> <<?NO_EXPIRY:?EXPIRY_BITS>>;
 expiry_to_binary(Expiry)    -> <<Expiry:?EXPIRY_BITS>>.
 
-read_pub_record_body(Hdl) ->
-    case file_handle_cache:read(Hdl, ?GUID_BYTES + ?EXPIRY_BYTES) of
-        {ok, Bin} ->
-            %% work around for binary data fragmentation. See
-            %% rabbit_msg_file:read_next/2
-            <<GuidNum:?GUID_BITS, Expiry:?EXPIRY_BITS>> = Bin,
-            <<Guid:?GUID_BYTES/binary>> = <<GuidNum:?GUID_BITS>>,
-            Exp = case Expiry of
-                      ?NO_EXPIRY -> undefined;
-                      X          -> X
-                  end,
-            {Guid, #message_properties{expiry = Exp}};
-        Error ->
-            Error
-    end.
+parse_pub_record_body(<<MsgIdNum:?MSG_ID_BITS, Expiry:?EXPIRY_BITS>>) ->
+    %% work around for binary data fragmentation. See
+    %% rabbit_msg_file:read_next/2
+    <<MsgId:?MSG_ID_BYTES/binary>> = <<MsgIdNum:?MSG_ID_BITS>>,
+    Exp = case Expiry of
+              ?NO_EXPIRY -> undefined;
+              X          -> X
+          end,
+    {MsgId, #message_properties { expiry = Exp }}.
 
 %%----------------------------------------------------------------------------
 %% journal manipulation
@@ -666,8 +660,8 @@ recover_journal(State) ->
                       journal_minus_segment(JEntries, SegEntries),
                   Segment #segment { journal_entries = JEntries1,
                                      unacked = (UnackedCountInJournal +
-                                                UnackedCountInSeg -
-                                                UnackedCountDuplicates) }
+                                                    UnackedCountInSeg -
+                                                    UnackedCountDuplicates) }
           end, Segments),
     State1 #qistate { segments = Segments1 }.
 
@@ -680,15 +674,16 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
                 ?ACK_JPREFIX ->
                     load_journal_entries(add_to_journal(SeqId, ack, State));
                 _ ->
-                    case read_pub_record_body(Hdl) of
-                        {Guid, MsgProps} ->
-                            Publish = {Guid, MsgProps,
-                                       case Prefix of
-                                           ?PUB_PERSIST_JPREFIX -> true;
-                                           ?PUB_TRANS_JPREFIX   -> false
-                                       end},
+                    case file_handle_cache:read(Hdl, ?PUB_RECORD_BODY_BYTES) of
+                        {ok, Bin} ->
+                            {MsgId, MsgProps} = parse_pub_record_body(Bin),
+                            IsPersistent = case Prefix of
+                                               ?PUB_PERSIST_JPREFIX -> true;
+                                               ?PUB_TRANS_JPREFIX   -> false
+                                           end,
                             load_journal_entries(
-                              add_to_journal(SeqId, Publish, State));
+                              add_to_journal(
+                                SeqId, {MsgId, MsgProps, IsPersistent}, State));
                         _ErrOrEoF -> %% err, we've lost at least a publish
                             State
                     end
@@ -716,9 +711,9 @@ sync_if(true, State = #qistate { journal_handle = JournalHdl }) ->
     ok = file_handle_cache:sync(JournalHdl),
     notify_sync(State).
 
-notify_sync(State = #qistate { unsynced_guids = UG, on_sync = OnSyncFun }) ->
+notify_sync(State = #qistate { unsynced_msg_ids = UG, on_sync = OnSyncFun }) ->
     OnSyncFun(gb_sets:from_list(UG)),
-    State #qistate { unsynced_guids = [] }.
+    State #qistate { unsynced_msg_ids = [] }.
 
 %%----------------------------------------------------------------------------
 %% segment manipulation
@@ -796,19 +791,19 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
     ok = case Pub of
              no_pub ->
                  ok;
-             {Guid, MsgProps, IsPersistent} ->
+             {MsgId, MsgProps, IsPersistent} ->
                  file_handle_cache:append(
-                   Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                          (bool_to_int(IsPersistent)):1,
-                          RelSeq:?REL_SEQ_BITS>>,
-                          create_pub_record_body(Guid, MsgProps)])
+                   Hdl, [<<?PUB_PREFIX:?PUB_PREFIX_BITS,
+                           (bool_to_int(IsPersistent)):1,
+                           RelSeq:?REL_SEQ_BITS>>,
+                         create_pub_record_body(MsgId, MsgProps)])
          end,
     ok = case {Del, Ack} of
              {no_del, no_ack} ->
                  ok;
              _ ->
                  Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                           RelSeq:?REL_SEQ_BITS>>,
+                            RelSeq:?REL_SEQ_BITS>>,
                  file_handle_cache:append(
                    Hdl, case {Del, Ack} of
                             {del, ack} -> [Binary, Binary];
@@ -821,10 +816,10 @@ read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq},
                      {Messages, Segments}, Dir) ->
     Segment = segment_find_or_new(Seg, Dir, Segments),
     {segment_entries_foldr(
-       fun (RelSeq, {{Guid, MsgProps, IsPersistent}, IsDelivered, no_ack}, Acc)
+       fun (RelSeq, {{MsgId, MsgProps, IsPersistent}, IsDelivered, no_ack}, Acc)
              when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso
                   (Seg < EndSeg   orelse EndRelSeq   >= RelSeq) ->
-               [ {Guid, reconstruct_seq_id(StartSeg, RelSeq), MsgProps,
+               [ {MsgId, reconstruct_seq_id(StartSeg, RelSeq), MsgProps,
                   IsPersistent, IsDelivered == del} | Acc ];
            (_RelSeq, _Value, Acc) ->
                Acc
@@ -845,36 +840,40 @@ load_segment(KeepAcked, #segment { path = Path }) ->
         false -> {array_new(), 0};
         true  -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_AHEAD_MODE, []),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
-                 Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0),
+                 {ok, SegData} = file_handle_cache:read(
+                                   Hdl, ?SEGMENT_TOTAL_SIZE),
+                 Res = load_segment_entries(KeepAcked, SegData, array_new(), 0),
                  ok = file_handle_cache:close(Hdl),
                  Res
     end.
 
-load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) ->
-    case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
-        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
-            {Guid, MsgProps} = read_pub_record_body(Hdl),
-            Obj = {{Guid, MsgProps, 1 == IsPersistentNum}, no_del, no_ack},
-            SegEntries1 = array:set(RelSeq, Obj, SegEntries),
-            load_segment_entries(KeepAcked, Hdl, SegEntries1,
-                                 UnackedCount + 1);
-        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-              RelSeq:?REL_SEQ_BITS>>} ->
-            {UnackedCountDelta, SegEntries1} =
-                case array:get(RelSeq, SegEntries) of
-                    {Pub, no_del, no_ack} ->
-                        { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
-                    {Pub, del, no_ack} when KeepAcked ->
-                        {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
-                    {_Pub, del, no_ack} ->
-                        {-1, array:reset(RelSeq, SegEntries)}
-                end,
-            load_segment_entries(KeepAcked, Hdl, SegEntries1,
-                                 UnackedCount + UnackedCountDelta);
-        _ErrOrEoF ->
-            {SegEntries, UnackedCount}
-    end.
+load_segment_entries(KeepAcked,
+                     <<?PUB_PREFIX:?PUB_PREFIX_BITS,
+                       IsPersistentNum:1, RelSeq:?REL_SEQ_BITS,
+                       PubRecordBody:?PUB_RECORD_BODY_BYTES/binary,
+                       SegData/binary>>,
+                     SegEntries, UnackedCount) ->
+    {MsgId, MsgProps} = parse_pub_record_body(PubRecordBody),
+    Obj = {{MsgId, MsgProps, 1 == IsPersistentNum}, no_del, no_ack},
+    SegEntries1 = array:set(RelSeq, Obj, SegEntries),
+    load_segment_entries(KeepAcked, SegData, SegEntries1, UnackedCount + 1);
+load_segment_entries(KeepAcked,
+                     <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                       RelSeq:?REL_SEQ_BITS, SegData/binary>>,
+                     SegEntries, UnackedCount) ->
+    {UnackedCountDelta, SegEntries1} =
+        case array:get(RelSeq, SegEntries) of
+            {Pub, no_del, no_ack} ->
+                { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
+            {Pub, del, no_ack} when KeepAcked ->
+                {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
+            {_Pub, del, no_ack} ->
+                {-1, array:reset(RelSeq, SegEntries)}
+        end,
+    load_segment_entries(KeepAcked, SegData, SegEntries1,
+                         UnackedCount + UnackedCountDelta);
+load_segment_entries(_KeepAcked, _SegData, SegEntries, UnackedCount) ->
+    {SegEntries, UnackedCount}.
 
 array_new() ->
     array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
@@ -1002,17 +1001,17 @@ add_queue_ttl_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS,
                         Rest/binary>>) ->
     {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest};
 add_queue_ttl_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS,
-                        Guid:?GUID_BYTES/binary, Rest/binary>>) ->
-    {[<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid,
+                        MsgId:?MSG_ID_BYTES/binary, Rest/binary>>) ->
+    {[<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, MsgId,
       expiry_to_binary(undefined)], Rest};
 add_queue_ttl_journal(_) ->
     stop.
 
-add_queue_ttl_segment(<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, IsPersistentNum:1,
-                        RelSeq:?REL_SEQ_BITS, Guid:?GUID_BYTES/binary,
+add_queue_ttl_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1,
+                        RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BYTES/binary,
                         Rest/binary>>) ->
-    {[<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, IsPersistentNum:1,
-        RelSeq:?REL_SEQ_BITS>>, Guid, expiry_to_binary(undefined)], Rest};
+    {[<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>,
+      MsgId, expiry_to_binary(undefined)], Rest};
 add_queue_ttl_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                         RelSeq:?REL_SEQ_BITS, Rest>>) ->
     {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>,
@@ -1035,8 +1034,8 @@ foreach_queue_index(Funs) ->
                 end)
      end || QueueDirName <- QueueDirNames],
     empty = gatherer:out(Gatherer),
-    ok = gatherer:stop(Gatherer),
-    ok = rabbit_misc:unlink_and_capture_exit(Gatherer).
+    unlink(Gatherer),
+    ok = gatherer:stop(Gatherer).
 
 transform_queue(Dir, Gatherer, {JournalFun, SegmentFun}) ->
     ok = transform_file(filename:join(Dir, ?JOURNAL_FILENAME), JournalFun),
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index 34883058..3bc0e389 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -24,7 +24,7 @@
 
 -export([init/4, mainloop/2]).
 
--export([conserve_memory/2, server_properties/0]).
+-export([conserve_memory/2, server_properties/1]).
 
 -export([process_channel_frame/5]). %% used by erlang-client
 
@@ -33,14 +33,13 @@
 -define(CLOSING_TIMEOUT, 1).
 -define(CHANNEL_TERMINATION_TIMEOUT, 3).
 -define(SILENT_CLOSE_DELAY, 3).
--define(FRAME_MAX, 131072). %% set to zero once QPid fix their negotiation
 
-%---------------------------------------------------------------------------
+%%--------------------------------------------------------------------------
 
--record(v1, {parent, sock, connection, callback, recv_length, recv_ref,
+-record(v1, {parent, sock, connection, callback, recv_len, pending_recv,
              connection_state, queue_collector, heartbeater, stats_timer,
-             channel_sup_sup_pid, start_heartbeat_fun, auth_mechanism,
-             auth_state}).
+             channel_sup_sup_pid, start_heartbeat_fun, buf, buf_len,
+             auth_mechanism, auth_state}).
 
 -define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt,
                           send_pend, state, channels]).
@@ -55,98 +54,12 @@
 
 -define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
 
-%% connection lifecycle
-%%
-%% all state transitions and terminations are marked with *...*
-%%
-%% The lifecycle begins with: start handshake_timeout timer, *pre-init*
-%%
-%% all states, unless specified otherwise:
-%%   socket error -> *exit*
-%%   socket close -> *throw*
-%%   writer send failure -> *throw*
-%%   forced termination -> *exit*
-%%   handshake_timeout -> *throw*
-%% pre-init:
-%%   receive protocol header -> send connection.start, *starting*
-%% starting:
-%%   receive connection.start_ok -> *securing*
-%% securing:
-%%   check authentication credentials
-%%     if authentication success -> send connection.tune, *tuning*
-%%     if more challenge needed -> send connection.secure,
-%%                                 receive connection.secure_ok *securing*
-%%     otherwise send close, *exit*
-%% tuning:
-%%   receive connection.tune_ok -> start heartbeats, *opening*
-%% opening:
-%%   receive connection.open -> send connection.open_ok, *running*
-%% running:
-%%   receive connection.close ->
-%%     tell channels to terminate gracefully
-%%     if no channels then send connection.close_ok, start
-%%        terminate_connection timer, *closed*
-%%     else *closing*
-%%   forced termination
-%%   -> wait for channels to terminate forcefully, start
-%%      terminate_connection timer, send close, *exit*
-%%   channel exit with hard error
-%%   -> log error, wait for channels to terminate forcefully, start
-%%      terminate_connection timer, send close, *closed*
-%%   channel exit with soft error
-%%   -> log error, mark channel as closing, *running*
-%%   handshake_timeout -> ignore, *running*
-%%   heartbeat timeout -> *throw*
-%%   conserve_memory=true -> *blocking*
-%% blocking:
-%%   conserve_memory=true -> *blocking*
-%%   conserve_memory=false -> *running*
-%%   receive a method frame for a content-bearing method
-%%   -> process, stop receiving, *blocked*
-%%   ...rest same as 'running'
-%% blocked:
-%%   conserve_memory=true -> *blocked*
-%%   conserve_memory=false -> resume receiving, *running*
-%%   ...rest same as 'running'
-%% closing:
-%%   socket close -> *terminate*
-%%   receive connection.close -> send connection.close_ok,
-%%     *closing*
-%%   receive frame -> ignore, *closing*
-%%   handshake_timeout -> ignore, *closing*
-%%   heartbeat timeout -> *throw*
-%%   channel exit with hard error
-%%   -> log error, wait for channels to terminate forcefully, start
-%%      terminate_connection timer, send close, *closed*
-%%   channel exit with soft error
-%%   -> log error, mark channel as closing
-%%      if last channel to exit then send connection.close_ok,
-%%         start terminate_connection timer, *closed*
-%%      else *closing*
-%%   channel exits normally
-%%   -> if last channel to exit then send connection.close_ok,
-%%      start terminate_connection timer, *closed*
-%% closed:
-%%   socket close -> *terminate*
-%%   receive connection.close -> send connection.close_ok,
-%%     *closed*
-%%   receive connection.close_ok -> self() ! terminate_connection,
-%%     *closed*
-%%   receive frame -> ignore, *closed*
-%%   terminate_connection timeout -> *terminate*
-%%   handshake_timeout -> ignore, *closed*
-%%   heartbeat timeout -> *throw*
-%%   channel exit -> log error, *closed*
-%%
-%%
-%% TODO: refactor the code so that the above is obvious
-
 -define(IS_RUNNING(State),
         (State#v1.connection_state =:= running orelse
          State#v1.connection_state =:= blocking orelse
          State#v1.connection_state =:= blocked)).
 
-%%----------------------------------------------------------------------------
+%%--------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
@@ -157,7 +70,8 @@
 -spec(info/2 :: (pid(), rabbit_types:info_keys()) -> rabbit_types:infos()).
 -spec(shutdown/2 :: (pid(), string()) -> 'ok').
 -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
--spec(server_properties/0 :: () -> rabbit_framing:amqp_table()).
+-spec(server_properties/1 :: (rabbit_types:protocol()) ->
+                                  rabbit_framing:amqp_table()).
 
 %% These specs only exists to add no_return() to keep dialyzer happy
 -spec(init/4 :: (pid(), pid(), pid(), rabbit_heartbeat:start_heartbeat_fun())
@@ -213,7 +127,7 @@ conserve_memory(Pid, Conserve) ->
     Pid ! {conserve_memory, Conserve},
     ok.
 
-server_properties() ->
+server_properties(Protocol) ->
     {ok, Product} = application:get_key(rabbit, id),
     {ok, Version} = application:get_key(rabbit, vsn),
 
@@ -224,22 +138,31 @@ server_properties() ->
     %% Normalize the simplifed (2-tuple) and unsimplified (3-tuple) forms
     %% from the config and merge them with the generated built-in properties
     NormalizedConfigServerProps =
-        [case X of
-             {KeyAtom, Value} -> {list_to_binary(atom_to_list(KeyAtom)),
-                                  longstr,
-                                  list_to_binary(Value)};
-             {BinKey, Type, Value} -> {BinKey, Type, Value}
-         end || X <- RawConfigServerProps ++
-                    [{product,     Product},
-                     {version,     Version},
-                     {platform,    "Erlang/OTP"},
-                     {copyright,   ?COPYRIGHT_MESSAGE},
-                     {information, ?INFORMATION_MESSAGE}]],
-
-    %% Filter duplicated properties in favor of config file provided values
+        [{<<"capabilities">>, table, server_capabilities(Protocol)} |
+         [case X of
+              {KeyAtom, Value} -> {list_to_binary(atom_to_list(KeyAtom)),
+                                   longstr,
+                                   list_to_binary(Value)};
+              {BinKey, Type, Value} -> {BinKey, Type, Value}
+          end || X <- RawConfigServerProps ++
+                     [{product,     Product},
+                      {version,     Version},
+                      {platform,    "Erlang/OTP"},
+                      {copyright,   ?COPYRIGHT_MESSAGE},
+                      {information, ?INFORMATION_MESSAGE}]]],
+
+    %% Filter duplicated properties in favour of config file provided values
     lists:usort(fun ({K1,_,_}, {K2,_,_}) -> K1 =< K2 end,
                 NormalizedConfigServerProps).
 
+server_capabilities(rabbit_framing_amqp_0_9_1) ->
+    [{<<"publisher_confirms">>,         bool, true},
+     {<<"exchange_exchange_bindings">>, bool, true},
+     {<<"basic.nack">>,                 bool, true},
+     {<<"consumer_cancel_notify">>,     bool, true}];
+server_capabilities(_) ->
+    [].
+
 inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F).
 
 socket_op(Sock, Fun) ->
@@ -263,7 +186,7 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb,
     erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(),
                       handshake_timeout),
     try
-        mainloop(Deb, switch_callback(
+        recvloop(Deb, switch_callback(
                         #v1{parent              = Parent,
                             sock                = ClientSock,
                             connection          = #connection{
@@ -272,10 +195,11 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb,
                               timeout_sec        = ?HANDSHAKE_TIMEOUT,
                               frame_max          = ?FRAME_MIN_SIZE,
                               vhost              = none,
-                              client_properties  = none},
+                              client_properties  = none,
+                              capabilities       = []},
                             callback            = uninitialized_callback,
-                            recv_length         = 0,
-                            recv_ref            = none,
+                            recv_len            = 0,
+                            pending_recv        = false,
                             connection_state    = pre_init,
                             queue_collector     = Collector,
                             heartbeater         = none,
@@ -283,6 +207,8 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb,
                                 rabbit_event:init_stats_timer(),
                             channel_sup_sup_pid = ChannelSupSupPid,
                             start_heartbeat_fun = StartHeartbeatFun,
+                            buf                 = [],
+                            buf_len             = 0,
                             auth_mechanism      = none,
                             auth_state          = none
                            },
@@ -307,88 +233,104 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb,
     end,
     done.
 
-mainloop(Deb, State = #v1{parent = Parent, sock= Sock, recv_ref = Ref}) ->
-    receive
-        {inet_async, Sock, Ref, {ok, Data}} ->
-            mainloop(Deb, handle_input(State#v1.callback, Data,
-                                       State#v1{recv_ref = none}));
-        {inet_async, Sock, Ref, {error, closed}} ->
-            if State#v1.connection_state =:= closed ->
-                    State;
-               true ->
-                    throw(connection_closed_abruptly)
-            end;
-        {inet_async, Sock, Ref, {error, Reason}} ->
-            throw({inet_error, Reason});
-        {conserve_memory, Conserve} ->
-            mainloop(Deb, internal_conserve_memory(Conserve, State));
-        {'EXIT', Parent, Reason} ->
-            terminate(io_lib:format("broker forced connection closure "
-                                    "with reason '~w'", [Reason]), State),
-            %% this is what we are expected to do according to
-            %% http://www.erlang.org/doc/man/sys.html
-            %%
-            %% If we wanted to be *really* nice we should wait for a
-            %% while for clients to close the socket at their end,
-            %% just as we do in the ordinary error case. However,
-            %% since this termination is initiated by our parent it is
-            %% probably more important to exit quickly.
-            exit(Reason);
-        {channel_exit, _Channel, E = {writer, send_failed, _Error}} ->
-            throw(E);
-        {channel_exit, Channel, Reason} ->
-            mainloop(Deb, handle_exception(State, Channel, Reason));
-        {'DOWN', _MRef, process, ChPid, Reason} ->
-            mainloop(Deb, handle_dependent_exit(ChPid, Reason, State));
-        terminate_connection ->
-            State;
-        handshake_timeout ->
-            if ?IS_RUNNING(State) orelse
-               State#v1.connection_state =:= closing orelse
-               State#v1.connection_state =:= closed ->
-                    mainloop(Deb, State);
-               true ->
-                    throw({handshake_timeout, State#v1.callback})
-            end;
-        timeout ->
-            case State#v1.connection_state of
-                closed -> mainloop(Deb, State);
-                S      -> throw({timeout, S})
-            end;
-        {'$gen_call', From, {shutdown, Explanation}} ->
-            {ForceTermination, NewState} = terminate(Explanation, State),
-            gen_server:reply(From, ok),
-            case ForceTermination of
-                force  -> ok;
-                normal -> mainloop(Deb, NewState)
-            end;
-        {'$gen_call', From, info} ->
-            gen_server:reply(From, infos(?INFO_KEYS, State)),
-            mainloop(Deb, State);
-        {'$gen_call', From, {info, Items}} ->
-            gen_server:reply(From, try {ok, infos(Items, State)}
-                                   catch Error -> {error, Error}
-                                   end),
-            mainloop(Deb, State);
-        emit_stats ->
-            State1 = internal_emit_stats(State),
-            mainloop(Deb, State1);
-        {system, From, Request} ->
-            sys:handle_system_msg(Request, From,
-                                  Parent, ?MODULE, Deb, State);
-        Other ->
-            %% internal error -> something worth dying for
-            exit({unexpected_message, Other})
+recvloop(Deb, State = #v1{pending_recv = true}) ->
+    mainloop(Deb, State);
+recvloop(Deb, State = #v1{connection_state = blocked}) ->
+    mainloop(Deb, State);
+recvloop(Deb, State = #v1{sock = Sock, recv_len = RecvLen, buf_len = BufLen})
+  when BufLen < RecvLen ->
+    ok = rabbit_net:setopts(Sock, [{active, once}]),
+    mainloop(Deb, State#v1{pending_recv = true});
+recvloop(Deb, State = #v1{recv_len = RecvLen, buf = Buf, buf_len = BufLen}) ->
+    {Data, Rest} = split_binary(case Buf of
+                                    [B] -> B;
+                                    _   -> list_to_binary(lists:reverse(Buf))
+                                end, RecvLen),
+    recvloop(Deb, handle_input(State#v1.callback, Data,
+                               State#v1{buf = [Rest],
+                                        buf_len = BufLen - RecvLen})).
+
+mainloop(Deb, State = #v1{sock = Sock, buf = Buf, buf_len = BufLen}) ->
+    case rabbit_net:recv(Sock) of
+        {data, Data}    -> recvloop(Deb, State#v1{buf = [Data | Buf],
+                                                  buf_len = BufLen + size(Data),
+                                                  pending_recv = false});
+        closed          -> if State#v1.connection_state =:= closed ->
+                                   State;
+                              true ->
+                                   throw(connection_closed_abruptly)
+                           end;
+        {error, Reason} -> throw({inet_error, Reason});
+        {other, Other}  -> handle_other(Other, Deb, State)
     end.
 
+handle_other({conserve_memory, Conserve}, Deb, State) ->
+    recvloop(Deb, internal_conserve_memory(Conserve, State));
+handle_other({channel_closing, ChPid}, Deb, State) ->
+    ok = rabbit_channel:ready_for_close(ChPid),
+    channel_cleanup(ChPid),
+    mainloop(Deb, State);
+handle_other({'EXIT', Parent, Reason}, _Deb, State = #v1{parent = Parent}) ->
+    terminate(io_lib:format("broker forced connection closure "
+                            "with reason '~w'", [Reason]), State),
+    %% this is what we are expected to do according to
+    %% http://www.erlang.org/doc/man/sys.html
+    %%
+    %% If we wanted to be *really* nice we should wait for a while for
+    %% clients to close the socket at their end, just as we do in the
+    %% ordinary error case. However, since this termination is
+    %% initiated by our parent it is probably more important to exit
+    %% quickly.
+    exit(Reason);
+handle_other({channel_exit, _Channel, E = {writer, send_failed, _Error}},
+             _Deb, _State) ->
+    throw(E);
+handle_other({channel_exit, Channel, Reason}, Deb, State) ->
+    mainloop(Deb, handle_exception(State, Channel, Reason));
+handle_other({'DOWN', _MRef, process, ChPid, Reason}, Deb, State) ->
+    mainloop(Deb, handle_dependent_exit(ChPid, Reason, State));
+handle_other(terminate_connection, _Deb, State) ->
+    State;
+handle_other(handshake_timeout, Deb, State)
+  when ?IS_RUNNING(State) orelse
+       State#v1.connection_state =:= closing orelse
+       State#v1.connection_state =:= closed ->
+    mainloop(Deb, State);
+handle_other(handshake_timeout, _Deb, State) ->
+    throw({handshake_timeout, State#v1.callback});
+handle_other(timeout, Deb, State = #v1{connection_state = closed}) ->
+    mainloop(Deb, State);
+handle_other(timeout, _Deb, #v1{connection_state = S}) ->
+    throw({timeout, S});
+handle_other({'$gen_call', From, {shutdown, Explanation}}, Deb, State) ->
+    {ForceTermination, NewState} = terminate(Explanation, State),
+    gen_server:reply(From, ok),
+    case ForceTermination of
+        force  -> ok;
+        normal -> mainloop(Deb, NewState)
+    end;
+handle_other({'$gen_call', From, info}, Deb, State) ->
+    gen_server:reply(From, infos(?INFO_KEYS, State)),
+    mainloop(Deb, State);
+handle_other({'$gen_call', From, {info, Items}}, Deb, State) ->
+    gen_server:reply(From, try {ok, infos(Items, State)}
+                           catch Error -> {error, Error}
+                           end),
+    mainloop(Deb, State);
+handle_other(emit_stats, Deb, State) ->
+    mainloop(Deb, internal_emit_stats(State));
+handle_other({system, From, Request}, Deb, State = #v1{parent = Parent}) ->
+    sys:handle_system_msg(Request, From, Parent, ?MODULE, Deb, State);
+handle_other(Other, _Deb, _State) ->
+    %% internal error -> something worth dying for
+    exit({unexpected_message, Other}).
+
 switch_callback(State = #v1{connection_state = blocked,
                             heartbeater = Heartbeater}, Callback, Length) ->
     ok = rabbit_heartbeat:pause_monitor(Heartbeater),
-    State#v1{callback = Callback, recv_length = Length, recv_ref = none};
+    State#v1{callback = Callback, recv_len = Length};
 switch_callback(State, Callback, Length) ->
-    Ref = inet_op(fun () -> rabbit_net:async_recv(
-                              State#v1.sock, Length, infinity) end),
-    State#v1{callback = Callback, recv_length = Length, recv_ref = Ref}.
+    State#v1{callback = Callback, recv_len = Length}.
 
 terminate(Explanation, State) when ?IS_RUNNING(State) ->
     {normal, send_exception(State, 0,
@@ -402,12 +344,9 @@ internal_conserve_memory(true,  State = #v1{connection_state = running}) ->
 internal_conserve_memory(false, State = #v1{connection_state = blocking}) ->
     State#v1{connection_state = running};
 internal_conserve_memory(false, State = #v1{connection_state = blocked,
-                                            heartbeater      = Heartbeater,
-                                            callback         = Callback,
-                                            recv_length      = Length,
-                                            recv_ref         = none}) ->
+                                            heartbeater      = Heartbeater}) ->
     ok = rabbit_heartbeat:resume_monitor(Heartbeater),
-    switch_callback(State#v1{connection_state = running}, Callback, Length);
+    State#v1{connection_state = running};
 internal_conserve_memory(_Conserve, State) ->
     State.
 
@@ -429,32 +368,32 @@ close_connection(State = #v1{queue_collector = Collector,
     erlang:send_after(TimeoutMillisec, self(), terminate_connection),
     State#v1{connection_state = closed}.
 
-close_channel(Channel, State) ->
-    put({channel, Channel}, closing),
-    State.
-
 handle_dependent_exit(ChPid, Reason, State) ->
     case termination_kind(Reason) of
         controlled ->
-            erase({ch_pid, ChPid}),
+            channel_cleanup(ChPid),
             maybe_close(State);
         uncontrolled ->
             case channel_cleanup(ChPid) of
                 undefined -> exit({abnormal_dependent_exit, ChPid, Reason});
-                Channel   -> maybe_close(
+                Channel   -> rabbit_log:error(
+                               "connection ~p, channel ~p - error:~n~p~n",
+                               [self(), Channel, Reason]),
+                             maybe_close(
                                handle_exception(State, Channel, Reason))
             end
     end.
 
 channel_cleanup(ChPid) ->
     case get({ch_pid, ChPid}) of
-        undefined -> undefined;
-        Channel   -> erase({channel, Channel}),
-                     erase({ch_pid, ChPid}),
-                     Channel
+        undefined       -> undefined;
+        {Channel, MRef} -> erase({channel, Channel}),
+                           erase({ch_pid, ChPid}),
+                           erlang:demonitor(MRef, [flush]),
+                           Channel
     end.
 
-all_channels() -> [ChPid || {{ch_pid, ChPid}, _Channel} <- get()].
+all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()].
 
 terminate_channels() ->
     NChannels =
@@ -509,8 +448,8 @@ maybe_close(State = #v1{connection_state = closing,
 maybe_close(State) ->
     State.
 
-termination_kind(normal)            -> controlled;
-termination_kind(_)                 -> uncontrolled.
+termination_kind(normal) -> controlled;
+termination_kind(_)      -> uncontrolled.
 
 handle_frame(Type, 0, Payload,
              State = #v1{connection_state = CS,
@@ -546,8 +485,8 @@ handle_frame(Type, Channel, Payload,
                                   Channel, ChPid, FramingState),
                     put({channel, Channel}, {ChPid, NewAState}),
                     case AnalyzedFrame of
-                        {method, 'channel.close', _} ->
-                            erase({channel, Channel}),
+                        {method, 'channel.close_ok', _} ->
+                            channel_cleanup(ChPid),
                             State;
                         {method, MethodName, _} ->
                             case (State#v1.connection_state =:= blocking
@@ -559,25 +498,6 @@ handle_frame(Type, Channel, Payload,
                         _ ->
                             State
                     end;
-                closing ->
-                    %% According to the spec, after sending a
-                    %% channel.close we must ignore all frames except
-                    %% channel.close and channel.close_ok.  In the
-                    %% event of a channel.close, we should send back a
-                    %% channel.close_ok.
-                    case AnalyzedFrame of
-                        {method, 'channel.close_ok', _} ->
-                            erase({channel, Channel});
-                        {method, 'channel.close', _} ->
-                            %% We're already closing this channel, so
-                            %% there's no cleanup to do (notify
-                            %% queues, etc.)
-                            ok = rabbit_writer:internal_send_command(
-                                   State#v1.sock, Channel,
-                                   #'channel.close_ok'{}, Protocol);
-                        _ -> ok
-                    end,
-                    State;
                 undefined ->
                     case ?IS_RUNNING(State) of
                         true  -> send_to_new_channel(
@@ -598,8 +518,8 @@ handle_input({frame_payload, Type, Channel, PayloadSize},
              PayloadAndMarker, State) ->
     case PayloadAndMarker of
         <<Payload:PayloadSize/binary, ?FRAME_END>> ->
-            handle_frame(Type, Channel, Payload,
-                         switch_callback(State, frame_header, 7));
+            switch_callback(handle_frame(Type, Channel, Payload, State),
+                            frame_header, 7);
         _ ->
             throw({bad_payload, Type, Channel, PayloadSize, PayloadAndMarker})
     end;
@@ -649,8 +569,8 @@ start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision},
     Start = #'connection.start'{
       version_major = ProtocolMajor,
       version_minor = ProtocolMinor,
-      server_properties = server_properties(),
-      mechanisms = auth_mechanisms_binary(),
+      server_properties = server_properties(Protocol),
+      mechanisms = auth_mechanisms_binary(Sock),
       locales = <<"en_US">> },
     ok = send_on_channel0(Sock, Start, Protocol),
     switch_callback(State#v1{connection = Connection#connection{
@@ -676,14 +596,14 @@ handle_method0(MethodName, FieldsBin,
                State = #v1{connection = #connection{protocol = Protocol}}) ->
     HandleException =
         fun(R) ->
-            case ?IS_RUNNING(State) of
-                true  -> send_exception(State, 0, R);
-                %% We don't trust the client at this point - force
-                %% them to wait for a bit so they can't DOS us with
-                %% repeated failed logins etc.
-                false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000),
-                         throw({channel0_error, State#v1.connection_state, R})
-            end
+                case ?IS_RUNNING(State) of
+                    true  -> send_exception(State, 0, R);
+                    %% We don't trust the client at this point - force
+                    %% them to wait for a bit so they can't DOS us with
+                    %% repeated failed logins etc.
+                    false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000),
+                             throw({channel0_error, State#v1.connection_state, R})
+                end
         end,
     try
         handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin),
@@ -700,13 +620,19 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism,
                State0 = #v1{connection_state = starting,
                             connection       = Connection,
                             sock             = Sock}) ->
-    AuthMechanism = auth_mechanism_to_module(Mechanism),
+    AuthMechanism = auth_mechanism_to_module(Mechanism, Sock),
+    Capabilities =
+        case rabbit_misc:table_lookup(ClientProperties, <<"capabilities">>) of
+            {table, Capabilities1} -> Capabilities1;
+            _                      -> []
+        end,
     State = State0#v1{auth_mechanism   = AuthMechanism,
                       auth_state       = AuthMechanism:init(Sock),
                       connection_state = securing,
                       connection       =
                           Connection#connection{
-                            client_properties = ClientProperties}},
+                            client_properties = ClientProperties,
+                            capabilities      = Capabilities}},
     auth_phase(Response, State);
 
 handle_method0(#'connection.secure_ok'{response = Response},
@@ -719,14 +645,15 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax,
                            connection = Connection,
                            sock = Sock,
                            start_heartbeat_fun = SHF}) ->
-    if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) ->
+    ServerFrameMax = server_frame_max(),
+    if FrameMax /= 0 andalso FrameMax < ?FRAME_MIN_SIZE ->
             rabbit_misc:protocol_error(
               not_allowed, "frame_max=~w < ~w min size",
               [FrameMax, ?FRAME_MIN_SIZE]);
-       (?FRAME_MAX /= 0) and (FrameMax > ?FRAME_MAX) ->
+       ServerFrameMax /= 0 andalso FrameMax > ServerFrameMax ->
             rabbit_misc:protocol_error(
               not_allowed, "frame_max=~w > ~w max size",
-              [FrameMax, ?FRAME_MAX]);
+              [FrameMax, ServerFrameMax]);
        true ->
             Frame = rabbit_binary_generator:build_heartbeat_frame(),
             SendFun = fun() -> catch rabbit_net:send(Sock, Frame) end,
@@ -742,7 +669,6 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax,
     end;
 
 handle_method0(#'connection.open'{virtual_host = VHostPath},
-
                State = #v1{connection_state = opening,
                            connection = Connection = #connection{
                                           user = User,
@@ -757,7 +683,8 @@ handle_method0(#'connection.open'{virtual_host = VHostPath},
                State#v1{connection_state = running,
                         connection = NewConnection}),
     rabbit_event:notify(connection_created,
-                        infos(?CREATION_EVENT_KEYS, State1)),
+                        [{type, network} |
+                         infos(?CREATION_EVENT_KEYS, State1)]),
     rabbit_event:if_enabled(StatsTimer,
                             fun() -> internal_emit_stats(State1) end),
     State1;
@@ -784,17 +711,23 @@ handle_method0(_Method, #v1{connection_state = S}) ->
     rabbit_misc:protocol_error(
       channel_error, "unexpected method in connection state ~w", [S]).
 
+%% Compute frame_max for this instance. Could simply use 0, but breaks
+%% QPid Java client.
+server_frame_max() ->
+    {ok, FrameMax} = application:get_env(rabbit, frame_max),
+    FrameMax.
+
 send_on_channel0(Sock, Method, Protocol) ->
     ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol).
 
-auth_mechanism_to_module(TypeBin) ->
+auth_mechanism_to_module(TypeBin, Sock) ->
     case rabbit_registry:binary_to_type(TypeBin) of
         {error, not_found} ->
             rabbit_misc:protocol_error(
               command_invalid, "unknown authentication mechanism '~s'",
               [TypeBin]);
         T ->
-            case {lists:member(T, auth_mechanisms()),
+            case {lists:member(T, auth_mechanisms(Sock)),
                   rabbit_registry:lookup_module(auth_mechanism, T)} of
                 {true, {ok, Module}} ->
                     Module;
@@ -805,15 +738,14 @@ auth_mechanism_to_module(TypeBin) ->
             end
     end.
 
-auth_mechanisms() ->
+auth_mechanisms(Sock) ->
     {ok, Configured} = application:get_env(auth_mechanisms),
-    [Name || {Name, _Module} <- rabbit_registry:lookup_all(auth_mechanism),
-             lists:member(Name, Configured)].
+    [Name || {Name, Module} <- rabbit_registry:lookup_all(auth_mechanism),
+             Module:should_offer(Sock), lists:member(Name, Configured)].
 
-auth_mechanisms_binary() ->
+auth_mechanisms_binary(Sock) ->
     list_to_binary(
-            string:join(
-              [atom_to_list(A) || A <- auth_mechanisms()], " ")).
+      string:join([atom_to_list(A) || A <- auth_mechanisms(Sock)], " ")).
 
 auth_phase(Response,
            State = #v1{auth_mechanism = AuthMechanism,
@@ -835,7 +767,7 @@ auth_phase(Response,
             State#v1{auth_state = AuthState1};
         {ok, User} ->
             Tune = #'connection.tune'{channel_max = 0,
-                                      frame_max = ?FRAME_MAX,
+                                      frame_max = server_frame_max(),
                                       heartbeat = 0},
             ok = send_on_channel0(Sock, Tune, Protocol),
             State#v1{connection_state = tuning,
@@ -939,19 +871,20 @@ cert_info(F, Sock) ->
 send_to_new_channel(Channel, AnalyzedFrame, State) ->
     #v1{sock = Sock, queue_collector = Collector,
         channel_sup_sup_pid = ChanSupSup,
-        connection = #connection{protocol  = Protocol,
-                                 frame_max = FrameMax,
-                                 user      = User,
-                                 vhost     = VHost}} = State,
+        connection = #connection{protocol     = Protocol,
+                                 frame_max    = FrameMax,
+                                 user         = User,
+                                 vhost        = VHost,
+                                 capabilities = Capabilities}} = State,
     {ok, _ChSupPid, {ChPid, AState}} =
         rabbit_channel_sup_sup:start_channel(
-          ChanSupSup, {tcp, Protocol, Sock, Channel, FrameMax, self(), User,
-                       VHost, Collector}),
-    erlang:monitor(process, ChPid),
+          ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), Protocol, User,
+                       VHost, Capabilities, Collector}),
+    MRef = erlang:monitor(process, ChPid),
     NewAState = process_channel_frame(AnalyzedFrame, self(),
                                       Channel, ChPid, AState),
     put({channel, Channel}, {ChPid, NewAState}),
-    put({ch_pid, ChPid}, Channel),
+    put({ch_pid, ChPid}, {Channel, MRef}),
     State.
 
 process_channel_frame(Frame, ErrPid, Channel, ChPid, AState) ->
@@ -967,29 +900,20 @@ process_channel_frame(Frame, ErrPid, Channel, ChPid, AState) ->
                                             AState
     end.
 
-log_channel_error(ConnectionState, Channel, Reason) ->
-    rabbit_log:error("connection ~p (~p), channel ~p - error:~n~p~n",
-                     [self(), ConnectionState, Channel, Reason]).
-
-handle_exception(State = #v1{connection_state = closed}, Channel, Reason) ->
-    log_channel_error(closed, Channel, Reason),
+handle_exception(State = #v1{connection_state = closed}, _Channel, _Reason) ->
     State;
-handle_exception(State = #v1{connection_state = CS}, Channel, Reason) ->
-    log_channel_error(CS, Channel, Reason),
+handle_exception(State, Channel, Reason) ->
     send_exception(State, Channel, Reason).
 
 send_exception(State = #v1{connection = #connection{protocol = Protocol}},
                Channel, Reason) ->
-    {ShouldClose, CloseChannel, CloseMethod} =
+    {0, CloseMethod} =
         rabbit_binary_generator:map_exception(Channel, Reason, Protocol),
-    NewState = case ShouldClose of
-                   true  -> terminate_channels(),
-                            close_connection(State);
-                   false -> close_channel(Channel, State)
-               end,
+    terminate_channels(),
+    State1 = close_connection(State),
     ok = rabbit_writer:internal_send_command(
-           NewState#v1.sock, CloseChannel, CloseMethod, Protocol),
-    NewState.
+           State1#v1.sock, 0, CloseMethod, Protocol),
+    State1.
 
 internal_emit_stats(State = #v1{stats_timer = StatsTimer}) ->
     rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)),
diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl
index 795413aa..9821ae7b 100644
--- a/src/rabbit_registry.erl
+++ b/src/rabbit_registry.erl
@@ -48,7 +48,7 @@ start_link() ->
 %%---------------------------------------------------------------------------
 
 register(Class, TypeName, ModuleName) ->
-    gen_server:call(?SERVER, {register, Class, TypeName, ModuleName}).
+    gen_server:call(?SERVER, {register, Class, TypeName, ModuleName}, infinity).
 
 %% This is used with user-supplied arguments (e.g., on exchange
 %% declare), so we restrict it to existing atoms only.  This means it
diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl
index 692d2473..d453a870 100644
--- a/src/rabbit_router.erl
+++ b/src/rabbit_router.erl
@@ -37,7 +37,8 @@
                            fun ((rabbit_types:binding()) -> boolean())) ->
     match_result()).
 -spec(match_routing_key/2 :: (rabbit_types:binding_source(),
-                              routing_key() | '_') -> match_result()).
+                             [routing_key()] | ['_']) ->
+    match_result()).
 
 -endif.
 
@@ -58,7 +59,7 @@ deliver(QNames, Delivery = #delivery{mandatory = false,
     {routed, QPids};
 
 deliver(QNames, Delivery = #delivery{mandatory = Mandatory,
-                                    immediate = Immediate}) ->
+                                     immediate = Immediate}) ->
     QPids = lookup_qpids(QNames),
     {Success, _} =
         delegate:invoke(QPids,
@@ -66,7 +67,7 @@ deliver(QNames, Delivery = #delivery{mandatory = Mandatory,
                                 rabbit_amqqueue:deliver(Pid, Delivery)
                         end),
     {Routed, Handled} =
-         lists:foldl(fun fold_deliveries/2, {false, []}, Success),
+        lists:foldl(fun fold_deliveries/2, {false, []}, Success),
     check_delivery(Mandatory, Immediate, {Routed, Handled}).
 
 
@@ -82,12 +83,19 @@ match_bindings(SrcName, Match) ->
                       Match(Binding)]),
     mnesia:async_dirty(fun qlc:e/1, [Query]).
 
-match_routing_key(SrcName, RoutingKey) ->
-    MatchHead = #route{binding = #binding{source      = SrcName,
+match_routing_key(SrcName, [RoutingKey]) ->
+    find_routes(#route{binding = #binding{source      = SrcName,
                                           destination = '$1',
                                           key         = RoutingKey,
                                           _           = '_'}},
-    mnesia:dirty_select(rabbit_route, [{MatchHead, [], ['$1']}]).
+                []);
+match_routing_key(SrcName, [_|_] = RoutingKeys) ->
+    find_routes(#route{binding = #binding{source      = SrcName,
+                                          destination = '$1',
+                                          key         = '$2',
+                                          _           = '_'}},
+                [list_to_tuple(['orelse' | [{'=:=', '$2', RKey} ||
+                                               RKey <- RoutingKeys]])]).
 
 %%--------------------------------------------------------------------
 
@@ -102,7 +110,31 @@ check_delivery(_   , _   , {_    , Qs}) -> {routed, Qs}.
 lookup_qpids(QNames) ->
     lists:foldl(fun (QName, QPids) ->
                         case mnesia:dirty_read({rabbit_queue, QName}) of
-                            [#amqqueue{pid = QPid}] -> [QPid | QPids];
-                            []                      -> QPids
+                            [#amqqueue{pid = QPid, slave_pids = SPids}] ->
+                                [QPid | SPids ++ QPids];
+                            [] ->
+                                QPids
                         end
                 end, [], QNames).
+
+%% Normally we'd call mnesia:dirty_select/2 here, but that is quite
+%% expensive due to
+%%
+%% 1) general mnesia overheads (figuring out table types and
+%% locations, etc). We get away with bypassing these because we know
+%% that the table
+%% - is not the schema table
+%% - has a local ram copy
+%% - does not have any indices
+%%
+%% 2) 'fixing' of the table with ets:safe_fixtable/2, which is wholly
+%% unnecessary. According to the ets docs (and the code in erl_db.c),
+%% 'select' is safe anyway ("Functions that internally traverse over a
+%% table, like select and match, will give the same guarantee as
+%% safe_fixtable.") and, furthermore, even the lower level iterators
+%% ('first' and 'next') are safe on ordered_set tables ("Note that for
+%% tables of the ordered_set type, safe_fixtable/2 is not necessary as
+%% calls to first/1 and next/2 will always succeed."), which
+%% rabbit_route is.
+find_routes(MatchHead, Conditions) ->
+    ets:select(rabbit_route, [{MatchHead, Conditions, ['$1']}]).
diff --git a/src/rabbit_ssl.erl b/src/rabbit_ssl.erl
index e831ee51..e0defa9e 100644
--- a/src/rabbit_ssl.erl
+++ b/src/rabbit_ssl.erl
@@ -87,10 +87,10 @@ cert_info(F, Cert) ->
 
 find_by_type(Type, {rdnSequence, RDNs}) ->
     case [V || #'AttributeTypeAndValue'{type = T, value = V}
-                     <- lists:flatten(RDNs),
-                 T == Type] of
-        [{printableString, S}] -> S;
-        []                     -> not_found
+                   <- lists:flatten(RDNs),
+               T == Type] of
+        [Val] -> format_asn1_value(Val);
+        []    -> not_found
     end.
 
 %%--------------------------------------------------------------------------
@@ -162,12 +162,85 @@ escape_rdn_value([C | S], middle) ->
 format_asn1_value({ST, S}) when ST =:= teletexString; ST =:= printableString;
                                 ST =:= universalString; ST =:= utf8String;
                                 ST =:= bmpString ->
-    if is_binary(S) -> binary_to_list(S);
-       true         -> S
-    end;
+    format_directory_string(ST, S);
 format_asn1_value({utcTime, [Y1, Y2, M1, M2, D1, D2, H1, H2,
-                            Min1, Min2, S1, S2, $Z]}) ->
+                             Min1, Min2, S1, S2, $Z]}) ->
     io_lib:format("20~c~c-~c~c-~c~cT~c~c:~c~c:~c~cZ",
                   [Y1, Y2, M1, M2, D1, D2, H1, H2, Min1, Min2, S1, S2]);
 format_asn1_value(V) ->
     io_lib:format("~p", [V]).
+
+%% DirectoryString { INTEGER : maxSize } ::= CHOICE {
+%%     teletexString     TeletexString (SIZE (1..maxSize)),
+%%     printableString   PrintableString (SIZE (1..maxSize)),
+%%     bmpString         BMPString (SIZE (1..maxSize)),
+%%     universalString   UniversalString (SIZE (1..maxSize)),
+%%     uTF8String        UTF8String (SIZE (1..maxSize)) }
+%%
+%% Precise definitions of printable / teletexString are hard to come
+%% by. This is what I reconstructed:
+%%
+%% printableString:
+%% "intended to represent the limited character sets available to
+%% mainframe input terminals"
+%% A-Z a-z 0-9 ' ( ) + , - . / : = ? [space]
+%% http://msdn.microsoft.com/en-us/library/bb540814(v=vs.85).aspx
+%%
+%% teletexString:
+%% "a sizable volume of software in the world treats TeletexString
+%% (T61String) as a simple 8-bit string with mostly Windows Latin 1
+%% (superset of iso-8859-1) encoding"
+%% http://www.mail-archive.com/asn1@asn1.org/msg00460.html
+%%
+%% (However according to that link X.680 actually defines
+%% TeletexString in some much more involved and crazy way. I suggest
+%% we treat it as ISO-8859-1 since Erlang does not support Windows
+%% Latin 1).
+%%
+%% bmpString:
+%% UCS-2 according to RFC 3641. Hence cannot represent Unicode
+%% characters above 65535 (outside the "Basic Multilingual Plane").
+%%
+%% universalString:
+%% UCS-4 according to RFC 3641.
+%%
+%% utf8String:
+%% UTF-8 according to RFC 3641.
+%%
+%% Within Rabbit we assume UTF-8 encoding. Since printableString is a
+%% subset of ASCII it is also a subset of UTF-8. The others need
+%% converting. Fortunately since the Erlang SSL library does the
+%% decoding for us (albeit into a weird format, see below), we just
+%% need to handle encoding into UTF-8. Note also that utf8Strings come
+%% back as binary.
+%%
+%% Note for testing: the default Ubuntu configuration for openssl will
+%% only create printableString or teletexString types no matter what
+%% you do. Edit string_mask in the [req] section of
+%% /etc/ssl/openssl.cnf to change this (see comments there). You
+%% probably also need to set utf8 = yes to get it to accept UTF-8 on
+%% the command line. Also note I could not get openssl to generate a
+%% universalString.
+
+format_directory_string(printableString, S) -> S;
+format_directory_string(teletexString,   S) -> utf8_list_from(S);
+format_directory_string(bmpString,       S) -> utf8_list_from(S);
+format_directory_string(universalString, S) -> utf8_list_from(S);
+format_directory_string(utf8String,      S) -> binary_to_list(S).
+
+utf8_list_from(S) ->
+    binary_to_list(
+          unicode:characters_to_binary(flatten_ssl_list(S), utf32, utf8)).
+
+%% The Erlang SSL implementation invents its own representation for
+%% non-ascii strings - looking like [97,{0,0,3,187}] (that's LATIN
+%% SMALL LETTER A followed by GREEK SMALL LETTER LAMDA). We convert
+%% this into a list of unicode characters, which we can tell
+%% unicode:characters_to_binary is utf32.
+
+flatten_ssl_list(L) -> [flatten_ssl_list_item(I) || I <- L].
+
+flatten_ssl_list_item({A, B, C, D}) ->
+    A * (1 bsl 24) + B * (1 bsl 16) + C * (1 bsl 8) + D;
+flatten_ssl_list_item(N) when is_number (N) ->
+    N.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 49b09508..2a3ced92 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -26,6 +26,7 @@
 
 -define(PERSISTENT_MSG_STORE, msg_store_persistent).
 -define(TRANSIENT_MSG_STORE,  msg_store_transient).
+-define(CLEANUP_QUEUE_NAME, <<"cleanup-queue">>).
 
 test_content_prop_roundtrip(Datum, Binary) ->
     Types =  [element(1, E) || E <- Datum],
@@ -34,6 +35,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
+    passed = gm_tests:all_tests(),
     application:set_env(rabbit, file_handles_high_watermark, 10, infinity),
     ok = file_handle_cache:set_limit(10),
     passed = test_file_handle_cache(),
@@ -55,6 +57,7 @@ all_tests() ->
     passed = test_cluster_management(),
     passed = test_user_management(),
     passed = test_server_status(),
+    passed = test_confirms(),
     passed = maybe_run_cluster_dependent_tests(),
     passed = test_configurable_server_properties(),
     passed.
@@ -80,20 +83,24 @@ run_cluster_dependent_tests(SecondaryNode) ->
     io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]),
     passed = test_delegates_async(SecondaryNode),
     passed = test_delegates_sync(SecondaryNode),
+    passed = test_queue_cleanup(SecondaryNode),
+    passed = test_declare_on_dead_queue(SecondaryNode),
 
     %% we now run the tests remotely, so that code coverage on the
     %% local node picks up more of the delegate
     Node = node(),
     Self = self(),
     Remote = spawn(SecondaryNode,
-                   fun () -> A = test_delegates_async(Node),
-                             B = test_delegates_sync(Node),
-                             Self ! {self(), {A, B}}
+                   fun () -> Rs = [ test_delegates_async(Node),
+                                    test_delegates_sync(Node),
+                                    test_queue_cleanup(Node),
+                                    test_declare_on_dead_queue(Node) ],
+                             Self ! {self(), Rs}
                    end),
     receive
         {Remote, Result} ->
-            Result = {passed, passed}
-    after 2000 ->
+            Result = lists:duplicate(length(Result), passed)
+    after 30000 ->
             throw(timeout)
     end,
 
@@ -196,6 +203,42 @@ test_priority_queue() ->
     {true, false, 3, [{1, baz}, {0, foo}, {0, bar}], [baz, foo, bar]} =
         test_priority_queue(Q15),
 
+    %% 1-element infinity priority Q
+    Q16 = priority_queue:in(foo, infinity, Q),
+    {true, false, 1, [{infinity, foo}], [foo]} = test_priority_queue(Q16),
+
+    %% add infinity to 0-priority Q
+    Q17 = priority_queue:in(foo, infinity, priority_queue:in(bar, Q)),
+    {true, false, 2, [{infinity, foo}, {0, bar}], [foo, bar]} =
+        test_priority_queue(Q17),
+
+    %% and the other way around
+    Q18 = priority_queue:in(bar, priority_queue:in(foo, infinity, Q)),
+    {true, false, 2, [{infinity, foo}, {0, bar}], [foo, bar]} =
+        test_priority_queue(Q18),
+
+    %% add infinity to mixed-priority Q
+    Q19 = priority_queue:in(qux, infinity, Q3),
+    {true, false, 3, [{infinity, qux}, {2, bar}, {1, foo}], [qux, bar, foo]} =
+        test_priority_queue(Q19),
+
+    %% merge the above with a negative priority Q
+    Q20 = priority_queue:join(Q19, Q4),
+    {true, false, 4, [{infinity, qux}, {2, bar}, {1, foo}, {-1, foo}],
+     [qux, bar, foo, foo]} = test_priority_queue(Q20),
+
+    %% merge two infinity priority queues
+    Q21 = priority_queue:join(priority_queue:in(foo, infinity, Q),
+                              priority_queue:in(bar, infinity, Q)),
+    {true, false, 2, [{infinity, foo}, {infinity, bar}], [foo, bar]} =
+        test_priority_queue(Q21),
+
+    %% merge two mixed priority with infinity queues
+    Q22 = priority_queue:join(Q18, Q20),
+    {true, false, 6, [{infinity, foo}, {infinity, qux}, {2, bar}, {1, foo},
+                      {0, bar}, {-1, foo}], [foo, qux, bar, foo, bar, foo]} =
+        test_priority_queue(Q22),
+
     passed.
 
 priority_queue_in_all(Q, L) ->
@@ -419,35 +462,35 @@ test_content_properties() ->
                                            [{<<"one">>, signedint, 1},
                                             {<<"two">>, signedint, 2}]}]}],
                                 <<
-                                 % property-flags
-                                 16#8000:16,
+                                  %% property-flags
+                                  16#8000:16,
 
-                                 % property-list:
+                                  %% property-list:
 
-                                 % table
-                                 117:32,                % table length in bytes
+                                  %% table
+                                  117:32,                % table length in bytes
 
-                                 11,"a signedint",        % name
-                                 "I",12345678:32,        % type and value
+                                  11,"a signedint",      % name
+                                  "I",12345678:32,       % type and value
 
-                                 9,"a longstr",
-                                 "S",10:32,"yes please",
+                                  9,"a longstr",
+                                  "S",10:32,"yes please",
 
-                                 9,"a decimal",
-                                 "D",123,12345678:32,
+                                  9,"a decimal",
+                                  "D",123,12345678:32,
 
-                                 11,"a timestamp",
-                                 "T", 123456789012345:64,
+                                  11,"a timestamp",
+                                  "T", 123456789012345:64,
 
-                                 14,"a nested table",
-                                 "F",
-                                        18:32,
+                                  14,"a nested table",
+                                  "F",
+                                  18:32,
 
-                                        3,"one",
-                                        "I",1:32,
+                                  3,"one",
+                                  "I",1:32,
 
-                                        3,"two",
-                                        "I",2:32 >>),
+                                  3,"two",
+                                  "I",2:32 >>),
     case catch rabbit_binary_parser:parse_properties([bit, bit, bit, bit], <<16#A0,0,1>>) of
         {'EXIT', content_properties_binary_overflow} -> passed;
         V -> exit({got_success_but_expected_failure, V})
@@ -474,28 +517,28 @@ test_field_values() ->
 
                ]}],
       <<
-       % property-flags
-       16#8000:16,
-       % table length in bytes
-       228:32,
-
-       7,"longstr",   "S", 21:32, "Here is a long string",      %      = 34
-       9,"signedint", "I", 12345:32/signed,                     % + 15 = 49
-       7,"decimal",   "D", 3, 123456:32,                        % + 14 = 63
-       9,"timestamp", "T", 109876543209876:64,                  % + 19 = 82
-       5,"table",     "F", 31:32, % length of table             % + 11 = 93
-                           3,"one", "I", 54321:32,              % +  9 = 102
-                           3,"two", "S", 13:32, "A long string",% + 22 = 124
-       4,"byte",      "b", 255:8,                               % +  7 = 131
-       4,"long",      "l", 1234567890:64,                       % + 14 = 145
-       5,"short",     "s", 655:16,                              % +  9 = 154
-       4,"bool",      "t", 1,                                   % +  7 = 161
-       6,"binary",    "x", 15:32, "a binary string",            % + 27 = 188
-       4,"void",      "V",                                      % +  6 = 194
-       5,"array",     "A", 23:32,                               % + 11 = 205
-                           "I", 54321:32,                       % +  5 = 210
-                           "S", 13:32, "A long string"          % + 18 = 228
-       >>),
+        %% property-flags
+        16#8000:16,
+        %% table length in bytes
+        228:32,
+
+        7,"longstr",   "S", 21:32, "Here is a long string",      %      = 34
+        9,"signedint", "I", 12345:32/signed,                     % + 15 = 49
+        7,"decimal",   "D", 3, 123456:32,                        % + 14 = 63
+        9,"timestamp", "T", 109876543209876:64,                  % + 19 = 82
+        5,"table",     "F", 31:32, % length of table             % + 11 = 93
+        3,"one", "I", 54321:32,                                  % +  9 = 102
+        3,"two", "S", 13:32, "A long string",                    % + 22 = 124
+        4,"byte",      "b", 255:8,                               % +  7 = 131
+        4,"long",      "l", 1234567890:64,                       % + 14 = 145
+        5,"short",     "s", 655:16,                              % +  9 = 154
+        4,"bool",      "t", 1,                                   % +  7 = 161
+        6,"binary",    "x", 15:32, "a binary string",            % + 27 = 188
+        4,"void",      "V",                                      % +  6 = 194
+        5,"array",     "A", 23:32,                               % + 11 = 205
+        "I", 54321:32,                                           % +  5 = 210
+        "S", 13:32, "A long string"                              % + 18 = 228
+      >>),
     passed.
 
 %% Test that content frames don't exceed frame-max
@@ -580,32 +623,134 @@ sequence_with_content(Sequence) ->
                   rabbit_framing_amqp_0_9_1),
                 Sequence).
 
-test_topic_match(P, R) ->
-    test_topic_match(P, R, true).
-
-test_topic_match(P, R, Expected) ->
-    case rabbit_exchange_type_topic:topic_matches(list_to_binary(P),
-                                                  list_to_binary(R)) of
-        Expected ->
-            passed;
-        _ ->
-            {topic_match_failure, P, R}
-    end.
-
 test_topic_matching() ->
-    passed = test_topic_match("#", "test.test"),
-    passed = test_topic_match("#", ""),
-    passed = test_topic_match("#.T.R", "T.T.R"),
-    passed = test_topic_match("#.T.R", "T.R.T.R"),
-    passed = test_topic_match("#.Y.Z", "X.Y.Z.X.Y.Z"),
-    passed = test_topic_match("#.test", "test"),
-    passed = test_topic_match("#.test", "test.test"),
-    passed = test_topic_match("#.test", "ignored.test"),
-    passed = test_topic_match("#.test", "more.ignored.test"),
-    passed = test_topic_match("#.test", "notmatched", false),
-    passed = test_topic_match("#.z", "one.two.three.four", false),
+    XName = #resource{virtual_host = <<"/">>,
+                      kind = exchange,
+                      name = <<"test_exchange">>},
+    X = #exchange{name = XName, type = topic, durable = false,
+                  auto_delete = false, arguments = []},
+    %% create
+    rabbit_exchange_type_topic:validate(X),
+    exchange_op_callback(X, create, []),
+
+    %% add some bindings
+    Bindings = [#binding{source = XName,
+                         key = list_to_binary(Key),
+                         destination = #resource{virtual_host = <<"/">>,
+                                                 kind = queue,
+                                                 name = list_to_binary(Q)}} ||
+                   {Key, Q} <- [{"a.b.c",         "t1"},
+                                {"a.*.c",         "t2"},
+                                {"a.#.b",         "t3"},
+                                {"a.b.b.c",       "t4"},
+                                {"#",             "t5"},
+                                {"#.#",           "t6"},
+                                {"#.b",           "t7"},
+                                {"*.*",           "t8"},
+                                {"a.*",           "t9"},
+                                {"*.b.c",         "t10"},
+                                {"a.#",           "t11"},
+                                {"a.#.#",         "t12"},
+                                {"b.b.c",         "t13"},
+                                {"a.b.b",         "t14"},
+                                {"a.b",           "t15"},
+                                {"b.c",           "t16"},
+                                {"",              "t17"},
+                                {"*.*.*",         "t18"},
+                                {"vodka.martini", "t19"},
+                                {"a.b.c",         "t20"},
+                                {"*.#",           "t21"},
+                                {"#.*.#",         "t22"},
+                                {"*.#.#",         "t23"},
+                                {"#.#.#",         "t24"},
+                                {"*",             "t25"},
+                                {"#.b.#",         "t26"}]],
+    lists:foreach(fun (B) -> exchange_op_callback(X, add_binding, [B]) end,
+                  Bindings),
+
+    %% test some matches
+    test_topic_expect_match(
+      X, [{"a.b.c",               ["t1", "t2", "t5", "t6", "t10", "t11", "t12",
+                                   "t18", "t20", "t21", "t22", "t23", "t24",
+                                   "t26"]},
+          {"a.b",                 ["t3", "t5", "t6", "t7", "t8", "t9", "t11",
+                                   "t12", "t15", "t21", "t22", "t23", "t24",
+                                   "t26"]},
+          {"a.b.b",               ["t3", "t5", "t6", "t7", "t11", "t12", "t14",
+                                   "t18", "t21", "t22", "t23", "t24", "t26"]},
+          {"",                    ["t5", "t6", "t17", "t24"]},
+          {"b.c.c",               ["t5", "t6", "t18", "t21", "t22", "t23",
+                                   "t24", "t26"]},
+          {"a.a.a.a.a",           ["t5", "t6", "t11", "t12", "t21", "t22",
+                                   "t23", "t24"]},
+          {"vodka.gin",           ["t5", "t6", "t8", "t21", "t22", "t23",
+                                   "t24"]},
+          {"vodka.martini",       ["t5", "t6", "t8", "t19", "t21", "t22", "t23",
+                                   "t24"]},
+          {"b.b.c",               ["t5", "t6", "t10", "t13", "t18", "t21",
+                                   "t22", "t23", "t24", "t26"]},
+          {"nothing.here.at.all", ["t5", "t6", "t21", "t22", "t23", "t24"]},
+          {"oneword",             ["t5", "t6", "t21", "t22", "t23", "t24",
+                                   "t25"]}]),
+
+    %% remove some bindings
+    RemovedBindings = [lists:nth(1, Bindings), lists:nth(5, Bindings),
+                       lists:nth(11, Bindings), lists:nth(19, Bindings),
+                       lists:nth(21, Bindings)],
+    exchange_op_callback(X, remove_bindings, [RemovedBindings]),
+    RemainingBindings = ordsets:to_list(
+                          ordsets:subtract(ordsets:from_list(Bindings),
+                                           ordsets:from_list(RemovedBindings))),
+
+    %% test some matches
+    test_topic_expect_match(
+      X,
+      [{"a.b.c",               ["t2", "t6", "t10", "t12", "t18", "t20", "t22",
+                                "t23", "t24", "t26"]},
+       {"a.b",                 ["t3", "t6", "t7", "t8", "t9", "t12", "t15",
+                                "t22", "t23", "t24", "t26"]},
+       {"a.b.b",               ["t3", "t6", "t7", "t12", "t14", "t18", "t22",
+                                "t23", "t24", "t26"]},
+       {"",                    ["t6", "t17", "t24"]},
+       {"b.c.c",               ["t6", "t18", "t22", "t23", "t24", "t26"]},
+       {"a.a.a.a.a",           ["t6", "t12", "t22", "t23", "t24"]},
+       {"vodka.gin",           ["t6", "t8", "t22", "t23", "t24"]},
+       {"vodka.martini",       ["t6", "t8", "t22", "t23", "t24"]},
+       {"b.b.c",               ["t6", "t10", "t13", "t18", "t22", "t23",
+                                "t24", "t26"]},
+       {"nothing.here.at.all", ["t6", "t22", "t23", "t24"]},
+       {"oneword",             ["t6", "t22", "t23", "t24", "t25"]}]),
+
+    %% remove the entire exchange
+    exchange_op_callback(X, delete, [RemainingBindings]),
+    %% none should match now
+    test_topic_expect_match(X, [{"a.b.c", []}, {"b.b.c", []}, {"", []}]),
     passed.
 
+exchange_op_callback(X, Fun, Args) ->
+    rabbit_misc:execute_mnesia_transaction(
+      fun () -> rabbit_exchange:callback(X, Fun, [transaction, X] ++ Args) end),
+    rabbit_exchange:callback(X, Fun, [none, X] ++ Args).
+
+test_topic_expect_match(X, List) ->
+    lists:foreach(
+      fun ({Key, Expected}) ->
+              BinKey = list_to_binary(Key),
+              Message = rabbit_basic:message(X#exchange.name, BinKey,
+                                             #'P_basic'{}, <<>>),
+              Res = rabbit_exchange_type_topic:route(
+                      X, #delivery{mandatory = false,
+                                   immediate = false,
+                                   sender    = self(),
+                                   message   = Message}),
+              ExpectedRes = lists:map(
+                              fun (Q) -> #resource{virtual_host = <<"/">>,
+                                                   kind = queue,
+                                                   name = list_to_binary(Q)}
+                              end, Expected),
+              true = (lists:usort(ExpectedRes) =:= lists:usort(Res))
+      end, List).
+
 test_app_management() ->
     %% starting, stopping, status
     ok = control_action(stop_app, []),
@@ -713,7 +858,7 @@ test_log_management_during_startup() ->
     ok = delete_log_handlers([sasl_report_tty_h]),
     ok = case catch control_action(start_app, []) of
              ok -> exit({got_success_but_expected_failure,
-                        log_rotation_tty_no_handlers_test});
+                         log_rotation_tty_no_handlers_test});
              {error, {cannot_log_to_tty, _, _}} -> ok
          end,
 
@@ -738,8 +883,8 @@ test_log_management_during_startup() ->
     ok = add_log_handlers([{error_logger_file_h, MainLog}]),
     ok = case control_action(start_app, []) of
              ok -> exit({got_success_but_expected_failure,
-                        log_rotation_no_write_permission_dir_test});
-            {error, {cannot_log_to_file, _, _}} -> ok
+                         log_rotation_no_write_permission_dir_test});
+             {error, {cannot_log_to_file, _, _}} -> ok
          end,
 
     %% start application with logging to a subdirectory which
@@ -749,9 +894,9 @@ test_log_management_during_startup() ->
     ok = add_log_handlers([{error_logger_file_h, MainLog}]),
     ok = case control_action(start_app, []) of
              ok -> exit({got_success_but_expected_failure,
-                        log_rotatation_parent_dirs_test});
+                         log_rotatation_parent_dirs_test});
              {error, {cannot_log_to_file, _,
-               {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok
+                      {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok
          end,
     ok = set_permissions(TmpDir, 8#00700),
     ok = set_permissions(TmpLog, 8#00600),
@@ -771,22 +916,22 @@ test_log_management_during_startup() ->
     passed.
 
 test_option_parser() ->
-    % command and arguments should just pass through
+    %% command and arguments should just pass through
     ok = check_get_options({["mock_command", "arg1", "arg2"], []},
                            [], ["mock_command", "arg1", "arg2"]),
 
-    % get flags
+    %% get flags
     ok = check_get_options(
            {["mock_command", "arg1"], [{"-f", true}, {"-f2", false}]},
            [{flag, "-f"}, {flag, "-f2"}], ["mock_command", "arg1", "-f"]),
 
-    % get options
+    %% get options
     ok = check_get_options(
            {["mock_command"], [{"-foo", "bar"}, {"-baz", "notbaz"}]},
            [{option, "-foo", "notfoo"}, {option, "-baz", "notbaz"}],
            ["mock_command", "-foo", "bar"]),
 
-    % shuffled and interleaved arguments and options
+    %% shuffled and interleaved arguments and options
     ok = check_get_options(
            {["a1", "a2", "a3"], [{"-o1", "hello"}, {"-o2", "noto2"}, {"-f", true}]},
            [{option, "-o1", "noto1"}, {flag, "-f"}, {option, "-o2", "noto2"}],
@@ -795,7 +940,6 @@ test_option_parser() ->
     passed.
 
 test_cluster_management() ->
-
     %% 'cluster' and 'reset' should only work if the app is stopped
     {error, _} = control_action(cluster, []),
     {error, _} = control_action(reset, []),
@@ -843,13 +987,16 @@ test_cluster_management() ->
     ok = control_action(reset, []),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
+    ok = assert_disc_node(),
     ok = control_action(force_cluster, ["invalid1@invalid",
                                         "invalid2@invalid"]),
+    ok = assert_ram_node(),
 
     %% join a non-existing cluster as a ram node
     ok = control_action(reset, []),
     ok = control_action(force_cluster, ["invalid1@invalid",
                                         "invalid2@invalid"]),
+    ok = assert_ram_node(),
 
     SecondaryNode = rabbit_misc:makenode("hare"),
     case net_adm:ping(SecondaryNode) of
@@ -868,15 +1015,18 @@ test_cluster_management2(SecondaryNode) ->
     %% make a disk node
     ok = control_action(reset, []),
     ok = control_action(cluster, [NodeS]),
+    ok = assert_disc_node(),
     %% make a ram node
     ok = control_action(reset, []),
     ok = control_action(cluster, [SecondaryNodeS]),
+    ok = assert_ram_node(),
 
     %% join cluster as a ram node
     ok = control_action(reset, []),
     ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
+    ok = assert_ram_node(),
 
     %% change cluster config while remaining in same cluster
     ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]),
@@ -888,27 +1038,45 @@ test_cluster_management2(SecondaryNode) ->
                                         "invalid2@invalid"]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
+    ok = assert_ram_node(),
 
-    %% join empty cluster as a ram node
+    %% join empty cluster as a ram node (converts to disc)
     ok = control_action(cluster, []),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
+    ok = assert_disc_node(),
 
-    %% turn ram node into disk node
+    %% make a new ram node
     ok = control_action(reset, []),
+    ok = control_action(force_cluster, [SecondaryNodeS]),
+    ok = control_action(start_app, []),
+    ok = control_action(stop_app, []),
+    ok = assert_ram_node(),
+
+    %% turn ram node into disk node
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
+    ok = assert_disc_node(),
 
     %% convert a disk node into a ram node
+    ok = assert_disc_node(),
     ok = control_action(force_cluster, ["invalid1@invalid",
                                         "invalid2@invalid"]),
+    ok = assert_ram_node(),
+
+    %% make a new disk node
+    ok = control_action(force_reset, []),
+    ok = control_action(start_app, []),
+    ok = control_action(stop_app, []),
+    ok = assert_disc_node(),
 
     %% turn a disk node into a ram node
     ok = control_action(reset, []),
     ok = control_action(cluster, [SecondaryNodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
+    ok = assert_ram_node(),
 
     %% NB: this will log an inconsistent_database error, which is harmless
     %% Turning cover on / off is OK even if we're not in general using cover,
@@ -934,6 +1102,10 @@ test_cluster_management2(SecondaryNode) ->
     {error, {no_running_cluster_nodes, _, _}} =
         control_action(reset, []),
 
+    %% attempt to change type when no other node is alive
+    {error, {no_running_cluster_nodes, _, _}} =
+        control_action(cluster, [SecondaryNodeS]),
+
     %% leave system clustered, with the secondary node as a ram node
     ok = control_action(force_reset, []),
     ok = control_action(start_app, []),
@@ -962,15 +1134,25 @@ test_user_management() ->
         control_action(list_permissions, [], [{"-p", "/testhost"}]),
     {error, {invalid_regexp, _, _}} =
         control_action(set_permissions, ["guest", "+foo", ".*", ".*"]),
+    {error, {no_such_user, _}} =
+        control_action(set_user_tags, ["foo", "bar"]),
 
     %% user creation
     ok = control_action(add_user, ["foo", "bar"]),
     {error, {user_already_exists, _}} =
         control_action(add_user, ["foo", "bar"]),
     ok = control_action(change_password, ["foo", "baz"]),
-    ok = control_action(set_admin, ["foo"]),
-    ok = control_action(clear_admin, ["foo"]),
-    ok = control_action(list_users, []),
+
+    TestTags = fun (Tags) ->
+                       Args = ["foo" | [atom_to_list(T) || T <- Tags]],
+                       ok = control_action(set_user_tags, Args),
+                       {ok, #internal_user{tags = Tags}} =
+                           rabbit_auth_backend_internal:lookup_user(<<"foo">>),
+                       ok = control_action(list_users, [])
+               end,
+    TestTags([foo, bar, baz]),
+    TestTags([administrator]),
+    TestTags([]),
 
     %% vhost creation
     ok = control_action(add_vhost, ["/testhost"]),
@@ -1014,9 +1196,10 @@ test_user_management() ->
 test_server_status() ->
     %% create a few things so there is some useful information to list
     Writer = spawn(fun () -> receive shutdown -> ok end end),
-    {ok, Ch} = rabbit_channel:start_link(1, self(), Writer,
-                                         user(<<"user">>), <<"/">>, self(),
-                                         fun (_) -> {ok, self()} end),
+    {ok, Ch} = rabbit_channel:start_link(
+                 1, self(), Writer, self(), rabbit_framing_amqp_0_9_1,
+                 user(<<"user">>), <<"/">>, [], self(),
+                 fun (_) -> {ok, self()} end),
     [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>],
                         {new, Queue = #amqqueue{}} <-
                             [rabbit_amqqueue:declare(
@@ -1038,7 +1221,7 @@ test_server_status() ->
     [_|_] = rabbit_binding:list_for_source(
               rabbit_misc:r(<<"/">>, exchange, <<"">>)),
     [_] = rabbit_binding:list_for_destination(
-              rabbit_misc:r(<<"/">>, queue, <<"foo">>)),
+            rabbit_misc:r(<<"/">>, queue, <<"foo">>)),
     [_] = rabbit_binding:list_for_source_and_destination(
             rabbit_misc:r(<<"/">>, exchange, <<"">>),
             rabbit_misc:r(<<"/">>, queue, <<"foo">>)),
@@ -1071,12 +1254,19 @@ test_server_status() ->
 
     passed.
 
-test_spawn(Receiver) ->
+test_writer(Pid) ->
+    receive
+        shutdown               -> ok;
+        {send_command, Method} -> Pid ! Method, test_writer(Pid)
+    end.
+
+test_spawn() ->
     Me = self(),
-    Writer = spawn(fun () -> Receiver(Me) end),
-    {ok, Ch} = rabbit_channel:start_link(1, Me, Writer,
-                                         user(<<"guest">>), <<"/">>, self(),
-                                         fun (_) -> {ok, self()} end),
+    Writer = spawn(fun () -> test_writer(Me) end),
+    {ok, Ch} = rabbit_channel:start_link(
+                 1, Me, Writer, Me, rabbit_framing_amqp_0_9_1,
+                 user(<<"guest">>), <<"/">>, [], self(),
+                 fun (_) -> {ok, self()} end),
     ok = rabbit_channel:do(Ch, #'channel.open'{}),
     receive #'channel.open_ok'{} -> ok
     after 1000 -> throw(failed_to_receive_channel_open_ok)
@@ -1085,25 +1275,14 @@ test_spawn(Receiver) ->
 
 user(Username) ->
     #user{username     = Username,
-          is_admin     = true,
+          tags         = [administrator],
           auth_backend = rabbit_auth_backend_internal,
           impl         = #internal_user{username = Username,
-                                        is_admin = true}}.
-
-test_statistics_receiver(Pid) ->
-    receive
-        shutdown ->
-            ok;
-        {send_command, Method} ->
-            Pid ! Method,
-            test_statistics_receiver(Pid)
-    end.
+                                        tags     = [administrator]}}.
 
 test_statistics_event_receiver(Pid) ->
     receive
-        Foo ->
-            Pid ! Foo,
-            test_statistics_event_receiver(Pid)
+        Foo -> Pid ! Foo, test_statistics_event_receiver(Pid)
     end.
 
 test_statistics_receive_event(Ch, Matcher) ->
@@ -1120,6 +1299,66 @@ test_statistics_receive_event1(Ch, Matcher) ->
     after 1000 -> throw(failed_to_receive_event)
     end.
 
+test_confirms() ->
+    {_Writer, Ch} = test_spawn(),
+    DeclareBindDurableQueue =
+        fun() ->
+                rabbit_channel:do(Ch, #'queue.declare'{durable = true}),
+                receive #'queue.declare_ok'{queue = Q0} ->
+                        rabbit_channel:do(Ch, #'queue.bind'{
+                                            queue = Q0,
+                                            exchange = <<"amq.direct">>,
+                                            routing_key = "magic" }),
+                        receive #'queue.bind_ok'{} ->
+                                Q0
+                        after 1000 ->
+                                throw(failed_to_bind_queue)
+                        end
+                after 1000 ->
+                        throw(failed_to_declare_queue)
+                end
+        end,
+    %% Declare and bind two queues
+    QName1 = DeclareBindDurableQueue(),
+    QName2 = DeclareBindDurableQueue(),
+    %% Get the first one's pid (we'll crash it later)
+    {ok, Q1} = rabbit_amqqueue:lookup(rabbit_misc:r(<<"/">>, queue, QName1)),
+    QPid1 = Q1#amqqueue.pid,
+    %% Enable confirms
+    rabbit_channel:do(Ch, #'confirm.select'{}),
+    receive
+        #'confirm.select_ok'{} -> ok
+    after 1000 -> throw(failed_to_enable_confirms)
+    end,
+    %% Publish a message
+    rabbit_channel:do(Ch, #'basic.publish'{exchange = <<"amq.direct">>,
+                                           routing_key = "magic"
+                                          },
+                      rabbit_basic:build_content(
+                        #'P_basic'{delivery_mode = 2}, <<"">>)),
+    %% Crash the queue
+    QPid1 ! boom,
+    %% Wait for a nack
+    receive
+        #'basic.nack'{} -> ok;
+        #'basic.ack'{}  -> throw(received_ack_instead_of_nack)
+    after 2000 -> throw(did_not_receive_nack)
+    end,
+    receive
+        #'basic.ack'{} -> throw(received_ack_when_none_expected)
+    after 1000 -> ok
+    end,
+    %% Cleanup
+    rabbit_channel:do(Ch, #'queue.delete'{queue = QName2}),
+    receive
+        #'queue.delete_ok'{} -> ok
+    after 1000 -> throw(failed_to_cleanup_queue)
+    end,
+    unlink(Ch),
+    ok = rabbit_channel:shutdown(Ch),
+
+    passed.
+
 test_statistics() ->
     application:set_env(rabbit, collect_statistics, fine),
 
@@ -1127,7 +1366,7 @@ test_statistics() ->
     %% by far the most complex code though.
 
     %% Set up a channel and queue
-    {_Writer, Ch} = test_spawn(fun test_statistics_receiver/1),
+    {_Writer, Ch} = test_spawn(),
     rabbit_channel:do(Ch, #'queue.declare'{}),
     QName = receive #'queue.declare_ok'{queue = Q0} ->
                     Q0
@@ -1200,9 +1439,9 @@ test_delegates_async(SecondaryNode) ->
 make_responder(FMsg) -> make_responder(FMsg, timeout).
 make_responder(FMsg, Throw) ->
     fun () ->
-        receive Msg -> FMsg(Msg)
-        after 1000 -> throw(Throw)
-        end
+            receive Msg -> FMsg(Msg)
+            after 1000 -> throw(Throw)
+            end
     end.
 
 spawn_responders(Node, Responder, Count) ->
@@ -1213,10 +1452,10 @@ await_response(0) ->
 await_response(Count) ->
     receive
         response -> ok,
-        await_response(Count - 1)
+                    await_response(Count - 1)
     after 1000 ->
-        io:format("Async reply not received~n"),
-        throw(timeout)
+            io:format("Async reply not received~n"),
+            throw(timeout)
     end.
 
 must_exit(Fun) ->
@@ -1228,11 +1467,11 @@ must_exit(Fun) ->
     end.
 
 test_delegates_sync(SecondaryNode) ->
-    Sender = fun (Pid) -> gen_server:call(Pid, invoked) end,
+    Sender = fun (Pid) -> gen_server:call(Pid, invoked, infinity) end,
     BadSender = fun (_Pid) -> exit(exception) end,
 
     Responder = make_responder(fun ({'$gen_call', From, invoked}) ->
-                                   gen_server:reply(From, response)
+                                       gen_server:reply(From, response)
                                end),
 
     BadResponder = make_responder(fun ({'$gen_call', From, invoked}) ->
@@ -1244,7 +1483,7 @@ test_delegates_sync(SecondaryNode) ->
 
     must_exit(fun () -> delegate:invoke(spawn(BadResponder), BadSender) end),
     must_exit(fun () ->
-        delegate:invoke(spawn(SecondaryNode, BadResponder), BadSender) end),
+                      delegate:invoke(spawn(SecondaryNode, BadResponder), BadSender) end),
 
     LocalGoodPids = spawn_responders(node(), Responder, 2),
     RemoteGoodPids = spawn_responders(SecondaryNode, Responder, 2),
@@ -1278,7 +1517,52 @@ test_delegates_sync(SecondaryNode) ->
 
     passed.
 
-%---------------------------------------------------------------------
+test_queue_cleanup(_SecondaryNode) ->
+    {_Writer, Ch} = test_spawn(),
+    rabbit_channel:do(Ch, #'queue.declare'{ queue = ?CLEANUP_QUEUE_NAME }),
+    receive #'queue.declare_ok'{queue = ?CLEANUP_QUEUE_NAME} ->
+            ok
+    after 1000 -> throw(failed_to_receive_queue_declare_ok)
+    end,
+    rabbit:stop(),
+    rabbit:start(),
+    rabbit_channel:do(Ch, #'queue.declare'{ passive = true,
+                                            queue   = ?CLEANUP_QUEUE_NAME }),
+    receive
+        #'channel.close'{reply_code = ?NOT_FOUND} ->
+            ok
+    after 2000 ->
+            throw(failed_to_receive_channel_exit)
+    end,
+    passed.
+
+test_declare_on_dead_queue(SecondaryNode) ->
+    QueueName = rabbit_misc:r(<<"/">>, queue, ?CLEANUP_QUEUE_NAME),
+    Self = self(),
+    Pid = spawn(SecondaryNode,
+                fun () ->
+                        {new, #amqqueue{name = QueueName, pid = QPid}} =
+                            rabbit_amqqueue:declare(QueueName, false, false, [],
+                                                    none),
+                        exit(QPid, kill),
+                        Self ! {self(), killed, QPid}
+                end),
+    receive
+        {Pid, killed, QPid} ->
+            {existing, #amqqueue{name = QueueName,
+                                 pid = QPid}} =
+                rabbit_amqqueue:declare(QueueName, false, false, [], none),
+            false = rabbit_misc:is_process_alive(QPid),
+            {new, Q} = rabbit_amqqueue:declare(QueueName, false, false, [],
+                                               none),
+            true = rabbit_misc:is_process_alive(Q#amqqueue.pid),
+            {ok, 0} = rabbit_amqqueue:delete(Q, false, false),
+            passed
+    after 2000 ->
+            throw(failed_to_create_and_kill_queue)
+    end.
+
+%%---------------------------------------------------------------------
 
 control_action(Command, Args) ->
     control_action(Command, node(), Args, default_options()).
@@ -1341,7 +1625,7 @@ test_logs_working(MainLogFile, SaslLogFile) ->
     ok = rabbit_log:error("foo bar"),
     ok = error_logger:error_report(crash_report, [foo, bar]),
     %% give the error loggers some time to catch up
-    timer:sleep(50),
+    timer:sleep(100),
     [true, true] = non_empty_files([MainLogFile, SaslLogFile]),
     ok.
 
@@ -1360,6 +1644,18 @@ clean_logs(Files, Suffix) ->
      end || File <- Files],
     ok.
 
+assert_ram_node() ->
+    case rabbit_mnesia:is_disc_node() of
+        true  -> exit('not_ram_node');
+        false -> ok
+    end.
+
+assert_disc_node() ->
+    case rabbit_mnesia:is_disc_node() of
+        true  -> ok;
+        false -> exit('not_disc_node')
+    end.
+
 delete_file(File) ->
     case file:delete(File) of
         ok              -> ok;
@@ -1391,23 +1687,42 @@ test_file_handle_cache() ->
     ok = file_handle_cache:set_limit(5), %% 1 or 2 sockets, 2 msg_stores
     TmpDir = filename:join(rabbit_mnesia:dir(), "tmp"),
     ok = filelib:ensure_dir(filename:join(TmpDir, "nothing")),
+    [Src1, Dst1, Src2, Dst2] = Files =
+        [filename:join(TmpDir, Str) || Str <- ["file1", "file2", "file3", "file4"]],
+    Content = <<"foo">>,
+    CopyFun = fun (Src, Dst) ->
+                      ok = rabbit_misc:write_file(Src, Content),
+                      {ok, SrcHdl} = file_handle_cache:open(Src, [read], []),
+                      {ok, DstHdl} = file_handle_cache:open(Dst, [write], []),
+                      Size = size(Content),
+                      {ok, Size} = file_handle_cache:copy(SrcHdl, DstHdl, Size),
+                      ok = file_handle_cache:delete(SrcHdl),
+                      ok = file_handle_cache:delete(DstHdl)
+              end,
     Pid = spawn(fun () -> {ok, Hdl} = file_handle_cache:open(
-                                        filename:join(TmpDir, "file3"),
+                                        filename:join(TmpDir, "file5"),
                                         [write], []),
-                          receive close -> ok end,
-                          file_handle_cache:delete(Hdl)
+                          receive {next, Pid1} -> Pid1 ! {next, self()} end,
+                          file_handle_cache:delete(Hdl),
+                          %% This will block and never return, so we
+                          %% exercise the fhc tidying up the pending
+                          %% queue on the death of a process.
+                          ok = CopyFun(Src1, Dst1)
                 end),
-    Src = filename:join(TmpDir, "file1"),
-    Dst = filename:join(TmpDir, "file2"),
-    Content = <<"foo">>,
-    ok = file:write_file(Src, Content),
-    {ok, SrcHdl} = file_handle_cache:open(Src, [read], []),
-    {ok, DstHdl} = file_handle_cache:open(Dst, [write], []),
-    Size = size(Content),
-    {ok, Size} = file_handle_cache:copy(SrcHdl, DstHdl, Size),
-    ok = file_handle_cache:delete(SrcHdl),
-    file_handle_cache:delete(DstHdl),
-    Pid ! close,
+    ok = CopyFun(Src1, Dst1),
+    ok = file_handle_cache:set_limit(2),
+    Pid ! {next, self()},
+    receive {next, Pid} -> ok end,
+    timer:sleep(100),
+    Pid1 = spawn(fun () -> CopyFun(Src2, Dst2) end),
+    timer:sleep(100),
+    erlang:monitor(process, Pid),
+    erlang:monitor(process, Pid1),
+    exit(Pid, kill),
+    exit(Pid1, kill),
+    receive {'DOWN', _MRef, process, Pid, _Reason} -> ok end,
+    receive {'DOWN', _MRef1, process, Pid1, _Reason1} -> ok end,
+    [file:delete(File) || File <- Files],
     ok = file_handle_cache:set_limit(Limit),
     passed.
 
@@ -1432,6 +1747,10 @@ test_backing_queue() ->
             passed = test_queue_recover(),
             application:set_env(rabbit, queue_index_max_journal_entries,
                                 MaxJournal, infinity),
+            %% We will have restarted the message store, and thus changed
+            %% the order of the children of rabbit_sup. This will cause
+            %% problems if there are subsequent failures - see bug 24262.
+            ok = restart_app(),
             passed;
         _ ->
             passed
@@ -1442,50 +1761,50 @@ restart_msg_store_empty() ->
     ok = rabbit_variable_queue:start_msg_store(
            undefined, {fun (ok) -> finished end, ok}).
 
-guid_bin(X) ->
+msg_id_bin(X) ->
     erlang:md5(term_to_binary(X)).
 
 msg_store_client_init(MsgStore, Ref) ->
     rabbit_msg_store:client_init(MsgStore, Ref, undefined, undefined).
 
-msg_store_contains(Atom, Guids, MSCState) ->
+msg_store_contains(Atom, MsgIds, MSCState) ->
     Atom = lists:foldl(
-             fun (Guid, Atom1) when Atom1 =:= Atom ->
-                     rabbit_msg_store:contains(Guid, MSCState) end,
-             Atom, Guids).
+             fun (MsgId, Atom1) when Atom1 =:= Atom ->
+                     rabbit_msg_store:contains(MsgId, MSCState) end,
+             Atom, MsgIds).
 
-msg_store_sync(Guids, MSCState) ->
+msg_store_sync(MsgIds, MSCState) ->
     Ref = make_ref(),
     Self = self(),
-    ok = rabbit_msg_store:sync(Guids, fun () -> Self ! {sync, Ref} end,
+    ok = rabbit_msg_store:sync(MsgIds, fun () -> Self ! {sync, Ref} end,
                                MSCState),
     receive
         {sync, Ref} -> ok
     after
         10000 ->
-            io:format("Sync from msg_store missing for guids ~p~n", [Guids]),
+            io:format("Sync from msg_store missing for msg_ids ~p~n", [MsgIds]),
             throw(timeout)
     end.
 
-msg_store_read(Guids, MSCState) ->
-    lists:foldl(fun (Guid, MSCStateM) ->
-                        {{ok, Guid}, MSCStateN} = rabbit_msg_store:read(
-                                                    Guid, MSCStateM),
+msg_store_read(MsgIds, MSCState) ->
+    lists:foldl(fun (MsgId, MSCStateM) ->
+                        {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read(
+                                                     MsgId, MSCStateM),
                         MSCStateN
-                end, MSCState, Guids).
+                end, MSCState, MsgIds).
 
-msg_store_write(Guids, MSCState) ->
-    ok = lists:foldl(
-           fun (Guid, ok) -> rabbit_msg_store:write(Guid, Guid, MSCState) end,
-           ok, Guids).
+msg_store_write(MsgIds, MSCState) ->
+    ok = lists:foldl(fun (MsgId, ok) ->
+                             rabbit_msg_store:write(MsgId, MsgId, MSCState)
+                     end, ok, MsgIds).
 
-msg_store_remove(Guids, MSCState) ->
-    rabbit_msg_store:remove(Guids, MSCState).
+msg_store_remove(MsgIds, MSCState) ->
+    rabbit_msg_store:remove(MsgIds, MSCState).
 
-msg_store_remove(MsgStore, Ref, Guids) ->
+msg_store_remove(MsgStore, Ref, MsgIds) ->
     with_msg_store_client(MsgStore, Ref,
                           fun (MSCStateM) ->
-                                  ok = msg_store_remove(Guids, MSCStateM),
+                                  ok = msg_store_remove(MsgIds, MSCStateM),
                                   MSCStateM
                           end).
 
@@ -1495,140 +1814,138 @@ with_msg_store_client(MsgStore, Ref, Fun) ->
 
 foreach_with_msg_store_client(MsgStore, Ref, Fun, L) ->
     rabbit_msg_store:client_terminate(
-      lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MSCState) end,
+      lists:foldl(fun (MsgId, MSCState) -> Fun(MsgId, MSCState) end,
                   msg_store_client_init(MsgStore, Ref), L)).
 
 test_msg_store() ->
     restart_msg_store_empty(),
     Self = self(),
-    Guids = [guid_bin(M) || M <- lists:seq(1,100)],
-    {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids),
+    MsgIds = [msg_id_bin(M) || M <- lists:seq(1,100)],
+    {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
     Ref = rabbit_guid:guid(),
     MSCState = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref),
     %% check we don't contain any of the msgs we're about to publish
-    false = msg_store_contains(false, Guids, MSCState),
+    false = msg_store_contains(false, MsgIds, MSCState),
     %% publish the first half
-    ok = msg_store_write(Guids1stHalf, MSCState),
+    ok = msg_store_write(MsgIds1stHalf, MSCState),
     %% sync on the first half
-    ok = msg_store_sync(Guids1stHalf, MSCState),
+    ok = msg_store_sync(MsgIds1stHalf, MSCState),
     %% publish the second half
-    ok = msg_store_write(Guids2ndHalf, MSCState),
+    ok = msg_store_write(MsgIds2ndHalf, MSCState),
     %% sync on the first half again - the msg_store will be dirty, but
     %% we won't need the fsync
-    ok = msg_store_sync(Guids1stHalf, MSCState),
+    ok = msg_store_sync(MsgIds1stHalf, MSCState),
     %% check they're all in there
-    true = msg_store_contains(true, Guids, MSCState),
+    true = msg_store_contains(true, MsgIds, MSCState),
     %% publish the latter half twice so we hit the caching and ref count code
-    ok = msg_store_write(Guids2ndHalf, MSCState),
+    ok = msg_store_write(MsgIds2ndHalf, MSCState),
     %% check they're still all in there
-    true = msg_store_contains(true, Guids, MSCState),
+    true = msg_store_contains(true, MsgIds, MSCState),
     %% sync on the 2nd half, but do lots of individual syncs to try
     %% and cause coalescing to happen
     ok = lists:foldl(
-           fun (Guid, ok) -> rabbit_msg_store:sync(
-                               [Guid], fun () -> Self ! {sync, Guid} end,
-                               MSCState)
-           end, ok, Guids2ndHalf),
+           fun (MsgId, ok) -> rabbit_msg_store:sync(
+                                [MsgId], fun () -> Self ! {sync, MsgId} end,
+                                MSCState)
+           end, ok, MsgIds2ndHalf),
     lists:foldl(
-      fun(Guid, ok) ->
+      fun(MsgId, ok) ->
               receive
-                  {sync, Guid} -> ok
+                  {sync, MsgId} -> ok
               after
                   10000 ->
-                      io:format("Sync from msg_store missing (guid: ~p)~n",
-                                [Guid]),
+                      io:format("Sync from msg_store missing (msg_id: ~p)~n",
+                                [MsgId]),
                       throw(timeout)
               end
-      end, ok, Guids2ndHalf),
+      end, ok, MsgIds2ndHalf),
     %% it's very likely we're not dirty here, so the 1st half sync
     %% should hit a different code path
-    ok = msg_store_sync(Guids1stHalf, MSCState),
+    ok = msg_store_sync(MsgIds1stHalf, MSCState),
     %% read them all
-    MSCState1 = msg_store_read(Guids, MSCState),
+    MSCState1 = msg_store_read(MsgIds, MSCState),
     %% read them all again - this will hit the cache, not disk
-    MSCState2 = msg_store_read(Guids, MSCState1),
+    MSCState2 = msg_store_read(MsgIds, MSCState1),
     %% remove them all
-    ok = rabbit_msg_store:remove(Guids, MSCState2),
+    ok = rabbit_msg_store:remove(MsgIds, MSCState2),
     %% check first half doesn't exist
-    false = msg_store_contains(false, Guids1stHalf, MSCState2),
+    false = msg_store_contains(false, MsgIds1stHalf, MSCState2),
     %% check second half does exist
-    true = msg_store_contains(true, Guids2ndHalf, MSCState2),
+    true = msg_store_contains(true, MsgIds2ndHalf, MSCState2),
     %% read the second half again
-    MSCState3 = msg_store_read(Guids2ndHalf, MSCState2),
-    %% release the second half, just for fun (aka code coverage)
-    ok = rabbit_msg_store:release(Guids2ndHalf, MSCState3),
+    MSCState3 = msg_store_read(MsgIds2ndHalf, MSCState2),
     %% read the second half again, just for fun (aka code coverage)
-    MSCState4 = msg_store_read(Guids2ndHalf, MSCState3),
+    MSCState4 = msg_store_read(MsgIds2ndHalf, MSCState3),
     ok = rabbit_msg_store:client_terminate(MSCState4),
     %% stop and restart, preserving every other msg in 2nd half
     ok = rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start_msg_store(
            [], {fun ([]) -> finished;
-                    ([Guid|GuidsTail])
-                      when length(GuidsTail) rem 2 == 0 ->
-                        {Guid, 1, GuidsTail};
-                    ([Guid|GuidsTail]) ->
-                        {Guid, 0, GuidsTail}
-                end, Guids2ndHalf}),
+                    ([MsgId|MsgIdsTail])
+                      when length(MsgIdsTail) rem 2 == 0 ->
+                        {MsgId, 1, MsgIdsTail};
+                    ([MsgId|MsgIdsTail]) ->
+                        {MsgId, 0, MsgIdsTail}
+                end, MsgIds2ndHalf}),
     MSCState5 = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref),
     %% check we have the right msgs left
     lists:foldl(
-      fun (Guid, Bool) ->
-              not(Bool = rabbit_msg_store:contains(Guid, MSCState5))
-      end, false, Guids2ndHalf),
+      fun (MsgId, Bool) ->
+              not(Bool = rabbit_msg_store:contains(MsgId, MSCState5))
+      end, false, MsgIds2ndHalf),
     ok = rabbit_msg_store:client_terminate(MSCState5),
     %% restart empty
     restart_msg_store_empty(),
     MSCState6 = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref),
     %% check we don't contain any of the msgs
-    false = msg_store_contains(false, Guids, MSCState6),
+    false = msg_store_contains(false, MsgIds, MSCState6),
     %% publish the first half again
-    ok = msg_store_write(Guids1stHalf, MSCState6),
+    ok = msg_store_write(MsgIds1stHalf, MSCState6),
     %% this should force some sort of sync internally otherwise misread
     ok = rabbit_msg_store:client_terminate(
-           msg_store_read(Guids1stHalf, MSCState6)),
+           msg_store_read(MsgIds1stHalf, MSCState6)),
     MSCState7 = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref),
-    ok = rabbit_msg_store:remove(Guids1stHalf, MSCState7),
+    ok = rabbit_msg_store:remove(MsgIds1stHalf, MSCState7),
     ok = rabbit_msg_store:client_terminate(MSCState7),
     %% restart empty
-    restart_msg_store_empty(), %% now safe to reuse guids
+    restart_msg_store_empty(), %% now safe to reuse msg_ids
     %% push a lot of msgs in... at least 100 files worth
     {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit),
     PayloadSizeBits = 65536,
     BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)),
-    GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
+    MsgIdsBig = [msg_id_bin(X) || X <- lists:seq(1, BigCount)],
     Payload = << 0:PayloadSizeBits >>,
     ok = with_msg_store_client(
            ?PERSISTENT_MSG_STORE, Ref,
            fun (MSCStateM) ->
-                   [ok = rabbit_msg_store:write(Guid, Payload, MSCStateM) ||
-                       Guid <- GuidsBig],
+                   [ok = rabbit_msg_store:write(MsgId, Payload, MSCStateM) ||
+                       MsgId <- MsgIdsBig],
                    MSCStateM
            end),
     %% now read them to ensure we hit the fast client-side reading
     ok = foreach_with_msg_store_client(
            ?PERSISTENT_MSG_STORE, Ref,
-           fun (Guid, MSCStateM) ->
+           fun (MsgId, MSCStateM) ->
                    {{ok, Payload}, MSCStateN} = rabbit_msg_store:read(
-                                                  Guid, MSCStateM),
+                                                  MsgId, MSCStateM),
                    MSCStateN
-           end, GuidsBig),
+           end, MsgIdsBig),
     %% .., then 3s by 1...
     ok = msg_store_remove(?PERSISTENT_MSG_STORE, Ref,
-                          [guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]),
+                          [msg_id_bin(X) || X <- lists:seq(BigCount, 1, -3)]),
     %% .., then remove 3s by 2, from the young end first. This hits
     %% GC (under 50% good data left, but no empty files. Must GC).
     ok = msg_store_remove(?PERSISTENT_MSG_STORE, Ref,
-                          [guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]),
+                          [msg_id_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]),
     %% .., then remove 3s by 3, from the young end first. This hits
     %% GC...
     ok = msg_store_remove(?PERSISTENT_MSG_STORE, Ref,
-                          [guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]),
+                          [msg_id_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]),
     %% ensure empty
     ok = with_msg_store_client(
            ?PERSISTENT_MSG_STORE, Ref,
            fun (MSCStateM) ->
-                   false = msg_store_contains(false, GuidsBig, MSCStateM),
+                   false = msg_store_contains(false, MsgIdsBig, MSCStateM),
                    MSCStateM
            end),
     %% restart empty
@@ -1648,8 +1965,8 @@ init_test_queue() ->
     PersistentClient = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef),
     Res = rabbit_queue_index:recover(
             TestQueue, Terms, false,
-            fun (Guid) ->
-                    rabbit_msg_store:contains(Guid, PersistentClient)
+            fun (MsgId) ->
+                    rabbit_msg_store:contains(MsgId, PersistentClient)
             end,
             fun nop/1),
     ok = rabbit_msg_store:client_delete_and_terminate(PersistentClient),
@@ -1673,6 +1990,10 @@ with_empty_test_queue(Fun) ->
     {0, Qi} = init_test_queue(),
     rabbit_queue_index:delete_and_terminate(Fun(Qi)).
 
+restart_app() ->
+    rabbit:stop(),
+    rabbit:start().
+
 queue_index_publish(SeqIds, Persistent, Qi) ->
     Ref = rabbit_guid:guid(),
     MsgStore = case Persistent of
@@ -1680,25 +2001,25 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
                    false -> ?TRANSIENT_MSG_STORE
                end,
     MSCState = msg_store_client_init(MsgStore, Ref),
-    {A, B = [{_SeqId, LastGuidWritten} | _]} =
+    {A, B = [{_SeqId, LastMsgIdWritten} | _]} =
         lists:foldl(
-          fun (SeqId, {QiN, SeqIdsGuidsAcc}) ->
-                  Guid = rabbit_guid:guid(),
+          fun (SeqId, {QiN, SeqIdsMsgIdsAcc}) ->
+                  MsgId = rabbit_guid:guid(),
                   QiM = rabbit_queue_index:publish(
-                          Guid, SeqId, #message_properties{}, Persistent, QiN),
-                  ok = rabbit_msg_store:write(Guid, Guid, MSCState),
-                  {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc]}
+                          MsgId, SeqId, #message_properties{}, Persistent, QiN),
+                  ok = rabbit_msg_store:write(MsgId, MsgId, MSCState),
+                  {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]}
           end, {Qi, []}, SeqIds),
     %% do this just to force all of the publishes through to the msg_store:
-    true = rabbit_msg_store:contains(LastGuidWritten, MSCState),
+    true = rabbit_msg_store:contains(LastMsgIdWritten, MSCState),
     ok = rabbit_msg_store:client_delete_and_terminate(MSCState),
     {A, B}.
 
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
 verify_read_with_published(Delivered, Persistent,
-                           [{Guid, SeqId, _Props, Persistent, Delivered}|Read],
-                           [{SeqId, Guid}|Published]) ->
+                           [{MsgId, SeqId, _Props, Persistent, Delivered}|Read],
+                           [{SeqId, MsgId}|Published]) ->
     verify_read_with_published(Delivered, Persistent, Read, Published);
 verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
@@ -1706,10 +2027,10 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
 test_queue_index_props() ->
     with_empty_test_queue(
       fun(Qi0) ->
-              Guid = rabbit_guid:guid(),
+              MsgId = rabbit_guid:guid(),
               Props = #message_properties{expiry=12345},
-              Qi1 = rabbit_queue_index:publish(Guid, 1, Props, true, Qi0),
-              {[{Guid, 1, Props, _, _}], Qi2} =
+              Qi1 = rabbit_queue_index:publish(MsgId, 1, Props, true, Qi0),
+              {[{MsgId, 1, Props, _, _}], Qi2} =
                   rabbit_queue_index:read(1, 2, Qi1),
               Qi2
       end),
@@ -1731,19 +2052,19 @@ test_queue_index() ->
     with_empty_test_queue(
       fun (Qi0) ->
               {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
-              {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
+              {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
               {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
               {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
               ok = verify_read_with_published(false, false, ReadA,
-                                              lists:reverse(SeqIdsGuidsA)),
+                                              lists:reverse(SeqIdsMsgIdsA)),
               %% should get length back as 0, as all the msgs were transient
               {0, Qi6} = restart_test_queue(Qi4),
               {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
-              {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
+              {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
               {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
               {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
               ok = verify_read_with_published(false, true, ReadB,
-                                              lists:reverse(SeqIdsGuidsB)),
+                                              lists:reverse(SeqIdsMsgIdsB)),
               %% should get length back as MostOfASegment
               LenB = length(SeqIdsB),
               {LenB, Qi12} = restart_test_queue(Qi10),
@@ -1751,7 +2072,7 @@ test_queue_index() ->
               Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
               {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
               ok = verify_read_with_published(true, true, ReadC,
-                                              lists:reverse(SeqIdsGuidsB)),
+                                              lists:reverse(SeqIdsMsgIdsB)),
               Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
               Qi17 = rabbit_queue_index:flush(Qi16),
               %% Everything will have gone now because #pubs == #acks
@@ -1767,12 +2088,12 @@ test_queue_index() ->
     %% a) partial pub+del+ack, then move to new segment
     with_empty_test_queue(
       fun (Qi0) ->
-              {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC,
+              {Qi1, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC,
                                                          false, Qi0),
               Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
               Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2),
               Qi4 = rabbit_queue_index:flush(Qi3),
-              {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize],
+              {Qi5, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize],
                                                           false, Qi4),
               Qi5
       end),
@@ -1780,10 +2101,10 @@ test_queue_index() ->
     %% b) partial pub+del, then move to new segment, then ack all in old segment
     with_empty_test_queue(
       fun (Qi0) ->
-              {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC,
+              {Qi1, _SeqIdsMsgIdsC2} = queue_index_publish(SeqIdsC,
                                                           false, Qi0),
               Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
-              {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize],
+              {Qi3, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize],
                                                           false, Qi2),
               Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3),
               rabbit_queue_index:flush(Qi4)
@@ -1792,8 +2113,8 @@ test_queue_index() ->
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     with_empty_test_queue(
       fun (Qi0) ->
-              {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD,
-                                                          false, Qi0),
+              {Qi1, _SeqIdsMsgIdsD} = queue_index_publish(SeqIdsD,
+                                                         false, Qi0),
               Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1),
               Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2),
               rabbit_queue_index:flush(Qi3)
@@ -1826,12 +2147,12 @@ test_queue_index() ->
     %% exercise journal_minus_segment, not segment_plus_journal.
     with_empty_test_queue(
       fun (Qi0) ->
-              {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7],
+              {Qi1, _SeqIdsMsgIdsE} = queue_index_publish([0,1,2,4,5,7],
                                                          true, Qi0),
               Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
               Qi3 = rabbit_queue_index:ack([0], Qi2),
               {5, Qi4} = restart_test_queue(Qi3),
-              {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4),
+              {Qi5, _SeqIdsMsgIdsF} = queue_index_publish([3,6,8], true, Qi4),
               Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
               Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
               {5, Qi8} = restart_test_queue(Qi7),
@@ -1843,9 +2164,16 @@ test_queue_index() ->
 
     passed.
 
+variable_queue_init(Q, Recover) ->
+    rabbit_variable_queue:init(
+      Q, Recover, fun nop/2, fun nop/2, fun nop/1).
+
 variable_queue_publish(IsPersistent, Count, VQ) ->
+    variable_queue_publish(IsPersistent, Count, fun (_N, P) -> P end, VQ).
+
+variable_queue_publish(IsPersistent, Count, PropFun, VQ) ->
     lists:foldl(
-      fun (_N, VQN) ->
+      fun (N, VQN) ->
               rabbit_variable_queue:publish(
                 rabbit_basic:message(
                   rabbit_misc:r(<<>>, exchange, <<>>),
@@ -1853,7 +2181,7 @@ variable_queue_publish(IsPersistent, Count, VQ) ->
                                                        true  -> 2;
                                                        false -> 1
                                                    end}, <<>>),
-                #message_properties{}, VQN)
+                PropFun(N, #message_properties{}), self(), VQN)
       end, VQ, lists:seq(1, Count)).
 
 variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
@@ -1871,18 +2199,44 @@ assert_prop(List, Prop, Value) ->
 assert_props(List, PropVals) ->
     [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals].
 
+test_amqqueue(Durable) ->
+    (rabbit_amqqueue:pseudo_queue(test_queue(), self()))
+        #amqqueue { durable = Durable }.
+
 with_fresh_variable_queue(Fun) ->
     ok = empty_test_queue(),
-    VQ = rabbit_variable_queue:init(test_queue(), true, false,
-                                    fun nop/2, fun nop/1),
+    VQ = variable_queue_init(test_amqqueue(true), false),
     S0 = rabbit_variable_queue:status(VQ),
     assert_props(S0, [{q1, 0}, {q2, 0},
                       {delta, {delta, undefined, 0, undefined}},
                       {q3, 0}, {q4, 0},
                       {len, 0}]),
-    _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)),
+    _ = rabbit_variable_queue:delete_and_terminate(shutdown, Fun(VQ)),
     passed.
 
+publish_and_confirm(QPid, Payload, Count) ->
+    Seqs = lists:seq(1, Count),
+    [begin
+         Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>),
+                                    <<>>, #'P_basic'{delivery_mode = 2},
+                                    Payload),
+         Delivery = #delivery{mandatory = false, immediate = false,
+                              sender = self(), message = Msg, msg_seq_no = Seq},
+         true = rabbit_amqqueue:deliver(QPid, Delivery)
+     end || Seq <- Seqs],
+    wait_for_confirms(gb_sets:from_list(Seqs)).
+
+wait_for_confirms(Unconfirmed) ->
+    case gb_sets:is_empty(Unconfirmed) of
+        true  -> ok;
+        false -> receive {'$gen_cast', {confirm, Confirmed, _}} ->
+                         wait_for_confirms(
+                           gb_sets:difference(Unconfirmed,
+                                              gb_sets:from_list(Confirmed)))
+                 after 5000 -> exit(timeout_waiting_for_confirm)
+                 end
+    end.
+
 test_variable_queue() ->
     [passed = with_fresh_variable_queue(F) ||
         F <- [fun test_variable_queue_dynamic_duration_change/1,
@@ -1890,6 +2244,7 @@ test_variable_queue() ->
               fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1,
               fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1,
               fun test_dropwhile/1,
+              fun test_dropwhile_varying_ram_duration/1,
               fun test_variable_queue_ack_limiting/1]],
     passed.
 
@@ -1926,14 +2281,9 @@ test_dropwhile(VQ0) ->
     Count = 10,
 
     %% add messages with sequential expiry
-    VQ1 = lists:foldl(
-            fun (N, VQN) ->
-                    rabbit_variable_queue:publish(
-                      rabbit_basic:message(
-                        rabbit_misc:r(<<>>, exchange, <<>>),
-                        <<>>, #'P_basic'{}, <<>>),
-                      #message_properties{expiry = N}, VQN)
-            end, VQ0, lists:seq(1, Count)),
+    VQ1 = variable_queue_publish(
+            false, Count,
+            fun (N, Props) -> Props#message_properties{expiry = N} end, VQ0),
 
     %% drop the first 5 messages
     VQ2 = rabbit_variable_queue:dropwhile(
@@ -1953,6 +2303,14 @@ test_dropwhile(VQ0) ->
 
     VQ4.
 
+test_dropwhile_varying_ram_duration(VQ0) ->
+    VQ1 = variable_queue_publish(false, 1, VQ0),
+    VQ2 = rabbit_variable_queue:set_ram_duration_target(0, VQ1),
+    VQ3 = rabbit_variable_queue:dropwhile(fun(_) -> false end, VQ2),
+    VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
+    VQ5 = variable_queue_publish(false, 1, VQ4),
+    rabbit_variable_queue:dropwhile(fun(_) -> false end, VQ5).
+
 test_variable_queue_dynamic_duration_change(VQ0) ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
 
@@ -1976,7 +2334,7 @@ test_variable_queue_dynamic_duration_change(VQ0) ->
 
     %% drain
     {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7),
-    VQ9 = rabbit_variable_queue:ack(AckTags, VQ8),
+    {_Guids, VQ9} = rabbit_variable_queue:ack(AckTags, VQ8),
     {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
 
     VQ10.
@@ -1986,7 +2344,7 @@ publish_fetch_and_ack(0, _Len, VQ0) ->
 publish_fetch_and_ack(N, Len, VQ0) ->
     VQ1 = variable_queue_publish(false, 1, VQ0),
     {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1),
-    VQ3 = rabbit_variable_queue:ack([AckTag], VQ2),
+    {_Guids, VQ3} = rabbit_variable_queue:ack([AckTag], VQ2),
     publish_fetch_and_ack(N-1, Len, VQ3).
 
 test_variable_queue_partial_segments_delta_thing(VQ0) ->
@@ -2020,7 +2378,7 @@ test_variable_queue_partial_segments_delta_thing(VQ0) ->
              {len, HalfSegment + 1}]),
     {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
                                            HalfSegment + 1, VQ7),
-    VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8),
+    {_Guids, VQ9} = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8),
     %% should be empty now
     {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
     VQ10.
@@ -2033,10 +2391,10 @@ check_variable_queue_status(VQ0, Props) ->
     VQ1.
 
 variable_queue_wait_for_shuffling_end(VQ) ->
-    case rabbit_variable_queue:needs_idle_timeout(VQ) of
-        true  -> variable_queue_wait_for_shuffling_end(
-                  rabbit_variable_queue:idle_timeout(VQ));
-        false -> VQ
+    case rabbit_variable_queue:needs_timeout(VQ) of
+        false -> VQ;
+        _     -> variable_queue_wait_for_shuffling_end(
+                   rabbit_variable_queue:timeout(VQ))
     end.
 
 test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
@@ -2048,9 +2406,8 @@ test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
                                             Count + Count, VQ3),
     {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false,
                                             Count, VQ4),
-    _VQ6 = rabbit_variable_queue:terminate(VQ5),
-    VQ7 = rabbit_variable_queue:init(test_queue(), true, true,
-                                     fun nop/2, fun nop/1),
+    _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5),
+    VQ7 = variable_queue_init(test_amqqueue(true), true),
     {{_Msg1, true, _AckTag1, Count1}, VQ8} =
         rabbit_variable_queue:fetch(true, VQ7),
     VQ9 = variable_queue_publish(false, 1, VQ8),
@@ -2063,34 +2420,27 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
     VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0),
     VQ2 = variable_queue_publish(false, 4, VQ1),
     {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2),
-    VQ4 = rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3),
-    VQ5 = rabbit_variable_queue:idle_timeout(VQ4),
-    _VQ6 = rabbit_variable_queue:terminate(VQ5),
-    VQ7 = rabbit_variable_queue:init(test_queue(), true, true,
-                                     fun nop/2, fun nop/1),
+    {_Guids, VQ4} =
+        rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3),
+    VQ5 = rabbit_variable_queue:timeout(VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5),
+    VQ7 = variable_queue_init(test_amqqueue(true), true),
     {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7),
     VQ8.
 
 test_queue_recover() ->
     Count = 2 * rabbit_queue_index:next_segment_boundary(0),
-    TxID = rabbit_guid:guid(),
-    {new, #amqqueue { pid = QPid, name = QName }} =
+    {new, #amqqueue { pid = QPid, name = QName } = Q} =
         rabbit_amqqueue:declare(test_queue(), true, false, [], none),
-    [begin
-         Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>),
-                                    <<>>, #'P_basic'{delivery_mode = 2}, <<>>),
-         Delivery = #delivery{mandatory = false, immediate = false, txn = TxID,
-                              sender = self(), message = Msg},
-         true = rabbit_amqqueue:deliver(QPid, Delivery)
-     end || _ <- lists:seq(1, Count)],
-    rabbit_amqqueue:commit_all([QPid], TxID, self()),
+    publish_and_confirm(QPid, <<>>, Count),
+
     exit(QPid, kill),
     MRef = erlang:monitor(process, QPid),
     receive {'DOWN', MRef, process, QPid, _Info} -> ok
     after 10000 -> exit(timeout_waiting_for_queue_death)
     end,
     rabbit_amqqueue:stop(),
-    ok = rabbit_amqqueue:start(),
+    rabbit_amqqueue:start(),
     rabbit_amqqueue:with_or_die(
       QName,
       fun (Q1 = #amqqueue { pid = QPid1 }) ->
@@ -2098,11 +2448,10 @@ test_queue_recover() ->
               {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} =
                   rabbit_amqqueue:basic_get(Q1, self(), false),
               exit(QPid1, shutdown),
-              VQ1 = rabbit_variable_queue:init(QName, true, true,
-                                               fun nop/2, fun nop/1),
+              VQ1 = variable_queue_init(Q, true),
               {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} =
                   rabbit_variable_queue:fetch(true, VQ1),
-              _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2),
+              _VQ3 = rabbit_variable_queue:delete_and_terminate(shutdown, VQ2),
               rabbit_amqqueue:internal_delete(QName)
       end),
     passed.
@@ -2111,18 +2460,10 @@ test_variable_queue_delete_msg_store_files_callback() ->
     ok = restart_msg_store_empty(),
     {new, #amqqueue { pid = QPid, name = QName } = Q} =
         rabbit_amqqueue:declare(test_queue(), true, false, [], none),
-    TxID = rabbit_guid:guid(),
     Payload = <<0:8388608>>, %% 1MB
     Count = 30,
-    [begin
-         Msg = rabbit_basic:message(
-                 rabbit_misc:r(<<>>, exchange, <<>>),
-                 <<>>, #'P_basic'{delivery_mode = 2}, Payload),
-         Delivery = #delivery{mandatory = false, immediate = false, txn = TxID,
-                              sender = self(), message = Msg},
-         true = rabbit_amqqueue:deliver(QPid, Delivery)
-     end || _ <- lists:seq(1, Count)],
-    rabbit_amqqueue:commit_all([QPid], TxID, self()),
+    publish_and_confirm(QPid, Payload, Count),
+
     rabbit_amqqueue:set_ram_duration_target(QPid, 0),
 
     CountMinusOne = Count - 1,
@@ -2141,9 +2482,11 @@ test_configurable_server_properties() ->
     BuiltInPropNames = [<<"product">>, <<"version">>, <<"platform">>,
                         <<"copyright">>, <<"information">>],
 
+    Protocol = rabbit_framing_amqp_0_9_1,
+
     %% Verify that the built-in properties are initially present
-    ActualPropNames = [Key ||
-                         {Key, longstr, _} <- rabbit_reader:server_properties()],
+    ActualPropNames = [Key || {Key, longstr, _} <-
+                                  rabbit_reader:server_properties(Protocol)],
     true = lists:all(fun (X) -> lists:member(X, ActualPropNames) end,
                      BuiltInPropNames),
 
@@ -2154,9 +2497,10 @@ test_configurable_server_properties() ->
     ConsProp = fun (X) -> application:set_env(rabbit,
                                               server_properties,
                                               [X | ServerProperties]) end,
-    IsPropPresent = fun (X) -> lists:member(X,
-                                            rabbit_reader:server_properties())
-                    end,
+    IsPropPresent =
+        fun (X) ->
+                lists:member(X, rabbit_reader:server_properties(Protocol))
+        end,
 
     %% Add a wholly new property of the simplified {KeyAtom, StringValue} form
     NewSimplifiedProperty = {NewHareKey, NewHareVal} = {hare, "soup"},
@@ -2179,7 +2523,7 @@ test_configurable_server_properties() ->
     {BinNewVerKey, BinNewVerVal} = {list_to_binary(atom_to_list(NewVerKey)),
                                     list_to_binary(NewVerVal)},
     ConsProp(NewVersion),
-    ClobberedServerProps = rabbit_reader:server_properties(),
+    ClobberedServerProps = rabbit_reader:server_properties(Protocol),
     %% Is the clobbering insert present?
     true = IsPropPresent({BinNewVerKey, longstr, BinNewVerVal}),
     %% Is the clobbering insert the only thing with the clobbering key?
diff --git a/src/rabbit_trace.erl b/src/rabbit_trace.erl
new file mode 100644
index 00000000..7d36856a
--- /dev/null
+++ b/src/rabbit_trace.erl
@@ -0,0 +1,120 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_trace).
+
+-export([init/1, tracing/1, tap_trace_in/2, tap_trace_out/2, start/1, stop/1]).
+
+-include("rabbit.hrl").
+-include("rabbit_framing.hrl").
+
+-define(TRACE_VHOSTS, trace_vhosts).
+-define(XNAME, <<"amq.rabbitmq.trace">>).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(state() :: rabbit_types:exchange() | 'none').
+
+-spec(init/1 :: (rabbit_types:vhost()) -> state()).
+-spec(tracing/1 :: (rabbit_types:vhost()) -> boolean()).
+-spec(tap_trace_in/2 :: (rabbit_types:basic_message(), state()) -> 'ok').
+-spec(tap_trace_out/2 :: (rabbit_amqqueue:qmsg(), state()) -> 'ok').
+
+-spec(start/1 :: (rabbit_types:vhost()) -> 'ok').
+-spec(stop/1 :: (rabbit_types:vhost()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+init(VHost) ->
+    case tracing(VHost) of
+        false -> none;
+        true  -> {ok, X} = rabbit_exchange:lookup(
+                             rabbit_misc:r(VHost, exchange, ?XNAME)),
+                 X
+    end.
+
+tracing(VHost) ->
+    {ok, VHosts} = application:get_env(rabbit, ?TRACE_VHOSTS),
+    lists:member(VHost, VHosts).
+
+tap_trace_in(Msg = #basic_message{exchange_name = #resource{name = XName}},
+             TraceX) ->
+    maybe_trace(TraceX, Msg, <<"publish">>, XName, []).
+
+tap_trace_out({#resource{name = QName}, _QPid, _QMsgId, Redelivered, Msg},
+              TraceX) ->
+    RedeliveredNum = case Redelivered of true -> 1; false -> 0 end,
+    maybe_trace(TraceX, Msg, <<"deliver">>, QName,
+                [{<<"redelivered">>, signedint, RedeliveredNum}]).
+
+%%----------------------------------------------------------------------------
+
+start(VHost) ->
+    update_config(fun (VHosts) -> [VHost | VHosts -- [VHost]] end).
+
+stop(VHost) ->
+    update_config(fun (VHosts) -> VHosts -- [VHost] end).
+
+update_config(Fun) ->
+    {ok, VHosts0} = application:get_env(rabbit, ?TRACE_VHOSTS),
+    VHosts = Fun(VHosts0),
+    application:set_env(rabbit, ?TRACE_VHOSTS, VHosts),
+    rabbit_channel:refresh_config_all(),
+    ok.
+
+%%----------------------------------------------------------------------------
+
+maybe_trace(none, _Msg, _RKPrefix, _RKSuffix, _Extra) ->
+    ok;
+maybe_trace(#exchange{name = Name}, #basic_message{exchange_name = Name},
+            _RKPrefix, _RKSuffix, _Extra) ->
+    ok;
+maybe_trace(X, Msg = #basic_message{content = #content{
+                                      payload_fragments_rev = PFR}},
+            RKPrefix, RKSuffix, Extra) ->
+    {ok, _, _} = rabbit_basic:publish(
+                   X, <<RKPrefix/binary, ".", RKSuffix/binary>>,
+                   #'P_basic'{headers = msg_to_table(Msg) ++ Extra}, PFR),
+    ok.
+
+msg_to_table(#basic_message{exchange_name = #resource{name = XName},
+                            routing_keys  = RoutingKeys,
+                            content       = Content}) ->
+    #content{properties = Props} =
+        rabbit_binary_parser:ensure_content_decoded(Content),
+    {PropsTable, _Ix} =
+        lists:foldl(fun (K, {L, Ix}) ->
+                            V = element(Ix, Props),
+                            NewL = case V of
+                                       undefined -> L;
+                                       _         -> [{a2b(K), type(V), V} | L]
+                                   end,
+                            {NewL, Ix + 1}
+                    end, {[], 2}, record_info(fields, 'P_basic')),
+    [{<<"exchange_name">>, longstr, XName},
+     {<<"routing_keys">>,  array,   [{longstr, K} || K <- RoutingKeys]},
+     {<<"properties">>,    table,   PropsTable},
+     {<<"node">>,          longstr, a2b(node())}].
+
+a2b(A) -> list_to_binary(atom_to_list(A)).
+
+type(V) when is_list(V)    -> table;
+type(V) when is_integer(V) -> signedint;
+type(_V)                   -> longstr.
diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl
index 3dbe740f..2db960ac 100644
--- a/src/rabbit_types.erl
+++ b/src/rabbit_types.erl
@@ -20,8 +20,8 @@
 
 -ifdef(use_specs).
 
--export_type([txn/0, maybe/1, info/0, infos/0, info_key/0, info_keys/0,
-              message/0, basic_message/0,
+-export_type([maybe/1, info/0, infos/0, info_key/0, info_keys/0,
+              message/0, msg_id/0, basic_message/0,
               delivery/0, content/0, decoded_content/0, undecoded_content/0,
               unencoded_content/0, encoded_content/0, message_properties/0,
               vhost/0, ctag/0, amqp_error/0, r/1, r2/2, r3/3, listener/0,
@@ -42,46 +42,43 @@
 %% TODO: make this more precise by tying specific class_ids to
 %% specific properties
 -type(undecoded_content() ::
-      #content{class_id              :: rabbit_framing:amqp_class_id(),
-               properties            :: 'none',
-               properties_bin        :: binary(),
-               payload_fragments_rev :: [binary()]} |
-      #content{class_id              :: rabbit_framing:amqp_class_id(),
-               properties            :: rabbit_framing:amqp_property_record(),
-               properties_bin        :: 'none',
-               payload_fragments_rev :: [binary()]}).
+        #content{class_id              :: rabbit_framing:amqp_class_id(),
+                 properties            :: 'none',
+                 properties_bin        :: binary(),
+                 payload_fragments_rev :: [binary()]} |
+        #content{class_id              :: rabbit_framing:amqp_class_id(),
+                 properties            :: rabbit_framing:amqp_property_record(),
+                 properties_bin        :: 'none',
+                 payload_fragments_rev :: [binary()]}).
 -type(unencoded_content() :: undecoded_content()).
 -type(decoded_content() ::
-      #content{class_id              :: rabbit_framing:amqp_class_id(),
-               properties            :: rabbit_framing:amqp_property_record(),
-               properties_bin        :: maybe(binary()),
-               payload_fragments_rev :: [binary()]}).
+        #content{class_id              :: rabbit_framing:amqp_class_id(),
+                 properties            :: rabbit_framing:amqp_property_record(),
+                 properties_bin        :: maybe(binary()),
+                 payload_fragments_rev :: [binary()]}).
 -type(encoded_content() ::
-      #content{class_id       :: rabbit_framing:amqp_class_id(),
-               properties     :: maybe(rabbit_framing:amqp_property_record()),
-               properties_bin        :: binary(),
-               payload_fragments_rev :: [binary()]}).
+        #content{class_id       :: rabbit_framing:amqp_class_id(),
+                 properties     :: maybe(rabbit_framing:amqp_property_record()),
+                 properties_bin        :: binary(),
+                 payload_fragments_rev :: [binary()]}).
 -type(content() :: undecoded_content() | decoded_content()).
+-type(msg_id() :: rabbit_guid:guid()).
 -type(basic_message() ::
-      #basic_message{exchange_name  :: rabbit_exchange:name(),
-                     routing_key    :: rabbit_router:routing_key(),
-                     content        :: content(),
-                     guid           :: rabbit_guid:guid(),
-                     is_persistent  :: boolean()}).
+        #basic_message{exchange_name  :: rabbit_exchange:name(),
+                       routing_keys   :: [rabbit_router:routing_key()],
+                       content        :: content(),
+                     id             :: msg_id(),
+                       is_persistent  :: boolean()}).
 -type(message() :: basic_message()).
 -type(delivery() ::
-      #delivery{mandatory :: boolean(),
-                immediate :: boolean(),
-                txn       :: maybe(txn()),
-                sender    :: pid(),
-                message   :: message()}).
+        #delivery{mandatory :: boolean(),
+                  immediate :: boolean(),
+                  sender    :: pid(),
+                  message   :: message()}).
 -type(message_properties() ::
         #message_properties{expiry :: pos_integer() | 'undefined',
                             needs_confirming :: boolean()}).
 
-%% this is really an abstract type, but dialyzer does not support them
--type(txn() :: rabbit_guid:guid()).
-
 -type(info_key() :: atom()).
 -type(info_keys() :: [info_key()]).
 
@@ -89,9 +86,9 @@
 -type(infos() :: [info()]).
 
 -type(amqp_error() ::
-      #amqp_error{name        :: rabbit_framing:amqp_exception(),
-                  explanation :: string(),
-                  method      :: rabbit_framing:amqp_method_name()}).
+        #amqp_error{name        :: rabbit_framing:amqp_exception(),
+                    explanation :: string(),
+                    method      :: rabbit_framing:amqp_method_name()}).
 
 -type(r(Kind) ::
         r2(vhost(), Kind)).
@@ -103,34 +100,36 @@
                   name         :: Name}).
 
 -type(listener() ::
-      #listener{node     :: node(),
-                protocol :: atom(),
-                host     :: rabbit_networking:hostname(),
-                port     :: rabbit_networking:ip_port()}).
+        #listener{node     :: node(),
+                  protocol :: atom(),
+                  host     :: rabbit_networking:hostname(),
+                  port     :: rabbit_networking:ip_port()}).
 
 -type(binding_source() :: rabbit_exchange:name()).
 -type(binding_destination() :: rabbit_amqqueue:name() | rabbit_exchange:name()).
 
 -type(binding() ::
-      #binding{source      :: rabbit_exchange:name(),
-               destination :: binding_destination(),
-               key         :: rabbit_binding:key(),
-               args        :: rabbit_framing:amqp_table()}).
+        #binding{source      :: rabbit_exchange:name(),
+                 destination :: binding_destination(),
+                 key         :: rabbit_binding:key(),
+                 args        :: rabbit_framing:amqp_table()}).
 
 -type(amqqueue() ::
-      #amqqueue{name            :: rabbit_amqqueue:name(),
-                durable         :: boolean(),
-                auto_delete     :: boolean(),
-                exclusive_owner :: rabbit_types:maybe(pid()),
-                arguments       :: rabbit_framing:amqp_table(),
-                pid             :: rabbit_types:maybe(pid())}).
+        #amqqueue{name            :: rabbit_amqqueue:name(),
+                  durable         :: boolean(),
+                  auto_delete     :: boolean(),
+                  exclusive_owner :: rabbit_types:maybe(pid()),
+                  arguments       :: rabbit_framing:amqp_table(),
+                  pid             :: rabbit_types:maybe(pid()),
+                  slave_pids      :: [pid()],
+                  mirror_nodes    :: [node()] | 'undefined' | 'all'}).
 
 -type(exchange() ::
-      #exchange{name        :: rabbit_exchange:name(),
-                type        :: rabbit_exchange:type(),
-                durable     :: boolean(),
-                auto_delete :: boolean(),
-                arguments   :: rabbit_framing:amqp_table()}).
+        #exchange{name        :: rabbit_exchange:name(),
+                  type        :: rabbit_exchange:type(),
+                  durable     :: boolean(),
+                  auto_delete :: boolean(),
+                  arguments   :: rabbit_framing:amqp_table()}).
 
 -type(connection() :: pid()).
 
@@ -138,14 +137,14 @@
 
 -type(user() ::
         #user{username     :: username(),
-              is_admin     :: boolean(),
+              tags         :: [atom()],
               auth_backend :: atom(),
               impl         :: any()}).
 
 -type(internal_user() ::
         #internal_user{username      :: username(),
                        password_hash :: password_hash(),
-                       is_admin      :: boolean()}).
+                       tags          :: [atom()]}).
 
 -type(username() :: binary()).
 -type(password() :: binary()).
diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl
index b0a71523..9739f6b7 100644
--- a/src/rabbit_upgrade.erl
+++ b/src/rabbit_upgrade.erl
@@ -16,7 +16,7 @@
 
 -module(rabbit_upgrade).
 
--export([maybe_upgrade/0, read_version/0, write_version/0, desired_version/0]).
+-export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0]).
 
 -include("rabbit.hrl").
 
@@ -27,142 +27,262 @@
 
 -ifdef(use_specs).
 
--type(step() :: atom()).
--type(version() :: [step()]).
-
--spec(maybe_upgrade/0 :: () -> 'ok' | 'version_not_available').
--spec(read_version/0 :: () -> rabbit_types:ok_or_error2(version(), any())).
--spec(write_version/0 :: () -> 'ok').
--spec(desired_version/0 :: () -> version()).
+-spec(maybe_upgrade_mnesia/0 :: () -> 'ok').
+-spec(maybe_upgrade_local/0 :: () -> 'ok' | 'version_not_available').
 
 -endif.
 
 %% -------------------------------------------------------------------
 
-%% Try to upgrade the schema. If no information on the existing schema
-%% could be found, do nothing. rabbit_mnesia:check_schema_integrity()
-%% will catch the problem.
-maybe_upgrade() ->
-    case read_version() of
-        {ok, CurrentHeads} ->
-            with_upgrade_graph(
-              fun (G) ->
-                      case unknown_heads(CurrentHeads, G) of
-                          []      -> case upgrades_to_apply(CurrentHeads, G) of
-                                         []       -> ok;
-                                         Upgrades -> apply_upgrades(Upgrades)
-                                     end;
-                          Unknown -> throw({error,
-                                            {future_upgrades_found, Unknown}})
-                      end
-              end);
-        {error, enoent} ->
-            version_not_available
+%% The upgrade logic is quite involved, due to the existence of
+%% clusters.
+%%
+%% Firstly, we have two different types of upgrades to do: Mnesia and
+%% everythinq else. Mnesia upgrades must only be done by one node in
+%% the cluster (we treat a non-clustered node as a single-node
+%% cluster). This is the primary upgrader. The other upgrades need to
+%% be done by all nodes.
+%%
+%% The primary upgrader has to start first (and do its Mnesia
+%% upgrades). Secondary upgraders need to reset their Mnesia database
+%% and then rejoin the cluster. They can't do the Mnesia upgrades as
+%% well and then merge databases since the cookie for each table will
+%% end up different and the merge will fail.
+%%
+%% This in turn means that we need to determine whether we are the
+%% primary or secondary upgrader *before* Mnesia comes up. If we
+%% didn't then the secondary upgrader would try to start Mnesia, and
+%% either hang waiting for a node which is not yet up, or fail since
+%% its schema differs from the other nodes in the cluster.
+%%
+%% Also, the primary upgrader needs to start Mnesia to do its
+%% upgrades, but needs to forcibly load tables rather than wait for
+%% them (in case it was not the last node to shut down, in which case
+%% it would wait forever).
+%%
+%% This in turn means that maybe_upgrade_mnesia/0 has to be patched
+%% into the boot process by prelaunch before the mnesia application is
+%% started. By the time Mnesia is started the upgrades have happened
+%% (on the primary), or Mnesia has been reset (on the secondary) and
+%% rabbit_mnesia:init_db/3 can then make the node rejoin the cluster
+%% in the normal way.
+%%
+%% The non-mnesia upgrades are then triggered by
+%% rabbit_mnesia:init_db/3. Of course, it's possible for a given
+%% upgrade process to only require Mnesia upgrades, or only require
+%% non-Mnesia upgrades. In the latter case no Mnesia resets and
+%% reclusterings occur.
+%%
+%% The primary upgrader needs to be a disc node. Ideally we would like
+%% it to be the last disc node to shut down (since otherwise there's a
+%% risk of data loss). On each node we therefore record the disc nodes
+%% that were still running when we shut down. A disc node that knows
+%% other nodes were up when it shut down, or a ram node, will refuse
+%% to be the primary upgrader, and will thus not start when upgrades
+%% are needed.
+%%
+%% However, this is racy if several nodes are shut down at once. Since
+%% rabbit records the running nodes, and shuts down before mnesia, the
+%% race manifests as all disc nodes thinking they are not the primary
+%% upgrader. Therefore the user can remove the record of the last disc
+%% node to shut down to get things going again. This may lose any
+%% mnesia changes that happened after the node chosen as the primary
+%% upgrader was shut down.
+
+%% -------------------------------------------------------------------
+
+ensure_backup_taken() ->
+    case filelib:is_file(lock_filename()) of
+        false -> case filelib:is_dir(backup_dir()) of
+                     false -> ok = take_backup();
+                     _     -> ok
+                 end;
+        true  -> throw({error, previous_upgrade_failed})
     end.
 
-read_version() ->
-    case rabbit_misc:read_term_file(schema_filename()) of
-        {ok, [Heads]}    -> {ok, Heads};
-        {error, _} = Err -> Err
+take_backup() ->
+    BackupDir = backup_dir(),
+    case rabbit_mnesia:copy_db(BackupDir) of
+        ok         -> info("upgrades: Mnesia dir backed up to ~p~n",
+                           [BackupDir]);
+        {error, E} -> throw({could_not_back_up_mnesia_dir, E})
     end.
 
-write_version() ->
-    ok = rabbit_misc:write_term_file(schema_filename(), [desired_version()]),
-    ok.
+ensure_backup_removed() ->
+    case filelib:is_dir(backup_dir()) of
+        true -> ok = remove_backup();
+        _    -> ok
+    end.
 
-desired_version() ->
-    with_upgrade_graph(fun (G) -> heads(G) end).
+remove_backup() ->
+    ok = rabbit_misc:recursive_delete([backup_dir()]),
+    info("upgrades: Mnesia backup removed~n", []).
 
-%% -------------------------------------------------------------------
+maybe_upgrade_mnesia() ->
+    AllNodes = rabbit_mnesia:all_clustered_nodes(),
+    case rabbit_version:upgrades_required(mnesia) of
+        {error, version_not_available} ->
+            case AllNodes of
+                [_] -> ok;
+                _   -> die("Cluster upgrade needed but upgrading from "
+                           "< 2.1.1.~nUnfortunately you will need to "
+                           "rebuild the cluster.", [])
+            end;
+        {error, _} = Err ->
+            throw(Err);
+        {ok, []} ->
+            ok;
+        {ok, Upgrades} ->
+            ensure_backup_taken(),
+            ok = case upgrade_mode(AllNodes) of
+                     primary   -> primary_upgrade(Upgrades, AllNodes);
+                     secondary -> secondary_upgrade(AllNodes)
+                 end
+    end.
 
-with_upgrade_graph(Fun) ->
-    case rabbit_misc:build_acyclic_graph(
-           fun vertices/2, fun edges/2,
-           rabbit_misc:all_module_attributes(rabbit_upgrade)) of
-        {ok, G} -> try
-                       Fun(G)
-                   after
-                       true = digraph:delete(G)
-                   end;
-        {error, {vertex, duplicate, StepName}} ->
-            throw({error, {duplicate_upgrade_step, StepName}});
-        {error, {edge, {bad_vertex, StepName}, _From, _To}} ->
-            throw({error, {dependency_on_unknown_upgrade_step, StepName}});
-        {error, {edge, {bad_edge, StepNames}, _From, _To}} ->
-            throw({error, {cycle_in_upgrade_steps, StepNames}})
+upgrade_mode(AllNodes) ->
+    case nodes_running(AllNodes) of
+        [] ->
+            AfterUs = rabbit_mnesia:read_previously_running_nodes(),
+            case {is_disc_node_legacy(), AfterUs} of
+                {true, []}  ->
+                    primary;
+                {true, _}  ->
+                    Filename = rabbit_mnesia:running_nodes_filename(),
+                    die("Cluster upgrade needed but other disc nodes shut "
+                        "down after this one.~nPlease first start the last "
+                        "disc node to shut down.~n~nNote: if several disc "
+                        "nodes were shut down simultaneously they may "
+                        "all~nshow this message. In which case, remove "
+                        "the lock file on one of them and~nstart that node. "
+                        "The lock file on this node is:~n~n ~s ", [Filename]);
+                {false, _} ->
+                    die("Cluster upgrade needed but this is a ram node.~n"
+                        "Please first start the last disc node to shut down.",
+                        [])
+            end;
+        [Another|_] ->
+            MyVersion = rabbit_version:desired_for_scope(mnesia),
+            ErrFun = fun (ClusterVersion) ->
+                             %% The other node(s) are running an
+                             %% unexpected version.
+                             die("Cluster upgrade needed but other nodes are "
+                                 "running ~p~nand I want ~p",
+                                 [ClusterVersion, MyVersion])
+                     end,
+            case rpc:call(Another, rabbit_version, desired_for_scope,
+                          [mnesia]) of
+                {badrpc, {'EXIT', {undef, _}}} -> ErrFun(unknown_old_version);
+                {badrpc, Reason}               -> ErrFun({unknown, Reason});
+                CV                             -> case rabbit_version:matches(
+                                                         MyVersion, CV) of
+                                                      true  -> secondary;
+                                                      false -> ErrFun(CV)
+                                                  end
+            end
     end.
 
-vertices(Module, Steps) ->
-    [{StepName, {Module, StepName}} || {StepName, _Reqs} <- Steps].
+die(Msg, Args) ->
+    %% We don't throw or exit here since that gets thrown
+    %% straight out into do_boot, generating an erl_crash.dump
+    %% and displaying any error message in a confusing way.
+    error_logger:error_msg(Msg, Args),
+    io:format("~n~n****~n~n" ++ Msg ++ "~n~n****~n~n~n", Args),
+    error_logger:logfile(close),
+    halt(1).
 
-edges(_Module, Steps) ->
-    [{Require, StepName} || {StepName, Requires} <- Steps, Require <- Requires].
+primary_upgrade(Upgrades, Nodes) ->
+    Others = Nodes -- [node()],
+    ok = apply_upgrades(
+           mnesia,
+           Upgrades,
+           fun () ->
+                   force_tables(),
+                   case Others of
+                       [] -> ok;
+                       _  -> info("mnesia upgrades: Breaking cluster~n", []),
+                             [{atomic, ok} = mnesia:del_table_copy(schema, Node)
+                              || Node <- Others]
+                   end
+           end),
+    ok.
 
+force_tables() ->
+    [mnesia:force_load_table(T) || T <- rabbit_mnesia:table_names()].
 
-unknown_heads(Heads, G) ->
-    [H || H <- Heads, digraph:vertex(G, H) =:= false].
+secondary_upgrade(AllNodes) ->
+    %% must do this before we wipe out schema
+    IsDiscNode = is_disc_node_legacy(),
+    rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+                          cannot_delete_schema),
+    %% Note that we cluster with all nodes, rather than all disc nodes
+    %% (as we can't know all disc nodes at this point). This is safe as
+    %% we're not writing the cluster config, just setting up Mnesia.
+    ClusterNodes = case IsDiscNode of
+                       true  -> AllNodes;
+                       false -> AllNodes -- [node()]
+                   end,
+    rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+    ok = rabbit_mnesia:init_db(ClusterNodes, true, fun () -> ok end),
+    ok = rabbit_version:record_desired_for_scope(mnesia),
+    ok.
 
-upgrades_to_apply(Heads, G) ->
-    %% Take all the vertices which can reach the known heads. That's
-    %% everything we've already applied. Subtract that from all
-    %% vertices: that's what we have to apply.
-    Unsorted = sets:to_list(
-                sets:subtract(
-                  sets:from_list(digraph:vertices(G)),
-                  sets:from_list(digraph_utils:reaching(Heads, G)))),
-    %% Form a subgraph from that list and find a topological ordering
-    %% so we can invoke them in order.
-    [element(2, digraph:vertex(G, StepName)) ||
-        StepName <- digraph_utils:topsort(digraph_utils:subgraph(G, Unsorted))].
+nodes_running(Nodes) ->
+    [N || N <- Nodes, node_running(N)].
 
-heads(G) ->
-    lists:sort([V || V <- digraph:vertices(G), digraph:out_degree(G, V) =:= 0]).
+node_running(Node) ->
+    case rpc:call(Node, application, which_applications, []) of
+        {badrpc, _} -> false;
+        Apps        -> lists:keysearch(rabbit, 1, Apps) =/= false
+    end.
 
 %% -------------------------------------------------------------------
 
-apply_upgrades(Upgrades) ->
-    LockFile = lock_filename(dir()),
-    case rabbit_misc:lock_file(LockFile) of
-        ok ->
-            BackupDir = dir() ++ "-upgrade-backup",
-            info("Upgrades: ~w to apply~n", [length(Upgrades)]),
-            case rabbit_mnesia:copy_db(BackupDir) of
-                ok ->
-                    %% We need to make the backup after creating the
-                    %% lock file so that it protects us from trying to
-                    %% overwrite the backup. Unfortunately this means
-                    %% the lock file exists in the backup too, which
-                    %% is not intuitive. Remove it.
-                    ok = file:delete(lock_filename(BackupDir)),
-                    info("Upgrades: Mnesia dir backed up to ~p~n", [BackupDir]),
-                    [apply_upgrade(Upgrade) || Upgrade <- Upgrades],
-                    info("Upgrades: All upgrades applied successfully~n", []),
-                    ok = write_version(),
-                    ok = rabbit_misc:recursive_delete([BackupDir]),
-                    info("Upgrades: Mnesia backup removed~n", []),
-                    ok = file:delete(LockFile);
-                {error, E} ->
-                    %% If we can't backup, the upgrade hasn't started
-                    %% hence we don't need the lockfile since the real
-                    %% mnesia dir is the good one.
-                    ok = file:delete(LockFile),
-                    throw({could_not_back_up_mnesia_dir, E})
-            end;
-        {error, eexist} ->
-            throw({error, previous_upgrade_failed})
+maybe_upgrade_local() ->
+    case rabbit_version:upgrades_required(local) of
+        {error, version_not_available} -> version_not_available;
+        {error, _} = Err               -> throw(Err);
+        {ok, []}                       -> ensure_backup_removed(),
+                                          ok;
+        {ok, Upgrades}                 -> mnesia:stop(),
+                                          ensure_backup_taken(),
+                                          ok = apply_upgrades(local, Upgrades,
+                                                              fun () -> ok end),
+                                          ensure_backup_removed(),
+                                          ok
     end.
 
-apply_upgrade({M, F}) ->
-    info("Upgrades: Applying ~w:~w~n", [M, F]),
+%% -------------------------------------------------------------------
+
+apply_upgrades(Scope, Upgrades, Fun) ->
+    ok = rabbit_misc:lock_file(lock_filename()),
+    info("~s upgrades: ~w to apply~n", [Scope, length(Upgrades)]),
+    rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
+    Fun(),
+    [apply_upgrade(Scope, Upgrade) || Upgrade <- Upgrades],
+    info("~s upgrades: All upgrades applied successfully~n", [Scope]),
+    ok = rabbit_version:record_desired_for_scope(Scope),
+    ok = file:delete(lock_filename()).
+
+apply_upgrade(Scope, {M, F}) ->
+    info("~s upgrades: Applying ~w:~w~n", [Scope, M, F]),
     ok = apply(M, F, []).
 
 %% -------------------------------------------------------------------
 
 dir() -> rabbit_mnesia:dir().
 
-schema_filename() -> filename:join(dir(), ?VERSION_FILENAME).
-
+lock_filename() -> lock_filename(dir()).
 lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME).
+backup_dir() -> dir() ++ "-upgrade-backup".
+
+is_disc_node_legacy() ->
+    %% This is pretty ugly but we can't start Mnesia and ask it (will
+    %% hang), we can't look at the config file (may not include us
+    %% even if we're a disc node).  We also can't use
+    %% rabbit_mnesia:is_disc_node/0 because that will give false
+    %% postivies on Rabbit up to 2.5.1.
+    filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")).
 
 %% NB: we cannot use rabbit_log here since it may not have been
 %% started yet
diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl
index 68b88b3e..8d26866b 100644
--- a/src/rabbit_upgrade_functions.erl
+++ b/src/rabbit_upgrade_functions.erl
@@ -16,25 +16,42 @@
 
 -module(rabbit_upgrade_functions).
 
--include("rabbit.hrl").
+%% If you are tempted to add include("rabbit.hrl"). here, don't. Using record
+%% defs here leads to pain later.
 
 -compile([export_all]).
 
--rabbit_upgrade({remove_user_scope,  []}).
--rabbit_upgrade({hash_passwords,     []}).
--rabbit_upgrade({add_ip_to_listener, []}).
--rabbit_upgrade({internal_exchanges, []}).
--rabbit_upgrade({user_to_internal_user, [hash_passwords]}).
+-rabbit_upgrade({remove_user_scope,     mnesia, []}).
+-rabbit_upgrade({hash_passwords,        mnesia, []}).
+-rabbit_upgrade({add_ip_to_listener,    mnesia, []}).
+-rabbit_upgrade({internal_exchanges,    mnesia, []}).
+-rabbit_upgrade({user_to_internal_user, mnesia, [hash_passwords]}).
+-rabbit_upgrade({topic_trie,            mnesia, []}).
+-rabbit_upgrade({semi_durable_route,    mnesia, []}).
+-rabbit_upgrade({exchange_event_serial, mnesia, []}).
+-rabbit_upgrade({trace_exchanges,       mnesia, [internal_exchanges]}).
+-rabbit_upgrade({user_admin_to_tags,    mnesia, [user_to_internal_user]}).
+-rabbit_upgrade({ha_mirrors,            mnesia, []}).
+-rabbit_upgrade({gm,                    mnesia, []}).
+-rabbit_upgrade({exchange_scratch,      mnesia, [trace_exchanges]}).
 
 %% -------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(remove_user_scope/0  :: () -> 'ok').
--spec(hash_passwords/0     :: () -> 'ok').
--spec(add_ip_to_listener/0 :: () -> 'ok').
--spec(internal_exchanges/0 :: () -> 'ok').
+-spec(remove_user_scope/0     :: () -> 'ok').
+-spec(hash_passwords/0        :: () -> 'ok').
+-spec(add_ip_to_listener/0    :: () -> 'ok').
+-spec(internal_exchanges/0    :: () -> 'ok').
 -spec(user_to_internal_user/0 :: () -> 'ok').
+-spec(topic_trie/0            :: () -> 'ok').
+-spec(semi_durable_route/0    :: () -> 'ok').
+-spec(exchange_event_serial/0 :: () -> 'ok').
+-spec(trace_exchanges/0       :: () -> 'ok').
+-spec(user_admin_to_tags/0    :: () -> 'ok').
+-spec(ha_mirrors/0            :: () -> 'ok').
+-spec(gm/0                    :: () -> 'ok').
+-spec(exchange_scratch/0      :: () -> 'ok').
 
 -endif.
 
@@ -47,7 +64,7 @@
 %% point.
 
 remove_user_scope() ->
-    mnesia(
+    transform(
       rabbit_user_permission,
       fun ({user_permission, UV, {permission, _Scope, Conf, Write, Read}}) ->
               {user_permission, UV, {permission, Conf, Write, Read}}
@@ -55,7 +72,7 @@ remove_user_scope() ->
       [user_vhost, permission]).
 
 hash_passwords() ->
-    mnesia(
+    transform(
       rabbit_user,
       fun ({user, Username, Password, IsAdmin}) ->
               Hash = rabbit_auth_backend_internal:hash_password(Password),
@@ -64,7 +81,7 @@ hash_passwords() ->
       [username, password_hash, is_admin]).
 
 add_ip_to_listener() ->
-    mnesia(
+    transform(
       rabbit_listener,
       fun ({listener, Node, Protocol, Host, Port}) ->
               {listener, Node, Protocol, Host, {0,0,0,0}, Port}
@@ -77,27 +94,104 @@ internal_exchanges() ->
         fun ({exchange, Name, Type, Durable, AutoDelete, Args}) ->
                 {exchange, Name, Type, Durable, AutoDelete, false, Args}
         end,
-    [ ok = mnesia(T,
-                  AddInternalFun,
-                  [name, type, durable, auto_delete, internal, arguments])
+    [ ok = transform(T,
+                     AddInternalFun,
+                     [name, type, durable, auto_delete, internal, arguments])
       || T <- Tables ],
     ok.
 
 user_to_internal_user() ->
-    mnesia(
+    transform(
       rabbit_user,
       fun({user, Username, PasswordHash, IsAdmin}) ->
               {internal_user, Username, PasswordHash, IsAdmin}
       end,
       [username, password_hash, is_admin], internal_user).
 
+topic_trie() ->
+    create(rabbit_topic_trie_edge, [{record_name, topic_trie_edge},
+                                    {attributes, [trie_edge, node_id]},
+                                    {type, ordered_set}]),
+    create(rabbit_topic_trie_binding, [{record_name, topic_trie_binding},
+                                       {attributes, [trie_binding, value]},
+                                       {type, ordered_set}]).
+
+semi_durable_route() ->
+    create(rabbit_semi_durable_route, [{record_name, route},
+                                       {attributes, [binding, value]}]).
+
+exchange_event_serial() ->
+    create(rabbit_exchange_serial, [{record_name, exchange_serial},
+                                    {attributes, [name, next]}]).
+
+trace_exchanges() ->
+    [declare_exchange(
+       rabbit_misc:r(VHost, exchange, <<"amq.rabbitmq.trace">>), topic) ||
+        VHost <- rabbit_vhost:list()],
+    ok.
+
+user_admin_to_tags() ->
+    transform(
+      rabbit_user,
+      fun({internal_user, Username, PasswordHash, true}) ->
+              {internal_user, Username, PasswordHash, [administrator]};
+         ({internal_user, Username, PasswordHash, false}) ->
+              {internal_user, Username, PasswordHash, [management]}
+      end,
+      [username, password_hash, tags], internal_user).
+
+ha_mirrors() ->
+    Tables = [rabbit_queue, rabbit_durable_queue],
+    AddMirrorPidsFun =
+        fun ({amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid}) ->
+                {amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid,
+                 [], undefined}
+        end,
+    [ ok = transform(T,
+                     AddMirrorPidsFun,
+                     [name, durable, auto_delete, exclusive_owner, arguments,
+                      pid, slave_pids, mirror_nodes])
+      || T <- Tables ],
+    ok.
+
+gm() ->
+    create(gm_group, [{record_name, gm_group},
+                      {attributes, [name, version, members]}]).
+
+exchange_scratch() ->
+    ok = exchange_scratch(rabbit_exchange),
+    ok = exchange_scratch(rabbit_durable_exchange).
+
+exchange_scratch(Table) ->
+    transform(
+      Table,
+      fun ({exchange, Name, Type, Dur, AutoDel, Int, Args}) ->
+              {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined}
+      end,
+      [name, type, durable, auto_delete, internal, arguments, scratch]).
+
 %%--------------------------------------------------------------------
 
-mnesia(TableName, Fun, FieldList) ->
+transform(TableName, Fun, FieldList) ->
+    rabbit_mnesia:wait_for_tables([TableName]),
     {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList),
     ok.
 
-mnesia(TableName, Fun, FieldList, NewRecordName) ->
+transform(TableName, Fun, FieldList, NewRecordName) ->
+    rabbit_mnesia:wait_for_tables([TableName]),
     {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList,
                                           NewRecordName),
     ok.
+
+create(Tab, TabDef) ->
+    {atomic, ok} = mnesia:create_table(Tab, TabDef),
+    ok.
+
+%% Dumb replacement for rabbit_exchange:declare that does not require
+%% the exchange type registry or worker pool to be running by dint of
+%% not validating anything and assuming the exchange type does not
+%% require serialisation.
+%% NB: this assumes the pre-exchange-scratch-space format
+declare_exchange(XName, Type) ->
+    X = {exchange, XName, Type, true, false, false, []},
+    ok = mnesia:dirty_write(rabbit_durable_exchange, X).
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7142d560..ea72de66 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -16,13 +16,13 @@
 
 -module(rabbit_variable_queue).
 
--export([init/3, terminate/1, delete_and_terminate/1,
-         purge/1, publish/3, publish_delivered/4, fetch/2, ack/2,
-         tx_publish/4, tx_ack/3, tx_rollback/2, tx_commit/4,
-         requeue/3, len/1, is_empty/1, dropwhile/2,
+-export([init/3, terminate/2, delete_and_terminate/2,
+         purge/1, publish/4, publish_delivered/5, drain_confirmed/1,
+         dropwhile/2, fetch/2, ack/2, requeue/3, len/1, is_empty/1,
          set_ram_duration_target/2, ram_duration/1,
-         needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1,
-         status/1]).
+         needs_timeout/1, timeout/1, handle_pre_hibernate/1,
+         status/1, invoke/3, is_duplicate/2, discard/3,
+         multiple_routing_keys/0]).
 
 -export([start/1, stop/0]).
 
@@ -145,18 +145,21 @@
 %% any one time. This further smooths the effects of changes to the
 %% target_ram_count and ensures the queue remains responsive
 %% even when there is a large amount of IO work to do. The
-%% idle_timeout callback is utilised to ensure that conversions are
+%% timeout callback is utilised to ensure that conversions are
 %% done as promptly as possible whilst ensuring the queue remains
 %% responsive.
 %%
 %% In the queue we keep track of both messages that are pending
-%% delivery and messages that are pending acks. This ensures that
-%% purging (deleting the former) and deletion (deleting the former and
-%% the latter) are both cheap and do require any scanning through qi
-%% segments.
+%% delivery and messages that are pending acks. In the event of a
+%% queue purge, we only need to load qi segments if the queue has
+%% elements in deltas (i.e. it came under significant memory
+%% pressure). In the event of a queue deletion, in addition to the
+%% preceding, by keeping track of pending acks in RAM, we do not need
+%% to search through qi segments looking for messages that are yet to
+%% be acknowledged.
 %%
 %% Pending acks are recorded in memory either as the tuple {SeqId,
-%% Guid, MsgProps} (tuple-form) or as the message itself (message-
+%% MsgId, MsgProps} (tuple-form) or as the message itself (message-
 %% form). Acks for persistent messages are always stored in the tuple-
 %% form. Acks for transient messages are also stored in tuple-form if
 %% the message has been sent to disk as part of the memory reduction
@@ -234,10 +237,11 @@
           ram_ack_index,
           index_state,
           msg_store_clients,
-          on_sync,
           durable,
           transient_threshold,
 
+          async_callback,
+
           len,
           persistent_count,
 
@@ -252,6 +256,7 @@
           msgs_on_disk,
           msg_indices_on_disk,
           unconfirmed,
+          confirmed,
           ack_out_counter,
           ack_in_counter,
           ack_rates
@@ -261,24 +266,20 @@
 
 -record(msg_status,
         { seq_id,
-          guid,
+          msg_id,
           msg,
           is_persistent,
           is_delivered,
           msg_on_disk,
           index_on_disk,
           msg_props
-         }).
+        }).
 
 -record(delta,
         { start_seq_id, %% start_seq_id is inclusive
           count,
           end_seq_id    %% end_seq_id is exclusive
-         }).
-
--record(tx, { pending_messages, pending_acks }).
-
--record(sync, { acks_persistent, acks_all, pubs, funs }).
+        }).
 
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the IO_BATCH_SIZE sets the number of betas
@@ -294,6 +295,8 @@
 
 %%----------------------------------------------------------------------------
 
+-rabbit_upgrade({multiple_routing_keys, local, []}).
+
 -ifdef(use_specs).
 
 -type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
@@ -310,12 +313,6 @@
                           count        :: non_neg_integer(),
                           end_seq_id   :: non_neg_integer() }).
 
--type(sync() :: #sync { acks_persistent :: [[seq_id()]],
-                        acks_all        :: [[seq_id()]],
-                        pubs            :: [{message_properties_transformer(),
-                                             [rabbit_types:basic_message()]}],
-                        funs            :: [fun (() -> any())] }).
-
 -type(state() :: #vqstate {
              q1                    :: queue(),
              q2                    :: bpqueue:bpqueue(),
@@ -328,13 +325,14 @@
              index_state           :: any(),
              msg_store_clients     :: 'undefined' | {{any(), binary()},
                                                     {any(), binary()}},
-             on_sync               :: sync(),
              durable               :: boolean(),
+             transient_threshold   :: non_neg_integer(),
+
+             async_callback        :: async_callback(),
 
              len                   :: non_neg_integer(),
              persistent_count      :: non_neg_integer(),
 
-             transient_threshold   :: non_neg_integer(),
              target_ram_count      :: non_neg_integer() | 'infinity',
              ram_msg_count         :: non_neg_integer(),
              ram_msg_count_prev    :: non_neg_integer(),
@@ -345,12 +343,15 @@
              msgs_on_disk          :: gb_set(),
              msg_indices_on_disk   :: gb_set(),
              unconfirmed           :: gb_set(),
+             confirmed             :: gb_set(),
              ack_out_counter       :: non_neg_integer(),
              ack_in_counter        :: non_neg_integer(),
              ack_rates             :: rates() }).
 
 -include("rabbit_backing_queue_spec.hrl").
 
+-spec(multiple_routing_keys/0 :: () -> 'ok').
+
 -endif.
 
 -define(BLANK_DELTA, #delta { start_seq_id = undefined,
@@ -360,11 +361,6 @@
                                          count        = 0,
                                          end_seq_id   = Z }).
 
--define(BLANK_SYNC, #sync { acks_persistent = [],
-                            acks_all        = [],
-                            pubs            = [],
-                            funs            = [] }).
-
 %%----------------------------------------------------------------------------
 %% Public API
 %%----------------------------------------------------------------------------
@@ -393,25 +389,26 @@ stop_msg_store() ->
     ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE),
     ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE).
 
-init(QueueName, IsDurable, Recover) ->
-    Self = self(),
-    init(QueueName, IsDurable, Recover,
-         fun (Guids, ActionTaken) ->
-                 msgs_written_to_disk(Self, Guids, ActionTaken)
+init(Queue, Recover, AsyncCallback) ->
+    init(Queue, Recover, AsyncCallback,
+         fun (MsgIds, ActionTaken) ->
+                 msgs_written_to_disk(AsyncCallback, MsgIds, ActionTaken)
          end,
-         fun (Guids) -> msg_indices_written_to_disk(Self, Guids) end).
+         fun (MsgIds) -> msg_indices_written_to_disk(AsyncCallback, MsgIds) end).
 
-init(QueueName, IsDurable, false, MsgOnDiskFun, MsgIdxOnDiskFun) ->
+init(#amqqueue { name = QueueName, durable = IsDurable }, false,
+     AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun) ->
     IndexState = rabbit_queue_index:init(QueueName, MsgIdxOnDiskFun),
-    init(IsDurable, IndexState, 0, [],
+    init(IsDurable, IndexState, 0, [], AsyncCallback,
          case IsDurable of
              true  -> msg_store_client_init(?PERSISTENT_MSG_STORE,
-                                            MsgOnDiskFun);
+                                            MsgOnDiskFun, AsyncCallback);
              false -> undefined
          end,
-         msg_store_client_init(?TRANSIENT_MSG_STORE, undefined));
+         msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, AsyncCallback));
 
-init(QueueName, true, true, MsgOnDiskFun, MsgIdxOnDiskFun) ->
+init(#amqqueue { name = QueueName, durable = true }, true,
+     AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun) ->
     Terms = rabbit_queue_index:shutdown_terms(QueueName),
     {PRef, TRef, Terms1} =
         case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of
@@ -421,25 +418,25 @@ init(QueueName, true, true, MsgOnDiskFun, MsgIdxOnDiskFun) ->
             _  -> {rabbit_guid:guid(), rabbit_guid:guid(), []}
         end,
     PersistentClient = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef,
-                                             MsgOnDiskFun),
+                                             MsgOnDiskFun, AsyncCallback),
     TransientClient  = msg_store_client_init(?TRANSIENT_MSG_STORE, TRef,
-                                             undefined),
+                                             undefined, AsyncCallback),
     {DeltaCount, IndexState} =
         rabbit_queue_index:recover(
           QueueName, Terms1,
           rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE),
-          fun (Guid) ->
-                  rabbit_msg_store:contains(Guid, PersistentClient)
+          fun (MsgId) ->
+                  rabbit_msg_store:contains(MsgId, PersistentClient)
           end,
           MsgIdxOnDiskFun),
-    init(true, IndexState, DeltaCount, Terms1,
+    init(true, IndexState, DeltaCount, Terms1, AsyncCallback,
          PersistentClient, TransientClient).
 
-terminate(State) ->
+terminate(_Reason, State) ->
     State1 = #vqstate { persistent_count  = PCount,
                         index_state       = IndexState,
                         msg_store_clients = {MSCStateP, MSCStateT} } =
-        remove_pending_ack(true, tx_commit_index(State)),
+        remove_pending_ack(true, State),
     PRef = case MSCStateP of
                undefined -> undefined;
                _         -> ok = rabbit_msg_store:client_terminate(MSCStateP),
@@ -456,7 +453,7 @@ terminate(State) ->
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
-delete_and_terminate(State) ->
+delete_and_terminate(_Reason, State) ->
     %% TODO: there is no need to interact with qi at all - which we do
     %% as part of 'purge' and 'remove_pending_ack', other than
     %% deleting it.
@@ -501,32 +498,37 @@ purge(State = #vqstate { q4                = Q4,
                               ram_index_count   = 0,
                               persistent_count  = PCount1 })}.
 
-publish(Msg, MsgProps, State) ->
+publish(Msg, MsgProps, _ChPid, State) ->
     {_SeqId, State1} = publish(Msg, MsgProps, false, false, State),
     a(reduce_memory_use(State1)).
 
-publish_delivered(false, #basic_message { guid = Guid },
-                  _MsgProps, State = #vqstate { len = 0 }) ->
-    blind_confirm(self(), gb_sets:singleton(Guid)),
+publish_delivered(false, #basic_message { id = MsgId },
+                  #message_properties { needs_confirming = NeedsConfirming },
+                  _ChPid, State = #vqstate { async_callback = Callback,
+                                             len = 0 }) ->
+    case NeedsConfirming of
+        true  -> blind_confirm(Callback, gb_sets:singleton(MsgId));
+        false -> ok
+    end,
     {undefined, a(State)};
 publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent,
-                                               guid = Guid },
+                                               id = MsgId },
                   MsgProps = #message_properties {
                     needs_confirming = NeedsConfirming },
-                  State = #vqstate { len              = 0,
-                                     next_seq_id      = SeqId,
-                                     out_counter      = OutCount,
-                                     in_counter       = InCount,
-                                     persistent_count = PCount,
-                                     durable          = IsDurable,
-                                     unconfirmed      = UC }) ->
+                  _ChPid, State = #vqstate { len              = 0,
+                                             next_seq_id      = SeqId,
+                                             out_counter      = OutCount,
+                                             in_counter       = InCount,
+                                             persistent_count = PCount,
+                                             durable          = IsDurable,
+                                             unconfirmed      = UC }) ->
     IsPersistent1 = IsDurable andalso IsPersistent,
     MsgStatus = (msg_status(IsPersistent1, SeqId, Msg, MsgProps))
         #msg_status { is_delivered = true },
     {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
     State2 = record_pending_ack(m(MsgStatus1), State1),
     PCount1 = PCount + one_if(IsPersistent1),
-    UC1 = gb_sets_maybe_insert(NeedsConfirming, Guid, UC),
+    UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC),
     {SeqId, a(reduce_memory_use(
                 State2 #vqstate { next_seq_id      = SeqId    + 1,
                                   out_counter      = OutCount + 1,
@@ -534,186 +536,61 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent,
                                   persistent_count = PCount1,
                                   unconfirmed      = UC1 }))}.
 
-dropwhile(Pred, State) ->
-    {_OkOrEmpty, State1} = dropwhile1(Pred, State),
-    State1.
+drain_confirmed(State = #vqstate { confirmed = C }) ->
+    {gb_sets:to_list(C), State #vqstate { confirmed = gb_sets:new() }}.
 
-dropwhile1(Pred, State) ->
-    internal_queue_out(
-      fun(MsgStatus = #msg_status { msg_props = MsgProps }, State1) ->
-              case Pred(MsgProps) of
-                  true ->
-                      {_, State2} = internal_fetch(false, MsgStatus, State1),
-                      dropwhile1(Pred, State2);
-                  false ->
-                      %% message needs to go back into Q4 (or maybe go
-                      %% in for the first time if it was loaded from
-                      %% Q3). Also the msg contents might not be in
-                      %% RAM, so read them in now
-                      {MsgStatus1, State2 = #vqstate { q4 = Q4 }} =
-                          read_msg(MsgStatus, State1),
-                      {ok, State2 #vqstate {q4 = queue:in_r(MsgStatus1, Q4) }}
-              end
-      end, State).
+dropwhile(Pred, State) ->
+    case queue_out(State) of
+        {empty, State1} ->
+            a(State1);
+        {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} ->
+            case Pred(MsgProps) of
+                true ->  {_, State2} = internal_fetch(false, MsgStatus, State1),
+                         dropwhile(Pred, State2);
+                false -> a(in_r(MsgStatus, State1))
+            end
+    end.
 
 fetch(AckRequired, State) ->
-    internal_queue_out(
-      fun(MsgStatus, State1) ->
-              %% it's possible that the message wasn't read from disk
-              %% at this point, so read it in.
-              {MsgStatus1, State2} = read_msg(MsgStatus, State1),
-              internal_fetch(AckRequired, MsgStatus1, State2)
-      end, State).
-
-internal_queue_out(Fun, State = #vqstate { q4 = Q4 }) ->
-    case queue:out(Q4) of
-        {empty, _Q4} ->
-            case fetch_from_q3(State) of
-                {empty, State1} = Result      -> a(State1), Result;
-                {loaded, {MsgStatus, State1}} -> Fun(MsgStatus, State1)
-            end;
-        {{value, MsgStatus}, Q4a} ->
-            Fun(MsgStatus, State #vqstate { q4 = Q4a })
+    case queue_out(State) of
+        {empty, State1} ->
+            {empty, a(State1)};
+        {{value, MsgStatus}, State1} ->
+            %% it is possible that the message wasn't read from disk
+            %% at this point, so read it in.
+            {MsgStatus1, State2} = read_msg(MsgStatus, State1),
+            {Res, State3} = internal_fetch(AckRequired, MsgStatus1, State2),
+            {Res, a(State3)}
     end.
 
-read_msg(MsgStatus = #msg_status { msg           = undefined,
-                                   guid          = Guid,
-                                   is_persistent = IsPersistent },
-         State = #vqstate { ram_msg_count     = RamMsgCount,
-                            msg_store_clients = MSCState}) ->
-    {{ok, Msg = #basic_message {}}, MSCState1} =
-        msg_store_read(MSCState, IsPersistent, Guid),
-    {MsgStatus #msg_status { msg = Msg },
-     State #vqstate { ram_msg_count     = RamMsgCount + 1,
-                      msg_store_clients = MSCState1 }};
-read_msg(MsgStatus, State) ->
-    {MsgStatus, State}.
-
-internal_fetch(AckRequired, MsgStatus = #msg_status {
-                              seq_id        = SeqId,
-                              guid          = Guid,
-                              msg           = Msg,
-                              is_persistent = IsPersistent,
-                              is_delivered  = IsDelivered,
-                              msg_on_disk   = MsgOnDisk,
-                              index_on_disk = IndexOnDisk },
-               State = #vqstate {ram_msg_count     = RamMsgCount,
-                                 out_counter       = OutCount,
-                                 index_state       = IndexState,
-                                 msg_store_clients = MSCState,
-                                 len               = Len,
-                                 persistent_count  = PCount }) ->
-    %% 1. Mark it delivered if necessary
-    IndexState1 = maybe_write_delivered(
-                    IndexOnDisk andalso not IsDelivered,
-                    SeqId, IndexState),
-
-    %% 2. Remove from msg_store and queue index, if necessary
-    Rem = fun () ->
-                  ok = msg_store_remove(MSCState, IsPersistent, [Guid])
-          end,
-    Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end,
-    IndexState2 =
-        case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of
-            {false, true, false,     _} -> Rem(), IndexState1;
-            {false, true,  true,     _} -> Rem(), Ack();
-            { true, true,  true, false} -> Ack();
-            _                           -> IndexState1
-        end,
-
-    %% 3. If an ack is required, add something sensible to PA
-    {AckTag, State1} = case AckRequired of
-                        true  -> StateN = record_pending_ack(
-                                            MsgStatus #msg_status {
-                                              is_delivered = true }, State),
-                                 {SeqId, StateN};
-                        false -> {undefined, State}
-                    end,
-
-    PCount1 = PCount - one_if(IsPersistent andalso not AckRequired),
-    Len1 = Len - 1,
-    RamMsgCount1 = RamMsgCount - one_if(Msg =/= undefined),
-
-    {{Msg, IsDelivered, AckTag, Len1},
-     a(State1 #vqstate { ram_msg_count    = RamMsgCount1,
-                         out_counter      = OutCount + 1,
-                         index_state      = IndexState2,
-                         len              = Len1,
-                         persistent_count = PCount1 })}.
-
 ack(AckTags, State) ->
-    a(ack(fun msg_store_remove/3,
-          fun (_, State0) -> State0 end,
-          AckTags, State)).
-
-tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent }, MsgProps,
-           State = #vqstate { durable           = IsDurable,
-                              msg_store_clients = MSCState }) ->
-    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
-    store_tx(Txn, Tx #tx { pending_messages = [{Msg, MsgProps} | Pubs] }),
-    case IsPersistent andalso IsDurable of
-        true  -> MsgStatus = msg_status(true, undefined, Msg, MsgProps),
-                 #msg_status { msg_on_disk = true } =
-                     maybe_write_msg_to_disk(false, MsgStatus, MSCState);
-        false -> ok
-    end,
-    a(State).
-
-tx_ack(Txn, AckTags, State) ->
-    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
-    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }),
-    State.
-
-tx_rollback(Txn, State = #vqstate { durable           = IsDurable,
-                                    msg_store_clients = MSCState }) ->
-    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
-    erase_tx(Txn),
-    ok = case IsDurable of
-             true  -> msg_store_remove(MSCState, true, persistent_guids(Pubs));
-             false -> ok
-         end,
-    {lists:append(AckTags), a(State)}.
-
-tx_commit(Txn, Fun, MsgPropsFun,
-          State = #vqstate { durable           = IsDurable,
-                             msg_store_clients = MSCState }) ->
-    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
-    erase_tx(Txn),
-    AckTags1 = lists:append(AckTags),
-    PersistentGuids = persistent_guids(Pubs),
-    HasPersistentPubs = PersistentGuids =/= [],
-    {AckTags1,
-     a(case IsDurable andalso HasPersistentPubs of
-           true  -> ok = msg_store_sync(
-                           MSCState, true, PersistentGuids,
-                           msg_store_callback(PersistentGuids, Pubs, AckTags1,
-                                              Fun, MsgPropsFun)),
-                    State;
-           false -> tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags1,
-                                             Fun, MsgPropsFun, State)
-       end)}.
+    {MsgIds, State1} = ack(fun msg_store_remove/3,
+                           fun (_, State0) -> State0 end,
+                           AckTags, State),
+    {MsgIds, a(State1)}.
 
 requeue(AckTags, MsgPropsFun, State) ->
     MsgPropsFun1 = fun (MsgProps) ->
                            (MsgPropsFun(MsgProps)) #message_properties {
                              needs_confirming = false }
                    end,
-    a(reduce_memory_use(
-        ack(fun msg_store_release/3,
+    {MsgIds, State1} =
+        ack(fun (_, _, _) -> ok end,
             fun (#msg_status { msg = Msg, msg_props = MsgProps }, State1) ->
                     {_SeqId, State2} = publish(Msg, MsgPropsFun1(MsgProps),
                                                true, false, State1),
                     State2;
-                ({IsPersistent, Guid, MsgProps}, State1) ->
+                ({IsPersistent, MsgId, MsgProps}, State1) ->
                     #vqstate { msg_store_clients = MSCState } = State1,
                     {{ok, Msg = #basic_message{}}, MSCState1} =
-                        msg_store_read(MSCState, IsPersistent, Guid),
+                        msg_store_read(MSCState, IsPersistent, MsgId),
                     State2 = State1 #vqstate { msg_store_clients = MSCState1 },
                     {_SeqId, State3} = publish(Msg, MsgPropsFun1(MsgProps),
                                                true, true, State2),
                     State3
             end,
-            AckTags, State))).
+            AckTags, State),
+    {MsgIds, a(reduce_memory_use(State1))}.
 
 len(#vqstate { len = Len }) -> Len.
 
@@ -768,8 +645,8 @@ ram_duration(State = #vqstate {
     RamAckCount = gb_trees:size(RamAckIndex),
 
     Duration = %% msgs+acks / (msgs+acks/sec) == sec
-        case AvgEgressRate == 0 andalso AvgIngressRate == 0 andalso
-             AvgAckEgressRate == 0 andalso AvgAckIngressRate == 0 of
+        case (AvgEgressRate == 0 andalso AvgIngressRate == 0 andalso
+              AvgAckEgressRate == 0 andalso AvgAckIngressRate == 0) of
             true  -> infinity;
             false -> (RamMsgCountPrev + RamMsgCount +
                           RamAckCount + RamAckCountPrev) /
@@ -797,22 +674,22 @@ ram_duration(State = #vqstate {
                  ram_msg_count_prev = RamMsgCount,
                  ram_ack_count_prev = RamAckCount }}.
 
-needs_idle_timeout(State = #vqstate { on_sync = OnSync }) ->
-    case {OnSync, needs_index_sync(State)} of
-        {?BLANK_SYNC, false} ->
-            {Res, _State} = reduce_memory_use(
-                              fun (_Quota, State1) -> {0, State1} end,
-                              fun (_Quota, State1) -> State1 end,
-                              fun (State1)         -> State1 end,
-                              fun (_Quota, State1) -> {0, State1} end,
-                              State),
-            Res;
-        _ ->
-            true
+needs_timeout(State) ->
+    case needs_index_sync(State) of
+        false -> case reduce_memory_use(
+                        fun (_Quota, State1) -> {0, State1} end,
+                        fun (_Quota, State1) -> State1 end,
+                        fun (State1)         -> State1 end,
+                        fun (_Quota, State1) -> {0, State1} end,
+                        State) of
+                     {true,  _State} -> idle;
+                     {false, _State} -> false
+                 end;
+        true  -> timed
     end.
 
-idle_timeout(State) ->
-    a(reduce_memory_use(confirm_commit_index(tx_commit_index(State)))).
+timeout(State) ->
+    a(reduce_memory_use(confirm_commit_index(State))).
 
 handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
@@ -822,7 +699,6 @@ status(#vqstate {
           len              = Len,
           pending_ack      = PA,
           ram_ack_index    = RAI,
-          on_sync          = #sync { funs = From },
           target_ram_count = TargetRamCount,
           ram_msg_count    = RamMsgCount,
           ram_index_count  = RamIndexCount,
@@ -839,7 +715,6 @@ status(#vqstate {
       {q4                  , queue:len(Q4)},
       {len                 , Len},
       {pending_acks        , dict:size(PA)},
-      {outstanding_txns    , length(From)},
       {target_ram_count    , TargetRamCount},
       {ram_msg_count       , RamMsgCount},
       {ram_ack_count       , gb_trees:size(RAI)},
@@ -851,6 +726,12 @@ status(#vqstate {
       {avg_ack_ingress_rate, AvgAckIngressRate},
       {avg_ack_egress_rate , AvgAckEgressRate} ].
 
+invoke(?MODULE, Fun, State) -> Fun(?MODULE, State).
+
+is_duplicate(_Msg, State) -> {false, State}.
+
+discard(_Msg, _ChPid, State) -> State.
+
 %%----------------------------------------------------------------------------
 %% Minor helpers
 %%----------------------------------------------------------------------------
@@ -896,12 +777,12 @@ cons_if(true,   E, L) -> [E | L];
 cons_if(false, _E, L) -> L.
 
 gb_sets_maybe_insert(false, _Val, Set) -> Set;
-%% when requeueing, we re-add a guid to the unconfirmed set
+%% when requeueing, we re-add a msg_id to the unconfirmed set
 gb_sets_maybe_insert(true,  Val,  Set) -> gb_sets:add(Val, Set).
 
-msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid },
+msg_status(IsPersistent, SeqId, Msg = #basic_message { id = MsgId },
            MsgProps) ->
-    #msg_status { seq_id = SeqId, guid = Guid, msg = Msg,
+    #msg_status { seq_id = SeqId, msg_id = MsgId, msg = Msg,
                   is_persistent = IsPersistent, is_delivered = false,
                   msg_on_disk = false, index_on_disk = false,
                   msg_props = MsgProps }.
@@ -920,38 +801,28 @@ with_immutable_msg_store_state(MSCState, IsPersistent, Fun) ->
                                            end),
     Res.
 
-msg_store_client_init(MsgStore, MsgOnDiskFun) ->
-    msg_store_client_init(MsgStore, rabbit_guid:guid(), MsgOnDiskFun).
+msg_store_client_init(MsgStore, MsgOnDiskFun, Callback) ->
+    msg_store_client_init(MsgStore, rabbit_guid:guid(), MsgOnDiskFun, Callback).
 
-msg_store_client_init(MsgStore, Ref, MsgOnDiskFun) ->
-    rabbit_msg_store:client_init(
-      MsgStore, Ref, MsgOnDiskFun,
-      msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE)).
+msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback) ->
+    CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE),
+    rabbit_msg_store:client_init(MsgStore, Ref, MsgOnDiskFun,
+                                 fun () -> Callback(?MODULE, CloseFDsFun) end).
 
-msg_store_write(MSCState, IsPersistent, Guid, Msg) ->
+msg_store_write(MSCState, IsPersistent, MsgId, Msg) ->
     with_immutable_msg_store_state(
       MSCState, IsPersistent,
-      fun (MSCState1) -> rabbit_msg_store:write(Guid, Msg, MSCState1) end).
+      fun (MSCState1) -> rabbit_msg_store:write(MsgId, Msg, MSCState1) end).
 
-msg_store_read(MSCState, IsPersistent, Guid) ->
+msg_store_read(MSCState, IsPersistent, MsgId) ->
     with_msg_store_state(
       MSCState, IsPersistent,
-      fun (MSCState1) -> rabbit_msg_store:read(Guid, MSCState1) end).
-
-msg_store_remove(MSCState, IsPersistent, Guids) ->
-    with_immutable_msg_store_state(
-      MSCState, IsPersistent,
-      fun (MCSState1) -> rabbit_msg_store:remove(Guids, MCSState1) end).
-
-msg_store_release(MSCState, IsPersistent, Guids) ->
-    with_immutable_msg_store_state(
-      MSCState, IsPersistent,
-      fun (MCSState1) -> rabbit_msg_store:release(Guids, MCSState1) end).
+      fun (MSCState1) -> rabbit_msg_store:read(MsgId, MSCState1) end).
 
-msg_store_sync(MSCState, IsPersistent, Guids, Callback) ->
+msg_store_remove(MSCState, IsPersistent, MsgIds) ->
     with_immutable_msg_store_state(
       MSCState, IsPersistent,
-      fun (MSCState1) -> rabbit_msg_store:sync(Guids, Callback, MSCState1) end).
+      fun (MCSState1) -> rabbit_msg_store:remove(MsgIds, MCSState1) end).
 
 msg_store_close_fds(MSCState, IsPersistent) ->
     with_msg_store_state(
@@ -959,15 +830,9 @@ msg_store_close_fds(MSCState, IsPersistent) ->
       fun (MSCState1) -> rabbit_msg_store:close_all_indicated(MSCState1) end).
 
 msg_store_close_fds_fun(IsPersistent) ->
-    Self = self(),
-    fun () ->
-            rabbit_amqqueue:maybe_run_queue_via_backing_queue_async(
-              Self,
-              fun (State = #vqstate { msg_store_clients = MSCState }) ->
-                      {ok, MSCState1} =
-                          msg_store_close_fds(MSCState, IsPersistent),
-                      {[], State #vqstate { msg_store_clients = MSCState1 }}
-              end)
+    fun (?MODULE, State = #vqstate { msg_store_clients = MSCState }) ->
+            {ok, MSCState1} = msg_store_close_fds(MSCState, IsPersistent),
+            State #vqstate { msg_store_clients = MSCState1 }
     end.
 
 maybe_write_delivered(false, _SeqId, IndexState) ->
@@ -975,31 +840,17 @@ maybe_write_delivered(false, _SeqId, IndexState) ->
 maybe_write_delivered(true, SeqId, IndexState) ->
     rabbit_queue_index:deliver([SeqId], IndexState).
 
-lookup_tx(Txn) -> case get({txn, Txn}) of
-                      undefined -> #tx { pending_messages = [],
-                                         pending_acks     = [] };
-                      V         -> V
-                  end.
-
-store_tx(Txn, Tx) -> put({txn, Txn}, Tx).
-
-erase_tx(Txn) -> erase({txn, Txn}).
-
-persistent_guids(Pubs) ->
-    [Guid || {#basic_message { guid          = Guid,
-                               is_persistent = true }, _MsgProps} <- Pubs].
-
 betas_from_index_entries(List, TransientThreshold, IndexState) ->
     {Filtered, Delivers, Acks} =
         lists:foldr(
-          fun ({Guid, SeqId, MsgProps, IsPersistent, IsDelivered},
+          fun ({MsgId, SeqId, MsgProps, IsPersistent, IsDelivered},
                {Filtered1, Delivers1, Acks1}) ->
                   case SeqId < TransientThreshold andalso not IsPersistent of
                       true  -> {Filtered1,
                                 cons_if(not IsDelivered, SeqId, Delivers1),
                                 [SeqId | Acks1]};
                       false -> {[m(#msg_status { msg           = undefined,
-                                                 guid          = Guid,
+                                                 msg_id        = MsgId,
                                                  seq_id        = SeqId,
                                                  is_persistent = IsPersistent,
                                                  is_delivered  = IsDelivered,
@@ -1052,7 +903,7 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-init(IsDurable, IndexState, DeltaCount, Terms,
+init(IsDurable, IndexState, DeltaCount, Terms, AsyncCallback,
      PersistentClient, TransientClient) ->
     {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState),
 
@@ -1075,10 +926,11 @@ init(IsDurable, IndexState, DeltaCount, Terms,
       ram_ack_index       = gb_trees:empty(),
       index_state         = IndexState1,
       msg_store_clients   = {PersistentClient, TransientClient},
-      on_sync             = ?BLANK_SYNC,
       durable             = IsDurable,
       transient_threshold = NextSeqId,
 
+      async_callback      = AsyncCallback,
+
       len                 = DeltaCount1,
       persistent_count    = DeltaCount1,
 
@@ -1093,6 +945,7 @@ init(IsDurable, IndexState, DeltaCount, Terms,
       msgs_on_disk        = gb_sets:new(),
       msg_indices_on_disk = gb_sets:new(),
       unconfirmed         = gb_sets:new(),
+      confirmed           = gb_sets:new(),
       ack_out_counter     = 0,
       ack_in_counter      = 0,
       ack_rates           = blank_rate(Now, 0) },
@@ -1105,89 +958,94 @@ blank_rate(Timestamp, IngressLength) ->
              avg_ingress = 0.0,
              timestamp   = Timestamp }.
 
-msg_store_callback(PersistentGuids, Pubs, AckTags, Fun, MsgPropsFun) ->
-    Self = self(),
-    F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                    Self, fun (StateN) -> {[], tx_commit_post_msg_store(
-                                                 true, Pubs, AckTags,
-                                                 Fun, MsgPropsFun, StateN)}
-                          end)
-        end,
-    fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
-                                     fun () -> remove_persistent_messages(
-                                                 PersistentGuids)
-                                     end, F)
-                    end)
+in_r(MsgStatus = #msg_status { msg = undefined, index_on_disk = IndexOnDisk },
+     State = #vqstate { q3 = Q3, q4 = Q4, ram_index_count = RamIndexCount }) ->
+    case queue:is_empty(Q4) of
+        true  -> State #vqstate {
+                   q3              = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3),
+                   ram_index_count = RamIndexCount + one_if(not IndexOnDisk) };
+        false -> {MsgStatus1, State1 = #vqstate { q4 = Q4a }} =
+                     read_msg(MsgStatus, State),
+                 State1 #vqstate { q4 = queue:in_r(MsgStatus1, Q4a) }
+    end;
+in_r(MsgStatus, State = #vqstate { q4 = Q4 }) ->
+    State #vqstate { q4 = queue:in_r(MsgStatus, Q4) }.
+
+queue_out(State = #vqstate { q4 = Q4 }) ->
+    case queue:out(Q4) of
+        {empty, _Q4} ->
+            case fetch_from_q3(State) of
+                {empty, _State1} = Result     -> Result;
+                {loaded, {MsgStatus, State1}} -> {{value, MsgStatus}, State1}
+            end;
+        {{value, MsgStatus}, Q4a} ->
+            {{value, MsgStatus}, State #vqstate { q4 = Q4a }}
     end.
 
-remove_persistent_messages(Guids) ->
-    PersistentClient = msg_store_client_init(?PERSISTENT_MSG_STORE, undefined),
-    ok = rabbit_msg_store:remove(Guids, PersistentClient),
-    rabbit_msg_store:client_delete_and_terminate(PersistentClient).
-
-tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun, MsgPropsFun,
-                         State = #vqstate {
-                           on_sync     = OnSync = #sync {
-                                           acks_persistent = SPAcks,
-                                           acks_all        = SAcks,
-                                           pubs            = SPubs,
-                                           funs            = SFuns },
-                           pending_ack = PA,
-                           durable     = IsDurable }) ->
-    PersistentAcks =
-        case IsDurable of
-            true  -> [AckTag || AckTag <- AckTags,
-                                case dict:fetch(AckTag, PA) of
-                                    #msg_status {} ->
-                                        false;
-                                    {IsPersistent, _Guid, _MsgProps} ->
-                                        IsPersistent
-                                end];
-            false -> []
+read_msg(MsgStatus = #msg_status { msg           = undefined,
+                                   msg_id        = MsgId,
+                                   is_persistent = IsPersistent },
+         State = #vqstate { ram_msg_count     = RamMsgCount,
+                            msg_store_clients = MSCState}) ->
+    {{ok, Msg = #basic_message {}}, MSCState1} =
+        msg_store_read(MSCState, IsPersistent, MsgId),
+    {MsgStatus #msg_status { msg = Msg },
+     State #vqstate { ram_msg_count     = RamMsgCount + 1,
+                      msg_store_clients = MSCState1 }};
+read_msg(MsgStatus, State) ->
+    {MsgStatus, State}.
+
+internal_fetch(AckRequired, MsgStatus = #msg_status {
+                              seq_id        = SeqId,
+                              msg_id        = MsgId,
+                              msg           = Msg,
+                              is_persistent = IsPersistent,
+                              is_delivered  = IsDelivered,
+                              msg_on_disk   = MsgOnDisk,
+                              index_on_disk = IndexOnDisk },
+               State = #vqstate {ram_msg_count     = RamMsgCount,
+                                 out_counter       = OutCount,
+                                 index_state       = IndexState,
+                                 msg_store_clients = MSCState,
+                                 len               = Len,
+                                 persistent_count  = PCount }) ->
+    %% 1. Mark it delivered if necessary
+    IndexState1 = maybe_write_delivered(
+                    IndexOnDisk andalso not IsDelivered,
+                    SeqId, IndexState),
+
+    %% 2. Remove from msg_store and queue index, if necessary
+    Rem = fun () ->
+                  ok = msg_store_remove(MSCState, IsPersistent, [MsgId])
+          end,
+    Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end,
+    IndexState2 =
+        case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of
+            {false, true, false,     _} -> Rem(), IndexState1;
+            {false, true,  true,     _} -> Rem(), Ack();
+            { true, true,  true, false} -> Ack();
+            _                           -> IndexState1
         end,
-    case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of
-        true  -> State #vqstate {
-                   on_sync = #sync {
-                     acks_persistent = [PersistentAcks | SPAcks],
-                     acks_all        = [AckTags | SAcks],
-                     pubs            = [{MsgPropsFun, Pubs} | SPubs],
-                     funs            = [Fun | SFuns] }};
-        false -> State1 = tx_commit_index(
-                            State #vqstate {
-                              on_sync = #sync {
-                                acks_persistent = [],
-                                acks_all        = [AckTags],
-                                pubs            = [{MsgPropsFun, Pubs}],
-                                funs            = [Fun] } }),
-                 State1 #vqstate { on_sync = OnSync }
-    end.
 
-tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
-    State;
-tx_commit_index(State = #vqstate { on_sync = #sync {
-                                     acks_persistent = SPAcks,
-                                     acks_all        = SAcks,
-                                     pubs            = SPubs,
-                                     funs            = SFuns },
-                                   durable = IsDurable }) ->
-    PAcks = lists:append(SPAcks),
-    Acks  = lists:append(SAcks),
-    Pubs  = [{Msg, Fun(MsgProps)} || {Fun, PubsN}    <- lists:reverse(SPubs),
-                                     {Msg, MsgProps} <- lists:reverse(PubsN)],
-    {SeqIds, State1 = #vqstate { index_state = IndexState }} =
-        lists:foldl(
-          fun ({Msg = #basic_message { is_persistent = IsPersistent },
-                MsgProps},
-               {SeqIdsAcc, State2}) ->
-                  IsPersistent1 = IsDurable andalso IsPersistent,
-                  {SeqId, State3} =
-                      publish(Msg, MsgProps, false, IsPersistent1, State2),
-                  {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3}
-          end, {PAcks, ack(Acks, State)}, Pubs),
-    IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
-    [ Fun() || Fun <- lists:reverse(SFuns) ],
-    reduce_memory_use(
-      State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }).
+    %% 3. If an ack is required, add something sensible to PA
+    {AckTag, State1} = case AckRequired of
+                           true  -> StateN = record_pending_ack(
+                                               MsgStatus #msg_status {
+                                                 is_delivered = true }, State),
+                                    {SeqId, StateN};
+                           false -> {undefined, State}
+                       end,
+
+    PCount1 = PCount - one_if(IsPersistent andalso not AckRequired),
+    Len1 = Len - 1,
+    RamMsgCount1 = RamMsgCount - one_if(Msg =/= undefined),
+
+    {{Msg, IsDelivered, AckTag, Len1},
+     State1 #vqstate { ram_msg_count    = RamMsgCount1,
+                       out_counter      = OutCount + 1,
+                       index_state      = IndexState2,
+                       len              = Len1,
+                       persistent_count = PCount1 }}.
 
 purge_betas_and_deltas(LensByStore,
                        State = #vqstate { q3                = Q3,
@@ -1206,38 +1064,38 @@ purge_betas_and_deltas(LensByStore,
     end.
 
 remove_queue_entries(Fold, Q, LensByStore, IndexState, MSCState) ->
-    {GuidsByStore, Delivers, Acks} =
+    {MsgIdsByStore, Delivers, Acks} =
         Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q),
-    ok = orddict:fold(fun (IsPersistent, Guids, ok) ->
-                              msg_store_remove(MSCState, IsPersistent, Guids)
-                      end, ok, GuidsByStore),
-    {sum_guids_by_store_to_len(LensByStore, GuidsByStore),
+    ok = orddict:fold(fun (IsPersistent, MsgIds, ok) ->
+                              msg_store_remove(MSCState, IsPersistent, MsgIds)
+                      end, ok, MsgIdsByStore),
+    {sum_msg_ids_by_store_to_len(LensByStore, MsgIdsByStore),
      rabbit_queue_index:ack(Acks,
                             rabbit_queue_index:deliver(Delivers, IndexState))}.
 
 remove_queue_entries1(
-  #msg_status { guid = Guid, seq_id = SeqId,
+  #msg_status { msg_id = MsgId, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {GuidsByStore, Delivers, Acks}) ->
+  {MsgIdsByStore, Delivers, Acks}) ->
     {case MsgOnDisk of
-         true  -> rabbit_misc:orddict_cons(IsPersistent, Guid, GuidsByStore);
-         false -> GuidsByStore
+         true  -> rabbit_misc:orddict_cons(IsPersistent, MsgId, MsgIdsByStore);
+         false -> MsgIdsByStore
      end,
      cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
      cons_if(IndexOnDisk, SeqId, Acks)}.
 
-sum_guids_by_store_to_len(LensByStore, GuidsByStore) ->
+sum_msg_ids_by_store_to_len(LensByStore, MsgIdsByStore) ->
     orddict:fold(
-      fun (IsPersistent, Guids, LensByStore1) ->
-              orddict:update_counter(IsPersistent, length(Guids), LensByStore1)
-      end, LensByStore, GuidsByStore).
+      fun (IsPersistent, MsgIds, LensByStore1) ->
+              orddict:update_counter(IsPersistent, length(MsgIds), LensByStore1)
+      end, LensByStore, MsgIdsByStore).
 
 %%----------------------------------------------------------------------------
 %% Internal gubbins for publishing
 %%----------------------------------------------------------------------------
 
-publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
+publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId },
         MsgProps = #message_properties { needs_confirming = NeedsConfirming },
         IsDelivered, MsgOnDisk,
         State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
@@ -1257,7 +1115,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
                  true  -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) }
              end,
     PCount1 = PCount + one_if(IsPersistent1),
-    UC1 = gb_sets_maybe_insert(NeedsConfirming, Guid, UC),
+    UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC),
     {SeqId, State2 #vqstate { next_seq_id      = SeqId   + 1,
                               len              = Len     + 1,
                               in_counter       = InCount + 1,
@@ -1269,14 +1127,14 @@ maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
                                   msg_on_disk = true }, _MSCState) ->
     MsgStatus;
 maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
-                                 msg = Msg, guid = Guid,
+                                 msg = Msg, msg_id = MsgId,
                                  is_persistent = IsPersistent }, MSCState)
   when Force orelse IsPersistent ->
     Msg1 = Msg #basic_message {
              %% don't persist any recoverable decoded properties
              content = rabbit_binary_parser:clear_decoded_content(
                          Msg #basic_message.content)},
-    ok = msg_store_write(MSCState, IsPersistent, Guid, Msg1),
+    ok = msg_store_write(MSCState, IsPersistent, MsgId, Msg1),
     MsgStatus #msg_status { msg_on_disk = true };
 maybe_write_msg_to_disk(_Force, MsgStatus, _MSCState) ->
     MsgStatus.
@@ -1286,7 +1144,7 @@ maybe_write_index_to_disk(_Force, MsgStatus = #msg_status {
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     {MsgStatus, IndexState};
 maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
-                                   guid          = Guid,
+                                   msg_id        = MsgId,
                                    seq_id        = SeqId,
                                    is_persistent = IsPersistent,
                                    is_delivered  = IsDelivered,
@@ -1294,7 +1152,7 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
   when Force orelse IsPersistent ->
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     IndexState1 = rabbit_queue_index:publish(
-                    Guid, SeqId, MsgProps, IsPersistent, IndexState),
+                    MsgId, SeqId, MsgProps, IsPersistent, IndexState),
     {MsgStatus #msg_status { index_on_disk = true },
      maybe_write_delivered(IsDelivered, SeqId, IndexState1)};
 maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
@@ -1313,7 +1171,7 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
 %%----------------------------------------------------------------------------
 
 record_pending_ack(#msg_status { seq_id        = SeqId,
-                                 guid          = Guid,
+                                 msg_id        = MsgId,
                                  is_persistent = IsPersistent,
                                  msg_on_disk   = MsgOnDisk,
                                  msg_props     = MsgProps } = MsgStatus,
@@ -1322,8 +1180,8 @@ record_pending_ack(#msg_status { seq_id        = SeqId,
                                       ack_in_counter  = AckInCount}) ->
     {AckEntry, RAI1} =
         case MsgOnDisk of
-            true  -> {{IsPersistent, Guid, MsgProps}, RAI};
-            false -> {MsgStatus, gb_trees:insert(SeqId, Guid, RAI)}
+            true  -> {{IsPersistent, MsgId, MsgProps}, RAI};
+            false -> {MsgStatus, gb_trees:insert(SeqId, MsgId, RAI)}
         end,
     PA1 = dict:store(SeqId, AckEntry, PA),
     State #vqstate { pending_ack    = PA1,
@@ -1334,28 +1192,28 @@ remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack       = PA,
                                       index_state       = IndexState,
                                       msg_store_clients = MSCState }) ->
-    {PersistentSeqIds, GuidsByStore} =
+    {PersistentSeqIds, MsgIdsByStore, _AllMsgIds} =
         dict:fold(fun accumulate_ack/3, accumulate_ack_init(), PA),
     State1 = State #vqstate { pending_ack   = dict:new(),
                               ram_ack_index = gb_trees:empty() },
     case KeepPersistent of
-        true  -> case orddict:find(false, GuidsByStore) of
-                     error       -> State1;
-                     {ok, Guids} -> ok = msg_store_remove(MSCState, false,
-                                                          Guids),
+        true  -> case orddict:find(false, MsgIdsByStore) of
+                     error        -> State1;
+                     {ok, MsgIds} -> ok = msg_store_remove(MSCState, false,
+                                                           MsgIds),
                                     State1
                  end;
         false -> IndexState1 =
                      rabbit_queue_index:ack(PersistentSeqIds, IndexState),
-                 [ok = msg_store_remove(MSCState, IsPersistent, Guids)
-                  || {IsPersistent, Guids} <- orddict:to_list(GuidsByStore)],
+                 [ok = msg_store_remove(MSCState, IsPersistent, MsgIds)
+                  || {IsPersistent, MsgIds} <- orddict:to_list(MsgIdsByStore)],
                  State1 #vqstate { index_state = IndexState1 }
     end.
 
 ack(_MsgStoreFun, _Fun, [], State) ->
-    State;
+    {[], State};
 ack(MsgStoreFun, Fun, AckTags, State) ->
-    {{PersistentSeqIds, GuidsByStore},
+    {{PersistentSeqIds, MsgIdsByStore, AllMsgIds},
      State1 = #vqstate { index_state       = IndexState,
                          msg_store_clients = MSCState,
                          persistent_count  = PCount,
@@ -1371,25 +1229,28 @@ ack(MsgStoreFun, Fun, AckTags, State) ->
                                        gb_trees:delete_any(SeqId, RAI)})}
           end, {accumulate_ack_init(), State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(PersistentSeqIds, IndexState),
-    [ok = MsgStoreFun(MSCState, IsPersistent, Guids)
-     || {IsPersistent, Guids} <- orddict:to_list(GuidsByStore)],
-    PCount1 = PCount - find_persistent_count(sum_guids_by_store_to_len(
-                                               orddict:new(), GuidsByStore)),
-    State1 #vqstate { index_state      = IndexState1,
-                      persistent_count = PCount1,
-                      ack_out_counter  = AckOutCount + length(AckTags) }.
+    [ok = MsgStoreFun(MSCState, IsPersistent, MsgIds)
+     || {IsPersistent, MsgIds} <- orddict:to_list(MsgIdsByStore)],
+    PCount1 = PCount - find_persistent_count(sum_msg_ids_by_store_to_len(
+                                               orddict:new(), MsgIdsByStore)),
+    {lists:reverse(AllMsgIds),
+     State1 #vqstate { index_state      = IndexState1,
+                       persistent_count = PCount1,
+                       ack_out_counter  = AckOutCount + length(AckTags) }}.
 
-accumulate_ack_init() -> {[], orddict:new()}.
+accumulate_ack_init() -> {[], orddict:new(), []}.
 
 accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS
                                      msg_on_disk   = false,
-                                     index_on_disk = false },
-              {PersistentSeqIdsAcc, GuidsByStore}) ->
-    {PersistentSeqIdsAcc, GuidsByStore};
-accumulate_ack(SeqId, {IsPersistent, Guid, _MsgProps},
-               {PersistentSeqIdsAcc, GuidsByStore}) ->
+                                     index_on_disk = false,
+                                     msg_id        = MsgId },
+              {PersistentSeqIdsAcc, MsgIdsByStore, AllMsgIds}) ->
+    {PersistentSeqIdsAcc, MsgIdsByStore, [MsgId | AllMsgIds]};
+accumulate_ack(SeqId, {IsPersistent, MsgId, _MsgProps},
+               {PersistentSeqIdsAcc, MsgIdsByStore, AllMsgIds}) ->
     {cons_if(IsPersistent, SeqId, PersistentSeqIdsAcc),
-     rabbit_misc:orddict_cons(IsPersistent, Guid, GuidsByStore)}.
+     rabbit_misc:orddict_cons(IsPersistent, MsgId, MsgIdsByStore),
+     [MsgId | AllMsgIds]}.
 
 find_persistent_count(LensByStore) ->
     case orddict:find(true, LensByStore) of
@@ -1408,12 +1269,14 @@ confirm_commit_index(State = #vqstate { index_state = IndexState }) ->
         false -> State
     end.
 
-remove_confirms(GuidSet, State = #vqstate { msgs_on_disk        = MOD,
-                                            msg_indices_on_disk = MIOD,
-                                            unconfirmed         = UC }) ->
-    State #vqstate { msgs_on_disk        = gb_sets:difference(MOD,  GuidSet),
-                     msg_indices_on_disk = gb_sets:difference(MIOD, GuidSet),
-                     unconfirmed         = gb_sets:difference(UC,   GuidSet) }.
+record_confirms(MsgIdSet, State = #vqstate { msgs_on_disk        = MOD,
+                                             msg_indices_on_disk = MIOD,
+                                             unconfirmed         = UC,
+                                             confirmed           = C }) ->
+    State #vqstate { msgs_on_disk        = gb_sets:difference(MOD,  MsgIdSet),
+                     msg_indices_on_disk = gb_sets:difference(MIOD, MsgIdSet),
+                     unconfirmed         = gb_sets:difference(UC,   MsgIdSet),
+                     confirmed           = gb_sets:union     (C,    MsgIdSet) }.
 
 needs_index_sync(#vqstate { msg_indices_on_disk = MIOD,
                             unconfirmed = UC }) ->
@@ -1430,38 +1293,35 @@ needs_index_sync(#vqstate { msg_indices_on_disk = MIOD,
     %% subtraction.
     not (gb_sets:is_empty(UC) orelse gb_sets:is_subset(UC, MIOD)).
 
-msgs_confirmed(GuidSet, State) ->
-    {gb_sets:to_list(GuidSet), remove_confirms(GuidSet, State)}.
-
-blind_confirm(QPid, GuidSet) ->
-    rabbit_amqqueue:maybe_run_queue_via_backing_queue_async(
-      QPid, fun (State) -> msgs_confirmed(GuidSet, State) end).
-
-msgs_written_to_disk(QPid, GuidSet, removed) ->
-    blind_confirm(QPid, GuidSet);
-msgs_written_to_disk(QPid, GuidSet, written) ->
-    rabbit_amqqueue:maybe_run_queue_via_backing_queue_async(
-      QPid, fun (State = #vqstate { msgs_on_disk        = MOD,
-                                    msg_indices_on_disk = MIOD,
-                                    unconfirmed         = UC }) ->
-                    msgs_confirmed(gb_sets:intersection(GuidSet, MIOD),
-                                   State #vqstate {
-                                     msgs_on_disk =
-                                         gb_sets:intersection(
-                                           gb_sets:union(MOD, GuidSet), UC) })
-            end).
-
-msg_indices_written_to_disk(QPid, GuidSet) ->
-    rabbit_amqqueue:maybe_run_queue_via_backing_queue_async(
-      QPid, fun (State = #vqstate { msgs_on_disk        = MOD,
-                                    msg_indices_on_disk = MIOD,
-                                    unconfirmed         = UC }) ->
-                    msgs_confirmed(gb_sets:intersection(GuidSet, MOD),
-                                   State #vqstate {
-                                     msg_indices_on_disk =
-                                         gb_sets:intersection(
-                                           gb_sets:union(MIOD, GuidSet), UC) })
-            end).
+blind_confirm(Callback, MsgIdSet) ->
+    Callback(?MODULE,
+             fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end).
+
+msgs_written_to_disk(Callback, MsgIdSet, removed) ->
+    blind_confirm(Callback, MsgIdSet);
+msgs_written_to_disk(Callback, MsgIdSet, written) ->
+    Callback(?MODULE,
+             fun (?MODULE, State = #vqstate { msgs_on_disk        = MOD,
+                                              msg_indices_on_disk = MIOD,
+                                              unconfirmed         = UC }) ->
+                     Confirmed = gb_sets:intersection(UC, MsgIdSet),
+                     record_confirms(gb_sets:intersection(MsgIdSet, MIOD),
+                                     State #vqstate {
+                                       msgs_on_disk =
+                                           gb_sets:union(MOD, Confirmed) })
+             end).
+
+msg_indices_written_to_disk(Callback, MsgIdSet) ->
+    Callback(?MODULE,
+             fun (?MODULE, State = #vqstate { msgs_on_disk        = MOD,
+                                              msg_indices_on_disk = MIOD,
+                                              unconfirmed         = UC }) ->
+                     Confirmed = gb_sets:intersection(UC, MsgIdSet),
+                     record_confirms(gb_sets:intersection(MsgIdSet, MOD),
+                                     State #vqstate {
+                                       msg_indices_on_disk =
+                                           gb_sets:union(MIOD, Confirmed) })
+             end).
 
 %%----------------------------------------------------------------------------
 %% Phase changes
@@ -1538,17 +1398,16 @@ limit_ram_acks(Quota, State = #vqstate { pending_ack   = PA,
         true ->
             {Quota, State};
         false ->
-            {SeqId, Guid, RAI1} = gb_trees:take_largest(RAI),
+            {SeqId, MsgId, RAI1} = gb_trees:take_largest(RAI),
             MsgStatus = #msg_status {
-              guid          = Guid, %% ASSERTION
+              msg_id        = MsgId, %% ASSERTION
               is_persistent = false, %% ASSERTION
               msg_props     = MsgProps } = dict:fetch(SeqId, PA),
             {_, State1} = maybe_write_to_disk(true, false, MsgStatus, State),
+            PA1 = dict:store(SeqId, {false, MsgId, MsgProps}, PA),
             limit_ram_acks(Quota - 1,
-                           State1 #vqstate {
-                             pending_ack   =
-                                 dict:store(SeqId, {false, Guid, MsgProps}, PA),
-                             ram_ack_index = RAI1 })
+                           State1 #vqstate { pending_ack   = PA1,
+                                             ram_ack_index = RAI1 })
     end.
 
 
@@ -1801,3 +1660,27 @@ push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
             push_betas_to_deltas(
               Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1)
     end.
+
+%%----------------------------------------------------------------------------
+%% Upgrading
+%%----------------------------------------------------------------------------
+
+multiple_routing_keys() ->
+    transform_storage(
+      fun ({basic_message, ExchangeName, Routing_Key, Content,
+            MsgId, Persistent}) ->
+              {ok, {basic_message, ExchangeName, [Routing_Key], Content,
+                    MsgId, Persistent}};
+          (_) -> {error, corrupt_message}
+      end),
+    ok.
+
+
+%% Assumes message store is not running
+transform_storage(TransformFun) ->
+    transform_store(?PERSISTENT_MSG_STORE, TransformFun),
+    transform_store(?TRANSIENT_MSG_STORE, TransformFun).
+
+transform_store(Store, TransformFun) ->
+    rabbit_msg_store:force_recovery(rabbit_mnesia:dir(), Store),
+    rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun).
diff --git a/src/rabbit_version.erl b/src/rabbit_version.erl
new file mode 100644
index 00000000..400abc10
--- /dev/null
+++ b/src/rabbit_version.erl
@@ -0,0 +1,172 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2011 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_version).
+
+-export([recorded/0, matches/2, desired/0, desired_for_scope/1,
+         record_desired/0, record_desired_for_scope/1,
+         upgrades_required/1]).
+
+%% -------------------------------------------------------------------
+-ifdef(use_specs).
+
+-export_type([scope/0, step/0]).
+
+-type(scope() :: atom()).
+-type(scope_version() :: [atom()]).
+-type(step() :: {atom(), atom()}).
+
+-type(version() :: [atom()]).
+
+-spec(recorded/0 :: () -> rabbit_types:ok_or_error2(version(), any())).
+-spec(matches/2 :: ([A], [A]) -> boolean()).
+-spec(desired/0 :: () -> version()).
+-spec(desired_for_scope/1 :: (scope()) -> scope_version()).
+-spec(record_desired/0 :: () -> 'ok').
+-spec(record_desired_for_scope/1 ::
+        (scope()) -> rabbit_types:ok_or_error(any())).
+-spec(upgrades_required/1 ::
+        (scope()) -> rabbit_types:ok_or_error2([step()], any())).
+
+-endif.
+%% -------------------------------------------------------------------
+
+-define(VERSION_FILENAME, "schema_version").
+-define(SCOPES, [mnesia, local]).
+
+%% -------------------------------------------------------------------
+
+recorded() -> case rabbit_misc:read_term_file(schema_filename()) of
+                  {ok, [V]}        -> {ok, V};
+                  {error, _} = Err -> Err
+              end.
+
+record(V) -> ok = rabbit_misc:write_term_file(schema_filename(), [V]).
+
+recorded_for_scope(Scope) ->
+    case recorded() of
+        {error, _} = Err ->
+            Err;
+        {ok, Version} ->
+            {ok, case lists:keysearch(Scope, 1, categorise_by_scope(Version)) of
+                     false                 -> [];
+                     {value, {Scope, SV1}} -> SV1
+                 end}
+    end.
+
+record_for_scope(Scope, ScopeVersion) ->
+    case recorded() of
+        {error, _} = Err ->
+            Err;
+        {ok, Version} ->
+            Version1 = lists:keystore(Scope, 1, categorise_by_scope(Version),
+                                      {Scope, ScopeVersion}),
+            ok = record([Name || {_Scope, Names} <- Version1, Name <- Names])
+    end.
+
+%% -------------------------------------------------------------------
+
+matches(VerA, VerB) ->
+    lists:usort(VerA) =:= lists:usort(VerB).
+
+%% -------------------------------------------------------------------
+
+desired() -> [Name || Scope <- ?SCOPES, Name <- desired_for_scope(Scope)].
+
+desired_for_scope(Scope) -> with_upgrade_graph(fun heads/1, Scope).
+
+record_desired() -> record(desired()).
+
+record_desired_for_scope(Scope) ->
+    record_for_scope(Scope, desired_for_scope(Scope)).
+
+upgrades_required(Scope) ->
+    case recorded_for_scope(Scope) of
+        {error, enoent} ->
+            {error, version_not_available};
+        {ok, CurrentHeads} ->
+            with_upgrade_graph(
+              fun (G) ->
+                      case unknown_heads(CurrentHeads, G) of
+                          []      -> {ok, upgrades_to_apply(CurrentHeads, G)};
+                          Unknown -> {error, {future_upgrades_found, Unknown}}
+                      end
+              end, Scope)
+    end.
+
+%% -------------------------------------------------------------------
+
+with_upgrade_graph(Fun, Scope) ->
+    case rabbit_misc:build_acyclic_graph(
+           fun (Module, Steps) -> vertices(Module, Steps, Scope) end,
+           fun (Module, Steps) -> edges(Module, Steps, Scope) end,
+           rabbit_misc:all_module_attributes(rabbit_upgrade)) of
+        {ok, G} -> try
+                       Fun(G)
+                   after
+                       true = digraph:delete(G)
+                   end;
+        {error, {vertex, duplicate, StepName}} ->
+            throw({error, {duplicate_upgrade_step, StepName}});
+        {error, {edge, {bad_vertex, StepName}, _From, _To}} ->
+            throw({error, {dependency_on_unknown_upgrade_step, StepName}});
+        {error, {edge, {bad_edge, StepNames}, _From, _To}} ->
+            throw({error, {cycle_in_upgrade_steps, StepNames}})
+    end.
+
+vertices(Module, Steps, Scope0) ->
+    [{StepName, {Module, StepName}} || {StepName, Scope1, _Reqs} <- Steps,
+                                       Scope0 == Scope1].
+
+edges(_Module, Steps, Scope0) ->
+    [{Require, StepName} || {StepName, Scope1, Requires} <- Steps,
+                            Require <- Requires,
+                            Scope0 == Scope1].
+unknown_heads(Heads, G) ->
+    [H || H <- Heads, digraph:vertex(G, H) =:= false].
+
+upgrades_to_apply(Heads, G) ->
+    %% Take all the vertices which can reach the known heads. That's
+    %% everything we've already applied. Subtract that from all
+    %% vertices: that's what we have to apply.
+    Unsorted = sets:to_list(
+                 sets:subtract(
+                   sets:from_list(digraph:vertices(G)),
+                   sets:from_list(digraph_utils:reaching(Heads, G)))),
+    %% Form a subgraph from that list and find a topological ordering
+    %% so we can invoke them in order.
+    [element(2, digraph:vertex(G, StepName)) ||
+        StepName <- digraph_utils:topsort(digraph_utils:subgraph(G, Unsorted))].
+
+heads(G) ->
+    lists:sort([V || V <- digraph:vertices(G), digraph:out_degree(G, V) =:= 0]).
+
+%% -------------------------------------------------------------------
+
+categorise_by_scope(Version) when is_list(Version) ->
+    Categorised =
+        [{Scope, Name} || {_Module, Attributes} <-
+                              rabbit_misc:all_module_attributes(rabbit_upgrade),
+                          {Name, Scope, _Requires} <- Attributes,
+                          lists:member(Name, Version)],
+    orddict:to_list(
+      lists:foldl(fun ({Scope, Name}, CatVersion) ->
+                          rabbit_misc:orddict_cons(Scope, Name, CatVersion)
+                  end, orddict:new(), Categorised)).
+
+dir() -> rabbit_mnesia:dir().
+
+schema_filename() -> filename:join(dir(), ?VERSION_FILENAME).
diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl
index efebef06..08d6c99a 100644
--- a/src/rabbit_vhost.erl
+++ b/src/rabbit_vhost.erl
@@ -21,6 +21,7 @@
 %%----------------------------------------------------------------------------
 
 -export([add/1, delete/1, exists/1, list/0, with/2]).
+-export([info/1, info/2, info_all/0, info_all/1]).
 
 -ifdef(use_specs).
 
@@ -30,10 +31,18 @@
 -spec(list/0 :: () -> [rabbit_types:vhost()]).
 -spec(with/2 :: (rabbit_types:vhost(), rabbit_misc:thunk(A)) -> A).
 
+-spec(info/1 :: (rabbit_types:vhost()) -> rabbit_types:infos()).
+-spec(info/2 :: (rabbit_types:vhost(), rabbit_types:info_keys())
+                -> rabbit_types:infos()).
+-spec(info_all/0 :: () -> [rabbit_types:infos()]).
+-spec(info_all/1 :: (rabbit_types:info_keys()) -> [rabbit_types:infos()]).
+
 -endif.
 
 %%----------------------------------------------------------------------------
 
+-define(INFO_KEYS, [name, tracing]).
+
 add(VHostPath) ->
     R = rabbit_misc:execute_mnesia_transaction(
           fun () ->
@@ -48,15 +57,16 @@ add(VHostPath) ->
                   ok;
               (ok, false) ->
                   [rabbit_exchange:declare(
-                      rabbit_misc:r(VHostPath, exchange, Name),
-                      Type, true, false, false, []) ||
-                         {Name,Type} <-
-                             [{<<"">>,            direct},
-                              {<<"amq.direct">>,  direct},
-                              {<<"amq.topic">>,   topic},
-                              {<<"amq.match">>,   headers}, %% per 0-9-1 pdf
-                              {<<"amq.headers">>, headers}, %% per 0-9-1 xml
-                              {<<"amq.fanout">>,  fanout}]],
+                     rabbit_misc:r(VHostPath, exchange, Name),
+                     Type, true, false, false, []) ||
+                      {Name,Type} <-
+                          [{<<"">>,                   direct},
+                           {<<"amq.direct">>,         direct},
+                           {<<"amq.topic">>,          topic},
+                           {<<"amq.match">>,          headers}, %% per 0-9-1 pdf
+                           {<<"amq.headers">>,        headers}, %% per 0-9-1 xml
+                           {<<"amq.fanout">>,         fanout},
+                           {<<"amq.rabbitmq.trace">>, topic}]],
                   ok
           end),
     rabbit_log:info("Added vhost ~p~n", [VHostPath]),
@@ -81,9 +91,9 @@ delete(VHostPath) ->
 
 internal_delete(VHostPath) ->
     lists:foreach(
-      fun ({Username, _, _, _}) ->
-              ok = rabbit_auth_backend_internal:clear_permissions(Username,
-                                                                  VHostPath)
+      fun (Info) ->
+              ok = rabbit_auth_backend_internal:clear_permissions(
+                     proplists:get_value(user, Info), VHostPath)
       end,
       rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)),
     ok = mnesia:delete({rabbit_vhost, VHostPath}),
@@ -104,3 +114,17 @@ with(VHostPath, Thunk) ->
                     Thunk()
             end
     end.
+
+%%----------------------------------------------------------------------------
+
+infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items].
+
+i(name,    VHost) -> VHost;
+i(tracing, VHost) -> rabbit_trace:tracing(VHost);
+i(Item, _)        -> throw({bad_argument, Item}).
+
+info(VHost)        -> infos(?INFO_KEYS, VHost).
+info(VHost, Items) -> infos(Items, VHost).
+
+info_all()      -> info_all(?INFO_KEYS).
+info_all(Items) -> [info(VHost, Items) || VHost <- list()].
diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl
index eba86a55..ac3434d2 100644
--- a/src/rabbit_writer.erl
+++ b/src/rabbit_writer.erl
@@ -28,7 +28,7 @@
 
 -define(HIBERNATE_AFTER, 5000).
 
-%%----------------------------------------------------------------------------
+%%---------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
@@ -69,7 +69,7 @@
 
 -endif.
 
-%%----------------------------------------------------------------------------
+%%---------------------------------------------------------------------------
 
 start(Sock, Channel, FrameMax, Protocol, ReaderPid) ->
     {ok,
@@ -133,7 +133,7 @@ handle_message({inet_reply, _, Status}, _State) ->
 handle_message(Message, _State) ->
     exit({writer, message_not_understood, Message}).
 
-%---------------------------------------------------------------------------
+%%---------------------------------------------------------------------------
 
 send_command(W, MethodRecord) ->
     W ! {send_command, MethodRecord},
@@ -157,13 +157,13 @@ send_command_and_notify(W, Q, ChPid, MethodRecord, Content) ->
     W ! {send_command_and_notify, Q, ChPid, MethodRecord, Content},
     ok.
 
-%---------------------------------------------------------------------------
+%%---------------------------------------------------------------------------
 
 call(Pid, Msg) ->
     {ok, Res} = gen:call(Pid, '$gen_call', Msg, infinity),
     Res.
 
-%---------------------------------------------------------------------------
+%%---------------------------------------------------------------------------
 
 assemble_frame(Channel, MethodRecord, Protocol) ->
     ?LOGMESSAGE(out, Channel, MethodRecord, none),
diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index d1537f26..405949ef 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -38,6 +38,9 @@
 %%    child is a supervisor and it exits normally (i.e. with reason of
 %%    'shutdown') then the child's parent also exits normally.
 %%
+%% 5) normal, and {shutdown, _} exit reasons are all treated the same
+%%    (i.e. are regarded as normal exits)
+%%
 %% All modifications are (C) 2010-2011 VMware, Inc.
 %%
 %% %CopyrightBegin%
@@ -539,17 +542,12 @@ do_restart({RestartType, Delay}, Reason, Child, State) ->
 do_restart(permanent, Reason, Child, State) ->
     report_error(child_terminated, Reason, Child, State#state.name),
     restart(Child, State);
-do_restart(intrinsic, normal, Child, State) ->
-    {shutdown, state_del_child(Child, State)};
-do_restart(intrinsic, shutdown, Child = #child{child_type = supervisor},
-           State) ->
-    {shutdown, state_del_child(Child, State)};
-do_restart(_, normal, Child, State) ->
-    NState = state_del_child(Child, State),
-    {ok, NState};
-do_restart(_, shutdown, Child, State) ->
-    NState = state_del_child(Child, State),
-    {ok, NState};
+do_restart(Type, normal, Child, State) ->
+    del_child_and_maybe_shutdown(Type, Child, State);
+do_restart(Type, {shutdown, _}, Child, State) ->
+    del_child_and_maybe_shutdown(Type, Child, State);
+do_restart(Type, shutdown, Child = #child{child_type = supervisor}, State) ->
+    del_child_and_maybe_shutdown(Type, Child, State);
 do_restart(Type, Reason, Child, State) when Type =:= transient orelse
                                             Type =:= intrinsic ->
     report_error(child_terminated, Reason, Child, State#state.name),
@@ -559,6 +557,11 @@ do_restart(temporary, Reason, Child, State) ->
     NState = state_del_child(Child, State),
     {ok, NState}.
 
+del_child_and_maybe_shutdown(intrinsic, Child, State) ->
+    {shutdown, state_del_child(Child, State)};
+del_child_and_maybe_shutdown(_, Child, State) ->
+    {ok, state_del_child(Child, State)}.
+
 restart(Child, State) ->
     case add_restart(State) of
 	{ok, NState} ->
diff --git a/src/test_sup.erl b/src/test_sup.erl
index b4df1fd0..84c4121c 100644
--- a/src/test_sup.erl
+++ b/src/test_sup.erl
@@ -33,10 +33,10 @@ test_supervisor_delayed_restart() ->
 test_supervisor_delayed_restart(SupPid) ->
     ok = ping_child(SupPid),
     ok = exit_child(SupPid),
-    timer:sleep(10),
+    timer:sleep(100),
     ok = ping_child(SupPid),
     ok = exit_child(SupPid),
-    timer:sleep(10),
+    timer:sleep(100),
     timeout = ping_child(SupPid),
     timer:sleep(1010),
     ok = ping_child(SupPid),
@@ -45,8 +45,8 @@ test_supervisor_delayed_restart(SupPid) ->
 with_sup(RestartStrategy, Fun) ->
     {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]),
     Res = Fun(SupPid),
+    unlink(SupPid),
     exit(SupPid, shutdown),
-    rabbit_misc:unlink_and_capture_exit(SupPid),
     Res.
 
 init([RestartStrategy]) ->
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index 44e1e4b5..fb2fa267 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -175,10 +175,10 @@ internal_update(State = #state { memory_limit = MemLimit,
     case {Alarmed, NewAlarmed} of
         {false, true} ->
             emit_update_info(set, MemUsed, MemLimit),
-            alarm_handler:set_alarm({vm_memory_high_watermark, []});
+            alarm_handler:set_alarm({{vm_memory_high_watermark, node()}, []});
         {true, false} ->
             emit_update_info(clear, MemUsed, MemLimit),
-            alarm_handler:clear_alarm(vm_memory_high_watermark);
+            alarm_handler:clear_alarm({vm_memory_high_watermark, node()});
         _ ->
             ok
     end,
@@ -239,10 +239,13 @@ get_total_memory({unix,darwin}) ->
     PageSize * (Inactive + Active + Free + Wired);
 
 get_total_memory({unix,freebsd}) ->
-    PageSize  = freebsd_sysctl("vm.stats.vm.v_page_size"),
-    PageCount = freebsd_sysctl("vm.stats.vm.v_page_count"),
+    PageSize  = sysctl("vm.stats.vm.v_page_size"),
+    PageCount = sysctl("vm.stats.vm.v_page_count"),
     PageCount * PageSize;
 
+get_total_memory({unix,openbsd}) ->
+    sysctl("hw.usermem");
+
 get_total_memory({win32,_OSname}) ->
     %% Due to the Erlang print format bug, on Windows boxes the memory
     %% size is broken. For example Windows 7 64 bit with 4Gigs of RAM
@@ -342,7 +345,7 @@ parse_line_aix(Line) ->
          false -> list_to_integer(Value)
      end}.
 
-freebsd_sysctl(Def) ->
+sysctl(Def) ->
     list_to_integer(cmd("/sbin/sysctl -n " ++ Def) -- "\n").
 
 %% file:read_file does not work on files in /proc as it seems to get