Merged bug25053 into default

author: Emile Joubert <emile@rabbitmq.com> 2012-10-25 13:00:09 +0100
committer: Emile Joubert <emile@rabbitmq.com> 2012-10-25 13:00:09 +0100
commit: c419fcc1d934827226df2493a9f100e2f8d72933 (patch)
tree: b0cc0604e0434f3a845fd7132505055dbb0c5e80
parent: ccd409f9522378fed49bf9a56c474734e8a85b7d (diff)
parent: fa60848094653e1764565e7e5582c0bbd794638c (diff)
download: rabbitmq-server-c419fcc1d934827226df2493a9f100e2f8d72933.tar.gz
85 files changed, 5968 insertions, 2902 deletions
diff --git a/LICENSE b/LICENSE
index 89640485..9feeceac 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,8 @@
 This package, the RabbitMQ server is licensed under the MPL. For the
 MPL, please see LICENSE-MPL-RabbitMQ.
 
+The files `mochijson2.erl' and `mochinum.erl' are (c) 2007 Mochi Media, Inc and
+licensed under a MIT license, see LICENSE-MIT-Mochi.
+
 If you have any questions regarding licensing, please contact us at
 info@rabbitmq.com.
diff --git a/LICENSE-MIT-Mochi b/LICENSE-MIT-Mochi
new file mode 100644
index 00000000..c85b65a4
--- /dev/null
+++ b/LICENSE-MIT-Mochi
@@ -0,0 +1,9 @@
+This is the MIT license.
+
+Copyright (c) 2007 Mochi Media, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
index 0e3960dc..c63e3dfd 100644
--- a/Makefile
+++ b/Makefile
@@ -103,7 +103,7 @@ endif
 
 all: $(TARGETS)
 
-.PHONY: plugins
+.PHONY: plugins check-xref
 ifneq "$(PLUGINS_SRC_DIR)" ""
 plugins:
 	[ -d "$(PLUGINS_SRC_DIR)/rabbitmq-server" ] || ln -s "$(CURDIR)" "$(PLUGINS_SRC_DIR)/rabbitmq-server"
@@ -111,9 +111,19 @@ plugins:
 	PLUGINS_SRC_DIR="" $(MAKE) -C "$(PLUGINS_SRC_DIR)" plugins-dist PLUGINS_DIST_DIR="$(CURDIR)/$(PLUGINS_DIR)" VERSION=$(VERSION)
 	echo "Put your EZs here and use rabbitmq-plugins to enable them." > $(PLUGINS_DIR)/README
 	rm -f $(PLUGINS_DIR)/rabbit_common*.ez
+
+# add -q to remove printout of warnings....
+check-xref: $(BEAM_TARGETS) $(PLUGINS_DIR)
+	rm -rf lib
+	./check_xref $(PLUGINS_DIR) -q
+
 else
 plugins:
 # Not building plugins
+
+check-xref:
+	$(info xref checks are disabled)
+
 endif
 
 $(DEPS_FILE): $(SOURCES) $(INCLUDES)
@@ -137,7 +147,7 @@ $(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_c
 
 dialyze: $(BEAM_TARGETS) $(BASIC_PLT)
 	dialyzer --plt $(BASIC_PLT) --no_native --fullpath \
-	  -Wrace_conditions $(BEAM_TARGETS)
+	   $(BEAM_TARGETS)
 
 # rabbit.plt is used by rabbitmq-erlang-client's dialyze make target
 create-plt: $(RABBIT_PLT)
@@ -217,11 +227,11 @@ stop-rabbit-on-node: all
 	echo "rabbit:stop()." | $(ERL_CALL)
 
 set-resource-alarm: all
-	echo "alarm_handler:set_alarm({{resource_limit, $(SOURCE), node()}, []})." | \
+	echo "rabbit_alarm:set_alarm({{resource_limit, $(SOURCE), node()}, []})." | \
 	$(ERL_CALL)
 
 clear-resource-alarm: all
-	echo "alarm_handler:clear_alarm({resource_limit, $(SOURCE), node()})." | \
+	echo "rabbit_alarm:clear_alarm({resource_limit, $(SOURCE), node()})." | \
 	$(ERL_CALL)
 
 stop-node:
diff --git a/README b/README
index 67e3a66a..90e99e62 100644
--- a/README
+++ b/README
@@ -1 +1 @@
-Please see http://www.rabbitmq.com/build-server.html for build instructions.
+Please see http://www.rabbitmq.com/build-server.html for build instructions.
+\ No newline at end of file
diff --git a/check_xref b/check_xref
new file mode 100755
index 00000000..8f65f3b1
--- /dev/null
+++ b/check_xref
@@ -0,0 +1,291 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+-mode(compile).
+
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2010-2012 VMware, Inc.  All rights reserved.
+%%
+
+main(["-h"]) ->
+    io:format("usage: check_xref PluginDirectory (options)~n"
+              "options:~n"
+              "      -q - quiet mode (only prints errors)~n"
+              "      -X - disables all filters~n");
+main([PluginsDir|Argv]) ->
+    put({?MODULE, quiet}, lists:member("-q", Argv)),
+    put({?MODULE, no_filters}, lists:member("-X", Argv)),
+
+    {ok, Cwd} = file:get_cwd(),
+    code:add_pathz(filename:join(Cwd, "ebin")),
+    LibDir = filename:join(Cwd, "lib"),
+    case filelib:is_dir(LibDir) of
+        false -> ok;
+        true  -> os:cmd("rm -rf " ++ LibDir)
+    end,
+    Rc = try
+             check(Cwd, PluginsDir, LibDir, checks())
+         catch
+             _:Err ->
+                 io:format(user, "failed: ~p~n", [Err]),
+                 1
+         end,
+    shutdown(Rc, LibDir).
+
+shutdown(Rc, LibDir) ->
+    os:cmd("rm -rf " ++ LibDir),
+    erlang:halt(Rc).
+
+check(Cwd, PluginsDir, LibDir, Checks) ->
+    {ok, Plugins} = file:list_dir(PluginsDir),
+    ok = file:make_dir(LibDir),
+    [begin
+        Source = filename:join(PluginsDir, Plugin),
+        Target = filename:join(LibDir, Plugin),
+        IsExternal = external_dependency(Plugin),
+        AppN = case IsExternal of
+                   true  -> filename:join(LibDir, unmangle_name(Plugin));
+                   false -> filename:join(
+                              LibDir, filename:basename(Plugin, ".ez"))
+               end,
+
+        report(info, "mkdir -p ~s~n", [Target]),
+        filelib:ensure_dir(Target),
+
+        report(info, "cp ~s ~s~n", [Source, Target]),
+        {ok, _} = file:copy(Source, Target),
+
+        report(info, "unzip -d ~s ~s~n", [LibDir, Target]),
+        {ok, _} = zip:unzip(Target, [{cwd, LibDir}]),
+
+        UnpackDir = filename:join(LibDir, filename:basename(Target, ".ez")),
+        report(info, "mv ~s ~s~n", [UnpackDir, AppN]),
+        ok = file:rename(UnpackDir, AppN),
+
+        code:add_patha(filename:join(AppN, "ebin")),
+        case IsExternal of
+            true -> App = list_to_atom(hd(string:tokens(filename:basename(AppN),
+                                                        "-"))),
+                    report(info, "loading ~p~n", [App]),
+                    application:load(App),
+                    store_third_party(App);
+            _    -> ok
+        end
+     end || Plugin <- Plugins,
+            lists:suffix(".ez", Plugin)],
+
+    RabbitAppEbin = filename:join([LibDir, "rabbit", "ebin"]),
+    filelib:ensure_dir(filename:join(RabbitAppEbin, "foo")),
+    {ok, Beams} = file:list_dir("ebin"),
+    [{ok, _} = file:copy(filename:join("ebin", Beam),
+                         filename:join(RabbitAppEbin, Beam)) || Beam <- Beams],
+    xref:start(?MODULE),
+    xref:set_default(?MODULE, [{verbose, false}, {warnings, false}]),
+    xref:set_library_path(?MODULE, code:get_path()),
+    xref:add_release(?MODULE, Cwd, {name, rabbit}),
+    store_unresolved_calls(),
+    Results = lists:flatten([perform_analysis(Q) || Q <- Checks]),
+    report(Results).
+
+%%
+%% Analysis
+%%
+
+perform_analysis({Query, Description, Severity}) ->
+    perform_analysis({Query, Description, Severity, fun(_) -> false end});
+perform_analysis({Query, Description, Severity, Filter}) ->
+    report_progress("Checking whether any code ~s "
+                    "(~s)~n", [Description, Query]),
+    case analyse(Query) of
+        {ok, Analysis} ->
+            [filter(Result, Filter) ||
+                Result <- process_analysis(Query, Description,
+                                           Severity, Analysis)];
+        {error, Module, Reason} ->
+            {analysis_error, {Module, Reason}}
+    end.
+
+partition(Results) ->
+    lists:partition(fun({{_, L}, _}) -> L =:= error end, Results).
+
+analyse(Query) when is_atom(Query) ->
+    xref:analyse(?MODULE, Query, [{verbose, false}]);
+analyse(Query) when is_list(Query) ->
+    xref:q(?MODULE, Query).
+
+process_analysis(Query, Tag, Severity, Analysis) when is_atom(Query) ->
+    [{{Tag, Severity}, MFA} || MFA <- Analysis];
+process_analysis(Query, Tag, Severity, Analysis) when is_list(Query) ->
+    [{{Tag, Severity}, Result} || Result <- Analysis].
+
+checks() ->
+   [{"(XXL)(Lin) ((XC - UC) || (XU - X - B))",
+     "has call to undefined function(s)",
+        error, filters()},
+    {"(Lin) (L - LU)", "has unused local function(s)",
+        error, filters()},
+    {"(Lin) (LU * (X - XU))",
+        "has exported function(s) only used locally",
+        warning, filters()},
+    {"(Lin) (DF * (XU + LU))", "used deprecated function(s)",
+        warning, filters()}].
+%    {"(Lin) (X - XU)", "possibly unused export",
+%        warning, fun filter_unused/1}].
+
+%%
+%% noise filters (can be disabled with -X) - strip uninteresting analyses
+%%
+
+filter(Result, Filter) ->
+    case Filter(Result) of
+        false -> Result;
+        true  -> []  %% NB: this gets flattened out later on....
+    end.
+
+filters() ->
+    case get({?MODULE, no_filters}) of
+        true  -> fun(_) -> false end;
+        _     -> filter_chain([fun is_unresolved_call/1, fun is_callback/1,
+                               fun is_unused/1, fun is_irrelevant/1])
+    end.
+
+filter_chain(FnChain) ->
+    fun(AnalysisResult) ->
+        lists:foldl(fun(F, false) -> F(cleanup(AnalysisResult));
+                       (_F, true) -> true
+                    end, false, FnChain)
+    end.
+
+cleanup({{_, _},{{{{_,_,_}=MFA1,_},{{_,_,_}=MFA2,_}},_}}) -> {MFA1, MFA2};
+cleanup({{_, _},{{{_,_,_}=MFA1,_},{{_,_,_}=MFA2,_}}})     -> {MFA1, MFA2};
+cleanup({{_, _},{{_,_,_}=MFA1,{_,_,_}=MFA2},_})           -> {MFA1, MFA2};
+cleanup({{_, _},{{_,_,_}=MFA1,{_,_,_}=MFA2}})             -> {MFA1, MFA2};
+cleanup({{_, _}, {_,_,_}=MFA})                            -> MFA;
+cleanup({{_, _}, {{_,_,_}=MFA,_}})                        -> MFA;
+cleanup({{_,_,_}=MFA, {_,_,_}})                           -> MFA;
+cleanup({{_,_,_}=MFA, {_,_,_},_})                         -> MFA;
+cleanup(Other)                                            -> Other.
+
+is_irrelevant({{M,_,_}, {_,_,_}}) ->
+    is_irrelevant(M);
+is_irrelevant({M,_,_}) ->
+    is_irrelevant(M);
+is_irrelevant(Mod) when is_atom(Mod) ->
+    lists:member(Mod, get({?MODULE, third_party})).
+
+is_unused({{_,_,_}=MFA, {_,_,_}}) ->
+    is_unused(MFA);
+is_unused({M,_F,_A}) ->
+    lists:suffix("_tests", atom_to_list(M));
+is_unused(_) ->
+    false.
+
+is_unresolved_call({_, F, A}) ->
+    UC = get({?MODULE, unresolved_calls}),
+    sets:is_element({'$M_EXPR', F, A}, UC);
+is_unresolved_call(_) ->
+    false.
+
+%% TODO: cache this....
+is_callback({M,_,_}=MFA) ->
+    Attributes = M:module_info(attributes),
+    Behaviours = proplists:append_values(behaviour, Attributes),
+    {_, Callbacks} = lists:foldl(fun acc_behaviours/2, {M, []}, Behaviours),
+    lists:member(MFA, Callbacks);
+is_callback(_) ->
+    false.
+
+acc_behaviours(B, {M, CB}=Acc) ->
+    case catch(B:behaviour_info(callbacks)) of
+        [{_,_} | _] = Callbacks ->
+            {M, CB ++ [{M, F, A} || {F,A} <- Callbacks]};
+        _ ->
+            Acc
+    end.
+
+%%
+%% reporting/output
+%%
+
+report(Results) ->
+    [report_failures(F) || F <- Results],
+    {Errors, Warnings} = partition(Results),
+    report(info, "Completed: ~p errors, ~p warnings~n",
+                 [length(Errors), length(Warnings)]),
+    case length(Errors) > 0 of
+        true  -> 1;
+        false -> 0
+    end.
+
+report_failures({analysis_error, {Mod, Reason}}) ->
+    report(error, "~s:0 Analysis Error: ~p~n", [source_file(Mod), Reason]);
+report_failures({{Tag, Level}, {{{{M,_,_},L},{{M2,F2,A2},_}},_}}) ->
+    report(Level, "~s:~w ~s ~p:~p/~p~n",
+           [source_file(M), L, Tag, M2, F2, A2]);
+report_failures({{Tag, Level}, {{M,F,A},L}}) ->
+    report(Level, "~s:~w ~s ~p:~p/~p~n", [source_file(M), L, Tag, M, F, A]);
+report_failures({{Tag, Level}, {M,F,A}}) ->
+    report(Level, "~s:unknown ~s ~p:~p/~p~n", [source_file(M), Tag, M, F, A]);
+report_failures(Term) ->
+    report(error, "Ignoring ~p~n", [Term]),
+    ok.
+
+report_progress(Fmt, Args) ->
+    report(info, Fmt, Args).
+
+report(Level, Fmt, Args) ->
+    case {get({?MODULE, quiet}), Level} of
+        {true,  error} -> do_report(lookup_prefix(Level), Fmt, Args);
+        {false, _}     -> do_report(lookup_prefix(Level), Fmt, Args);
+        _              -> ok
+    end.
+
+do_report(Prefix, Fmt, Args) ->
+    io:format(Prefix ++ Fmt, Args).
+
+lookup_prefix(error)   -> "ERROR: ";
+lookup_prefix(warning) -> "WARNING: ";
+lookup_prefix(info)    -> "INFO: ".
+
+source_file(M) ->
+    proplists:get_value(source, M:module_info(compile)).
+
+%%
+%% setup/code-path/file-system ops
+%%
+
+store_third_party(App) ->
+    {ok, AppConfig} = application:get_all_key(App),
+    case get({?MODULE, third_party}) of
+        undefined ->
+            put({?MODULE, third_party},
+                proplists:get_value(modules, AppConfig));
+        Modules ->
+            put({?MODULE, third_party},
+                proplists:get_value(modules, AppConfig) ++ Modules)
+    end.
+
+%% TODO: this ought not to be maintained in such a fashion
+external_dependency(Path) ->
+    lists:any(fun(P) -> lists:prefix(P, Path) end,
+              ["mochiweb", "webmachine", "rfc4627", "eldap"]).
+
+unmangle_name(Path) ->
+    [Name, Vsn | _] = re:split(Path, "-", [{return, list}]),
+    string:join([Name, Vsn], "-").
+
+store_unresolved_calls() ->
+    {ok, UCFull} = analyse("UC"),
+    UC = [MFA || {_, {_,_,_} = MFA} <- UCFull],
+    put({?MODULE, unresolved_calls}, sets:from_list(UC)).
diff --git a/docs/rabbitmq-server.1.xml b/docs/rabbitmq-server.1.xml
index ca63927c..32ae842c 100644
--- a/docs/rabbitmq-server.1.xml
+++ b/docs/rabbitmq-server.1.xml
@@ -109,7 +109,8 @@ Defaults to 5672.
           <term>-detached</term>
           <listitem>
             <para>
-              start the server process in the background
+              Start the server process in the background. Note that this will
+              cause the pid not to be written to the pid file.
             </para>
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmq-server -detached</screen>
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml
index 2d25edee..3082fe14 100644
--- a/docs/rabbitmqctl.1.xml
+++ b/docs/rabbitmqctl.1.xml
@@ -288,105 +288,161 @@
       <title>Cluster management</title>
 
       <variablelist>
-        <varlistentry id="cluster">
-          <term><cmdsynopsis><command>cluster</command> <arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term>
+        <varlistentry id="join_cluster">
+          <term><cmdsynopsis><command>join_cluster</command> <arg choice="req"><replaceable>clusternode</replaceable></arg><arg choice="opt"><replaceable>--ram</replaceable></arg></cmdsynopsis></term>
           <listitem>
             <variablelist>
               <varlistentry>
                 <term>clusternode</term>
-                <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem>
+                <listitem><para>Node to cluster with.</para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term><cmdsynopsis><arg choice="opt">--ram</arg></cmdsynopsis></term>
+                <listitem>
+                  <para>
+                    If provided, the node will join the cluster as a RAM node.
+                  </para>
+                </listitem>
               </varlistentry>
             </variablelist>
             <para>
-              Instruct the node to become member of a cluster with the
-              specified nodes.  To cluster with currently offline nodes,
-              use <link linkend="force_cluster"><command>force_cluster</command></link>.
+              Instruct the node to become a member of the cluster that the
+              specified node is in. Before clustering, the node is reset, so be
+              careful when using this command. For this command to succeed the
+              RabbitMQ application must have been stopped, e.g. with <link
+              linkend="stop_app"><command>stop_app</command></link>.
             </para>
             <para>
-              Cluster nodes can be of two types: disk or ram. Disk nodes
-              replicate data in ram and on disk, thus providing
-              redundancy in the event of node failure and recovery from
-              global events such as power failure across all nodes. Ram
-              nodes replicate data in ram only and are mainly used for
-              scalability. A cluster must always have at least one disk node.
+              Cluster nodes can be of two types: disc or RAM. Disc nodes
+              replicate data in RAM and on disc, thus providing redundancy in
+              the event of node failure and recovery from global events such
+              as power failure across all nodes. RAM nodes replicate data in
+              RAM only (with the exception of queue contents, which can reside
+              on disc if the queue is persistent or too big to fit in memory)
+              and are mainly used for scalability. RAM nodes are more
+              performant only when managing resources (e.g. adding/removing
+              queues, exchanges, or bindings). A cluster must always have at
+              least one disc node, and usually should have more than one.
             </para>
             <para>
-              If the current node is to become a disk node it needs to
-              appear in the cluster node list. Otherwise it becomes a
-              ram node. If the node list is empty or only contains the
-              current node then the node becomes a standalone,
-              i.e. non-clustered, (disk) node.
+              The node will be a disc node by default. If you wish to
+              create a RAM node, provide the <command>--ram</command> flag.
             </para>
             <para>
               After executing the <command>cluster</command> command, whenever
-              the RabbitMQ application is started on the current node it
-              will attempt to connect to the specified nodes, thus
-              becoming an active node in the cluster comprising those
-              nodes (and possibly others).
+              the RabbitMQ application is started on the current node it will
+              attempt to connect to the nodes that were in the cluster when the
+              node went down.
             </para>
             <para>
-              The list of nodes does not have to contain all the
-              cluster's nodes; a subset is sufficient. Also, clustering
-              generally succeeds as long as at least one of the
-              specified nodes is active. Hence adjustments to the list
-              are only necessary if the cluster configuration is to be
-              altered radically.
+              To leave a cluster, <command>reset</command> the node. You can
+              also remove nodes remotely with the
+              <command>forget_cluster_node</command> command.
             </para>
             <para>
-              For this command to succeed the RabbitMQ application must
-              have been stopped, e.g. with <link linkend="stop_app"><command>stop_app</command></link>. Furthermore,
-              turning a standalone node into a clustered node requires
-              the node be <link linkend="reset"><command>reset</command></link> first,
-              in order to avoid accidental destruction of data with the
-              <command>cluster</command> command.
+              For more details see the <ulink
+              url="http://www.rabbitmq.com/clustering.html">clustering
+              guide</ulink>.
             </para>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl join_cluster hare@elena --ram</screen>
+            <para role="example">
+              This command instructs the RabbitMQ node to join the cluster that
+              <command>hare@elena</command> is part of, as a ram node.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><cmdsynopsis><command>cluster_status</command></cmdsynopsis></term>
+          <listitem>
             <para>
-              For more details see the <ulink url="http://www.rabbitmq.com/clustering.html">clustering guide</ulink>.
+              Displays all the nodes in the cluster grouped by node type,
+              together with the currently running nodes.
             </para>
             <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl cluster rabbit@tanto hare@elena</screen>
+            <screen role="example">rabbitmqctl cluster_status</screen>
             <para role="example">
-              This command instructs the RabbitMQ node to join the
-              cluster with nodes <command>rabbit@tanto</command> and
-              <command>hare@elena</command>. If the node is one of these then
-              it becomes a disk node, otherwise a ram node.
+              This command displays the nodes in the cluster.
             </para>
           </listitem>
         </varlistentry>
-        <varlistentry id="force_cluster">
-          <term><cmdsynopsis><command>force_cluster</command> <arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term>
+        <varlistentry>
+          <term><cmdsynopsis><command>change_cluster_node_type</command> <arg choice="req">disc | ram</arg></cmdsynopsis>
+          </term>
+          <listitem>
+            <para>
+              Changes the type of the cluster node. The node must be stopped for
+              this operation to succeed, and when turning a node into a RAM node
+              the node must not be the only disc node in the cluster.
+            </para>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl change_cluster_node_type disc</screen>
+            <para role="example">
+              This command will turn a RAM node into a disc node.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><cmdsynopsis><command>forget_cluster_node</command> <arg choice="opt">--offline</arg></cmdsynopsis></term>
           <listitem>
             <variablelist>
               <varlistentry>
-                <term>clusternode</term>
-                <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem>
+                <term><cmdsynopsis><arg choice="opt">--offline</arg></cmdsynopsis></term>
+                <listitem>
+                  <para>
+                    Enables node removal from an offline node. This is only
+                    useful in the situation where all the nodes are offline and
+                    the last node to go down cannot be brought online, thus
+                    preventing the whole cluster from starting. It should not be
+                    used in any other circumstances since it can lead to
+                    inconsistencies.
+                  </para>
+                </listitem>
               </varlistentry>
             </variablelist>
             <para>
-              Instruct the node to become member of a cluster with the
-              specified nodes.  This will succeed even if the specified nodes
-              are offline.  For a more detailed description, see
-              <link linkend="cluster"><command>cluster</command>.</link>
+              Removes a cluster node remotely. The node that is being removed
+              must be offline, while the node we are removing from must be
+              online, except when using the <command>--offline</command> flag.
             </para>
-            <para>
-              Note that this variant of the cluster command just
-              ignores the current status of the specified nodes.
-              Clustering may still fail for a variety of other
-              reasons.
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl -n hare@mcnulty forget_cluster_node rabbit@stringer</screen>
+            <para role="example">
+              This command will remove the node
+              <command>rabbit@stringer</command> from the node
+              <command>hare@mcnulty</command>.
             </para>
           </listitem>
         </varlistentry>
         <varlistentry>
-          <term><cmdsynopsis><command>cluster_status</command></cmdsynopsis></term>
+          <term><cmdsynopsis><command>update_cluster_nodes</command> <arg choice="req">clusternode</arg></cmdsynopsis>
+          </term>
           <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>clusternode</term>
+                <listitem>
+                  <para>
+                    The node to consult for up to date information.
+                  </para>
+                </listitem>
+              </varlistentry>
+            </variablelist>
             <para>
-              Displays all the nodes in the cluster grouped by node type,
-              together with the currently running nodes.
+              Instructs an already clustered node to contact
+              <command>clusternode</command> to cluster when waking up. This is
+              different from <command>join_cluster</command> since it does not
+              join any cluster - it checks that the node is already in a cluster
+              with <command>clusternode</command>.
             </para>
-            <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl cluster_status</screen>
-            <para role="example">
-              This command displays the nodes in the cluster.
+            <para>
+              The need for this command is motivated by the fact that clusters
+              can change while a node is offline. Consider the situation in
+              which node A and B are clustered. A goes down, C clusters with B,
+              and then B leaves the cluster. When A wakes up, it'll try to
+              contact B, but this will fail since B is not in the cluster
+              anymore. <command>update_cluster_nodes -n A C</command> will solve
+              this situation.
             </para>
           </listitem>
         </varlistentry>
@@ -581,7 +637,7 @@
             </para>
             <para>
               Deleting a virtual host deletes all its exchanges,
-              queues, user mappings and associated permissions.
+              queues, bindings, user permissions, parameters and policies.
             </para>
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmqctl delete_vhost test</screen>
@@ -750,15 +806,16 @@
         Certain features of RabbitMQ (such as the federation plugin)
         are controlled by dynamic,
         cluster-wide <emphasis>parameters</emphasis>. Each parameter
-        consists of a component name, a key and a value. The
-        component name and key are strings, and the value is an
-        Erlang term. Parameters can be set, cleared and listed. In
-        general you should refer to the documentation for the feature
-        in question to see how to set parameters.
+        consists of a component name, a name and a value, and is
+        associated with a virtual host. The component name and name are
+        strings, and the value is an Erlang term. Parameters can be
+        set, cleared and listed. In general you should refer to the
+        documentation for the feature in question to see how to set
+        parameters.
       </para>
       <variablelist>
         <varlistentry>
-          <term><cmdsynopsis><command>set_parameter</command> <arg choice="req"><replaceable>component_name</replaceable></arg> <arg choice="req"><replaceable>key</replaceable></arg> <arg choice="req"><replaceable>value</replaceable></arg></cmdsynopsis></term>
+          <term><cmdsynopsis><command>set_parameter</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="req"><replaceable>component_name</replaceable></arg> <arg choice="req"><replaceable>name</replaceable></arg> <arg choice="req"><replaceable>value</replaceable></arg></cmdsynopsis></term>
           <listitem>
             <para>
               Sets a parameter.
@@ -772,29 +829,29 @@
                 </para></listitem>
               </varlistentry>
               <varlistentry>
-                <term>key</term>
+                <term>name</term>
                 <listitem><para>
-                    The key for which the parameter is being set.
+                    The name of the parameter being set.
                 </para></listitem>
               </varlistentry>
               <varlistentry>
                 <term>value</term>
                 <listitem><para>
-                    The value for the parameter, as an
-                    Erlang term. In most shells you are very likely to
+                    The value for the parameter, as a
+                    JSON term. In most shells you are very likely to
                     need to quote this.
                 </para></listitem>
               </varlistentry>
             </variablelist>
             <para role="example-prefix">For example:</para>
-            <screen role="example">rabbitmqctl set_parameter federation local_username '&lt;&lt;"guest">>'</screen>
+            <screen role="example">rabbitmqctl set_parameter federation local_username '"guest"'</screen>
             <para role="example">
-              This command sets the parameter <command>local_username</command> for the <command>federation</command> component to the Erlang term <command>&lt;&lt;"guest">></command>.
+              This command sets the parameter <command>local_username</command> for the <command>federation</command> component in the default virtual host to the JSON term <command>"guest"</command>.
             </para>
           </listitem>
         </varlistentry>
         <varlistentry>
-          <term><cmdsynopsis><command>clear_parameter</command> <arg choice="req"><replaceable>component_name</replaceable></arg> <arg choice="req"><replaceable>key</replaceable></arg></cmdsynopsis></term>
+          <term><cmdsynopsis><command>clear_parameter</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="req"><replaceable>component_name</replaceable></arg> <arg choice="req"><replaceable>key</replaceable></arg></cmdsynopsis></term>
           <listitem>
             <para>
               Clears a parameter.
@@ -808,29 +865,116 @@
                 </para></listitem>
               </varlistentry>
               <varlistentry>
-                <term>key</term>
+                <term>name</term>
                 <listitem><para>
-                    The key for which the parameter is being cleared.
+                    The name of the parameter being cleared.
                 </para></listitem>
               </varlistentry>
             </variablelist>
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmqctl clear_parameter federation local_username</screen>
             <para role="example">
-              This command clears the parameter <command>local_username</command> for the <command>federation</command> component.
+              This command clears the parameter <command>local_username</command> for the <command>federation</command> component in the default virtual host.
             </para>
           </listitem>
         </varlistentry>
         <varlistentry>
-          <term><cmdsynopsis><command>list_parameters</command></cmdsynopsis></term>
+          <term><cmdsynopsis><command>list_parameters</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg></cmdsynopsis></term>
           <listitem>
             <para>
-              Lists all parameters.
+              Lists all parameters for a virtual host.
             </para>
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmqctl list_parameters</screen>
             <para role="example">
-              This command lists all parameters.
+              This command lists all parameters in the default virtual host.
+            </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>Policy Management</title>
+      <para>
+        Policies are used to control and modify the behaviour of queues
+        and exchanges on a cluster-wide basis. Policies apply within a
+        given vhost, and consist of a name, pattern, definition and an
+        optional priority. Policies can be set, cleared and listed.
+      </para>
+      <variablelist>
+        <varlistentry>
+          <term><cmdsynopsis><command>set_policy</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="req"><replaceable>name</replaceable></arg> <arg choice="req"><replaceable>pattern</replaceable></arg>  <arg choice="req"><replaceable>definition</replaceable></arg> <arg choice="opt"><replaceable>priority</replaceable></arg> </cmdsynopsis></term>
+          <listitem>
+            <para>
+              Sets a policy.
+            </para>
+            <variablelist>
+              <varlistentry>
+                <term>name</term>
+                <listitem><para>
+                    The name of the policy.
+                </para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>pattern</term>
+                <listitem><para>
+                    The regular expression, which when matches on a given resources causes the policy to apply.
+                </para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>definition</term>
+                <listitem><para>
+                    The definition of the policy, as a
+                    JSON term. In most shells you are very likely to
+                    need to quote this.
+                </para></listitem>
+              </varlistentry>
+              <varlistentry>
+                <term>priority</term>
+                <listitem><para>
+                    The priority of the policy as an integer, defaulting to 0. Higher numbers indicate greater precedence.
+                </para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl set_policy federate-me "^amq." '{"federation-upstream-set":"all"}'</screen>
+            <para role="example">
+              This command sets the policy <command>federate-me</command> in the default virtual host so that built-in exchanges are federated.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><cmdsynopsis><command>clear_policy</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg> <arg choice="req"><replaceable>name</replaceable></arg></cmdsynopsis></term>
+          <listitem>
+            <para>
+              Clears a policy.
+            </para>
+            <variablelist>
+              <varlistentry>
+                <term>name</term>
+                <listitem><para>
+                    The name of the policy being cleared.
+                </para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl clear_policy federate-me</screen>
+            <para role="example">
+              This command clears the <command>federate-me</command> policy in the default virtual host.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><cmdsynopsis><command>list_policies</command> <arg choice="opt">-p <replaceable>vhostpath</replaceable></arg></cmdsynopsis></term>
+          <listitem>
+            <para>
+              Lists all policies for a virtual host.
+            </para>
+            <para role="example-prefix">For example:</para>
+            <screen role="example">rabbitmqctl list_policies</screen>
+            <para role="example">
+              This command lists all policies in the default virtual host.
             </para>
           </listitem>
         </varlistentry>
@@ -929,6 +1073,27 @@
                 <listitem><para>Number of consumers.</para></listitem>
               </varlistentry>
               <varlistentry>
+                <term>active_consumers</term>
+                <listitem>
+                  <para>
+                    Number of active consumers. An active consumer is
+                    one which could immediately receive any messages
+                    sent to the queue - i.e. it is not limited by its
+                    prefetch count, TCP congestion, flow control, or
+                    because it has issued channel.flow. At least one
+                    of messages_ready and active_consumers must always
+                    be zero.
+                  </para>
+                  <para>
+                    Note that this value is an instantaneous snapshot
+                    - when consumers are restricted by their prefetch
+                    count they may only appear to be active for small
+                    fractions of a second until more messages are sent
+                    out.
+                  </para>
+                </listitem>
+              </varlistentry>
+              <varlistentry>
                 <term>memory</term>
                 <listitem><para>Bytes of memory consumed by the Erlang process associated with the
                   queue, including stack, heap and internal structures.</para></listitem>
diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index 523b54ce..9b1ff8bd 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -25,6 +25,7 @@
          %% 0 ("no limit") would make a better default, but that
          %% breaks the QPid Java client
          {frame_max, 131072},
+         {heartbeat, 600},
          {msg_store_file_size_limit, 16777216},
          {queue_index_max_journal_entries, 262144},
          {default_user, <<"guest">>},
@@ -32,7 +33,7 @@
          {default_user_tags, [administrator]},
          {default_vhost, <<"/">>},
          {default_permissions, [<<".*">>, <<".*">>, <<".*">>]},
-         {cluster_nodes, []},
+         {cluster_nodes, {[], disc}},
          {server_properties, []},
          {collect_statistics, none},
          {collect_statistics_interval, 5000},
diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index e8b4a623..3db2b68a 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -47,7 +47,8 @@
 -record(exchange_serial, {name, next}).
 
 -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none,
-                   arguments, pid, slave_pids, mirror_nodes, policy}).
+                   arguments, pid, slave_pids, sync_slave_pids, policy,
+                   gm_pids}).
 
 %% mnesia doesn't like unary records, so we add a dummy 'value' field
 -record(route, {binding, value = const}).
@@ -72,12 +73,13 @@
                         is_persistent}).
 
 -record(ssl_socket, {tcp, ssl}).
--record(delivery, {mandatory, immediate, sender, message, msg_seq_no}).
+-record(delivery, {mandatory, sender, message, msg_seq_no}).
 -record(amqp_error, {name, explanation = "", method = none}).
 
 -record(event, {type, props, timestamp}).
 
--record(message_properties, {expiry, needs_confirming = false}).
+-record(message_properties, {expiry, needs_confirming = false,
+                             delivered = false}).
 
 -record(plugin, {name,          %% atom()
                  version,       %% string()
diff --git a/packaging/RPMS/Fedora/Makefile b/packaging/RPMS/Fedora/Makefile
index 03e513f8..4f5f1327 100644
--- a/packaging/RPMS/Fedora/Makefile
+++ b/packaging/RPMS/Fedora/Makefile
@@ -13,13 +13,17 @@ RPM_OS=fedora
 endif
 
 ifeq "$(RPM_OS)" "suse"
+FUNCTION_LIBRARY=
 REQUIRES=/sbin/chkconfig /sbin/service
 OS_DEFINES=--define '_initrddir /etc/init.d' --define 'dist .suse'
-START_PROG=setsid
+SPEC_DEFINES=--define 'group_tag Productivity/Networking/Other'
+START_PROG=startproc
 else
+FUNCTION_LIBRARY=\# Source function library.\n. /etc/init.d/functions
 REQUIRES=chkconfig initscripts
 OS_DEFINES=--define '_initrddir /etc/rc.d/init.d'
-START_PROG=runuser rabbitmq --session-command
+SPEC_DEFINES=--define 'group_tag Development/Libraries'
+START_PROG=daemon
 endif
 
 rpms:   clean server
@@ -35,6 +39,7 @@ prepare:
 	cp rabbitmq-server.init SOURCES/rabbitmq-server.init
 	sed -i \
 	    -e 's|^START_PROG=.*$$|START_PROG="$(START_PROG)"|' \
+	    -e 's|^@FUNCTION_LIBRARY@|$(FUNCTION_LIBRARY)|' \
 	    SOURCES/rabbitmq-server.init
 ifeq "$(RPM_OS)" "fedora"
 # Fedora says that only vital services should have Default-Start
@@ -47,7 +52,7 @@ endif
 	cp rabbitmq-server.logrotate SOURCES/rabbitmq-server.logrotate
 
 server: prepare
-	rpmbuild -ba --nodeps SPECS/rabbitmq-server.spec $(DEFINES) $(OS_DEFINES)
+	rpmbuild -ba --nodeps SPECS/rabbitmq-server.spec $(DEFINES) $(OS_DEFINES) $(SPEC_DEFINES)
 
 clean:
 	rm -rf SOURCES SPECS RPMS SRPMS BUILD tmp
diff --git a/packaging/RPMS/Fedora/rabbitmq-server.init b/packaging/RPMS/Fedora/rabbitmq-server.init
index 2d2680e3..3e48147b 100644
--- a/packaging/RPMS/Fedora/rabbitmq-server.init
+++ b/packaging/RPMS/Fedora/rabbitmq-server.init
@@ -10,12 +10,14 @@
 # Provides:          rabbitmq-server
 # Required-Start:    $remote_fs $network
 # Required-Stop:     $remote_fs $network
-# Default-Start:     3 4 5
+# Default-Start:     3 5
 # Default-Stop:      0 1 2 6
 # Description:       RabbitMQ broker
 # Short-Description: Enable AMQP service provided by RabbitMQ broker
 ### END INIT INFO
 
+@FUNCTION_LIBRARY@
+
 PATH=/sbin:/usr/sbin:/bin:/usr/bin
 NAME=rabbitmq-server
 DAEMON=/usr/sbin/${NAME}
diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec
index a6899005..d73c5634 100644
--- a/packaging/RPMS/Fedora/rabbitmq-server.spec
+++ b/packaging/RPMS/Fedora/rabbitmq-server.spec
@@ -3,8 +3,8 @@
 Name: rabbitmq-server
 Version: %%VERSION%%
 Release: 1%{?dist}
-License: MPLv1.1
-Group: Development/Libraries
+License: MPLv1.1 and MIT and ASL 2.0 and BSD
+Group: %{group_tag}
 Source: http://www.rabbitmq.com/releases/rabbitmq-server/v%{version}/%{name}-%{version}.tar.gz
 Source1: rabbitmq-server.init
 Source2: rabbitmq-script-wrapper
@@ -31,8 +31,10 @@ scalable implementation of an AMQP broker.
 %define _rabbit_server_ocf %{_builddir}/`basename %{S:4}`
 %define _plugins_state_dir %{_localstatedir}/lib/rabbitmq/plugins
 
+
 %define _maindir %{buildroot}%{_rabbit_erllibdir}
 
+
 %prep
 %setup -q
 
@@ -110,8 +112,8 @@ done
 
 %files -f ../%{name}.files
 %defattr(-,root,root,-)
-%attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/lib/rabbitmq
-%attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/log/rabbitmq
+%attr(0755, rabbitmq, rabbitmq) %dir %{_localstatedir}/lib/rabbitmq
+%attr(0755, rabbitmq, rabbitmq) %dir %{_localstatedir}/log/rabbitmq
 %dir %{_sysconfdir}/rabbitmq
 %{_initrddir}/rabbitmq-server
 %config(noreplace) %{_sysconfdir}/logrotate.d/rabbitmq-server
diff --git a/packaging/debs/Debian/debian/control b/packaging/debs/Debian/debian/control
index 943ed48f..d4526d87 100644
--- a/packaging/debs/Debian/debian/control
+++ b/packaging/debs/Debian/debian/control
@@ -5,12 +5,12 @@ Maintainer: RabbitMQ Team <packaging@rabbitmq.com>
 Uploaders: Emile Joubert <emile@rabbitmq.com>
 DM-Upload-Allowed: yes
 Build-Depends: cdbs, debhelper (>= 5), erlang-dev, python-simplejson, xmlto, xsltproc, erlang-nox (>= 1:12.b.3), erlang-src (>= 1:12.b.3), unzip, zip
-Standards-Version: 3.8.0
+Standards-Version: 3.9.2
 
 Package: rabbitmq-server
 Architecture: all
 Depends: erlang-nox (>= 1:12.b.3), adduser, logrotate, ${misc:Depends}
-Description: An AMQP server written in Erlang
+Description: AMQP server written in Erlang
  RabbitMQ is an implementation of AMQP, the emerging standard for high
  performance enterprise messaging. The RabbitMQ server is a robust and
  scalable implementation of an AMQP broker.
diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in
index e461e49e..82c1fb0c 100644
--- a/packaging/macports/Portfile.in
+++ b/packaging/macports/Portfile.in
@@ -59,7 +59,7 @@ set mandest ${destroot}${prefix}/share/man
 
 use_configure   no
 
-use_parallel_build yes
+use_parallel_build no
 
 build.env-append HOME=${workpath}
 
diff --git a/packaging/windows-exe/rabbitmq_nsi.in b/packaging/windows-exe/rabbitmq_nsi.in
index 91510991..f5257040 100644
--- a/packaging/windows-exe/rabbitmq_nsi.in
+++ b/packaging/windows-exe/rabbitmq_nsi.in
@@ -101,7 +101,9 @@ Section "RabbitMQ Service" RabbitService
   ExpandEnvStrings $0 %COMSPEC%
   ExecWait '"$0" /C "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" install'
   ExecWait '"$0" /C "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" start'
-  CopyFiles "$WINDIR\.erlang.cookie" "$PROFILE\.erlang.cookie"
+  ReadEnvStr $1 "HOMEDRIVE"
+  ReadEnvStr $2 "HOMEPATH"
+  CopyFiles "$WINDIR\.erlang.cookie" "$1$2\.erlang.cookie"
 SectionEnd
 
 ;--------------------------------
@@ -234,4 +236,4 @@ Function findErlang
     System::Call 'Kernel32::SetEnvironmentVariableA(t, t) i("ERLANG_HOME", "$0").r0'
   ${EndIf}
 
-FunctionEnd
-\ No newline at end of file
+FunctionEnd
diff --git a/scripts/rabbitmq-plugins.bat b/scripts/rabbitmq-plugins.bat
index c67a0263..341f871a 100755
--- a/scripts/rabbitmq-plugins.bat
+++ b/scripts/rabbitmq-plugins.bat
@@ -47,7 +47,7 @@ if "!RABBITMQ_PLUGINS_DIR!"=="" (
     set RABBITMQ_PLUGINS_DIR=!TDP0!..\plugins
 )
 
-"!ERLANG_HOME!\bin\erl.exe" -pa "!TDP0!..\ebin" -noinput -hidden -sname rabbitmq-plugins!RANDOM! -s rabbit_plugins_main -enabled_plugins_file "!RABBITMQ_ENABLED_PLUGINS_FILE!" -plugins_dist_dir "!RABBITMQ_PLUGINS_DIR:\=/!" -extra !STAR!
+"!ERLANG_HOME!\bin\erl.exe" -pa "!TDP0!..\ebin" -noinput -hidden -sname rabbitmq-plugins!RANDOM!!TIME:~9! -s rabbit_plugins_main -enabled_plugins_file "!RABBITMQ_ENABLED_PLUGINS_FILE!" -plugins_dist_dir "!RABBITMQ_PLUGINS_DIR:\=/!" -extra !STAR!
 
 endlocal
 endlocal
diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 34915b3d..e1686627 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -65,9 +65,20 @@ case "$(uname -s)" in
   CYGWIN*) # we make no attempt to record the cygwin pid; rabbitmqctl wait
            # will not be able to make sense of it anyway
            ;;
-  *)       mkdir -p $(dirname ${RABBITMQ_PID_FILE});
-           echo $$ > ${RABBITMQ_PID_FILE}
-           ;;
+  *)       # When -detached is passed, we don't write the pid, since it'd be the
+           # wrong one
+           detached=""
+           for opt in "$@"; do
+               if [ "$opt" = "-detached" ]; then
+                   detached="true"
+               fi
+           done
+           if [ $detached ]; then
+               echo "Warning: PID file not written; -detached was passed." 1>&2
+           else
+               mkdir -p $(dirname ${RABBITMQ_PID_FILE});
+               echo $$ > ${RABBITMQ_PID_FILE}
+           fi
 esac
 
 RABBITMQ_EBIN_ROOT="${RABBITMQ_HOME}/ebin"
diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index 167f272e..3aea4c07 100755
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -96,7 +96,7 @@ set RABBITMQ_EBIN_ROOT=!TDP0!..\ebin
         -pa "!RABBITMQ_EBIN_ROOT!" ^
         -noinput -hidden ^
         -s rabbit_prelaunch ^
-        -sname rabbitmqprelaunch!RANDOM! ^
+        -sname rabbitmqprelaunch!RANDOM!!TIME:~9! ^
         -extra "!RABBITMQ_NODENAME!"
 
 if ERRORLEVEL 1 (
diff --git a/scripts/rabbitmqctl.bat b/scripts/rabbitmqctl.bat
index 9f549f1e..d8b1eaf1 100755
--- a/scripts/rabbitmqctl.bat
+++ b/scripts/rabbitmqctl.bat
@@ -43,7 +43,7 @@ if not exist "!ERLANG_HOME!\bin\erl.exe" (
     exit /B
 )
 
-"!ERLANG_HOME!\bin\erl.exe" -pa "!TDP0!..\ebin" -noinput -hidden !RABBITMQ_CTL_ERL_ARGS! -sname rabbitmqctl!RANDOM! -s rabbit_control_main -nodename !RABBITMQ_NODENAME! -extra !STAR!
+"!ERLANG_HOME!\bin\erl.exe" -pa "!TDP0!..\ebin" -noinput -hidden !RABBITMQ_CTL_ERL_ARGS! -sname rabbitmqctl!RANDOM!!TIME:~9! -s rabbit_control_main -nodename !RABBITMQ_NODENAME! -extra !STAR!
 
 endlocal
 endlocal
diff --git a/src/app_utils.erl b/src/app_utils.erl
index 4bef83a5..fdf6ed41 100644
--- a/src/app_utils.erl
+++ b/src/app_utils.erl
@@ -15,17 +15,21 @@
 %%
 -module(app_utils).
 
--export([load_applications/1, start_applications/1,
-         stop_applications/1, app_dependency_order/2,
+-export([load_applications/1, start_applications/1, start_applications/2,
+         stop_applications/1, stop_applications/2, app_dependency_order/2,
          wait_for_applications/1]).
 
 -ifdef(use_specs).
 
--spec load_applications([atom()])               -> 'ok'.
--spec start_applications([atom()])              -> 'ok'.
--spec stop_applications([atom()])               -> 'ok'.
--spec wait_for_applications([atom()])           -> 'ok'.
--spec app_dependency_order([atom()], boolean()) -> [digraph:vertex()].
+-type error_handler() :: fun((atom(), any()) -> 'ok').
+
+-spec load_applications([atom()])                   -> 'ok'.
+-spec start_applications([atom()])                  -> 'ok'.
+-spec stop_applications([atom()])                   -> 'ok'.
+-spec start_applications([atom()], error_handler()) -> 'ok'.
+-spec stop_applications([atom()], error_handler())  -> 'ok'.
+-spec wait_for_applications([atom()])               -> 'ok'.
+-spec app_dependency_order([atom()], boolean())     -> [digraph:vertex()].
 
 -endif.
 
@@ -37,21 +41,34 @@ load_applications(Apps) ->
     ok.
 
 start_applications(Apps) ->
+    start_applications(
+      Apps, fun (App, Reason) ->
+                    throw({error, {cannot_start_application, App, Reason}})
+            end).
+
+stop_applications(Apps) ->
+    stop_applications(
+      Apps, fun (App, Reason) ->
+                    throw({error, {cannot_stop_application, App, Reason}})
+            end).
+
+start_applications(Apps, ErrorHandler) ->
     manage_applications(fun lists:foldl/3,
                         fun application:start/1,
                         fun application:stop/1,
                         already_started,
-                        cannot_start_application,
+                        ErrorHandler,
                         Apps).
 
-stop_applications(Apps) ->
+stop_applications(Apps, ErrorHandler) ->
     manage_applications(fun lists:foldr/3,
                         fun application:stop/1,
                         fun application:start/1,
                         not_started,
-                        cannot_stop_application,
+                        ErrorHandler,
                         Apps).
 
+
 wait_for_applications(Apps) ->
     [wait_for_application(App) || App <- Apps], ok.
 
@@ -107,14 +124,14 @@ app_dependencies(App) ->
         {ok, Lst} -> Lst
     end.
 
-manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) ->
+manage_applications(Iterate, Do, Undo, SkipError, ErrorHandler, Apps) ->
     Iterate(fun (App, Acc) ->
                     case Do(App) of
                         ok -> [App | Acc];
                         {error, {SkipError, _}} -> Acc;
                         {error, Reason} ->
                             lists:foreach(Undo, Acc),
-                            throw({error, {ErrorTag, App, Reason}})
+                            ErrorHandler(App, Reason)
                     end
             end, [], Apps),
     ok.
diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index f3b4dbaf..3260d369 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -120,12 +120,12 @@
 %% do not need to worry about their handles being closed by the server
 %% - reopening them when necessary is handled transparently.
 %%
-%% The server also supports obtain, release and transfer. obtain/0
+%% The server also supports obtain, release and transfer. obtain/{0,1}
 %% blocks until a file descriptor is available, at which point the
-%% requesting process is considered to 'own' one more
-%% descriptor. release/0 is the inverse operation and releases a
-%% previously obtained descriptor. transfer/1 transfers ownership of a
-%% file descriptor between processes. It is non-blocking. Obtain has a
+%% requesting process is considered to 'own' more descriptor(s).
+%% release/{0,1} is the inverse operation and releases previously obtained
+%% descriptor(s). transfer/{1,2} transfers ownership of file descriptor(s)
+%% between processes. It is non-blocking. Obtain has a
 %% lower limit, set by the ?OBTAIN_LIMIT/1 macro. File handles can use
 %% the entire limit, but will be evicted by obtain calls up to the
 %% point at which no more obtain calls can be satisfied by the obtains
@@ -136,8 +136,8 @@
 %% as sockets can do so in such a way that the overall number of open
 %% file descriptors is managed.
 %%
-%% The callers of register_callback/3, obtain/0, and the argument of
-%% transfer/1 are monitored, reducing the count of handles in use
+%% The callers of register_callback/3, obtain, and the argument of
+%% transfer are monitored, reducing the count of handles in use
 %% appropriately when the processes terminate.
 
 -behaviour(gen_server2).
@@ -146,12 +146,13 @@
 -export([open/3, close/1, read/2, append/2, needs_sync/1, sync/1, position/2,
          truncate/1, current_virtual_offset/1, current_raw_offset/1, flush/1,
          copy/3, set_maximum_since_use/1, delete/1, clear/1]).
--export([obtain/0, release/0, transfer/1, set_limit/1, get_limit/0, info_keys/0,
+-export([obtain/0, obtain/1, release/0, release/1, transfer/1, transfer/2,
+         set_limit/1, get_limit/0, info_keys/0,
          info/0, info/1]).
 -export([ulimit/0]).
 
--export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3, prioritise_cast/2]).
+-export([start_link/0, start_link/2, init/1, handle_call/3, handle_cast/2,
+         handle_info/2, terminate/2, code_change/3, prioritise_cast/2]).
 
 -define(SERVER, ?MODULE).
 -define(RESERVED_FOR_OTHERS, 100).
@@ -195,7 +196,9 @@
           obtain_count,
           obtain_pending,
           clients,
-          timer_ref
+          timer_ref,
+          alarm_set,
+          alarm_clear
         }).
 
 -record(cstate,
@@ -249,8 +252,11 @@
 -spec(clear/1 :: (ref()) -> ok_or_error()).
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(obtain/0 :: () -> 'ok').
+-spec(obtain/1 :: (non_neg_integer()) -> 'ok').
 -spec(release/0 :: () -> 'ok').
+-spec(release/1 :: (non_neg_integer()) -> 'ok').
 -spec(transfer/1 :: (pid()) -> 'ok').
+-spec(transfer/2 :: (pid(), non_neg_integer()) -> 'ok').
 -spec(set_limit/1 :: (non_neg_integer()) -> 'ok').
 -spec(get_limit/0 :: () -> non_neg_integer()).
 -spec(info_keys/0 :: () -> rabbit_types:info_keys()).
@@ -268,7 +274,11 @@
 %%----------------------------------------------------------------------------
 
 start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]).
+    start_link(fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1).
+
+start_link(AlarmSet, AlarmClear) ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [AlarmSet, AlarmClear],
+                           [{timeout, infinity}]).
 
 register_callback(M, F, A)
   when is_atom(M) andalso is_atom(F) andalso is_list(A) ->
@@ -374,11 +384,11 @@ sync(Ref) ->
       end).
 
 needs_sync(Ref) ->
-    with_handles(
-      [Ref],
-      fun ([#handle { is_dirty = false, write_buffer = [] }]) -> false;
-          ([_Handle])                                         -> true
-      end).
+    %% This must *not* use with_handles/2; see bug 25052
+    case get({Ref, fhc_handle}) of
+        #handle { is_dirty = false, write_buffer = [] } -> false;
+        #handle {}                                      -> true
+    end.
 
 position(Ref, NewOffset) ->
     with_flushed_handles(
@@ -479,18 +489,22 @@ set_maximum_since_use(MaximumAge) ->
         true  -> ok
     end.
 
-obtain() ->
+obtain()      -> obtain(1).
+release()     -> release(1).
+transfer(Pid) -> transfer(Pid, 1).
+
+obtain(Count) when Count > 0 ->
     %% If the FHC isn't running, obtains succeed immediately.
     case whereis(?SERVER) of
         undefined -> ok;
-        _         -> gen_server2:call(?SERVER, {obtain, self()}, infinity)
+        _         -> gen_server2:call(?SERVER, {obtain, Count, self()}, infinity)
     end.
 
-release() ->
-    gen_server2:cast(?SERVER, {release, self()}).
+release(Count) when Count > 0 ->
+    gen_server2:cast(?SERVER, {release, Count, self()}).
 
-transfer(Pid) ->
-    gen_server2:cast(?SERVER, {transfer, self(), Pid}).
+transfer(Pid, Count) when Count > 0 ->
+    gen_server2:cast(?SERVER, {transfer, Count, self(), Pid}).
 
 set_limit(Limit) ->
     gen_server2:call(?SERVER, {set_limit, Limit}, infinity).
@@ -806,7 +820,7 @@ i(Item, _) -> throw({bad_argument, Item}).
 %% gen_server2 callbacks
 %%----------------------------------------------------------------------------
 
-init([]) ->
+init([AlarmSet, AlarmClear]) ->
     Limit = case application:get_env(file_handles_high_watermark) of
                 {ok, Watermark} when (is_integer(Watermark) andalso
                                       Watermark > 0) ->
@@ -830,11 +844,13 @@ init([]) ->
                       obtain_count   = 0,
                       obtain_pending = pending_new(),
                       clients        = Clients,
-                      timer_ref      = undefined }}.
+                      timer_ref      = undefined,
+                      alarm_set      = AlarmSet,
+                      alarm_clear    = AlarmClear }}.
 
 prioritise_cast(Msg, _State) ->
     case Msg of
-        {release, _}                 -> 5;
+        {release, _, _}              -> 5;
         _                            -> 0
     end.
 
@@ -867,11 +883,12 @@ handle_call({open, Pid, Requested, EldestUnusedSince}, From,
         false -> {noreply, run_pending_item(Item, State)}
     end;
 
-handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count   = Count,
-                                                      obtain_pending = Pending,
-                                                      clients = Clients }) ->
+handle_call({obtain, N, Pid}, From, State = #fhc_state {
+                                              obtain_count   = Count,
+                                              obtain_pending = Pending,
+                                              clients = Clients }) ->
     ok = track_client(Pid, Clients),
-    Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From },
+    Item = #pending { kind = obtain, pid = Pid, requested = N, from = From },
     Enqueue = fun () ->
                       true = ets:update_element(Clients, Pid,
                                                 {#cstate.blocked, true}),
@@ -882,7 +899,7 @@ handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count   = Count,
         case obtain_limit_reached(State) of
             true  -> Enqueue();
             false -> case needs_reduce(State #fhc_state {
-                                      obtain_count = Count + 1 }) of
+                                      obtain_count = Count + N }) of
                          true  -> reduce(Enqueue());
                          false -> adjust_alarm(
                                       State, run_pending_item(Item, State))
@@ -917,9 +934,9 @@ handle_cast({update, Pid, EldestUnusedSince},
     %% storm of messages
     {noreply, State};
 
-handle_cast({release, Pid}, State) ->
+handle_cast({release, N, Pid}, State) ->
     {noreply, adjust_alarm(State, process_pending(
-                                    update_counts(obtain, Pid, -1, State)))};
+                                    update_counts(obtain, Pid, -N, State)))};
 
 handle_cast({close, Pid, EldestUnusedSince},
             State = #fhc_state { elders = Elders, clients = Clients }) ->
@@ -931,11 +948,11 @@ handle_cast({close, Pid, EldestUnusedSince},
     {noreply, adjust_alarm(State, process_pending(
                 update_counts(open, Pid, -1, State)))};
 
-handle_cast({transfer, FromPid, ToPid}, State) ->
+handle_cast({transfer, N, FromPid, ToPid}, State) ->
     ok = track_client(ToPid, State#fhc_state.clients),
     {noreply, process_pending(
-                update_counts(obtain, ToPid, +1,
-                              update_counts(obtain, FromPid, -1, State)))}.
+                update_counts(obtain, ToPid, +N,
+                              update_counts(obtain, FromPid, -N, State)))}.
 
 handle_info(check_counts, State) ->
     {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })};
@@ -1026,10 +1043,11 @@ obtain_limit_reached(#fhc_state { obtain_limit = Limit,
                                   obtain_count = Count}) ->
     Limit =/= infinity andalso Count >= Limit.
 
-adjust_alarm(OldState, NewState) ->
+adjust_alarm(OldState = #fhc_state { alarm_set   = AlarmSet,
+                                     alarm_clear = AlarmClear }, NewState) ->
     case {obtain_limit_reached(OldState), obtain_limit_reached(NewState)} of
-        {false, true} -> alarm_handler:set_alarm({file_descriptor_limit, []});
-        {true, false} -> alarm_handler:clear_alarm(file_descriptor_limit);
+        {false, true} -> AlarmSet({file_descriptor_limit, []});
+        {true, false} -> AlarmClear(file_descriptor_limit);
         _             -> ok
     end,
     NewState.
diff --git a/src/gatherer.erl b/src/gatherer.erl
index 98b36038..29d2d713 100644
--- a/src/gatherer.erl
+++ b/src/gatherer.erl
@@ -18,7 +18,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]).
+-export([start_link/0, stop/1, fork/1, finish/1, in/2, sync_in/2, out/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -32,6 +32,7 @@
 -spec(fork/1 :: (pid()) -> 'ok').
 -spec(finish/1 :: (pid()) -> 'ok').
 -spec(in/2 :: (pid(), any()) -> 'ok').
+-spec(sync_in/2 :: (pid(), any()) -> 'ok').
 -spec(out/1 :: (pid()) -> {'value', any()} | 'empty').
 
 -endif.
@@ -62,6 +63,9 @@ finish(Pid) ->
 in(Pid, Value) ->
     gen_server2:cast(Pid, {in, Value}).
 
+sync_in(Pid, Value) ->
+    gen_server2:call(Pid, {in, Value}, infinity).
+
 out(Pid) ->
     gen_server2:call(Pid, out, infinity).
 
@@ -78,19 +82,22 @@ handle_call(stop, _From, State) ->
 handle_call(fork, _From, State = #gstate { forks = Forks }) ->
     {reply, ok, State #gstate { forks = Forks + 1 }, hibernate};
 
+handle_call({in, Value}, From, State) ->
+    {noreply, in(Value, From, State), hibernate};
+
 handle_call(out, From, State = #gstate { forks   = Forks,
                                          values  = Values,
                                          blocked = Blocked }) ->
     case queue:out(Values) of
+        {empty, _} when Forks == 0 ->
+            {reply, empty, State, hibernate};
         {empty, _} ->
-            case Forks of
-                0 -> {reply, empty, State, hibernate};
-                _ -> {noreply,
-                      State #gstate { blocked = queue:in(From, Blocked) },
-                      hibernate}
-            end;
-        {{value, _Value} = V, NewValues} ->
-            {reply, V, State #gstate { values = NewValues }, hibernate}
+            {noreply, State #gstate { blocked = queue:in(From, Blocked) },
+             hibernate};
+        {{value, {PendingIn, Value}}, NewValues} ->
+            reply(PendingIn, ok),
+            {reply, {value, Value}, State #gstate { values = NewValues },
+             hibernate}
     end;
 
 handle_call(Msg, _From, State) ->
@@ -107,15 +114,8 @@ handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) ->
     {noreply, State #gstate { forks = NewForks, blocked = NewBlocked },
      hibernate};
 
-handle_cast({in, Value}, State = #gstate { values  = Values,
-                                           blocked = Blocked }) ->
-    {noreply, case queue:out(Blocked) of
-                  {empty, _} ->
-                      State #gstate { values = queue:in(Value, Values) };
-                  {{value, From}, NewBlocked} ->
-                      gen_server2:reply(From, {value, Value}),
-                      State #gstate { blocked = NewBlocked }
-              end, hibernate};
+handle_cast({in, Value}, State) ->
+    {noreply, in(Value, undefined, State), hibernate};
 
 handle_cast(Msg, State) ->
     {stop, {unexpected_cast, Msg}, State}.
@@ -128,3 +128,18 @@ code_change(_OldVsn, State, _Extra) ->
 
 terminate(_Reason, State) ->
     State.
+
+%%----------------------------------------------------------------------------
+
+in(Value, From,  State = #gstate { values = Values, blocked = Blocked }) ->
+    case queue:out(Blocked) of
+        {empty, _} ->
+            State #gstate { values = queue:in({From, Value}, Values) };
+        {{value, PendingOut}, NewBlocked} ->
+            reply(From, ok),
+            gen_server2:reply(PendingOut, {value, Value}),
+            State #gstate { blocked = NewBlocked }
+    end.
+
+reply(undefined, _Reply) -> ok;
+reply(From,       Reply) -> gen_server2:reply(From, Reply).
diff --git a/src/gm.erl b/src/gm.erl
index f88ed18f..4a95de0d 100644
--- a/src/gm.erl
+++ b/src/gm.erl
@@ -77,9 +77,13 @@
 %% confirmed_broadcast/2 directly from the callback module otherwise
 %% you will deadlock the entire group.
 %%
-%% group_members/1
-%% Provide the Pid. Returns a list of the current group members.
+%% info/1
+%% Provide the Pid. Returns a proplist with various facts, including
+%% the group name and the current group members.
 %%
+%% forget_group/1
+%% Provide the group name. Removes its mnesia record. Makes no attempt
+%% to ensure the group is empty.
 %%
 %% Implementation Overview
 %% -----------------------
@@ -372,8 +376,8 @@
 
 -behaviour(gen_server2).
 
--export([create_tables/0, start_link/3, leave/1, broadcast/2,
-         confirmed_broadcast/2, group_members/1]).
+-export([create_tables/0, start_link/4, leave/1, broadcast/2,
+         confirmed_broadcast/2, info/1, forget_group/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
          code_change/3, prioritise_info/2]).
@@ -404,7 +408,8 @@
           callback_args,
           confirms,
           broadcast_buffer,
-          broadcast_timer
+          broadcast_timer,
+          txn_executor
         }).
 
 -record(gm_group, { name, version, members }).
@@ -424,14 +429,16 @@
 -export_type([group_name/0]).
 
 -type(group_name() :: any()).
+-type(txn_fun() :: fun((fun(() -> any())) -> any())).
 
 -spec(create_tables/0 :: () -> 'ok' | {'aborted', any()}).
--spec(start_link/3 :: (group_name(), atom(), any()) ->
+-spec(start_link/4 :: (group_name(), atom(), any(), txn_fun()) ->
                            rabbit_types:ok_pid_or_error()).
 -spec(leave/1 :: (pid()) -> 'ok').
 -spec(broadcast/2 :: (pid(), any()) -> 'ok').
 -spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok').
--spec(group_members/1 :: (pid()) -> [pid()]).
+-spec(info/1 :: (pid()) -> rabbit_types:infos()).
+-spec(forget_group/1 :: (group_name()) -> 'ok').
 
 %% The joined, members_changed and handle_msg callbacks can all return
 %% any of the following terms:
@@ -502,8 +509,8 @@ table_definitions() ->
     {Name, Attributes} = ?TABLE,
     [{Name, [?TABLE_MATCH | Attributes]}].
 
-start_link(GroupName, Module, Args) ->
-    gen_server2:start_link(?MODULE, [GroupName, Module, Args], []).
+start_link(GroupName, Module, Args, TxnFun) ->
+    gen_server2:start_link(?MODULE, [GroupName, Module, Args, TxnFun], []).
 
 leave(Server) ->
     gen_server2:cast(Server, leave).
@@ -514,11 +521,17 @@ broadcast(Server, Msg) ->
 confirmed_broadcast(Server, Msg) ->
     gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity).
 
-group_members(Server) ->
-    gen_server2:call(Server, group_members, infinity).
+info(Server) ->
+    gen_server2:call(Server, info, infinity).
 
+forget_group(GroupName) ->
+    {atomic, ok} = mnesia:sync_transaction(
+                     fun () ->
+                             mnesia:delete({?GROUP_TABLE, GroupName})
+                     end),
+    ok.
 
-init([GroupName, Module, Args]) ->
+init([GroupName, Module, Args, TxnFun]) ->
     {MegaSecs, Secs, MicroSecs} = now(),
     random:seed(MegaSecs, Secs, MicroSecs),
     Self = make_member(GroupName),
@@ -534,7 +547,8 @@ init([GroupName, Module, Args]) ->
                   callback_args    = Args,
                   confirms         = queue:new(),
                   broadcast_buffer = [],
-                  broadcast_timer  = undefined }, hibernate,
+                  broadcast_timer  = undefined,
+                  txn_executor     = TxnFun }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 
@@ -553,12 +567,16 @@ handle_call({confirmed_broadcast, Msg}, _From,
 handle_call({confirmed_broadcast, Msg}, From, State) ->
     internal_broadcast(Msg, From, State);
 
-handle_call(group_members, _From,
+handle_call(info, _From,
             State = #state { members_state = undefined }) ->
     reply(not_joined, State);
 
-handle_call(group_members, _From, State = #state { view = View }) ->
-    reply(get_pids(alive_view_members(View)), State);
+handle_call(info, _From, State = #state { group_name = GroupName,
+                                          module     = Module,
+                                          view       = View }) ->
+    reply([{group_name,    GroupName},
+           {module,        Module},
+           {group_members, get_pids(alive_view_members(View))}], State);
 
 handle_call({add_on_right, _NewMember}, _From,
             State = #state { members_state = undefined }) ->
@@ -570,7 +588,8 @@ handle_call({add_on_right, NewMember}, _From,
                              view          = View,
                              members_state = MembersState,
                              module        = Module,
-                             callback_args = Args }) ->
+                             callback_args = Args,
+                             txn_executor  = TxnFun }) ->
     {MembersState1, Group} =
       record_new_member_in_group(
         GroupName, Self, NewMember,
@@ -581,7 +600,7 @@ handle_call({add_on_right, NewMember}, _From,
                                 {catchup, Self,
                                  prepare_members_state(MembersState1)}),
                 MembersState1
-        end),
+        end, TxnFun),
     View2 = group_to_view(Group),
     State1 = check_neighbours(State #state { view          = View2,
                                              members_state = MembersState1 }),
@@ -627,8 +646,9 @@ handle_cast(join, State = #state { self          = Self,
                                    group_name    = GroupName,
                                    members_state = undefined,
                                    module        = Module,
-                                   callback_args = Args }) ->
-    View = join_group(Self, GroupName),
+                                   callback_args = Args,
+                                   txn_executor  = TxnFun }) ->
+    View = join_group(Self, GroupName, TxnFun),
     MembersState =
         case alive_view_members(View) of
             [Self] -> blank_member_state();
@@ -655,7 +675,8 @@ handle_info({'DOWN', MRef, process, _Pid, Reason},
                              view          = View,
                              module        = Module,
                              callback_args = Args,
-                             confirms      = Confirms }) ->
+                             confirms      = Confirms,
+                             txn_executor  = TxnFun }) ->
     Member = case {Left, Right} of
                  {{Member1, MRef}, _} -> Member1;
                  {_, {Member1, MRef}} -> Member1;
@@ -668,7 +689,8 @@ handle_info({'DOWN', MRef, process, _Pid, Reason},
             noreply(State);
         _ ->
             View1 =
-                group_to_view(record_dead_member_in_group(Member, GroupName)),
+                group_to_view(record_dead_member_in_group(Member,
+                                                          GroupName, TxnFun)),
             {Result, State2} =
                 case alive_view_members(View1) of
                     [Self] ->
@@ -970,14 +992,15 @@ ensure_alive_suffix1(MembersQ) ->
 %% View modification
 %% ---------------------------------------------------------------------------
 
-join_group(Self, GroupName) ->
-    join_group(Self, GroupName, read_group(GroupName)).
+join_group(Self, GroupName, TxnFun) ->
+    join_group(Self, GroupName, read_group(GroupName), TxnFun).
 
-join_group(Self, GroupName, {error, not_found}) ->
-    join_group(Self, GroupName, prune_or_create_group(Self, GroupName));
-join_group(Self, _GroupName, #gm_group { members = [Self] } = Group) ->
+join_group(Self, GroupName, {error, not_found}, TxnFun) ->
+    join_group(Self, GroupName,
+               prune_or_create_group(Self, GroupName, TxnFun), TxnFun);
+join_group(Self, _GroupName, #gm_group { members = [Self] } = Group, _TxnFun) ->
     group_to_view(Group);
-join_group(Self, GroupName, #gm_group { members = Members } = Group) ->
+join_group(Self, GroupName, #gm_group { members = Members } = Group, TxnFun) ->
     case lists:member(Self, Members) of
         true ->
             group_to_view(Group);
@@ -985,20 +1008,22 @@ join_group(Self, GroupName, #gm_group { members = Members } = Group) ->
             case lists:filter(fun is_member_alive/1, Members) of
                 [] ->
                     join_group(Self, GroupName,
-                               prune_or_create_group(Self, GroupName));
+                               prune_or_create_group(Self, GroupName, TxnFun));
                 Alive ->
                     Left = lists:nth(random:uniform(length(Alive)), Alive),
                     Handler =
                         fun () ->
                                 join_group(
                                   Self, GroupName,
-                                  record_dead_member_in_group(Left, GroupName))
+                                  record_dead_member_in_group(
+                                    Left, GroupName, TxnFun),
+                                  TxnFun)
                         end,
                     try
                         case gen_server2:call(
                                get_pid(Left), {add_on_right, Self}, infinity) of
                             {ok, Group1} -> group_to_view(Group1);
-                            not_ready    -> join_group(Self, GroupName)
+                            not_ready    -> join_group(Self, GroupName, TxnFun)
                         end
                     catch
                         exit:{R, _}
@@ -1017,29 +1042,29 @@ read_group(GroupName) ->
         [Group] -> Group
     end.
 
-prune_or_create_group(Self, GroupName) ->
-    {atomic, Group} =
-        mnesia:sync_transaction(
-          fun () -> GroupNew = #gm_group { name    = GroupName,
-                                           members = [Self],
-                                           version = ?VERSION_START },
-                    case mnesia:read({?GROUP_TABLE, GroupName}) of
-                        [] ->
-                            mnesia:write(GroupNew),
-                            GroupNew;
-                        [Group1 = #gm_group { members = Members }] ->
-                            case lists:any(fun is_member_alive/1, Members) of
-                                true  -> Group1;
-                                false -> mnesia:write(GroupNew),
-                                         GroupNew
-                            end
-                    end
-          end),
+prune_or_create_group(Self, GroupName, TxnFun) ->
+    Group = TxnFun(
+              fun () ->
+                      GroupNew = #gm_group { name    = GroupName,
+                                             members = [Self],
+                                             version = ?VERSION_START },
+                      case mnesia:read({?GROUP_TABLE, GroupName}) of
+                          [] ->
+                              mnesia:write(GroupNew),
+                              GroupNew;
+                          [Group1 = #gm_group { members = Members }] ->
+                              case lists:any(fun is_member_alive/1, Members) of
+                                  true  -> Group1;
+                                  false -> mnesia:write(GroupNew),
+                                           GroupNew
+                              end
+                      end
+              end),
     Group.
 
-record_dead_member_in_group(Member, GroupName) ->
-    {atomic, Group} =
-        mnesia:sync_transaction(
+record_dead_member_in_group(Member, GroupName, TxnFun) ->
+    Group =
+        TxnFun(
           fun () -> [Group1 = #gm_group { members = Members, version = Ver }] =
                         mnesia:read({?GROUP_TABLE, GroupName}),
                     case lists:splitwith(
@@ -1056,9 +1081,9 @@ record_dead_member_in_group(Member, GroupName) ->
           end),
     Group.
 
-record_new_member_in_group(GroupName, Left, NewMember, Fun) ->
-    {atomic, {Result, Group}} =
-        mnesia:sync_transaction(
+record_new_member_in_group(GroupName, Left, NewMember, Fun, TxnFun) ->
+    {Result, Group} =
+        TxnFun(
           fun () ->
                   [#gm_group { members = Members, version = Ver } = Group1] =
                       mnesia:read({?GROUP_TABLE, GroupName}),
@@ -1073,10 +1098,10 @@ record_new_member_in_group(GroupName, Left, NewMember, Fun) ->
           end),
     {Result, Group}.
 
-erase_members_in_group(Members, GroupName) ->
+erase_members_in_group(Members, GroupName, TxnFun) ->
     DeadMembers = [{dead, Id} || Id <- Members],
-    {atomic, Group} =
-        mnesia:sync_transaction(
+    Group =
+        TxnFun(
           fun () ->
                   [Group1 = #gm_group { members = [_|_] = Members1,
                                         version = Ver }] =
@@ -1097,7 +1122,8 @@ maybe_erase_aliases(State = #state { self          = Self,
                                      view          = View0,
                                      members_state = MembersState,
                                      module        = Module,
-                                     callback_args = Args }, View) ->
+                                     callback_args = Args,
+                                     txn_executor  = TxnFun }, View) ->
     #view_member { aliases = Aliases } = fetch_view_member(Self, View),
     {Erasable, MembersState1}
         = ?SETS:fold(
@@ -1114,7 +1140,7 @@ maybe_erase_aliases(State = #state { self          = Self,
     case Erasable of
         [] -> {ok, State1 #state { view = View }};
         _  -> View1 = group_to_view(
-                        erase_members_in_group(Erasable, GroupName)),
+                        erase_members_in_group(Erasable, GroupName, TxnFun)),
               {callback_view_changed(Args, Module, View0, View1),
                check_neighbours(State1 #state { view = View1 })}
     end.
diff --git a/src/gm_soak_test.erl b/src/gm_soak_test.erl
index 57217541..5fbfc223 100644
--- a/src/gm_soak_test.erl
+++ b/src/gm_soak_test.erl
@@ -105,7 +105,9 @@ spawn_member() ->
               random:seed(MegaSecs, Secs, MicroSecs),
               %% start up delay of no more than 10 seconds
               timer:sleep(random:uniform(10000)),
-              {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []),
+              {ok, Pid} = gm:start_link(
+                            ?MODULE, ?MODULE, [],
+                            fun rabbit_misc:execute_mnesia_transaction/1),
               Start = random:uniform(10000),
               send_loop(Pid, Start, Start + random:uniform(10000)),
               gm:leave(Pid),
diff --git a/src/gm_speed_test.erl b/src/gm_speed_test.erl
index dad75bd4..84d4ab2f 100644
--- a/src/gm_speed_test.erl
+++ b/src/gm_speed_test.erl
@@ -44,7 +44,8 @@ terminate(Owner, _Reason) ->
 %% other
 
 wile_e_coyote(Time, WriteUnit) ->
-    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()),
+    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(),
+                              fun rabbit_misc:execute_mnesia_transaction/1),
     receive joined -> ok end,
     timer:sleep(1000), %% wait for all to join
     timer:send_after(Time, stop),
diff --git a/src/gm_tests.erl b/src/gm_tests.erl
index 0a2d4204..a9c0ba90 100644
--- a/src/gm_tests.erl
+++ b/src/gm_tests.erl
@@ -76,7 +76,9 @@ test_confirmed_broadcast() ->
 test_member_death() ->
     with_two_members(
       fun (Pid, Pid2) ->
-              {ok, Pid3} = gm:start_link(?MODULE, ?MODULE, self()),
+              {ok, Pid3} = gm:start_link(
+                             ?MODULE, ?MODULE, self(),
+                             fun rabbit_misc:execute_mnesia_transaction/1),
               passed = receive_joined(Pid3, [Pid, Pid2, Pid3],
                                       timeout_joining_gm_group_3),
               passed = receive_birth(Pid, Pid3, timeout_waiting_for_birth_3_1),
@@ -128,10 +130,12 @@ test_broadcast_fun(Fun) ->
 with_two_members(Fun) ->
     ok = gm:create_tables(),
 
-    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()),
+    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(),
+                              fun rabbit_misc:execute_mnesia_transaction/1),
     passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1),
 
-    {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self()),
+    {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(),
+                               fun rabbit_misc:execute_mnesia_transaction/1),
     passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2),
     passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2),
 
diff --git a/src/mirrored_supervisor.erl b/src/mirrored_supervisor.erl
index 4fc488b8..24c3ebd0 100644
--- a/src/mirrored_supervisor.erl
+++ b/src/mirrored_supervisor.erl
@@ -174,7 +174,7 @@
 -spec start_internal(Group, ChildSpecs) -> Result when
       Group :: group_name(),
       ChildSpecs :: [supervisor2:child_spec()],
-      Result :: supervisor2:startlink_ret().
+      Result :: {'ok', pid()} | {'error', term()}.
 
 -spec create_tables() -> Result when
       Result :: 'ok'.
diff --git a/src/mochijson2.erl b/src/mochijson2.erl
new file mode 100644
index 00000000..bddb52cc
--- /dev/null
+++ b/src/mochijson2.erl
@@ -0,0 +1,893 @@
+%% This file is a copy of `mochijson2.erl' from mochiweb, revision
+%% d541e9a0f36c00dcadc2e589f20e47fbf46fc76f.  For the license, see
+%% `LICENSE-MIT-Mochi'.
+
+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
+%%      with binaries as strings, arrays as lists (without an {array, _})
+%%      wrapper and it only knows how to decode UTF-8 (and ASCII).
+%%
+%%      JSON terms are decoded as follows (javascript -> erlang):
+%%      <ul>
+%%          <li>{"key": "value"} ->
+%%              {struct, [{&lt;&lt;"key">>, &lt;&lt;"value">>}]}</li>
+%%          <li>["array", 123, 12.34, true, false, null] ->
+%%              [&lt;&lt;"array">>, 123, 12.34, true, false, null]
+%%          </li>
+%%      </ul>
+%%      <ul>
+%%          <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
+%%          <li>Objects decode to {struct, PropList}</li>
+%%          <li>Numbers decode to integer or float</li>
+%%          <li>true, false, null decode to their respective terms.</li>
+%%      </ul>
+%%      The encoder will accept the same format that the decoder will produce,
+%%      but will also allow additional cases for leniency:
+%%      <ul>
+%%          <li>atoms other than true, false, null will be considered UTF-8
+%%              strings (even as a proplist key)
+%%          </li>
+%%          <li>{json, IoList} will insert IoList directly into the output
+%%              with no validation
+%%          </li>
+%%          <li>{array, Array} will be encoded as Array
+%%              (legacy mochijson style)
+%%          </li>
+%%          <li>A non-empty raw proplist will be encoded as an object as long
+%%              as the first pair does not have an atom key of json, struct,
+%%              or array
+%%          </li>
+%%      </ul>
+
+-module(mochijson2).
+-author('bob@mochimedia.com').
+-export([encoder/1, encode/1]).
+-export([decoder/1, decode/1, decode/2]).
+
+%% This is a macro to placate syntax highlighters..
+-define(Q, $\").
+-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
+                                 column=N+S#decoder.column}).
+-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
+                              column=1+S#decoder.column}).
+-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
+                               column=1,
+                               line=1+S#decoder.line}).
+-define(INC_CHAR(S, C),
+        case C of
+            $\n ->
+                S#decoder{column=1,
+                          line=1+S#decoder.line,
+                          offset=1+S#decoder.offset};
+            _ ->
+                S#decoder{column=1+S#decoder.column,
+                          offset=1+S#decoder.offset}
+        end).
+-define(IS_WHITESPACE(C),
+        (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
+
+%% @type json_string() = atom | binary()
+%% @type json_number() = integer() | float()
+%% @type json_array() = [json_term()]
+%% @type json_object() = {struct, [{json_string(), json_term()}]}
+%% @type json_eep18_object() = {[{json_string(), json_term()}]}
+%% @type json_iolist() = {json, iolist()}
+%% @type json_term() = json_string() | json_number() | json_array() |
+%%                     json_object() | json_eep18_object() | json_iolist()
+
+-record(encoder, {handler=null,
+                  utf8=false}).
+
+-record(decoder, {object_hook=null,
+                  offset=0,
+                  line=1,
+                  column=1,
+                  state=null}).
+
+%% @spec encoder([encoder_option()]) -> function()
+%% @doc Create an encoder/1 with the given options.
+%% @type encoder_option() = handler_option() | utf8_option()
+%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
+encoder(Options) ->
+    State = parse_encoder_options(Options, #encoder{}),
+    fun (O) -> json_encode(O, State) end.
+
+%% @spec encode(json_term()) -> iolist()
+%% @doc Encode the given as JSON to an iolist.
+encode(Any) ->
+    json_encode(Any, #encoder{}).
+
+%% @spec decoder([decoder_option()]) -> function()
+%% @doc Create a decoder/1 with the given options.
+decoder(Options) ->
+    State = parse_decoder_options(Options, #decoder{}),
+    fun (O) -> json_decode(O, State) end.
+
+%% @spec decode(iolist(), [{format, proplist | eep18 | struct}]) -> json_term()
+%% @doc Decode the given iolist to Erlang terms using the given object format
+%%      for decoding, where proplist returns JSON objects as [{binary(), json_term()}]
+%%      proplists, eep18 returns JSON objects as {[binary(), json_term()]}, and struct
+%%      returns them as-is.
+decode(S, Options) ->
+    json_decode(S, parse_decoder_options(Options, #decoder{})).
+
+%% @spec decode(iolist()) -> json_term()
+%% @doc Decode the given iolist to Erlang terms.
+decode(S) ->
+    json_decode(S, #decoder{}).
+
+%% Internal API
+
+parse_encoder_options([], State) ->
+    State;
+parse_encoder_options([{handler, Handler} | Rest], State) ->
+    parse_encoder_options(Rest, State#encoder{handler=Handler});
+parse_encoder_options([{utf8, Switch} | Rest], State) ->
+    parse_encoder_options(Rest, State#encoder{utf8=Switch}).
+
+parse_decoder_options([], State) ->
+    State;
+parse_decoder_options([{object_hook, Hook} | Rest], State) ->
+    parse_decoder_options(Rest, State#decoder{object_hook=Hook});
+parse_decoder_options([{format, Format} | Rest], State)
+  when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist ->
+    parse_decoder_options(Rest, State#decoder{object_hook=Format}).
+
+json_encode(true, _State) ->
+    <<"true">>;
+json_encode(false, _State) ->
+    <<"false">>;
+json_encode(null, _State) ->
+    <<"null">>;
+json_encode(I, _State) when is_integer(I) ->
+    integer_to_list(I);
+json_encode(F, _State) when is_float(F) ->
+    mochinum:digits(F);
+json_encode(S, State) when is_binary(S); is_atom(S) ->
+    json_encode_string(S, State);
+json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
+                                             K =/= array andalso
+                                             K =/= json) ->
+    json_encode_proplist(Props, State);
+json_encode({struct, Props}, State) when is_list(Props) ->
+    json_encode_proplist(Props, State);
+json_encode({Props}, State) when is_list(Props) ->
+    json_encode_proplist(Props, State);
+json_encode({}, State) ->
+    json_encode_proplist([], State);
+json_encode(Array, State) when is_list(Array) ->
+    json_encode_array(Array, State);
+json_encode({array, Array}, State) when is_list(Array) ->
+    json_encode_array(Array, State);
+json_encode({json, IoList}, _State) ->
+    IoList;
+json_encode(Bad, #encoder{handler=null}) ->
+    exit({json_encode, {bad_term, Bad}});
+json_encode(Bad, State=#encoder{handler=Handler}) ->
+    json_encode(Handler(Bad), State).
+
+json_encode_array([], _State) ->
+    <<"[]">>;
+json_encode_array(L, State) ->
+    F = fun (O, Acc) ->
+                [$,, json_encode(O, State) | Acc]
+        end,
+    [$, | Acc1] = lists:foldl(F, "[", L),
+    lists:reverse([$\] | Acc1]).
+
+json_encode_proplist([], _State) ->
+    <<"{}">>;
+json_encode_proplist(Props, State) ->
+    F = fun ({K, V}, Acc) ->
+                KS = json_encode_string(K, State),
+                VS = json_encode(V, State),
+                [$,, VS, $:, KS | Acc]
+        end,
+    [$, | Acc1] = lists:foldl(F, "{", Props),
+    lists:reverse([$\} | Acc1]).
+
+json_encode_string(A, State) when is_atom(A) ->
+    L = atom_to_list(A),
+    case json_string_is_safe(L) of
+        true ->
+            [?Q, L, ?Q];
+        false ->
+            json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
+    end;
+json_encode_string(B, State) when is_binary(B) ->
+    case json_bin_is_safe(B) of
+        true ->
+            [?Q, B, ?Q];
+        false ->
+            json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
+    end;
+json_encode_string(I, _State) when is_integer(I) ->
+    [?Q, integer_to_list(I), ?Q];
+json_encode_string(L, State) when is_list(L) ->
+    case json_string_is_safe(L) of
+        true ->
+            [?Q, L, ?Q];
+        false ->
+            json_encode_string_unicode(L, State, [?Q])
+    end.
+
+json_string_is_safe([]) ->
+    true;
+json_string_is_safe([C | Rest]) ->
+    case C of
+        ?Q ->
+            false;
+        $\\ ->
+            false;
+        $\b ->
+            false;
+        $\f ->
+            false;
+        $\n ->
+            false;
+        $\r ->
+            false;
+        $\t ->
+            false;
+        C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
+            false;
+        C when C < 16#7f ->
+            json_string_is_safe(Rest);
+        _ ->
+            false
+    end.
+
+json_bin_is_safe(<<>>) ->
+    true;
+json_bin_is_safe(<<C, Rest/binary>>) ->
+    case C of
+        ?Q ->
+            false;
+        $\\ ->
+            false;
+        $\b ->
+            false;
+        $\f ->
+            false;
+        $\n ->
+            false;
+        $\r ->
+            false;
+        $\t ->
+            false;
+        C when C >= 0, C < $\s; C >= 16#7f ->
+            false;
+        C when C < 16#7f ->
+            json_bin_is_safe(Rest)
+    end.
+
+json_encode_string_unicode([], _State, Acc) ->
+    lists:reverse([$\" | Acc]);
+json_encode_string_unicode([C | Cs], State, Acc) ->
+    Acc1 = case C of
+               ?Q ->
+                   [?Q, $\\ | Acc];
+               %% Escaping solidus is only useful when trying to protect
+               %% against "</script>" injection attacks which are only
+               %% possible when JSON is inserted into a HTML document
+               %% in-line. mochijson2 does not protect you from this, so
+               %% if you do insert directly into HTML then you need to
+               %% uncomment the following case or escape the output of encode.
+               %%
+               %% $/ ->
+               %%    [$/, $\\ | Acc];
+               %%
+               $\\ ->
+                   [$\\, $\\ | Acc];
+               $\b ->
+                   [$b, $\\ | Acc];
+               $\f ->
+                   [$f, $\\ | Acc];
+               $\n ->
+                   [$n, $\\ | Acc];
+               $\r ->
+                   [$r, $\\ | Acc];
+               $\t ->
+                   [$t, $\\ | Acc];
+               C when C >= 0, C < $\s ->
+                   [unihex(C) | Acc];
+               C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
+                   [xmerl_ucs:to_utf8(C) | Acc];
+               C when  C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
+                   [unihex(C) | Acc];
+               C when C < 16#7f ->
+                   [C | Acc];
+               _ ->
+                   exit({json_encode, {bad_char, C}})
+           end,
+    json_encode_string_unicode(Cs, State, Acc1).
+
+hexdigit(C) when C >= 0, C =< 9 ->
+    C + $0;
+hexdigit(C) when C =< 15 ->
+    C + $a - 10.
+
+unihex(C) when C < 16#10000 ->
+    <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
+    Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
+    [$\\, $u | Digits];
+unihex(C) when C =< 16#10FFFF ->
+    N = C - 16#10000,
+    S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
+    S2 = 16#dc00 bor (N band 16#3ff),
+    [unihex(S1), unihex(S2)].
+
+json_decode(L, S) when is_list(L) ->
+    json_decode(iolist_to_binary(L), S);
+json_decode(B, S) ->
+    {Res, S1} = decode1(B, S),
+    {eof, _} = tokenize(B, S1#decoder{state=trim}),
+    Res.
+
+decode1(B, S=#decoder{state=null}) ->
+    case tokenize(B, S#decoder{state=any}) of
+        {{const, C}, S1} ->
+            {C, S1};
+        {start_array, S1} ->
+            decode_array(B, S1);
+        {start_object, S1} ->
+            decode_object(B, S1)
+    end.
+
+make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct ->
+    V;
+make_object({struct, P}, #decoder{object_hook=eep18}) ->
+    {P};
+make_object({struct, P}, #decoder{object_hook=proplist}) ->
+    P;
+make_object(V, #decoder{object_hook=Hook}) ->
+    Hook(V).
+
+decode_object(B, S) ->
+    decode_object(B, S#decoder{state=key}, []).
+
+decode_object(B, S=#decoder{state=key}, Acc) ->
+    case tokenize(B, S) of
+        {end_object, S1} ->
+            V = make_object({struct, lists:reverse(Acc)}, S1),
+            {V, S1#decoder{state=null}};
+        {{const, K}, S1} ->
+            {colon, S2} = tokenize(B, S1),
+            {V, S3} = decode1(B, S2#decoder{state=null}),
+            decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
+    end;
+decode_object(B, S=#decoder{state=comma}, Acc) ->
+    case tokenize(B, S) of
+        {end_object, S1} ->
+            V = make_object({struct, lists:reverse(Acc)}, S1),
+            {V, S1#decoder{state=null}};
+        {comma, S1} ->
+            decode_object(B, S1#decoder{state=key}, Acc)
+    end.
+
+decode_array(B, S) ->
+    decode_array(B, S#decoder{state=any}, []).
+
+decode_array(B, S=#decoder{state=any}, Acc) ->
+    case tokenize(B, S) of
+        {end_array, S1} ->
+            {lists:reverse(Acc), S1#decoder{state=null}};
+        {start_array, S1} ->
+            {Array, S2} = decode_array(B, S1),
+            decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
+        {start_object, S1} ->
+            {Array, S2} = decode_object(B, S1),
+            decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
+        {{const, Const}, S1} ->
+            decode_array(B, S1#decoder{state=comma}, [Const | Acc])
+    end;
+decode_array(B, S=#decoder{state=comma}, Acc) ->
+    case tokenize(B, S) of
+        {end_array, S1} ->
+            {lists:reverse(Acc), S1#decoder{state=null}};
+        {comma, S1} ->
+            decode_array(B, S1#decoder{state=any}, Acc)
+    end.
+
+tokenize_string(B, S=#decoder{offset=O}) ->
+    case tokenize_string_fast(B, O) of
+        {escape, O1} ->
+            Length = O1 - O,
+            S1 = ?ADV_COL(S, Length),
+            <<_:O/binary, Head:Length/binary, _/binary>> = B,
+            tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
+        O1 ->
+            Length = O1 - O,
+            <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
+            {{const, String}, ?ADV_COL(S, Length + 1)}
+    end.
+
+tokenize_string_fast(B, O) ->
+    case B of
+        <<_:O/binary, ?Q, _/binary>> ->
+            O;
+        <<_:O/binary, $\\, _/binary>> ->
+            {escape, O};
+        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
+            tokenize_string_fast(B, 1 + O);
+        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            tokenize_string_fast(B, 2 + O);
+        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            tokenize_string_fast(B, 3 + O);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            tokenize_string_fast(B, 4 + O);
+        _ ->
+            throw(invalid_utf8)
+    end.
+
+tokenize_string(B, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, ?Q, _/binary>> ->
+            {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
+        <<_:O/binary, "\\\"", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
+        <<_:O/binary, "\\\\", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
+        <<_:O/binary, "\\/", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
+        <<_:O/binary, "\\b", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
+        <<_:O/binary, "\\f", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
+        <<_:O/binary, "\\n", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
+        <<_:O/binary, "\\r", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
+        <<_:O/binary, "\\t", _/binary>> ->
+            tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
+        <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
+            C = erlang:list_to_integer([C3, C2, C1, C0], 16),
+            if C > 16#D7FF, C < 16#DC00 ->
+                %% coalesce UTF-16 surrogate pair
+                <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
+                D = erlang:list_to_integer([D3,D2,D1,D0], 16),
+                [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
+                    D:16/big-unsigned-integer>>),
+                Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
+                tokenize_string(B, ?ADV_COL(S, 12), Acc1);
+            true ->
+                Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
+                tokenize_string(B, ?ADV_COL(S, 6), Acc1)
+            end;
+        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
+            tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
+        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
+        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
+        _ ->
+            throw(invalid_utf8)
+    end.
+
+tokenize_number(B, S) ->
+    case tokenize_number(B, sign, S, []) of
+        {{int, Int}, S1} ->
+            {{const, list_to_integer(Int)}, S1};
+        {{float, Float}, S1} ->
+            {{const, list_to_float(Float)}, S1}
+    end.
+
+tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
+    case B of
+        <<_:O/binary, $-, _/binary>> ->
+            tokenize_number(B, int, ?INC_COL(S), [$-]);
+        _ ->
+            tokenize_number(B, int, S, [])
+    end;
+tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, $0, _/binary>> ->
+            tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
+        <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
+            tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
+    end;
+tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+            tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
+        _ ->
+            tokenize_number(B, frac, S, Acc)
+    end;
+tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
+            tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
+        <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
+            tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
+        _ ->
+            {{int, lists:reverse(Acc)}, S}
+    end;
+tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+            tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
+        <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
+            tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
+        _ ->
+            {{float, lists:reverse(Acc)}, S}
+    end;
+tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
+            tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
+        _ ->
+            tokenize_number(B, eint, S, Acc)
+    end;
+tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+            tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
+    end;
+tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
+            tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
+        _ ->
+            {{float, lists:reverse(Acc)}, S}
+    end.
+
+tokenize(B, S=#decoder{offset=O}) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+            tokenize(B, ?INC_CHAR(S, C));
+        <<_:O/binary, "{", _/binary>> ->
+            {start_object, ?INC_COL(S)};
+        <<_:O/binary, "}", _/binary>> ->
+            {end_object, ?INC_COL(S)};
+        <<_:O/binary, "[", _/binary>> ->
+            {start_array, ?INC_COL(S)};
+        <<_:O/binary, "]", _/binary>> ->
+            {end_array, ?INC_COL(S)};
+        <<_:O/binary, ",", _/binary>> ->
+            {comma, ?INC_COL(S)};
+        <<_:O/binary, ":", _/binary>> ->
+            {colon, ?INC_COL(S)};
+        <<_:O/binary, "null", _/binary>> ->
+            {{const, null}, ?ADV_COL(S, 4)};
+        <<_:O/binary, "true", _/binary>> ->
+            {{const, true}, ?ADV_COL(S, 4)};
+        <<_:O/binary, "false", _/binary>> ->
+            {{const, false}, ?ADV_COL(S, 5)};
+        <<_:O/binary, "\"", _/binary>> ->
+            tokenize_string(B, ?INC_COL(S));
+        <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
+                                         orelse C =:= $- ->
+            tokenize_number(B, S);
+        <<_:O/binary>> ->
+            trim = S#decoder.state,
+            {eof, S}
+    end.
+%%
+%% Tests
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+
+%% testing constructs borrowed from the Yaws JSON implementation.
+
+%% Create an object from a list of Key/Value pairs.
+
+obj_new() ->
+    {struct, []}.
+
+is_obj({struct, Props}) ->
+    F = fun ({K, _}) when is_binary(K) -> true end,
+    lists:all(F, Props).
+
+obj_from_list(Props) ->
+    Obj = {struct, Props},
+    ?assert(is_obj(Obj)),
+    Obj.
+
+%% Test for equivalence of Erlang terms.
+%% Due to arbitrary order of construction, equivalent objects might
+%% compare unequal as erlang terms, so we need to carefully recurse
+%% through aggregates (tuples and objects).
+
+equiv({struct, Props1}, {struct, Props2}) ->
+    equiv_object(Props1, Props2);
+equiv(L1, L2) when is_list(L1), is_list(L2) ->
+    equiv_list(L1, L2);
+equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
+equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
+equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
+
+%% Object representation and traversal order is unknown.
+%% Use the sledgehammer and sort property lists.
+
+equiv_object(Props1, Props2) ->
+    L1 = lists:keysort(1, Props1),
+    L2 = lists:keysort(1, Props2),
+    Pairs = lists:zip(L1, L2),
+    true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
+                             equiv(K1, K2) and equiv(V1, V2)
+                     end, Pairs).
+
+%% Recursively compare tuple elements for equivalence.
+
+equiv_list([], []) ->
+    true;
+equiv_list([V1 | L1], [V2 | L2]) ->
+    equiv(V1, V2) andalso equiv_list(L1, L2).
+
+decode_test() ->
+    [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
+    <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
+
+e2j_vec_test() ->
+    test_one(e2j_test_vec(utf8), 1).
+
+test_one([], _N) ->
+    %% io:format("~p tests passed~n", [N-1]),
+    ok;
+test_one([{E, J} | Rest], N) ->
+    %% io:format("[~p] ~p ~p~n", [N, E, J]),
+    true = equiv(E, decode(J)),
+    true = equiv(E, decode(encode(E))),
+    test_one(Rest, 1+N).
+
+e2j_test_vec(utf8) ->
+    [
+     {1, "1"},
+     {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
+     {-1, "-1"},
+     {-3.1416, "-3.14160"},
+     {12.0e10, "1.20000e+11"},
+     {1.234E+10, "1.23400e+10"},
+     {-1.234E-10, "-1.23400e-10"},
+     {10.0, "1.0e+01"},
+     {123.456, "1.23456E+2"},
+     {10.0, "1e1"},
+     {<<"foo">>, "\"foo\""},
+     {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
+     {<<"">>, "\"\""},
+     {<<"\n\n\n">>, "\"\\n\\n\\n\""},
+     {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
+     {obj_new(), "{}"},
+     {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
+     {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
+      "{\"foo\":\"bar\",\"baz\":123}"},
+     {[], "[]"},
+     {[[]], "[[]]"},
+     {[1, <<"foo">>], "[1,\"foo\"]"},
+
+     %% json array in a json object
+     {obj_from_list([{<<"foo">>, [123]}]),
+      "{\"foo\":[123]}"},
+
+     %% json object in a json object
+     {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
+      "{\"foo\":{\"bar\":true}}"},
+
+     %% fold evaluation order
+     {obj_from_list([{<<"foo">>, []},
+                     {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
+                     {<<"alice">>, <<"bob">>}]),
+      "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
+
+     %% json object in a json array
+     {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
+      "[-123,\"foo\",{\"bar\":[]},null]"}
+    ].
+
+%% test utf8 encoding
+encoder_utf8_test() ->
+    %% safe conversion case (default)
+    [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
+        encode(<<1,"\321\202\320\265\321\201\321\202">>),
+
+    %% raw utf8 output (optional)
+    Enc = mochijson2:encoder([{utf8, true}]),
+    [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
+        Enc(<<1,"\321\202\320\265\321\201\321\202">>).
+
+input_validation_test() ->
+    Good = [
+        {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
+        {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
+        {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
+    ],
+    lists:foreach(fun({CodePoint, UTF8}) ->
+        Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
+        Expect = decode(UTF8)
+    end, Good),
+
+    Bad = [
+        %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
+        <<?Q, 16#80, ?Q>>,
+        %% missing continuations, last byte in each should be 80-BF
+        <<?Q, 16#C2, 16#7F, ?Q>>,
+        <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
+        <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
+        %% we don't support code points > 10FFFF per RFC 3629
+        <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
+        %% escape characters trigger a different code path
+        <<?Q, $\\, $\n, 16#80, ?Q>>
+    ],
+    lists:foreach(
+      fun(X) ->
+              ok = try decode(X) catch invalid_utf8 -> ok end,
+              %% could be {ucs,{bad_utf8_character_code}} or
+              %%          {json_encode,{bad_char,_}}
+              {'EXIT', _} = (catch encode(X))
+      end, Bad).
+
+inline_json_test() ->
+    ?assertEqual(<<"\"iodata iodata\"">>,
+                 iolist_to_binary(
+                   encode({json, [<<"\"iodata">>, " iodata\""]}))),
+    ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
+                 decode(
+                   encode({struct,
+                           [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
+    ok.
+
+big_unicode_test() ->
+    UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
+    ?assertEqual(
+       <<"\"\\ud834\\udd20\"">>,
+       iolist_to_binary(encode(UTF8Seq))),
+    ?assertEqual(
+       UTF8Seq,
+       decode(iolist_to_binary(encode(UTF8Seq)))),
+    ok.
+
+custom_decoder_test() ->
+    ?assertEqual(
+       {struct, [{<<"key">>, <<"value">>}]},
+       (decoder([]))("{\"key\": \"value\"}")),
+    F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
+    ?assertEqual(
+       win,
+       (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
+    ok.
+
+atom_test() ->
+    %% JSON native atoms
+    [begin
+         ?assertEqual(A, decode(atom_to_list(A))),
+         ?assertEqual(iolist_to_binary(atom_to_list(A)),
+                      iolist_to_binary(encode(A)))
+     end || A <- [true, false, null]],
+    %% Atom to string
+    ?assertEqual(
+       <<"\"foo\"">>,
+       iolist_to_binary(encode(foo))),
+    ?assertEqual(
+       <<"\"\\ud834\\udd20\"">>,
+       iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
+    ok.
+
+key_encode_test() ->
+    %% Some forms are accepted as keys that would not be strings in other
+    %% cases
+    ?assertEqual(
+       <<"{\"foo\":1}">>,
+       iolist_to_binary(encode({struct, [{foo, 1}]}))),
+    ?assertEqual(
+       <<"{\"foo\":1}">>,
+       iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
+    ?assertEqual(
+       <<"{\"foo\":1}">>,
+       iolist_to_binary(encode({struct, [{"foo", 1}]}))),
+	?assertEqual(
+       <<"{\"foo\":1}">>,
+       iolist_to_binary(encode([{foo, 1}]))),
+    ?assertEqual(
+       <<"{\"foo\":1}">>,
+       iolist_to_binary(encode([{<<"foo">>, 1}]))),
+    ?assertEqual(
+       <<"{\"foo\":1}">>,
+       iolist_to_binary(encode([{"foo", 1}]))),
+    ?assertEqual(
+       <<"{\"\\ud834\\udd20\":1}">>,
+       iolist_to_binary(
+         encode({struct, [{[16#0001d120], 1}]}))),
+    ?assertEqual(
+       <<"{\"1\":1}">>,
+       iolist_to_binary(encode({struct, [{1, 1}]}))),
+    ok.
+
+unsafe_chars_test() ->
+    Chars = "\"\\\b\f\n\r\t",
+    [begin
+         ?assertEqual(false, json_string_is_safe([C])),
+         ?assertEqual(false, json_bin_is_safe(<<C>>)),
+         ?assertEqual(<<C>>, decode(encode(<<C>>)))
+     end || C <- Chars],
+    ?assertEqual(
+       false,
+       json_string_is_safe([16#0001d120])),
+    ?assertEqual(
+       false,
+       json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
+    ?assertEqual(
+       [16#0001d120],
+       xmerl_ucs:from_utf8(
+         binary_to_list(
+           decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
+    ?assertEqual(
+       false,
+       json_string_is_safe([16#110000])),
+    ?assertEqual(
+       false,
+       json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
+    %% solidus can be escaped but isn't unsafe by default
+    ?assertEqual(
+       <<"/">>,
+       decode(<<"\"\\/\"">>)),
+    ok.
+
+int_test() ->
+    ?assertEqual(0, decode("0")),
+    ?assertEqual(1, decode("1")),
+    ?assertEqual(11, decode("11")),
+    ok.
+
+large_int_test() ->
+    ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
+        iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
+    ?assertEqual(<<"2147483649214748364921474836492147483649">>,
+        iolist_to_binary(encode(2147483649214748364921474836492147483649))),
+    ok.
+
+float_test() ->
+    ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
+    ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
+    ok.
+
+handler_test() ->
+    ?assertEqual(
+       {'EXIT',{json_encode,{bad_term,{x,y}}}},
+       catch encode({x,y})),
+    F = fun ({x,y}) -> [] end,
+    ?assertEqual(
+       <<"[]">>,
+       iolist_to_binary((encoder([{handler, F}]))({x, y}))),
+    ok.
+
+encode_empty_test_() ->
+    [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))}
+     || {A, B} <- [{"eep18 {}", {}},
+                   {"eep18 {[]}", {[]}},
+                   {"{struct, []}", {struct, []}}]].
+
+encode_test_() ->
+    P = [{<<"k">>, <<"v">>}],
+    JSON = iolist_to_binary(encode({struct, P})),
+    [{atom_to_list(F),
+      ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))}
+     || F <- [struct, eep18, proplist]].
+
+format_test_() ->
+    P = [{<<"k">>, <<"v">>}],
+    JSON = iolist_to_binary(encode({struct, P})),
+    [{atom_to_list(F),
+      ?_assertEqual(A, decode(JSON, [{format, F}]))}
+     || {F, A} <- [{struct, {struct, P}},
+                   {eep18, {P}},
+                   {proplist, P}]].
+
+-endif.
diff --git a/src/mochinum.erl b/src/mochinum.erl
new file mode 100644
index 00000000..4ea7a22a
--- /dev/null
+++ b/src/mochinum.erl
@@ -0,0 +1,358 @@
+%% This file is a copy of `mochijson2.erl' from mochiweb, revision
+%% d541e9a0f36c00dcadc2e589f20e47fbf46fc76f.  For the license, see
+%% `LICENSE-MIT-Mochi'.
+
+%% @copyright 2007 Mochi Media, Inc.
+%% @author Bob Ippolito <bob@mochimedia.com>
+
+%% @doc Useful numeric algorithms for floats that cover some deficiencies
+%% in the math module. More interesting is digits/1, which implements
+%% the algorithm from:
+%% http://www.cs.indiana.edu/~burger/fp/index.html
+%% See also "Printing Floating-Point Numbers Quickly and Accurately"
+%% in Proceedings of the SIGPLAN '96 Conference on Programming Language
+%% Design and Implementation.
+
+-module(mochinum).
+-author("Bob Ippolito <bob@mochimedia.com>").
+-export([digits/1, frexp/1, int_pow/2, int_ceil/1]).
+
+%% IEEE 754 Float exponent bias
+-define(FLOAT_BIAS, 1022).
+-define(MIN_EXP, -1074).
+-define(BIG_POW, 4503599627370496).
+
+%% External API
+
+%% @spec digits(number()) -> string()
+%% @doc  Returns a string that accurately represents the given integer or float
+%%       using a conservative amount of digits. Great for generating
+%%       human-readable output, or compact ASCII serializations for floats.
+digits(N) when is_integer(N) ->
+    integer_to_list(N);
+digits(0.0) ->
+    "0.0";
+digits(Float) ->
+    {Frac1, Exp1} = frexp_int(Float),
+    [Place0 | Digits0] = digits1(Float, Exp1, Frac1),
+    {Place, Digits} = transform_digits(Place0, Digits0),
+    R = insert_decimal(Place, Digits),
+    case Float < 0 of
+        true ->
+            [$- | R];
+        _ ->
+            R
+    end.
+
+%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()}
+%% @doc  Return the fractional and exponent part of an IEEE 754 double,
+%%       equivalent to the libc function of the same name.
+%%       F = Frac * pow(2, Exp).
+frexp(F) ->
+    frexp1(unpack(F)).
+
+%% @spec int_pow(X::integer(), N::integer()) -> Y::integer()
+%% @doc  Moderately efficient way to exponentiate integers.
+%%       int_pow(10, 2) = 100.
+int_pow(_X, 0) ->
+    1;
+int_pow(X, N) when N > 0 ->
+    int_pow(X, N, 1).
+
+%% @spec int_ceil(F::float()) -> integer()
+%% @doc  Return the ceiling of F as an integer. The ceiling is defined as
+%%       F when F == trunc(F);
+%%       trunc(F) when F &lt; 0;
+%%       trunc(F) + 1 when F &gt; 0.
+int_ceil(X) ->
+    T = trunc(X),
+    case (X - T) of
+        Pos when Pos > 0 -> T + 1;
+        _ -> T
+    end.
+
+
+%% Internal API
+
+int_pow(X, N, R) when N < 2 ->
+    R * X;
+int_pow(X, N, R) ->
+    int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end).
+
+insert_decimal(0, S) ->
+    "0." ++ S;
+insert_decimal(Place, S) when Place > 0 ->
+    L = length(S),
+    case Place - L of
+         0 ->
+            S ++ ".0";
+        N when N < 0 ->
+            {S0, S1} = lists:split(L + N, S),
+            S0 ++ "." ++ S1;
+        N when N < 6 ->
+            %% More places than digits
+            S ++ lists:duplicate(N, $0) ++ ".0";
+        _ ->
+            insert_decimal_exp(Place, S)
+    end;
+insert_decimal(Place, S) when Place > -6 ->
+    "0." ++ lists:duplicate(abs(Place), $0) ++ S;
+insert_decimal(Place, S) ->
+    insert_decimal_exp(Place, S).
+
+insert_decimal_exp(Place, S) ->
+    [C | S0] = S,
+    S1 = case S0 of
+             [] ->
+                 "0";
+             _ ->
+                 S0
+         end,
+    Exp = case Place < 0 of
+              true ->
+                  "e-";
+              false ->
+                  "e+"
+          end,
+    [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)).
+
+
+digits1(Float, Exp, Frac) ->
+    Round = ((Frac band 1) =:= 0),
+    case Exp >= 0 of
+        true ->
+            BExp = 1 bsl Exp,
+            case (Frac =/= ?BIG_POW) of
+                true ->
+                    scale((Frac * BExp * 2), 2, BExp, BExp,
+                          Round, Round, Float);
+                false ->
+                    scale((Frac * BExp * 4), 4, (BExp * 2), BExp,
+                          Round, Round, Float)
+            end;
+        false ->
+            case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of
+                true ->
+                    scale((Frac * 2), 1 bsl (1 - Exp), 1, 1,
+                          Round, Round, Float);
+                false ->
+                    scale((Frac * 4), 1 bsl (2 - Exp), 2, 1,
+                          Round, Round, Float)
+            end
+    end.
+
+scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) ->
+    Est = int_ceil(math:log10(abs(Float)) - 1.0e-10),
+    %% Note that the scheme implementation uses a 326 element look-up table
+    %% for int_pow(10, N) where we do not.
+    case Est >= 0 of
+        true ->
+            fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est,
+                  LowOk, HighOk);
+        false ->
+            Scale = int_pow(10, -Est),
+            fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est,
+                  LowOk, HighOk)
+    end.
+
+fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) ->
+    TooLow = case HighOk of
+                 true ->
+                     (R + MPlus) >= S;
+                 false ->
+                     (R + MPlus) > S
+             end,
+    case TooLow of
+        true ->
+            [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)];
+        false ->
+            [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)]
+    end.
+
+generate(R0, S, MPlus, MMinus, LowOk, HighOk) ->
+    D = R0 div S,
+    R = R0 rem S,
+    TC1 = case LowOk of
+              true ->
+                  R =< MMinus;
+              false ->
+                  R < MMinus
+          end,
+    TC2 = case HighOk of
+              true ->
+                  (R + MPlus) >= S;
+              false ->
+                  (R + MPlus) > S
+          end,
+    case TC1 of
+        false ->
+            case TC2 of
+                false ->
+                    [D | generate(R * 10, S, MPlus * 10, MMinus * 10,
+                                  LowOk, HighOk)];
+                true ->
+                    [D + 1]
+            end;
+        true ->
+            case TC2 of
+                false ->
+                    [D];
+                true ->
+                    case R * 2 < S of
+                        true ->
+                            [D];
+                        false ->
+                            [D + 1]
+                    end
+            end
+    end.
+
+unpack(Float) ->
+    <<Sign:1, Exp:11, Frac:52>> = <<Float:64/float>>,
+    {Sign, Exp, Frac}.
+
+frexp1({_Sign, 0, 0}) ->
+    {0.0, 0};
+frexp1({Sign, 0, Frac}) ->
+    Exp = log2floor(Frac),
+    <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, (Frac-1):52>>,
+    {Frac1, -(?FLOAT_BIAS) - 52 + Exp};
+frexp1({Sign, Exp, Frac}) ->
+    <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, Frac:52>>,
+    {Frac1, Exp - ?FLOAT_BIAS}.
+
+log2floor(Int) ->
+    log2floor(Int, 0).
+
+log2floor(0, N) ->
+    N;
+log2floor(Int, N) ->
+    log2floor(Int bsr 1, 1 + N).
+
+
+transform_digits(Place, [0 | Rest]) ->
+    transform_digits(Place, Rest);
+transform_digits(Place, Digits) ->
+    {Place, [$0 + D || D <- Digits]}.
+
+
+frexp_int(F) ->
+    case unpack(F) of
+        {_Sign, 0, Frac} ->
+            {Frac, ?MIN_EXP};
+        {_Sign, Exp, Frac} ->
+            {Frac + (1 bsl 52), Exp - 53 - ?FLOAT_BIAS}
+    end.
+
+%%
+%% Tests
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+int_ceil_test() ->
+    ?assertEqual(1, int_ceil(0.0001)),
+    ?assertEqual(0, int_ceil(0.0)),
+    ?assertEqual(1, int_ceil(0.99)),
+    ?assertEqual(1, int_ceil(1.0)),
+    ?assertEqual(-1, int_ceil(-1.5)),
+    ?assertEqual(-2, int_ceil(-2.0)),
+    ok.
+
+int_pow_test() ->
+    ?assertEqual(1, int_pow(1, 1)),
+    ?assertEqual(1, int_pow(1, 0)),
+    ?assertEqual(1, int_pow(10, 0)),
+    ?assertEqual(10, int_pow(10, 1)),
+    ?assertEqual(100, int_pow(10, 2)),
+    ?assertEqual(1000, int_pow(10, 3)),
+    ok.
+
+digits_test() ->
+    ?assertEqual("0",
+                 digits(0)),
+    ?assertEqual("0.0",
+                 digits(0.0)),
+    ?assertEqual("1.0",
+                 digits(1.0)),
+    ?assertEqual("-1.0",
+                 digits(-1.0)),
+    ?assertEqual("0.1",
+                 digits(0.1)),
+    ?assertEqual("0.01",
+                 digits(0.01)),
+    ?assertEqual("0.001",
+                 digits(0.001)),
+    ?assertEqual("1.0e+6",
+                 digits(1000000.0)),
+    ?assertEqual("0.5",
+                 digits(0.5)),
+    ?assertEqual("4503599627370496.0",
+                 digits(4503599627370496.0)),
+    %% small denormalized number
+    %% 4.94065645841246544177e-324 =:= 5.0e-324
+    <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
+    ?assertEqual("5.0e-324",
+                 digits(SmallDenorm)),
+    ?assertEqual(SmallDenorm,
+                 list_to_float(digits(SmallDenorm))),
+    %% large denormalized number
+    %% 2.22507385850720088902e-308
+    <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
+    ?assertEqual("2.225073858507201e-308",
+                 digits(BigDenorm)),
+    ?assertEqual(BigDenorm,
+                 list_to_float(digits(BigDenorm))),
+    %% small normalized number
+    %% 2.22507385850720138309e-308
+    <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
+    ?assertEqual("2.2250738585072014e-308",
+                 digits(SmallNorm)),
+    ?assertEqual(SmallNorm,
+                 list_to_float(digits(SmallNorm))),
+    %% large normalized number
+    %% 1.79769313486231570815e+308
+    <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
+    ?assertEqual("1.7976931348623157e+308",
+                 digits(LargeNorm)),
+    ?assertEqual(LargeNorm,
+                 list_to_float(digits(LargeNorm))),
+    %% issue #10 - mochinum:frexp(math:pow(2, -1074)).
+    ?assertEqual("5.0e-324",
+                 digits(math:pow(2, -1074))),
+    ok.
+
+frexp_test() ->
+    %% zero
+    ?assertEqual({0.0, 0}, frexp(0.0)),
+    %% one
+    ?assertEqual({0.5, 1}, frexp(1.0)),
+    %% negative one
+    ?assertEqual({-0.5, 1}, frexp(-1.0)),
+    %% small denormalized number
+    %% 4.94065645841246544177e-324
+    <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
+    ?assertEqual({0.5, -1073}, frexp(SmallDenorm)),
+    %% large denormalized number
+    %% 2.22507385850720088902e-308
+    <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
+    ?assertEqual(
+       {0.99999999999999978, -1022},
+       frexp(BigDenorm)),
+    %% small normalized number
+    %% 2.22507385850720138309e-308
+    <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
+    ?assertEqual({0.5, -1021}, frexp(SmallNorm)),
+    %% large normalized number
+    %% 1.79769313486231570815e+308
+    <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
+    ?assertEqual(
+        {0.99999999999999989, 1024},
+        frexp(LargeNorm)),
+    %% issue #10 - mochinum:frexp(math:pow(2, -1074)).
+    ?assertEqual(
+       {0.5, -1073},
+       frexp(math:pow(2, -1074))),
+    ok.
+
+-endif.
diff --git a/src/pmon.erl b/src/pmon.erl
index 45786577..1aeebb72 100644
--- a/src/pmon.erl
+++ b/src/pmon.erl
@@ -27,37 +27,39 @@
 
 -opaque(?MODULE()    :: dict()).
 
+-type(item()         :: pid() | {atom(), node()}).
+
 -spec(new/0          :: () -> ?MODULE()).
--spec(monitor/2      :: (pid(), ?MODULE()) -> ?MODULE()).
--spec(monitor_all/2  :: ([pid()], ?MODULE()) -> ?MODULE()).
--spec(demonitor/2    :: (pid(), ?MODULE()) -> ?MODULE()).
--spec(is_monitored/2 :: (pid(), ?MODULE()) -> boolean()).
--spec(erase/2        :: (pid(), ?MODULE()) -> ?MODULE()).
--spec(monitored/1    :: (?MODULE()) -> [pid()]).
+-spec(monitor/2      :: (item(), ?MODULE()) -> ?MODULE()).
+-spec(monitor_all/2  :: ([item()], ?MODULE()) -> ?MODULE()).
+-spec(demonitor/2    :: (item(), ?MODULE()) -> ?MODULE()).
+-spec(is_monitored/2 :: (item(), ?MODULE()) -> boolean()).
+-spec(erase/2        :: (item(), ?MODULE()) -> ?MODULE()).
+-spec(monitored/1    :: (?MODULE()) -> [item()]).
 -spec(is_empty/1     :: (?MODULE()) -> boolean()).
 
 -endif.
 
 new() -> dict:new().
 
-monitor(Pid, M) ->
-    case dict:is_key(Pid, M) of
+monitor(Item, M) ->
+    case dict:is_key(Item, M) of
         true  -> M;
-        false -> dict:store(Pid, erlang:monitor(process, Pid), M)
+        false -> dict:store(Item, erlang:monitor(process, Item), M)
     end.
 
-monitor_all(Pids, M) -> lists:foldl(fun monitor/2, M, Pids).
+monitor_all(Items, M) -> lists:foldl(fun monitor/2, M, Items).
 
-demonitor(Pid, M) ->
-    case dict:find(Pid, M) of
+demonitor(Item, M) ->
+    case dict:find(Item, M) of
         {ok, MRef} -> erlang:demonitor(MRef),
-                      dict:erase(Pid, M);
+                      dict:erase(Item, M);
         error      -> M
     end.
 
-is_monitored(Pid, M) -> dict:is_key(Pid, M).
+is_monitored(Item, M) -> dict:is_key(Item, M).
 
-erase(Pid, M) -> dict:erase(Pid, M).
+erase(Item, M) -> dict:erase(Item, M).
 
 monitored(M) -> dict:fetch_keys(M).
 
diff --git a/src/rabbit.erl b/src/rabbit.erl
index fda489fe..69f77824 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -20,7 +20,8 @@
 
 -export([start/0, boot/0, stop/0,
          stop_and_halt/0, await_startup/0, status/0, is_running/0,
-         is_running/1, environment/0, rotate_logs/1, force_event_refresh/0]).
+         is_running/1, environment/0, rotate_logs/1, force_event_refresh/0,
+         start_fhc/0]).
 
 -export([start/2, stop/1]).
 
@@ -53,8 +54,7 @@
 
 -rabbit_boot_step({file_handle_cache,
                    [{description, "file handle cache server"},
-                    {mfa,         {rabbit_sup, start_restartable_child,
-                                   [file_handle_cache]}},
+                    {mfa,         {rabbit, start_fhc, []}},
                     {requires,    pre_boot},
                     {enables,     worker_pool}]}).
 
@@ -176,7 +176,7 @@
 
 -rabbit_boot_step({notify_cluster,
                    [{description, "notify cluster nodes"},
-                    {mfa,         {rabbit_node_monitor, notify_cluster, []}},
+                    {mfa,         {rabbit_node_monitor, notify_node_up, []}},
                     {requires,    networking}]}).
 
 %%---------------------------------------------------------------------------
@@ -301,7 +301,10 @@ start() ->
                      %% mnesia after just restarting the app
                      ok = ensure_application_loaded(),
                      ok = ensure_working_log_handlers(),
-                     ok = app_utils:start_applications(app_startup_order()),
+                     rabbit_node_monitor:prepare_cluster_status_files(),
+                     rabbit_mnesia:check_cluster_consistency(),
+                     ok = app_utils:start_applications(
+                            app_startup_order(), fun handle_app_error/2),
                      ok = print_plugin_info(rabbit_plugins:active())
              end).
 
@@ -310,26 +313,43 @@ boot() ->
                      ok = ensure_application_loaded(),
                      maybe_hipe_compile(),
                      ok = ensure_working_log_handlers(),
+                     rabbit_node_monitor:prepare_cluster_status_files(),
                      ok = rabbit_upgrade:maybe_upgrade_mnesia(),
+                     %% It's important that the consistency check happens after
+                     %% the upgrade, since if we are a secondary node the
+                     %% primary node will have forgotten us
+                     rabbit_mnesia:check_cluster_consistency(),
                      Plugins = rabbit_plugins:setup(),
                      ToBeLoaded = Plugins ++ ?APPS,
                      ok = app_utils:load_applications(ToBeLoaded),
                      StartupApps = app_utils:app_dependency_order(ToBeLoaded,
                                                                   false),
-                     ok = app_utils:start_applications(StartupApps),
+                     ok = app_utils:start_applications(
+                            StartupApps, fun handle_app_error/2),
                      ok = print_plugin_info(Plugins)
              end).
 
+handle_app_error(App, {bad_return, {_MFA, {'EXIT', {Reason, _}}}}) ->
+    throw({could_not_start, App, Reason});
+
+handle_app_error(App, Reason) ->
+    throw({could_not_start, App, Reason}).
+
 start_it(StartFun) ->
     try
         StartFun()
+    catch
+        throw:{could_not_start, _App, _Reason}=Err ->
+            boot_error(Err, not_available);
+         _:Reason ->
+            boot_error(Reason, erlang:get_stacktrace())
     after
         %% give the error loggers some time to catch up
         timer:sleep(100)
     end.
 
 stop() ->
-    rabbit_log:info("Stopping Rabbit~n"),
+    rabbit_log:info("Stopping RabbitMQ~n"),
     ok = app_utils:stop_applications(app_shutdown_order()).
 
 stop_and_halt() ->
@@ -349,7 +369,7 @@ status() ->
           {running_applications, application:which_applications(infinity)},
           {os,                   os:type()},
           {erlang_version,       erlang:system_info(system_version)},
-          {memory,               erlang:memory()}],
+          {memory,               rabbit_vm:memory()}],
     S2 = rabbit_misc:filter_exit_map(
            fun ({Key, {M, F, A}}) -> {Key, erlang:apply(M, F, A)} end,
            [{vm_memory_high_watermark, {vm_memory_monitor,
@@ -397,6 +417,9 @@ rotate_logs(BinarySuffix) ->
 start(normal, []) ->
     case erts_version_check() of
         ok ->
+            {ok, Vsn} = application:get_key(rabbit, vsn),
+            error_logger:info_msg("Starting RabbitMQ ~s on Erlang ~s~n",
+                                  [Vsn, erlang:system_info(otp_release)]),
             {ok, SupPid} = rabbit_sup:start_link(),
             true = register(rabbit, self()),
             print_banner(),
@@ -408,12 +431,11 @@ start(normal, []) ->
     end.
 
 stop(_State) ->
-    ok = rabbit_mnesia:record_running_nodes(),
     terminated_ok = error_logger:delete_report_handler(rabbit_error_logger),
     ok = rabbit_alarm:stop(),
     ok = case rabbit_mnesia:is_clustered() of
              true  -> rabbit_amqqueue:on_node_down(node());
-             false -> rabbit_mnesia:empty_ram_only_tables()
+             false -> rabbit_table:clear_ram_only_tables()
          end,
     ok.
 
@@ -444,7 +466,7 @@ run_boot_step({StepName, Attributes}) ->
             [try
                  apply(M,F,A)
              catch
-                 _:Reason -> boot_step_error(Reason, erlang:get_stacktrace())
+                 _:Reason -> boot_error(Reason, erlang:get_stacktrace())
              end || {M,F,A} <- MFAs],
             io:format("done~n"),
             ok
@@ -483,14 +505,17 @@ sort_boot_steps(UnsortedSteps) ->
                      {mfa, {M,F,A}}         <- Attributes,
                      not erlang:function_exported(M, F, length(A))] of
                 []               -> SortedSteps;
-                MissingFunctions -> boot_error(
+                MissingFunctions -> basic_boot_error(
+                                      {missing_functions, MissingFunctions},
                                       "Boot step functions not exported: ~p~n",
                                       [MissingFunctions])
             end;
         {error, {vertex, duplicate, StepName}} ->
-            boot_error("Duplicate boot step name: ~w~n", [StepName]);
+            basic_boot_error({duplicate_boot_step, StepName},
+                             "Duplicate boot step name: ~w~n", [StepName]);
         {error, {edge, Reason, From, To}} ->
-            boot_error(
+            basic_boot_error(
+              {invalid_boot_step_dependency, From, To},
               "Could not add boot step dependency of ~w on ~w:~n~s",
               [To, From,
                case Reason of
@@ -504,30 +529,38 @@ sort_boot_steps(UnsortedSteps) ->
                end])
     end.
 
-boot_step_error({error, {timeout_waiting_for_tables, _}}, _Stacktrace) ->
+boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Stacktrace) ->
+    AllNodes = rabbit_mnesia:cluster_nodes(all),
     {Err, Nodes} =
-        case rabbit_mnesia:read_previously_running_nodes() of
+        case AllNodes -- [node()] of
             [] -> {"Timeout contacting cluster nodes. Since RabbitMQ was"
                    " shut down forcefully~nit cannot determine which nodes"
-                   " are timing out. Details on all nodes will~nfollow.~n",
-                   rabbit_mnesia:all_clustered_nodes() -- [node()]};
+                   " are timing out.~n", []};
             Ns -> {rabbit_misc:format(
                      "Timeout contacting cluster nodes: ~p.~n", [Ns]),
                    Ns}
         end,
-    boot_error(Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []);
-
-boot_step_error(Reason, Stacktrace) ->
-    boot_error("Error description:~n   ~p~n~n"
-               "Log files (may contain more information):~n   ~s~n   ~s~n~n"
-               "Stack trace:~n   ~p~n~n",
-               [Reason, log_location(kernel), log_location(sasl), Stacktrace]).
+    basic_boot_error(Term,
+                     Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []);
+boot_error(Reason, Stacktrace) ->
+    Fmt = "Error description:~n   ~p~n~n" ++
+        "Log files (may contain more information):~n   ~s~n   ~s~n~n",
+    Args = [Reason, log_location(kernel), log_location(sasl)],
+    boot_error(Reason, Fmt, Args, Stacktrace).
+
+boot_error(Reason, Fmt, Args, Stacktrace) ->
+    case Stacktrace of
+        not_available -> basic_boot_error(Reason, Fmt, Args);
+        _             -> basic_boot_error(Reason, Fmt ++
+                                              "Stack trace:~n   ~p~n~n",
+                                          Args ++ [Stacktrace])
+    end.
 
-boot_error(Format, Args) ->
+basic_boot_error(Reason, Format, Args) ->
     io:format("~n~nBOOT FAILED~n===========~n~n" ++ Format, Args),
-    error_logger:error_msg(Format, Args),
+    rabbit_misc:local_info_msg(Format, Args),
     timer:sleep(1000),
-    exit({?MODULE, failure_during_boot}).
+    exit({?MODULE, failure_during_boot, Reason}).
 
 %%---------------------------------------------------------------------------
 %% boot step functions
@@ -540,7 +573,7 @@ recover() ->
     rabbit_binding:recover(rabbit_exchange:recover(), rabbit_amqqueue:start()).
 
 maybe_insert_default_data() ->
-    case rabbit_mnesia:is_db_empty() of
+    case rabbit_table:is_empty() of
         true -> insert_default_data();
         false -> ok
     end.
@@ -730,3 +763,10 @@ config_files() ->
                            [File] <- Files];
         error       -> []
     end.
+
+%% We don't want this in fhc since it references rabbit stuff. And we can't put
+%% this in the bootstep directly.
+start_fhc() ->
+    rabbit_sup:start_restartable_child(
+      file_handle_cache,
+      [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]).
diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl
index d16d90a4..e6625b2b 100644
--- a/src/rabbit_alarm.erl
+++ b/src/rabbit_alarm.erl
@@ -18,22 +18,28 @@
 
 -behaviour(gen_event).
 
--export([start/0, stop/0, register/2, on_node_up/1, on_node_down/1]).
+-export([start_link/0, start/0, stop/0, register/2, set_alarm/1,
+         clear_alarm/1, get_alarms/0, on_node_up/1, on_node_down/1]).
 
 -export([init/1, handle_call/2, handle_event/2, handle_info/2,
          terminate/2, code_change/3]).
 
 -export([remote_conserve_resources/3]). %% Internal use only
 
--record(alarms, {alertees, alarmed_nodes}).
+-define(SERVER, ?MODULE).
+
+-record(alarms, {alertees, alarmed_nodes, alarms}).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
+-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
 -spec(start/0 :: () -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), rabbit_types:mfargs()) -> boolean()).
+-spec(set_alarm/1 :: (any()) -> 'ok').
+-spec(clear_alarm/1 :: (any()) -> 'ok').
 -spec(on_node_up/1 :: (node()) -> 'ok').
 -spec(on_node_down/1 :: (node()) -> 'ok').
 
@@ -41,59 +47,70 @@
 
 %%----------------------------------------------------------------------------
 
+start_link() ->
+    gen_event:start_link({local, ?SERVER}).
+
 start() ->
-    ok = alarm_handler:add_alarm_handler(?MODULE, []),
+    ok = rabbit_sup:start_restartable_child(?MODULE),
+    ok = gen_event:add_handler(?SERVER, ?MODULE, []),
     {ok, MemoryWatermark} = application:get_env(vm_memory_high_watermark),
-    rabbit_sup:start_restartable_child(vm_memory_monitor, [MemoryWatermark]),
-
+    rabbit_sup:start_restartable_child(
+      vm_memory_monitor, [MemoryWatermark, fun rabbit_alarm:set_alarm/1,
+                          fun rabbit_alarm:clear_alarm/1]),
     {ok, DiskLimit} = application:get_env(disk_free_limit),
     rabbit_sup:start_restartable_child(rabbit_disk_monitor, [DiskLimit]),
     ok.
 
-stop() ->
-    ok = alarm_handler:delete_alarm_handler(?MODULE).
+stop() -> ok.
 
 register(Pid, HighMemMFA) ->
-    gen_event:call(alarm_handler, ?MODULE,
-                   {register, Pid, HighMemMFA},
+    gen_event:call(?SERVER, ?MODULE, {register, Pid, HighMemMFA},
                    infinity).
 
-on_node_up(Node) -> gen_event:notify(alarm_handler, {node_up, Node}).
+set_alarm(Alarm)   -> gen_event:notify(?SERVER, {set_alarm,   Alarm}).
+clear_alarm(Alarm) -> gen_event:notify(?SERVER, {clear_alarm, Alarm}).
+
+get_alarms() -> gen_event:call(?SERVER, ?MODULE, get_alarms, infinity).
 
-on_node_down(Node) -> gen_event:notify(alarm_handler, {node_down, Node}).
+on_node_up(Node)   -> gen_event:notify(?SERVER, {node_up,   Node}).
+on_node_down(Node) -> gen_event:notify(?SERVER, {node_down, Node}).
 
-%% Can't use alarm_handler:{set,clear}_alarm because that doesn't
-%% permit notifying a remote node.
 remote_conserve_resources(Pid, Source, true) ->
-    gen_event:notify({alarm_handler, node(Pid)},
+    gen_event:notify({?SERVER, node(Pid)},
                      {set_alarm, {{resource_limit, Source, node()}, []}});
 remote_conserve_resources(Pid, Source, false) ->
-    gen_event:notify({alarm_handler, node(Pid)},
+    gen_event:notify({?SERVER, node(Pid)},
                      {clear_alarm, {resource_limit, Source, node()}}).
 
+
 %%----------------------------------------------------------------------------
 
 init([]) ->
     {ok, #alarms{alertees      = dict:new(),
-                 alarmed_nodes = dict:new()}}.
+                 alarmed_nodes = dict:new(),
+                 alarms        = []}}.
 
 handle_call({register, Pid, HighMemMFA}, State) ->
     {ok, 0 < dict:size(State#alarms.alarmed_nodes),
      internal_register(Pid, HighMemMFA, State)};
 
+handle_call(get_alarms, State = #alarms{alarms = Alarms}) ->
+    {ok, Alarms, State};
+
 handle_call(_Request, State) ->
     {ok, not_understood, State}.
 
-handle_event({set_alarm, {{resource_limit, Source, Node}, []}}, State) ->
-    {ok, maybe_alert(fun dict:append/3, Node, Source, State)};
+handle_event({set_alarm, Alarm}, State = #alarms{alarms = Alarms}) ->
+    handle_set_alarm(Alarm, State#alarms{alarms = [Alarm|Alarms]});
 
-handle_event({clear_alarm, {resource_limit, Source, Node}}, State) ->
-    {ok, maybe_alert(fun dict_unappend/3, Node, Source, State)};
+handle_event({clear_alarm, Alarm}, State = #alarms{alarms = Alarms}) ->
+    handle_clear_alarm(Alarm, State#alarms{alarms = lists:keydelete(Alarm, 1,
+                                                                    Alarms)});
 
 handle_event({node_up, Node}, State) ->
     %% Must do this via notify and not call to avoid possible deadlock.
     ok = gen_event:notify(
-           {alarm_handler, Node},
+           {?SERVER, Node},
            {register, self(), {?MODULE, remote_conserve_resources, []}}),
     {ok, State};
 
@@ -186,3 +203,25 @@ internal_register(Pid, {M, F, A} = HighMemMFA,
     end,
     NewAlertees = dict:store(Pid, HighMemMFA, Alertees),
     State#alarms{alertees = NewAlertees}.
+
+handle_set_alarm({{resource_limit, Source, Node}, []}, State) ->
+    rabbit_log:warning("~s resource limit alarm set on node ~p~n",
+                       [Source, Node]),
+    {ok, maybe_alert(fun dict:append/3, Node, Source, State)};
+handle_set_alarm({file_descriptor_limit, []}, State) ->
+    rabbit_log:warning("file descriptor limit alarm set~n"),
+    {ok, State};
+handle_set_alarm(Alarm, State) ->
+    rabbit_log:warning("alarm '~p' set~n", [Alarm]),
+    {ok, State}.
+
+handle_clear_alarm({resource_limit, Source, Node}, State) ->
+    rabbit_log:warning("~s resource limit alarm cleared on node ~p~n",
+                       [Source, Node]),
+    {ok, maybe_alert(fun dict_unappend/3, Node, Source, State)};
+handle_clear_alarm(file_descriptor_limit, State) ->
+    rabbit_log:warning("file descriptor limit alarm cleared~n"),
+    {ok, State};
+handle_clear_alarm(Alarm, State) ->
+    rabbit_log:warning("alarm '~p' cleared~n", [Alarm]),
+    {ok, State}.
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index afbaea65..6ad85b24 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -22,14 +22,14 @@
          check_exclusive_access/2, with_exclusive_access_or_die/3,
          stat/1, deliver/2, deliver_flow/2, requeue/3, ack/3, reject/4]).
 -export([list/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]).
--export([force_event_refresh/0]).
+-export([force_event_refresh/0, wake_up/1]).
 -export([consumers/1, consumers_all/1, consumer_info_keys/0]).
 -export([basic_get/3, basic_consume/7, basic_cancel/4]).
 -export([notify_sent/2, notify_sent_queue_down/1, unblock/2, flush_all/2]).
 -export([notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 -export([update/2, store_queue/1, policy_changed/2]).
-
+-export([start_mirroring/1, stop_mirroring/1]).
 
 %% internal
 -export([internal_declare/2, internal_delete/2, run_backing_queue/3,
@@ -40,6 +40,8 @@
 
 -define(INTEGER_ARG_TYPES, [byte, short, signedint, long]).
 
+-define(MAX_EXPIRY_TIMER, 4294967295).
+
 -define(MORE_CONSUMER_CREDIT_AFTER, 50).
 
 -define(FAILOVER_WAIT_MILLIS, 100).
@@ -58,7 +60,7 @@
 -type(msg_id() :: non_neg_integer()).
 -type(ok_or_errors() ::
         'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
--type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered').
+-type(routing_result() :: 'routed' | 'unroutable').
 -type(queue_or_not_found() :: rabbit_types:amqqueue() | 'not_found').
 
 -spec(start/0 :: () -> [name()]).
@@ -102,6 +104,7 @@
 -spec(info_all/2 :: (rabbit_types:vhost(), rabbit_types:info_keys())
                     -> [rabbit_types:infos()]).
 -spec(force_event_refresh/0 :: () -> 'ok').
+-spec(wake_up/1 :: (rabbit_types:amqqueue()) -> 'ok').
 -spec(consumers/1 ::
         (rabbit_types:amqqueue())
         -> [{pid(), rabbit_types:ctag(), boolean()}]).
@@ -162,6 +165,8 @@
 -spec(store_queue/1 :: (rabbit_types:amqqueue()) -> 'ok').
 -spec(policy_changed/2 ::
         (rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok').
+-spec(start_mirroring/1 :: (pid()) -> 'ok').
+-spec(stop_mirroring/1 :: (pid()) -> 'ok').
 
 -endif.
 
@@ -207,18 +212,20 @@ recover_durable_queues(DurableQueues) ->
 
 declare(QueueName, Durable, AutoDelete, Args, Owner) ->
     ok = check_declare_arguments(QueueName, Args),
-    {Node, MNodes} = determine_queue_nodes(Args),
-    Q = start_queue_process(Node, #amqqueue{name            = QueueName,
-                                            durable         = Durable,
-                                            auto_delete     = AutoDelete,
-                                            arguments       = Args,
-                                            exclusive_owner = Owner,
-                                            pid             = none,
-                                            slave_pids      = [],
-                                            mirror_nodes    = MNodes}),
-    case gen_server2:call(Q#amqqueue.pid, {init, false}, infinity) of
+    Q0 = rabbit_policy:set(#amqqueue{name            = QueueName,
+                                     durable         = Durable,
+                                     auto_delete     = AutoDelete,
+                                     arguments       = Args,
+                                     exclusive_owner = Owner,
+                                     pid             = none,
+                                     slave_pids      = [],
+                                     sync_slave_pids = [],
+                                     gm_pids         = []}),
+    {Node, _MNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q0),
+    Q1 = start_queue_process(Node, Q0),
+    case gen_server2:call(Q1#amqqueue.pid, {init, false}, infinity) of
         not_found -> rabbit_misc:not_found(QueueName);
-        Q1        -> Q1
+        Q2        -> Q2
     end.
 
 internal_declare(Q, true) ->
@@ -267,24 +274,8 @@ store_queue(Q = #amqqueue{durable = false}) ->
     ok = mnesia:write(rabbit_queue, Q, write),
     ok.
 
-policy_changed(_Q1, _Q2) ->
-    ok.
-
-determine_queue_nodes(Args) ->
-    Policy = rabbit_misc:table_lookup(Args, <<"x-ha-policy">>),
-    PolicyParams = rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>),
-    case {Policy, PolicyParams} of
-        {{_Type, <<"nodes">>}, {array, Nodes}} ->
-            case [list_to_atom(binary_to_list(Node)) ||
-                     {longstr, Node} <- Nodes] of
-                [Node]         -> {Node,   undefined};
-                [First | Rest] -> {First,  [First | Rest]}
-            end;
-        {{_Type, <<"all">>}, _} ->
-            {node(), all};
-        _ ->
-            {node(), undefined}
-    end.
+policy_changed(Q1, Q2) ->
+    rabbit_mirror_queue_misc:update_mirrors(Q1, Q2).
 
 start_queue_process(Node, Q) ->
     {ok, Pid} = rabbit_amqqueue_sup:start_child(Node, [Q]),
@@ -307,10 +298,17 @@ lookup(Name) ->
 
 with(Name, F, E) ->
     case lookup(Name) of
-        {ok, Q = #amqqueue{slave_pids = []}} ->
-            rabbit_misc:with_exit_handler(E, fun () -> F(Q) end);
-        {ok, Q} ->
-            E1 = fun () -> timer:sleep(25), with(Name, F, E) end,
+        {ok, Q = #amqqueue{pid = QPid}} ->
+            %% We check is_process_alive(QPid) in case we receive a
+            %% nodedown (for example) in F() that has nothing to do
+            %% with the QPid.
+            E1 = fun () ->
+                         case rabbit_misc:is_process_alive(QPid) of
+                             true  -> E();
+                             false -> timer:sleep(25),
+                                      with(Name, F, E)
+                         end
+                 end,
             rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end);
         {error, not_found} ->
             E()
@@ -351,13 +349,11 @@ with_exclusive_access_or_die(Name, ReaderPid, F) ->
 assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args},
                         RequiredArgs) ->
     rabbit_misc:assert_args_equivalence(
-      Args, RequiredArgs, QueueName,
-      [<<"x-expires">>, <<"x-message-ttl">>, <<"x-ha-policy">>]).
+      Args, RequiredArgs, QueueName, [<<"x-expires">>, <<"x-message-ttl">>]).
 
 check_declare_arguments(QueueName, Args) ->
     Checks = [{<<"x-expires">>,                 fun check_positive_int_arg/2},
               {<<"x-message-ttl">>,             fun check_non_neg_int_arg/2},
-              {<<"x-ha-policy">>,               fun check_ha_policy_arg/2},
               {<<"x-dead-letter-exchange">>,    fun check_string_arg/2},
               {<<"x-dead-letter-routing-key">>, fun check_dlxrk_arg/2}],
     [case rabbit_misc:table_lookup(Args, Key) of
@@ -386,16 +382,18 @@ check_int_arg({Type, _}, _) ->
 
 check_positive_int_arg({Type, Val}, Args) ->
     case check_int_arg({Type, Val}, Args) of
-        ok when Val > 0 -> ok;
-        ok              -> {error, {value_zero_or_less, Val}};
-        Error           -> Error
+        ok when Val > ?MAX_EXPIRY_TIMER -> {error, {value_too_big, Val}};
+        ok when Val > 0                 -> ok;
+        ok                              -> {error, {value_zero_or_less, Val}};
+        Error                           -> Error
     end.
 
 check_non_neg_int_arg({Type, Val}, Args) ->
     case check_int_arg({Type, Val}, Args) of
-        ok when Val >= 0 -> ok;
-        ok               -> {error, {value_less_than_zero, Val}};
-        Error            -> Error
+        ok when Val > ?MAX_EXPIRY_TIMER -> {error, {value_too_big, Val}};
+        ok when Val >= 0                -> ok;
+        ok                              -> {error, {value_less_than_zero, Val}};
+        Error                           -> Error
     end.
 
 check_dlxrk_arg({longstr, _}, Args) ->
@@ -406,29 +404,6 @@ check_dlxrk_arg({longstr, _}, Args) ->
 check_dlxrk_arg({Type, _}, _Args) ->
     {error, {unacceptable_type, Type}}.
 
-check_ha_policy_arg({longstr, <<"all">>}, _Args) ->
-    ok;
-check_ha_policy_arg({longstr, <<"nodes">>}, Args) ->
-    case rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>) of
-        undefined ->
-            {error, {require, 'x-ha-policy-params'}};
-        {array, []} ->
-            {error, {require_non_empty_list_of_nodes_for_ha}};
-        {array, Ary} ->
-            case lists:all(fun ({longstr, _Node}) -> true;
-                               (_               ) -> false
-                           end, Ary) of
-                true  -> ok;
-                false -> {error, {require_node_list_as_longstrs_for_ha, Ary}}
-            end;
-        {Type, _} ->
-            {error, {ha_nodes_policy_params_not_array_of_longstr, Type}}
-    end;
-check_ha_policy_arg({longstr, Policy}, _Args) ->
-    {error, {invalid_ha_policy, Policy}};
-check_ha_policy_arg({Type, _}, _Args) ->
-    {error, {unacceptable_type, Type}}.
-
 list() ->
     mnesia:dirty_match_object(rabbit_queue, #amqqueue{_ = '_'}).
 
@@ -475,6 +450,8 @@ force_event_refresh(QNames) ->
               force_event_refresh(Failed)
     end.
 
+wake_up(#amqqueue{pid = QPid}) -> gen_server2:cast(QPid, wake_up).
+
 consumers(#amqqueue{ pid = QPid }) ->
     delegate_call(QPid, consumers).
 
@@ -561,7 +538,12 @@ flush_all(QPids, ChPid) ->
 
 internal_delete1(QueueName) ->
     ok = mnesia:delete({rabbit_queue, QueueName}),
-    ok = mnesia:delete({rabbit_durable_queue, QueueName}),
+    %% this 'guarded' delete prevents unnecessary writes to the mnesia
+    %% disk log
+    case mnesia:wread({rabbit_durable_queue, QueueName}) of
+        []  -> ok;
+        [_] -> ok = mnesia:delete({rabbit_durable_queue, QueueName})
+    end,
     %% we want to execute some things, as decided by rabbit_exchange,
     %% after the transaction.
     rabbit_binding:remove_for_destination(QueueName).
@@ -591,6 +573,9 @@ set_ram_duration_target(QPid, Duration) ->
 set_maximum_since_use(QPid, Age) ->
     gen_server2:cast(QPid, {set_maximum_since_use, Age}).
 
+start_mirroring(QPid) -> ok = delegate_call(QPid, start_mirroring).
+stop_mirroring(QPid) -> ok = delegate_call(QPid, stop_mirroring).
+
 on_node_down(Node) ->
     rabbit_misc:execute_mnesia_tx_with_tail(
       fun () -> QsDels =
@@ -599,7 +584,7 @@ on_node_down(Node) ->
                                               slave_pids = []}
                                         <- mnesia:table(rabbit_queue),
                                     node(Pid) == Node andalso
-                                        not is_process_alive(Pid)])),
+                                    not rabbit_misc:is_process_alive(Pid)])),
                 {Qs, Dels} = lists:unzip(QsDels),
                 T = rabbit_binding:process_deletions(
                       lists:foldl(fun rabbit_binding:combine_deletions/2,
@@ -625,60 +610,76 @@ pseudo_queue(QueueName, Pid) ->
               auto_delete  = false,
               arguments    = [],
               pid          = Pid,
-              slave_pids   = [],
-              mirror_nodes = undefined}.
+              slave_pids   = []}.
 
-deliver([], #delivery{mandatory = false, immediate = false}, _Flow) ->
+deliver([], #delivery{mandatory = false}, _Flow) ->
     %% /dev/null optimisation
     {routed, []};
 
-deliver(Qs, Delivery = #delivery{mandatory = false, immediate = false}, Flow) ->
-    %% optimisation: when Mandatory = false and Immediate = false,
-    %% rabbit_amqqueue:deliver will deliver the message to the queue
-    %% process asynchronously, and return true, which means all the
-    %% QPids will always be returned. It is therefore safe to use a
-    %% fire-and-forget cast here and return the QPids - the semantics
-    %% is preserved. This scales much better than the non-immediate
-    %% case below.
-    QPids = qpids(Qs),
+deliver(Qs, Delivery = #delivery{mandatory = false}, Flow) ->
+    %% optimisation: when Mandatory = false, rabbit_amqqueue:deliver
+    %% will deliver the message to the queue process asynchronously,
+    %% and return true, which means all the QPids will always be
+    %% returned. It is therefore safe to use a fire-and-forget cast
+    %% here and return the QPids - the semantics is preserved. This
+    %% scales much better than the case below.
+    {MPids, SPids} = qpids(Qs),
+    QPids = MPids ++ SPids,
     case Flow of
         flow   -> [credit_flow:send(QPid) || QPid <- QPids];
         noflow -> ok
     end,
-    delegate:invoke_no_result(
-      QPids, fun (QPid) ->
-                     gen_server2:cast(QPid, {deliver, Delivery, Flow})
-             end),
+
+    %% We let slaves know that they were being addressed as slaves at
+    %% the time - if they receive such a message from the channel
+    %% after they have become master they should mark the message as
+    %% 'delivered' since they do not know what the master may have
+    %% done with it.
+    MMsg = {deliver, Delivery, false, Flow},
+    SMsg = {deliver, Delivery, true,  Flow},
+    delegate:invoke_no_result(MPids,
+                              fun (QPid) -> gen_server2:cast(QPid, MMsg) end),
+    delegate:invoke_no_result(SPids,
+                              fun (QPid) -> gen_server2:cast(QPid, SMsg) end),
     {routed, QPids};
 
-deliver(Qs, Delivery = #delivery{mandatory = Mandatory, immediate = Immediate},
-        _Flow) ->
-    QPids = qpids(Qs),
-    {Success, _} =
-        delegate:invoke(
-          QPids, fun (QPid) ->
-                         gen_server2:call(QPid, {deliver, Delivery}, infinity)
-                 end),
-    case {Mandatory, Immediate,
-          lists:foldl(fun ({QPid, true}, {_, H}) -> {true, [QPid | H]};
-                          ({_,   false}, {_, H}) -> {true, H}
-                      end, {false, []}, Success)} of
-        {true, _   , {false, []}} -> {unroutable,    []};
-        {_   , true, {_    , []}} -> {not_delivered, []};
-        {_   , _   , {_    ,  R}} -> {routed,         R}
+deliver(Qs, Delivery, _Flow) ->
+    {MPids, SPids} = qpids(Qs),
+    %% see comment above
+    MMsg = {deliver, Delivery, false},
+    SMsg = {deliver, Delivery, true},
+    {MRouted, _} = delegate:invoke(
+                     MPids, fun (QPid) ->
+                                    ok = gen_server2:call(QPid, MMsg, infinity)
+                            end),
+    {SRouted, _} = delegate:invoke(
+                     SPids, fun (QPid) ->
+                                    ok = gen_server2:call(QPid, SMsg, infinity)
+                            end),
+    case MRouted ++ SRouted of
+        [] -> {unroutable, []};
+        R  -> {routed,     [QPid || {QPid, ok} <- R]}
     end.
 
-qpids(Qs) -> lists:append([[QPid | SPids] ||
-                              #amqqueue{pid = QPid, slave_pids = SPids} <- Qs]).
+qpids(Qs) ->
+    {MPids, SPids} = lists:foldl(fun (#amqqueue{pid = QPid, slave_pids = SPids},
+                                      {MPidAcc, SPidAcc}) ->
+                                         {[QPid | MPidAcc], [SPids | SPidAcc]}
+                                 end, {[], []}, Qs),
+    {MPids, lists:append(SPids)}.
 
 safe_delegate_call_ok(F, Pids) ->
-    case delegate:invoke(Pids, fun (Pid) ->
-                                       rabbit_misc:with_exit_handler(
-                                         fun () -> ok end,
-                                         fun () -> F(Pid) end)
-                               end) of
-        {_,  []} -> ok;
-        {_, Bad} -> {error, Bad}
+    {_, Bads} = delegate:invoke(Pids, fun (Pid) ->
+                                              rabbit_misc:with_exit_handler(
+                                                fun () -> ok end,
+                                                fun () -> F(Pid) end)
+                                      end),
+    case lists:filter(
+           fun ({_Pid, {exit, {R, _}, _}}) -> rabbit_misc:is_abnormal_exit(R);
+               ({_Pid, _})                 -> false
+           end, Bads) of
+        []    -> ok;
+        Bads1 -> {error, Bads1}
     end.
 
 delegate_call(Pid, Msg) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b42d1aea..8d05a78c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -26,7 +26,7 @@
 
 -export([start_link/1, info_keys/0]).
 
--export([init_with_backing_queue_state/8]).
+-export([init_with_backing_queue_state/7]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
          handle_info/2, handle_pre_hibernate/1, prioritise_call/3,
@@ -47,6 +47,7 @@
             msg_id_to_channel,
             ttl,
             ttl_timer_ref,
+            ttl_timer_expiry,
             senders,
             publish_seqno,
             unconfirmed,
@@ -75,8 +76,8 @@
 -spec(start_link/1 ::
         (rabbit_types:amqqueue()) -> rabbit_types:ok_pid_or_error()).
 -spec(info_keys/0 :: () -> rabbit_types:info_keys()).
--spec(init_with_backing_queue_state/8 ::
-        (rabbit_types:amqqueue(), atom(), tuple(), any(), [any()],
+-spec(init_with_backing_queue_state/7 ::
+        (rabbit_types:amqqueue(), atom(), tuple(), any(),
          [rabbit_types:delivery()], pmon:pmon(), dict()) -> #q{}).
 
 -endif.
@@ -85,14 +86,17 @@
 
 -define(STATISTICS_KEYS,
         [pid,
+         policy,
          exclusive_consumer_pid,
          exclusive_consumer_tag,
          messages_ready,
          messages_unacknowledged,
          messages,
          consumers,
+         active_consumers,
          memory,
          slave_pids,
+         synchronised_slave_pids,
          backing_queue_status
         ]).
 
@@ -102,13 +106,11 @@
          durable,
          auto_delete,
          arguments,
-         owner_pid,
-         slave_pids,
-         synchronised_slave_pids
+         owner_pid
         ]).
 
 -define(INFO_KEYS,
-        ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid, slave_pids]).
+        ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
 
 %%----------------------------------------------------------------------------
 
@@ -144,7 +146,7 @@ init(Q) ->
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS,
-                              RateTRef, AckTags, Deliveries, Senders, MTC) ->
+                              RateTRef, Deliveries, Senders, MTC) ->
     case Owner of
         none -> ok;
         _    -> erlang:monitor(process, Owner)
@@ -166,12 +168,10 @@ init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS,
                delayed_stop        = undefined,
                queue_monitors      = pmon:new(),
                msg_id_to_channel   = MTC},
-    State1 = requeue_and_run(AckTags, process_args(
-                                        rabbit_event:init_stats_timer(
-                                          State, #q.stats_timer))),
-    lists:foldl(
-      fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end,
-      State1, Deliveries).
+    State1 = process_args(rabbit_event:init_stats_timer(State, #q.stats_timer)),
+    lists:foldl(fun (Delivery, StateN) ->
+                        deliver_or_enqueue(Delivery, true, StateN)
+                end, State1, Deliveries).
 
 terminate(shutdown = R,      State = #q{backing_queue = BQ}) ->
     terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State);
@@ -179,7 +179,6 @@ terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) ->
     terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State);
 terminate(Reason,            State = #q{q             = #amqqueue{name = QName},
                                         backing_queue = BQ}) ->
-    %% FIXME: How do we cancel active subscriptions?
     terminate_shutdown(
       fun (BQS) ->
               BQS1 = BQ:delete_and_terminate(Reason, BQS),
@@ -230,8 +229,7 @@ matches(false, Q1, Q2) ->
     Q1#amqqueue.exclusive_owner =:= Q2#amqqueue.exclusive_owner andalso
     Q1#amqqueue.arguments =:= Q2#amqqueue.arguments andalso
     Q1#amqqueue.pid =:= Q2#amqqueue.pid andalso
-    Q1#amqqueue.slave_pids =:= Q2#amqqueue.slave_pids andalso
-    Q1#amqqueue.mirror_nodes =:= Q2#amqqueue.mirror_nodes.
+    Q1#amqqueue.slave_pids =:= Q2#amqqueue.slave_pids.
 
 bq_init(BQ, Q, Recover) ->
     Self = self(),
@@ -296,11 +294,11 @@ next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
         timed -> {ensure_sync_timer(State1), 0             }
     end.
 
-backing_queue_module(#amqqueue{arguments = Args}) ->
-    case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of
-        undefined -> {ok, BQM} = application:get_env(backing_queue_module),
-                     BQM;
-        _Policy   -> rabbit_mirror_queue_master
+backing_queue_module(Q) ->
+    case rabbit_mirror_queue_misc:is_mirrored(Q) of
+        false -> {ok, BQM} = application:get_env(backing_queue_module),
+                 BQM;
+        true  -> rabbit_mirror_queue_master
     end.
 
 ensure_sync_timer(State = #q{sync_timer_ref = undefined}) ->
@@ -499,32 +497,29 @@ confirm_messages(MsgIds, State = #q{msg_id_to_channel = MTC}) ->
     rabbit_misc:gb_trees_foreach(fun rabbit_misc:confirm_to_sender/2, CMs),
     State#q{msg_id_to_channel = MTC1}.
 
-should_confirm_message(#delivery{msg_seq_no = undefined}, _State) ->
-    never;
-should_confirm_message(#delivery{sender     = SenderPid,
+send_or_record_confirm(#delivery{msg_seq_no = undefined}, State) ->
+    {never, State};
+send_or_record_confirm(#delivery{sender     = SenderPid,
                                  msg_seq_no = MsgSeqNo,
                                  message    = #basic_message {
                                    is_persistent = true,
                                    id            = MsgId}},
-                       #q{q = #amqqueue{durable = true}}) ->
-    {eventually, SenderPid, MsgSeqNo, MsgId};
-should_confirm_message(#delivery{sender     = SenderPid,
-                                 msg_seq_no = MsgSeqNo},
-                       _State) ->
-    {immediately, SenderPid, MsgSeqNo}.
-
-needs_confirming({eventually, _, _, _}) -> true;
-needs_confirming(_)                     -> false.
-
-maybe_record_confirm_message({eventually, SenderPid, MsgSeqNo, MsgId},
-                             State = #q{msg_id_to_channel = MTC}) ->
-    State#q{msg_id_to_channel =
-                gb_trees:insert(MsgId, {SenderPid, MsgSeqNo}, MTC)};
-maybe_record_confirm_message({immediately, SenderPid, MsgSeqNo}, State) ->
+                       State = #q{q                 = #amqqueue{durable = true},
+                                  msg_id_to_channel = MTC}) ->
+    MTC1 = gb_trees:insert(MsgId, {SenderPid, MsgSeqNo}, MTC),
+    {eventually, State#q{msg_id_to_channel = MTC1}};
+send_or_record_confirm(#delivery{sender     = SenderPid,
+                                 msg_seq_no = MsgSeqNo}, State) ->
     rabbit_misc:confirm_to_sender(SenderPid, [MsgSeqNo]),
-    State;
-maybe_record_confirm_message(_Confirm, State) ->
-    State.
+    {immediately, State}.
+
+discard(#delivery{sender = SenderPid, message = #basic_message{id = MsgId}},
+        State) ->
+    %% fake an 'eventual' confirm from BQ; noop if not needed
+    State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
+        confirm_messages([MsgId], State),
+    BQS1 = BQ:discard(MsgId, SenderPid, BQS),
+    State1#q{backing_queue_state = BQS1}.
 
 run_message_queue(State) ->
     State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
@@ -534,60 +529,50 @@ run_message_queue(State) ->
                             BQ:is_empty(BQS), State1),
     State2.
 
-attempt_delivery(#delivery{sender = SenderPid, message = Message}, Confirm,
+attempt_delivery(Delivery = #delivery{sender = SenderPid, message = Message},
+                 Props = #message_properties{delivered = Delivered},
                  State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     case BQ:is_duplicate(Message, BQS) of
         {false, BQS1} ->
             deliver_msgs_to_consumers(
-              fun (AckRequired, State1 = #q{backing_queue_state = BQS2}) ->
-                      Props = message_properties(Confirm, State1),
+              fun (true, State1 = #q{backing_queue_state = BQS2}) ->
                       {AckTag, BQS3} = BQ:publish_delivered(
-                                         AckRequired, Message, Props,
-                                         SenderPid, BQS2),
-                      {{Message, false, AckTag}, true,
-                       State1#q{backing_queue_state = BQS3}}
+                                         Message, Props, SenderPid, BQS2),
+                      {{Message, Delivered, AckTag},
+                       true, State1#q{backing_queue_state = BQS3}};
+                  (false, State1) ->
+                      {{Message, Delivered, undefined},
+                       true, discard(Delivery, State1)}
               end, false, State#q{backing_queue_state = BQS1});
-        {Duplicate, BQS1} ->
-            %% if the message has previously been seen by the BQ then
-            %% it must have been seen under the same circumstances as
-            %% now: i.e. if it is now a deliver_immediately then it
-            %% must have been before.
-            {case Duplicate of
-                 published -> true;
-                 discarded -> false
-             end,
-             State#q{backing_queue_state = BQS1}}
+        {published, BQS1} ->
+            {true,  State#q{backing_queue_state = BQS1}};
+        {discarded, BQS1} ->
+            {false, State#q{backing_queue_state = BQS1}}
     end.
 
-deliver_or_enqueue(Delivery = #delivery{message    = Message,
-                                        msg_seq_no = MsgSeqNo,
-                                        sender     = SenderPid}, State) ->
-    Confirm = should_confirm_message(Delivery, State),
-    case attempt_delivery(Delivery, Confirm, State) of
-        {true, State1} ->
-            maybe_record_confirm_message(Confirm, State1);
-        %% the next two are optimisations
-        {false, State1 = #q{ttl = 0, dlx = undefined}} when Confirm == never ->
-            discard_delivery(Delivery, State1);
-        {false, State1 = #q{ttl = 0, dlx = undefined}} ->
-            rabbit_misc:confirm_to_sender(SenderPid, [MsgSeqNo]),
-            discard_delivery(Delivery, State1);
-        {false, State1} ->
-            State2 = #q{backing_queue = BQ, backing_queue_state = BQS} =
-                maybe_record_confirm_message(Confirm, State1),
-            Props = message_properties(Confirm, State2),
+deliver_or_enqueue(Delivery = #delivery{message = Message, sender = SenderPid},
+                   Delivered, State) ->
+    {Confirm, State1} = send_or_record_confirm(Delivery, State),
+    Props = message_properties(Confirm, Delivered, State),
+    case attempt_delivery(Delivery, Props, State1) of
+        {true, State2} ->
+            State2;
+        %% The next one is an optimisation
+        {false, State2 = #q{ttl = 0, dlx = undefined}} ->
+            discard(Delivery, State2);
+        {false, State2 = #q{backing_queue = BQ, backing_queue_state = BQS}} ->
             BQS1 = BQ:publish(Message, Props, SenderPid, BQS),
-            ensure_ttl_timer(State2#q{backing_queue_state = BQS1})
+            ensure_ttl_timer(Props#message_properties.expiry,
+                             State2#q{backing_queue_state = BQS1})
     end.
 
-requeue_and_run(AckTags, State = #q{backing_queue = BQ}) ->
-    run_backing_queue(BQ, fun (M, BQS) ->
-                                  {_MsgIds, BQS1} = M:requeue(AckTags, BQS),
-                                  BQS1
-                          end, State).
+requeue_and_run(AckTags, State = #q{backing_queue       = BQ,
+                                    backing_queue_state = BQS}) ->
+    {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+    run_message_queue(State#q{backing_queue_state = BQS1}).
 
-fetch(AckRequired, State = #q{backing_queue_state = BQS,
-                              backing_queue       = BQ}) ->
+fetch(AckRequired, State = #q{backing_queue       = BQ,
+                              backing_queue_state = BQS}) ->
     {Result, BQS1} = BQ:fetch(AckRequired, BQS),
     {Result, State#q{backing_queue_state = BQS1}}.
 
@@ -681,12 +666,9 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
-backing_queue_timeout(State = #q{backing_queue = BQ}) ->
-    run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State).
-
-run_backing_queue(Mod, Fun, State = #q{backing_queue = BQ,
-                                       backing_queue_state = BQS}) ->
-    run_message_queue(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}).
+backing_queue_timeout(State = #q{backing_queue       = BQ,
+                                 backing_queue_state = BQS}) ->
+    State#q{backing_queue_state = BQ:timeout(BQS)}.
 
 subtract_acks(ChPid, AckTags, State, Fun) ->
     case lookup_ch(ChPid) of
@@ -698,15 +680,10 @@ subtract_acks(ChPid, AckTags, State, Fun) ->
             Fun(State)
     end.
 
-discard_delivery(#delivery{sender = SenderPid,
-                           message = Message},
-                 State = #q{backing_queue = BQ,
-                            backing_queue_state = BQS}) ->
-    State#q{backing_queue_state = BQ:discard(Message, SenderPid, BQS)}.
-
-message_properties(Confirm, #q{ttl = TTL}) ->
+message_properties(Confirm, Delivered, #q{ttl = TTL}) ->
     #message_properties{expiry           = calculate_msg_expiry(TTL),
-                        needs_confirming = needs_confirming(Confirm)}.
+                        needs_confirming = Confirm == eventually,
+                        delivered        = Delivered}.
 
 calculate_msg_expiry(undefined) -> undefined;
 calculate_msg_expiry(TTL)       -> now_micros() + (TTL * 1000).
@@ -717,28 +694,40 @@ drop_expired_messages(State = #q{backing_queue_state = BQS,
                                  backing_queue       = BQ }) ->
     Now = now_micros(),
     DLXFun = dead_letter_fun(expired, State),
-    ExpirePred = fun (#message_properties{expiry = Expiry}) -> Now > Expiry end,
-    case DLXFun of
-        undefined -> {undefined, BQS1} = BQ:dropwhile(ExpirePred, false, BQS),
-                     BQS1;
-        _         -> {Msgs, BQS1} = BQ:dropwhile(ExpirePred, true, BQS),
-                     lists:foreach(
-                       fun({Msg, AckTag}) -> DLXFun(Msg, AckTag) end, Msgs),
-                     BQS1
-    end,
-    ensure_ttl_timer(State#q{backing_queue_state = BQS1}).
-
-ensure_ttl_timer(State = #q{backing_queue       = BQ,
-                            backing_queue_state = BQS,
-                            ttl                 = TTL,
-                            ttl_timer_ref       = undefined})
-  when TTL =/= undefined ->
-    case BQ:is_empty(BQS) of
-        true  -> State;
-        false -> TRef = erlang:send_after(TTL, self(), drop_expired),
-                 State#q{ttl_timer_ref = TRef}
+    ExpirePred = fun (#message_properties{expiry = Exp}) -> Now >= Exp end,
+    {Props, BQS1} = case DLXFun of
+                        undefined -> {Next, undefined, BQS2} =
+                                         BQ:dropwhile(ExpirePred, false, BQS),
+                                     {Next, BQS2};
+                        _         -> {Next, Msgs,      BQS2} =
+                                         BQ:dropwhile(ExpirePred, true,  BQS),
+                                     DLXFun(Msgs),
+                                     {Next, BQS2}
+                    end,
+    ensure_ttl_timer(case Props of
+                         undefined                          -> undefined;
+                         #message_properties{expiry = Exp}  -> Exp
+                     end, State#q{backing_queue_state = BQS1}).
+
+ensure_ttl_timer(undefined, State) ->
+    State;
+ensure_ttl_timer(_Expiry, State = #q{ttl = undefined}) ->
+    State;
+ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = undefined}) ->
+    After = (case Expiry - now_micros() of
+                 V when V > 0 -> V + 999; %% always fire later
+                 _            -> 0
+             end) div 1000,
+    TRef = erlang:send_after(After, self(), drop_expired),
+    State#q{ttl_timer_ref = TRef, ttl_timer_expiry = Expiry};
+ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref    = TRef,
+                                    ttl_timer_expiry = TExpiry})
+  when Expiry + 1000 < TExpiry ->
+    case erlang:cancel_timer(TRef) of
+        false -> State;
+        _     -> ensure_ttl_timer(Expiry, State#q{ttl_timer_ref = undefined})
     end;
-ensure_ttl_timer(State) ->
+ensure_ttl_timer(_Expiry, State) ->
     State.
 
 ack_if_no_dlx(AckTags, State = #q{dlx                 = undefined,
@@ -752,43 +741,23 @@ ack_if_no_dlx(_AckTags, State) ->
 dead_letter_fun(_Reason, #q{dlx = undefined}) ->
     undefined;
 dead_letter_fun(Reason, _State) ->
-    fun(Msg, AckTag) ->
-            gen_server2:cast(self(), {dead_letter, {Msg, AckTag}, Reason})
-    end.
-
-dead_letter_publish(Msg, Reason, State = #q{publish_seqno = MsgSeqNo}) ->
-    DLMsg = #basic_message{exchange_name = XName} =
-        make_dead_letter_msg(Reason, Msg, State),
-    case rabbit_exchange:lookup(XName) of
-        {ok, X} ->
-            Delivery = rabbit_basic:delivery(false, false, DLMsg, MsgSeqNo),
-            {Queues, Cycles} = detect_dead_letter_cycles(
-                                 DLMsg, rabbit_exchange:route(X, Delivery)),
-            lists:foreach(fun log_cycle_once/1, Cycles),
-            QPids = rabbit_amqqueue:lookup(Queues),
-            {_, DeliveredQPids} = rabbit_amqqueue:deliver(QPids, Delivery),
-            DeliveredQPids;
-        {error, not_found} ->
-            []
-    end.
-
-dead_letter_msg(Msg, AckTag, Reason, State = #q{publish_seqno = MsgSeqNo,
-                                                unconfirmed   = UC}) ->
-    QPids = dead_letter_publish(Msg, Reason, State),
-    State1 = State#q{queue_monitors = pmon:monitor_all(
-                                        QPids, State#q.queue_monitors),
-                     publish_seqno  = MsgSeqNo + 1},
-    case QPids of
-        [] -> cleanup_after_confirm([AckTag], State1);
-        _  -> UC1 = dtree:insert(MsgSeqNo, QPids, AckTag, UC),
-              noreply(State1#q{unconfirmed = UC1})
-    end.
+    fun(Msgs) -> gen_server2:cast(self(), {dead_letter, Msgs, Reason}) end.
+
+dead_letter_publish(Msg, Reason, X, State = #q{publish_seqno = MsgSeqNo}) ->
+    DLMsg = make_dead_letter_msg(Reason, Msg, State),
+    Delivery = rabbit_basic:delivery(false, DLMsg, MsgSeqNo),
+    {Queues, Cycles} = detect_dead_letter_cycles(
+                         DLMsg, rabbit_exchange:route(X, Delivery)),
+    lists:foreach(fun log_cycle_once/1, Cycles),
+    QPids = rabbit_amqqueue:lookup(Queues),
+    {_, DeliveredQPids} = rabbit_amqqueue:deliver(QPids, Delivery),
+    DeliveredQPids.
 
 handle_queue_down(QPid, Reason, State = #q{queue_monitors = QMons,
                                            unconfirmed    = UC}) ->
     case pmon:is_monitored(QPid, QMons) of
         false -> noreply(State);
-        true  -> case rabbit_misc:is_abnormal_termination(Reason) of
+        true  -> case rabbit_misc:is_abnormal_exit(Reason) of
                      true  -> {Lost, _UC1} = dtree:take_all(QPid, UC),
                               QNameS = rabbit_misc:rs(qname(State)),
                               rabbit_log:warning("DLQ ~p for ~s died with "
@@ -893,37 +862,7 @@ make_dead_letter_msg(Reason,
 
 now_micros() -> timer:now_diff(now(), {0,0,0}).
 
-infos(Items, State) ->
-    {Prefix, Items1} =
-        case lists:member(synchronised_slave_pids, Items) of
-            true  -> Prefix1 = slaves_status(State),
-                     case lists:member(slave_pids, Items) of
-                         true  -> {Prefix1, Items -- [slave_pids]};
-                         false -> {proplists:delete(slave_pids, Prefix1), Items}
-                     end;
-            false -> {[], Items}
-        end,
-    Prefix ++ [{Item, i(Item, State)}
-               || Item <- (Items1 -- [synchronised_slave_pids])].
-
-slaves_status(#q{q = #amqqueue{name = Name}}) ->
-    case rabbit_amqqueue:lookup(Name) of
-        {ok, #amqqueue{mirror_nodes = undefined}} ->
-            [{slave_pids, ''}, {synchronised_slave_pids, ''}];
-        {ok, #amqqueue{slave_pids = SPids}} ->
-            {Results, _Bad} =
-                delegate:invoke(SPids, fun rabbit_mirror_queue_slave:info/1),
-            {SPids1, SSPids} =
-                lists:foldl(
-                  fun ({Pid, Infos}, {SPidsN, SSPidsN}) ->
-                          {[Pid | SPidsN],
-                           case proplists:get_bool(is_synchronised, Infos) of
-                               true  -> [Pid | SSPidsN];
-                               false -> SSPidsN
-                           end}
-                  end, {[], []}, Results),
-            [{slave_pids, SPids1}, {synchronised_slave_pids, SSPids}]
-    end.
+infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
 i(name,        #q{q = #amqqueue{name        = Name}})       -> Name;
 i(durable,     #q{q = #amqqueue{durable     = Durable}})    -> Durable;
@@ -935,6 +874,12 @@ i(owner_pid, #q{q = #amqqueue{exclusive_owner = none}}) ->
     '';
 i(owner_pid, #q{q = #amqqueue{exclusive_owner = ExclusiveOwner}}) ->
     ExclusiveOwner;
+i(policy,    #q{q = #amqqueue{name = Name}}) ->
+    {ok, Q} = rabbit_amqqueue:lookup(Name),
+    case rabbit_policy:name(Q) of
+        none   -> '';
+        Policy -> Policy
+    end;
 i(exclusive_consumer_pid, #q{exclusive_consumer = none}) ->
     '';
 i(exclusive_consumer_pid, #q{exclusive_consumer = {ChPid, _ConsumerTag}}) ->
@@ -952,13 +897,24 @@ i(messages, State) ->
                                           messages_unacknowledged]]);
 i(consumers, _) ->
     consumer_count();
+i(active_consumers, _) ->
+    active_consumer_count();
 i(memory, _) ->
     {memory, M} = process_info(self(), memory),
     M;
 i(slave_pids, #q{q = #amqqueue{name = Name}}) ->
-    case rabbit_amqqueue:lookup(Name) of
-        {ok, #amqqueue{mirror_nodes = undefined}} -> [];
-        {ok, #amqqueue{slave_pids = SPids}}       -> SPids
+    {ok, Q = #amqqueue{slave_pids = SPids}} =
+        rabbit_amqqueue:lookup(Name),
+    case rabbit_mirror_queue_misc:is_mirrored(Q) of
+        false -> '';
+        true  -> SPids
+    end;
+i(synchronised_slave_pids, #q{q = #amqqueue{name = Name}}) ->
+    {ok, Q = #amqqueue{sync_slave_pids = SSPids}} =
+        rabbit_amqqueue:lookup(Name),
+    case rabbit_mirror_queue_misc:is_mirrored(Q) of
+        false -> '';
+        true  -> SSPids
     end;
 i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
     BQ:status(BQS);
@@ -1063,28 +1019,10 @@ handle_call({info, Items}, _From, State) ->
 handle_call(consumers, _From, State) ->
     reply(consumers(State), State);
 
-handle_call({deliver, Delivery = #delivery{immediate = true}}, _From, State) ->
-    %% FIXME: Is this correct semantics?
-    %%
-    %% I'm worried in particular about the case where an exchange has
-    %% two queues against a particular routing key, and a message is
-    %% sent in immediate mode through the binding. In non-immediate
-    %% mode, both queues get the message, saving it for later if
-    %% there's noone ready to receive it just now. In immediate mode,
-    %% should both queues still get the message, somehow, or should
-    %% just all ready-to-consume queues get the message, with unready
-    %% queues discarding the message?
-    %%
-    Confirm = should_confirm_message(Delivery, State),
-    {Delivered, State1} = attempt_delivery(Delivery, Confirm, State),
-    reply(Delivered, case Delivered of
-                         true  -> maybe_record_confirm_message(Confirm, State1);
-                         false -> discard_delivery(Delivery, State1)
-                     end);
-
-handle_call({deliver, Delivery = #delivery{mandatory = true}}, From, State) ->
-    gen_server2:reply(From, true),
-    noreply(deliver_or_enqueue(Delivery, State));
+handle_call({deliver, Delivery, Delivered}, From, State) ->
+    %% Synchronous, "mandatory" deliver mode.
+    gen_server2:reply(From, ok),
+    noreply(deliver_or_enqueue(Delivery, Delivered, State));
 
 handle_call({notify_down, ChPid}, From, State) ->
     %% we want to do this synchronously, so that auto_deleted queues
@@ -1200,6 +1138,23 @@ handle_call({requeue, AckTags, ChPid}, From, State) ->
               ChPid, AckTags, State,
               fun (State1) -> requeue_and_run(AckTags, State1) end));
 
+handle_call(start_mirroring, _From, State = #q{backing_queue       = BQ,
+                                               backing_queue_state = BQS}) ->
+    %% lookup again to get policy for init_with_existing_bq
+    {ok, Q} = rabbit_amqqueue:lookup(qname(State)),
+    true = BQ =/= rabbit_mirror_queue_master, %% assertion
+    BQ1 = rabbit_mirror_queue_master,
+    BQS1 = BQ1:init_with_existing_bq(Q, BQ, BQS),
+    reply(ok, State#q{backing_queue       = BQ1,
+                      backing_queue_state = BQS1});
+
+handle_call(stop_mirroring, _From, State = #q{backing_queue       = BQ,
+                                              backing_queue_state = BQS}) ->
+    BQ = rabbit_mirror_queue_master, %% assertion
+    {BQ1, BQS1} = BQ:stop_mirroring(BQS),
+    reply(ok, State#q{backing_queue       = BQ1,
+                      backing_queue_state = BQS1});
+
 handle_call(force_event_refresh, _From,
             State = #q{exclusive_consumer = Exclusive}) ->
     rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State)),
@@ -1224,19 +1179,21 @@ handle_cast({confirm, MsgSeqNos, QPid}, State = #q{unconfirmed = UC}) ->
 handle_cast(_, State = #q{delayed_stop = DS}) when DS =/= undefined ->
     noreply(State);
 
-handle_cast({run_backing_queue, Mod, Fun}, State) ->
-    noreply(run_backing_queue(Mod, Fun, State));
+handle_cast({run_backing_queue, Mod, Fun},
+            State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+    noreply(run_message_queue(
+              State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}));
 
-handle_cast({deliver, Delivery = #delivery{sender = Sender}, Flow},
+handle_cast({deliver, Delivery = #delivery{sender = Sender}, Delivered, Flow},
             State = #q{senders = Senders}) ->
-    %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
+    %% Asynchronous, non-"mandatory" deliver mode.
     Senders1 = case Flow of
                    flow   -> credit_flow:ack(Sender),
                              pmon:monitor(Sender, Senders);
                    noflow -> Senders
                end,
     State1 = State#q{senders = Senders1},
-    noreply(deliver_or_enqueue(Delivery, State1));
+    noreply(deliver_or_enqueue(Delivery, Delivered, State1));
 
 handle_cast({ack, AckTags, ChPid}, State) ->
     noreply(subtract_acks(
@@ -1254,7 +1211,12 @@ handle_cast({reject, AckTags, Requeue, ChPid}, State) ->
                   true  -> fun (State1) -> requeue_and_run(AckTags, State1) end;
                   false -> fun (State1 = #q{backing_queue       = BQ,
                                             backing_queue_state = BQS}) ->
-                                   Fun = dead_letter_fun(rejected, State1),
+                                   Fun =
+                                       case dead_letter_fun(rejected, State1) of
+                                           undefined -> undefined;
+                                           F         -> fun(M, A) -> F([{M, A}])
+                                                        end
+                                       end,
                                    BQS1 = BQ:fold(Fun, BQS, AckTags),
                                    ack_if_no_dlx(
                                      AckTags,
@@ -1306,8 +1268,27 @@ handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
     noreply(State);
 
-handle_cast({dead_letter, {Msg, AckTag}, Reason}, State) ->
-    dead_letter_msg(Msg, AckTag, Reason, State).
+handle_cast({dead_letter, Msgs, Reason}, State = #q{dlx = XName}) ->
+    case rabbit_exchange:lookup(XName) of
+        {ok, X} ->
+            noreply(lists:foldl(
+                      fun({Msg, AckTag}, State1 = #q{publish_seqno  = SeqNo,
+                                                     unconfirmed    = UC,
+                                                     queue_monitors = QMon}) ->
+                              QPids = dead_letter_publish(Msg, Reason, X,
+                                                          State1),
+                              UC1   = dtree:insert(SeqNo, QPids, AckTag, UC),
+                              QMons = pmon:monitor_all(QPids, QMon),
+                              State1#q{queue_monitors = QMons,
+                                       publish_seqno  = SeqNo + 1,
+                                       unconfirmed    = UC1}
+                      end, State, Msgs));
+        {error, not_found} ->
+            cleanup_after_confirm([AckTag || {_, AckTag} <- Msgs], State)
+    end;
+
+handle_cast(wake_up, State) ->
+    noreply(State).
 
 %% We need to not ignore this as we need to remove outstanding
 %% confirms due to queue death.
diff --git a/src/rabbit_auth_backend.erl b/src/rabbit_auth_backend.erl
index e89951e7..c9475efd 100644
--- a/src/rabbit_auth_backend.erl
+++ b/src/rabbit_auth_backend.erl
@@ -20,7 +20,7 @@
 
 %% A description proplist as with auth mechanisms,
 %% exchanges. Currently unused.
--callback description() -> [proplist:property()].
+-callback description() -> [proplists:property()].
 
 %% Check a user can log in, given a username and a proplist of
 %% authentication information (e.g. [{password, Password}]).
diff --git a/src/rabbit_auth_mechanism.erl b/src/rabbit_auth_mechanism.erl
index eda6a743..c7d74dc3 100644
--- a/src/rabbit_auth_mechanism.erl
+++ b/src/rabbit_auth_mechanism.erl
@@ -19,7 +19,7 @@
 -ifdef(use_specs).
 
 %% A description.
--callback description() -> [proplist:property()].
+-callback description() -> [proplists:property()].
 
 %% If this mechanism is enabled, should it be offered for a given socket?
 %% (primarily so EXTERNAL can be SSL-only)
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index dc144a0e..af660c60 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -18,17 +18,21 @@
 
 -ifdef(use_specs).
 
+-export_type([async_callback/0]).
+
 %% We can't specify a per-queue ack/state with callback signatures
 -type(ack()   :: any()).
 -type(state() :: any()).
 
+-type(msg_ids() :: [rabbit_types:msg_id()]).
 -type(fetch_result(Ack) ::
         ('empty' |
          %% Message,                  IsDelivered, AckTag, Remaining_Len
          {rabbit_types:basic_message(), boolean(), Ack, non_neg_integer()})).
 -type(attempt_recovery() :: boolean()).
 -type(purged_msg_count() :: non_neg_integer()).
--type(async_callback() :: fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')).
+-type(async_callback() ::
+        fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')).
 -type(duration() :: ('undefined' | 'infinity' | number())).
 
 -type(msg_fun() :: fun((rabbit_types:basic_message(), ack()) -> 'ok') |
@@ -80,12 +84,16 @@
 %% Called for messages which have already been passed straight
 %% out to a client. The queue will be empty for these calls
 %% (i.e. saves the round trip through the backing queue).
--callback publish_delivered(true, rabbit_types:basic_message(),
-                            rabbit_types:message_properties(), pid(), state())
-                           -> {ack(), state()};
-                           (false, rabbit_types:basic_message(),
+-callback publish_delivered(rabbit_types:basic_message(),
                             rabbit_types:message_properties(), pid(), state())
-                           -> {undefined, state()}.
+                           -> {ack(), state()}.
+
+%% Called to inform the BQ about messages which have reached the
+%% queue, but are not going to be further passed to BQ for some
+%% reason. Note that this may be invoked for messages for which
+%% BQ:is_duplicate/2 has already returned {'published' | 'discarded',
+%% BQS}.
+-callback discard(rabbit_types:msg_id(), pid(), state()) -> state().
 
 %% Return ids of messages which have been confirmed since the last
 %% invocation of this function (or initialisation).
@@ -114,16 +122,18 @@
 %% first time the message id appears in the result of
 %% drain_confirmed. All subsequent appearances of that message id will
 %% be ignored.
--callback drain_confirmed(state()) -> {[rabbit_guid:guid()], state()}.
+-callback drain_confirmed(state()) -> {msg_ids(), state()}.
 
 %% Drop messages from the head of the queue while the supplied predicate returns
 %% true. Also accepts a boolean parameter that determines whether the messages
 %% necessitate an ack or not. If they do, the function returns a list of
 %% messages with the respective acktags.
 -callback dropwhile(msg_pred(), true, state())
-                   -> {[{rabbit_types:basic_message(), ack()}], state()};
+                   -> {rabbit_types:message_properties() | undefined,
+                       [{rabbit_types:basic_message(), ack()}], state()};
                    (msg_pred(), false, state())
-                   -> {undefined, state()}.
+                   -> {rabbit_types:message_properties() | undefined,
+                       undefined, state()}.
 
 %% Produce the next message.
 -callback fetch(true,  state()) -> {fetch_result(ack()), state()};
@@ -131,7 +141,7 @@
 
 %% Acktags supplied are for messages which can now be forgotten
 %% about. Must return 1 msg_id per Ack, in the same order as Acks.
--callback ack([ack()], state()) -> {[rabbit_guid:guid()], state()}.
+-callback ack([ack()], state()) -> {msg_ids(), state()}.
 
 %% Acktags supplied are for messages which should be processed. The
 %% provided callback function is called with each message.
@@ -139,7 +149,7 @@
 
 %% Reinsert messages into the queue which have already been delivered
 %% and were pending acknowledgement.
--callback requeue([ack()], state()) -> {[rabbit_guid:guid()], state()}.
+-callback requeue([ack()], state()) -> {msg_ids(), state()}.
 
 %% How long is my queue?
 -callback len(state()) -> non_neg_integer().
@@ -147,6 +157,9 @@
 %% Is my queue empty?
 -callback is_empty(state()) -> boolean().
 
+%% What's the queue depth, where depth = length + number of pending acks
+-callback depth(state()) -> non_neg_integer().
+
 %% For the next three functions, the assumption is that you're
 %% monitoring something like the ingress and egress rates of the
 %% queue. The RAM duration is thus the length of time represented by
@@ -191,13 +204,6 @@
 -callback is_duplicate(rabbit_types:basic_message(), state())
                       -> {'false'|'published'|'discarded', state()}.
 
-%% Called to inform the BQ about messages which have reached the
-%% queue, but are not going to be further passed to BQ for some
-%% reason. Note that this is may be invoked for messages for which
-%% BQ:is_duplicate/2 has already returned {'published' | 'discarded',
-%% BQS}.
--callback discard(rabbit_types:basic_message(), pid(), state()) -> state().
-
 -else.
 
 -export([behaviour_info/1]).
@@ -205,11 +211,11 @@
 behaviour_info(callbacks) ->
     [{start, 1}, {stop, 0}, {init, 3}, {terminate, 2},
      {delete_and_terminate, 2}, {purge, 1}, {publish, 4},
-     {publish_delivered, 5}, {drain_confirmed, 1}, {dropwhile, 3},
+     {publish_delivered, 4}, {discard, 3}, {drain_confirmed, 1}, {dropwhile, 3},
      {fetch, 2}, {ack, 2}, {fold, 3}, {requeue, 2}, {len, 1},
-     {is_empty, 1}, {set_ram_duration_target, 2}, {ram_duration, 1},
-     {needs_timeout, 1}, {timeout, 1}, {handle_pre_hibernate, 1},
-     {status, 1}, {invoke, 3}, {is_duplicate, 2}, {discard, 3}];
+     {is_empty, 1}, {depth, 1}, {set_ram_duration_target, 2},
+     {ram_duration, 1}, {needs_timeout, 1}, {timeout, 1},
+     {handle_pre_hibernate, 1}, {status, 1}, {invoke, 3}, {is_duplicate, 2}] ;
 behaviour_info(_Other) ->
     undefined.
 
diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl
index a84800c0..b37fbb29 100644
--- a/src/rabbit_backing_queue_qc.erl
+++ b/src/rabbit_backing_queue_qc.erl
@@ -119,7 +119,7 @@ qc_publish_multiple(#state{}) ->
 
 qc_publish_delivered(#state{bqstate = BQ}) ->
     {call, ?BQMOD, publish_delivered,
-     [boolean(), qc_message(), #message_properties{}, self(), BQ]}.
+     [qc_message(), #message_properties{}, self(), BQ]}.
 
 qc_fetch(#state{bqstate = BQ}) ->
     {call, ?BQMOD, fetch, [boolean(), BQ]}.
@@ -199,7 +199,7 @@ next_state(S, _BQ, {call, ?MODULE, publish_multiple, [PublishCount]}) ->
 
 next_state(S, Res,
            {call, ?BQMOD, publish_delivered,
-            [AckReq, Msg, MsgProps, _Pid, _BQ]}) ->
+            [Msg, MsgProps, _Pid, _BQ]}) ->
     #state{confirms = Confirms, acks = Acks, next_seq_id = NextSeq} = S,
     AckTag = {call, erlang, element, [1, Res]},
     BQ1    = {call, erlang, element, [2, Res]},
@@ -213,10 +213,7 @@ next_state(S, Res,
                            true -> gb_sets:add(MsgId, Confirms);
                            _    -> Confirms
                        end,
-            acks = case AckReq of
-                       true  -> [{AckTag, {NextSeq, {MsgProps, Msg}}}|Acks];
-                       false -> Acks
-                   end
+            acks = [{AckTag, {NextSeq, {MsgProps, Msg}}}|Acks]
            };
 
 next_state(S, Res, {call, ?BQMOD, fetch, [AckReq, _BQ]}) ->
@@ -268,7 +265,7 @@ next_state(S, Res, {call, ?BQMOD, drain_confirmed, _Args}) ->
     S#state{bqstate = BQ1};
 
 next_state(S, Res, {call, ?BQMOD, dropwhile, _Args}) ->
-    BQ = {call, erlang, element, [2, Res]},
+    BQ = {call, erlang, element, [3, Res]},
     #state{messages = Messages} = S,
     Msgs1 = drop_messages(Messages),
     S#state{bqstate = BQ, len = gb_trees:size(Msgs1), messages = Msgs1};
@@ -391,4 +388,13 @@ drop_messages(Messages) ->
             end
     end.
 
+-else.
+
+-export([prop_disabled/0]).
+
+prop_disabled() ->
+    exit({compiled_without_proper,
+          "PropEr was not present during compilation of the test module. "
+          "Hence all tests are disabled."}).
+
 -endif.
diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index 734456d3..db2b7e95 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -18,9 +18,9 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([publish/4, publish/6, publish/1,
+-export([publish/4, publish/5, publish/1,
          message/3, message/4, properties/1, append_table_header/3,
-         extract_headers/1, map_headers/2, delivery/4, header_routes/1]).
+         extract_headers/1, map_headers/2, delivery/3, header_routes/1]).
 -export([build_content/2, from_content/1]).
 
 %%----------------------------------------------------------------------------
@@ -40,13 +40,13 @@
 -spec(publish/4 ::
         (exchange_input(), rabbit_router:routing_key(), properties_input(),
          body_input()) -> publish_result()).
--spec(publish/6 ::
-        (exchange_input(), rabbit_router:routing_key(), boolean(), boolean(),
+-spec(publish/5 ::
+        (exchange_input(), rabbit_router:routing_key(), boolean(),
          properties_input(), body_input()) -> publish_result()).
 -spec(publish/1 ::
         (rabbit_types:delivery()) -> publish_result()).
--spec(delivery/4 ::
-        (boolean(), boolean(), rabbit_types:message(), undefined | integer()) ->
+-spec(delivery/3 ::
+        (boolean(), rabbit_types:message(), undefined | integer()) ->
                          rabbit_types:delivery()).
 -spec(message/4 ::
         (rabbit_exchange:name(), rabbit_router:routing_key(),
@@ -80,18 +80,16 @@
 %% Convenience function, for avoiding round-trips in calls across the
 %% erlang distributed network.
 publish(Exchange, RoutingKeyBin, Properties, Body) ->
-    publish(Exchange, RoutingKeyBin, false, false, Properties, Body).
+    publish(Exchange, RoutingKeyBin, false, Properties, Body).
 
 %% Convenience function, for avoiding round-trips in calls across the
 %% erlang distributed network.
-publish(X = #exchange{name = XName}, RKey, Mandatory, Immediate, Props, Body) ->
-    publish(X, delivery(Mandatory, Immediate,
-                        message(XName, RKey, properties(Props), Body),
-                        undefined));
-publish(XName, RKey, Mandatory, Immediate, Props, Body) ->
-    publish(delivery(Mandatory, Immediate,
-                     message(XName, RKey, properties(Props), Body),
-                     undefined)).
+publish(X = #exchange{name = XName}, RKey, Mandatory, Props, Body) ->
+    Message = message(XName, RKey, properties(Props), Body),
+    publish(X, delivery(Mandatory, Message, undefined));
+publish(XName, RKey, Mandatory, Props, Body) ->
+    Message = message(XName, RKey, properties(Props), Body),
+    publish(delivery(Mandatory, Message, undefined)).
 
 publish(Delivery = #delivery{
           message = #basic_message{exchange_name = XName}}) ->
@@ -105,8 +103,8 @@ publish(X, Delivery) ->
     {RoutingRes, DeliveredQPids} = rabbit_amqqueue:deliver(Qs, Delivery),
     {ok, RoutingRes, DeliveredQPids}.
 
-delivery(Mandatory, Immediate, Message, MsgSeqNo) ->
-    #delivery{mandatory = Mandatory, immediate = Immediate, sender = self(),
+delivery(Mandatory, Message, MsgSeqNo) ->
+    #delivery{mandatory = Mandatory, sender = self(),
               message = Message, msg_seq_no = MsgSeqNo}.
 
 build_content(Properties, BodyBin) when is_binary(BodyBin) ->
diff --git a/src/rabbit_binding.erl b/src/rabbit_binding.erl
index f0ea514d..0d23f716 100644
--- a/src/rabbit_binding.erl
+++ b/src/rabbit_binding.erl
@@ -169,9 +169,9 @@ add(Binding, InnerFun) ->
 
 add(Src, Dst, B) ->
     [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]],
-    case (not (SrcDurable andalso DstDurable) orelse
-          mnesia:read({rabbit_durable_route, B}) =:= []) of
-        true  -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable,
+    case (SrcDurable andalso DstDurable andalso
+          mnesia:read({rabbit_durable_route, B}) =/= []) of
+        false -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable,
                                  fun mnesia:write/3),
                  x_callback(transaction, Src, add_binding, B),
                  Serial = rabbit_exchange:serial(Src),
@@ -179,7 +179,7 @@ add(Src, Dst, B) ->
                          x_callback(Serial, Src, add_binding, B),
                          ok = rabbit_event:notify(binding_created, info(B))
                  end;
-        false -> rabbit_misc:const({error, binding_not_found})
+        true  -> rabbit_misc:const({error, binding_not_found})
     end.
 
 remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end).
@@ -277,21 +277,15 @@ has_for_source(SrcName) ->
 remove_for_source(SrcName) ->
     lock_route_tables(),
     Match = #route{binding = #binding{source = SrcName, _ = '_'}},
-    Routes = lists:usort(
-               mnesia:match_object(rabbit_route, Match, write) ++
-                   mnesia:match_object(rabbit_durable_route, Match, write)),
-    [begin
-         sync_route(Route, fun mnesia:delete_object/3),
-         Route#route.binding
-     end || Route <- Routes].
+    remove_routes(
+      lists:usort(mnesia:match_object(rabbit_route, Match, write) ++
+                      mnesia:match_object(rabbit_durable_route, Match, write))).
 
-remove_for_destination(Dst) ->
-    remove_for_destination(
-      Dst, fun (R) -> sync_route(R, fun mnesia:delete_object/3) end).
+remove_for_destination(DstName) ->
+    remove_for_destination(DstName, fun remove_routes/1).
 
-remove_transient_for_destination(Dst) ->
-    remove_for_destination(
-      Dst, fun (R) -> sync_transient_route(R, fun mnesia:delete_object/3) end).
+remove_transient_for_destination(DstName) ->
+    remove_for_destination(DstName, fun remove_transient_routes/1).
 
 %%----------------------------------------------------------------------------
 
@@ -308,6 +302,14 @@ binding_action(Binding = #binding{source      = SrcName,
               Fun(Src, Dst, Binding#binding{args = SortedArgs})
       end).
 
+delete_object(Tab, Record, LockKind) ->
+    %% this 'guarded' delete prevents unnecessary writes to the mnesia
+    %% disk log
+    case mnesia:match_object(Tab, Record, LockKind) of
+        []  -> ok;
+        [_] -> mnesia:delete_object(Tab, Record, LockKind)
+    end.
+
 sync_route(R, Fun) -> sync_route(R, true, true, Fun).
 
 sync_route(Route, true, true, Fun) ->
@@ -370,16 +372,32 @@ lock_route_tables() ->
                                              rabbit_semi_durable_route,
                                              rabbit_durable_route]].
 
-remove_for_destination(DstName, DeleteFun) ->
+remove_routes(Routes) ->
+    %% This partitioning allows us to suppress unnecessary delete
+    %% operations on disk tables, which require an fsync.
+    {TransientRoutes, DurableRoutes} =
+        lists:partition(fun (R) -> mnesia:match_object(
+                                     rabbit_durable_route, R, write) == [] end,
+                        Routes),
+    [ok = sync_transient_route(R, fun mnesia:delete_object/3) ||
+        R <- TransientRoutes],
+    [ok = sync_route(R, fun mnesia:delete_object/3) ||
+        R <- DurableRoutes],
+    [R#route.binding || R <- Routes].
+
+remove_transient_routes(Routes) ->
+    [begin
+         ok = sync_transient_route(R, fun delete_object/3),
+         R#route.binding
+     end || R <- Routes].
+
+remove_for_destination(DstName, Fun) ->
     lock_route_tables(),
     Match = reverse_route(
               #route{binding = #binding{destination = DstName, _ = '_'}}),
-    ReverseRoutes = mnesia:match_object(rabbit_reverse_route, Match, write),
-    Bindings = [begin
-                    Route = reverse_route(ReverseRoute),
-                    ok = DeleteFun(Route),
-                    Route#route.binding
-                end || ReverseRoute <- ReverseRoutes],
+    Routes = [reverse_route(R) || R <- mnesia:match_object(
+                                         rabbit_reverse_route, Match, write)],
+    Bindings = Fun(Routes),
     group_bindings_fold(fun maybe_auto_delete/3, new_deletions(),
                         lists:keysort(#binding.source, Bindings)).
 
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 22c6a223..0d13312b 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -136,7 +136,7 @@ flushed(Pid, QPid) ->
     gen_server2:cast(Pid, {flushed, QPid}).
 
 list() ->
-    rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(),
+    rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running),
                                      rabbit_channel, list_local, []).
 
 list_local() ->
@@ -267,7 +267,7 @@ handle_cast({method, Method, Content, Flow},
     catch
         exit:Reason = #amqp_error{} ->
             MethodName = rabbit_misc:method_record_type(Method),
-            send_exception(Reason#amqp_error{method = MethodName}, State);
+            handle_exception(Reason#amqp_error{method = MethodName}, State);
         _:Reason ->
             {stop, {Reason, erlang:get_stacktrace()}, State}
     end;
@@ -400,24 +400,29 @@ return_ok(State, false, Msg)  -> {reply, Msg, State}.
 ok_msg(true, _Msg) -> undefined;
 ok_msg(false, Msg) -> Msg.
 
-send_exception(Reason, State = #ch{protocol   = Protocol,
-                                   channel    = Channel,
-                                   writer_pid = WriterPid,
-                                   reader_pid = ReaderPid,
-                                   conn_pid   = ConnPid}) ->
-    {CloseChannel, CloseMethod} =
-        rabbit_binary_generator:map_exception(Channel, Reason, Protocol),
-    rabbit_log:error("connection ~p, channel ~p - error:~n~p~n",
-                     [ConnPid, Channel, Reason]),
+handle_exception(Reason, State = #ch{protocol   = Protocol,
+                                     channel    = Channel,
+                                     writer_pid = WriterPid,
+                                     reader_pid = ReaderPid,
+                                     conn_pid   = ConnPid}) ->
     %% something bad's happened: notify_queues may not be 'ok'
     {_Result, State1} = notify_queues(State),
-    case CloseChannel of
-        Channel -> ok = rabbit_writer:send_command(WriterPid, CloseMethod),
-                   {noreply, State1};
-        _       -> ReaderPid ! {channel_exit, Channel, Reason},
-                   {stop, normal, State1}
+    case rabbit_binary_generator:map_exception(Channel, Reason, Protocol) of
+        {Channel, CloseMethod} ->
+            rabbit_log:error("connection ~p, channel ~p - soft error:~n~p~n",
+                             [ConnPid, Channel, Reason]),
+            ok = rabbit_writer:send_command(WriterPid, CloseMethod),
+            {noreply, State1};
+        {0, _} ->
+            ReaderPid ! {channel_exit, Channel, Reason},
+            {stop, normal, State1}
     end.
 
+precondition_failed(Format) -> precondition_failed(Format, []).
+
+precondition_failed(Format, Params) ->
+    rabbit_misc:protocol_error(precondition_failed, Format, Params).
+
 return_queue_declare_ok(#resource{name = ActualName},
                         NoWait, MessageCount, ConsumerCount, State) ->
     return_ok(State#ch{most_recently_declared_queue = ActualName}, NoWait,
@@ -460,10 +465,14 @@ check_user_id_header(#'P_basic'{user_id = Username},
                      #ch{user = #user{username = Username}}) ->
     ok;
 check_user_id_header(#'P_basic'{user_id = Claimed},
-                     #ch{user = #user{username = Actual}}) ->
-    rabbit_misc:protocol_error(
-      precondition_failed, "user_id property set to '~s' but "
-      "authenticated user was '~s'", [Claimed, Actual]).
+                     #ch{user = #user{username = Actual,
+                                      tags     = Tags}}) ->
+    case lists:member(impersonator, Tags) of
+        true  -> ok;
+        false -> precondition_failed(
+                   "user_id property set to '~s' but authenticated user was "
+                   "'~s'", [Claimed, Actual])
+    end.
 
 check_internal_exchange(#exchange{name = Name, internal = true}) ->
     rabbit_misc:protocol_error(access_refused,
@@ -589,10 +598,12 @@ handle_method(_Method, _, #ch{tx_status = TxStatus})
 handle_method(#'access.request'{},_, State) ->
     {reply, #'access.request_ok'{ticket = 1}, State};
 
+handle_method(#'basic.publish'{immediate = true}, _Content, _State) ->
+    rabbit_misc:protocol_error(not_implemented, "immediate=true", []);
+
 handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
                                routing_key = RoutingKey,
-                               mandatory   = Mandatory,
-                               immediate   = Immediate},
+                               mandatory   = Mandatory},
               Content, State = #ch{virtual_host    = VHostPath,
                                    tx_status       = TxStatus,
                                    confirm_enabled = ConfirmEnabled,
@@ -614,8 +625,7 @@ handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
     case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of
         {ok, Message} ->
             rabbit_trace:tap_trace_in(Message, TraceState),
-            Delivery = rabbit_basic:delivery(Mandatory, Immediate, Message,
-                                             MsgSeqNo),
+            Delivery = rabbit_basic:delivery(Mandatory, Message, MsgSeqNo),
             QNames = rabbit_exchange:route(Exchange, Delivery),
             {noreply,
              case TxStatus of
@@ -625,8 +635,7 @@ handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
                                 State1#ch{uncommitted_message_q = NewTMQ}
              end};
         {error, Reason} ->
-            rabbit_misc:protocol_error(precondition_failed,
-                                       "invalid message: ~p", [Reason])
+            precondition_failed("invalid message: ~p", [Reason])
     end;
 
 handle_method(#'basic.nack'{delivery_tag = DeliveryTag,
@@ -881,8 +890,7 @@ handle_method(#'exchange.delete'{exchange = ExchangeNameBin,
         {error, not_found} ->
             rabbit_misc:not_found(ExchangeName);
         {error, in_use} ->
-            rabbit_misc:protocol_error(
-              precondition_failed, "~s in use", [rabbit_misc:rs(ExchangeName)]);
+            precondition_failed("~s in use", [rabbit_misc:rs(ExchangeName)]);
         ok ->
             return_ok(State, NoWait,  #'exchange.delete_ok'{})
     end;
@@ -980,11 +988,9 @@ handle_method(#'queue.delete'{queue = QueueNameBin,
            QueueName, ConnPid,
            fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of
         {error, in_use} ->
-            rabbit_misc:protocol_error(
-              precondition_failed, "~s in use", [rabbit_misc:rs(QueueName)]);
+            precondition_failed("~s in use", [rabbit_misc:rs(QueueName)]);
         {error, not_empty} ->
-            rabbit_misc:protocol_error(
-              precondition_failed, "~s not empty", [rabbit_misc:rs(QueueName)]);
+            precondition_failed("~s not empty", [rabbit_misc:rs(QueueName)]);
         {ok, PurgedMessageCount} ->
             return_ok(State, NoWait,
                       #'queue.delete_ok'{message_count = PurgedMessageCount})
@@ -1019,15 +1025,13 @@ handle_method(#'queue.purge'{queue = QueueNameBin,
               #'queue.purge_ok'{message_count = PurgedMessageCount});
 
 handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) ->
-    rabbit_misc:protocol_error(
-      precondition_failed, "cannot switch from confirm to tx mode", []);
+    precondition_failed("cannot switch from confirm to tx mode");
 
 handle_method(#'tx.select'{}, _, State) ->
     {reply, #'tx.select_ok'{}, State#ch{tx_status = in_progress}};
 
 handle_method(#'tx.commit'{}, _, #ch{tx_status = none}) ->
-    rabbit_misc:protocol_error(
-      precondition_failed, "channel is not transactional", []);
+    precondition_failed("channel is not transactional");
 
 handle_method(#'tx.commit'{}, _,
               State = #ch{uncommitted_message_q = TMQ,
@@ -1041,8 +1045,7 @@ handle_method(#'tx.commit'{}, _,
     {noreply, maybe_complete_tx(new_tx(State1#ch{tx_status = committing}))};
 
 handle_method(#'tx.rollback'{}, _, #ch{tx_status = none}) ->
-    rabbit_misc:protocol_error(
-      precondition_failed, "channel is not transactional", []);
+    precondition_failed("channel is not transactional");
 
 handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ,
                                                uncommitted_acks  = TAL,
@@ -1052,8 +1055,7 @@ handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ,
     {reply, #'tx.rollback_ok'{}, new_tx(State#ch{unacked_message_q = UAMQ1})};
 
 handle_method(#'confirm.select'{}, _, #ch{tx_status = in_progress}) ->
-    rabbit_misc:protocol_error(
-      precondition_failed, "cannot switch from tx to confirm mode", []);
+    precondition_failed("cannot switch from tx to confirm mode");
 
 handle_method(#'confirm.select'{nowait = NoWait}, _, State) ->
     return_ok(State#ch{confirm_enabled = true},
@@ -1119,7 +1121,7 @@ monitor_delivering_queue(false, QPid, State = #ch{queue_monitors    = QMons,
              delivering_queues = sets:add_element(QPid, DQ)}.
 
 handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed = UC}) ->
-    case rabbit_misc:is_abnormal_termination(Reason) of
+    case rabbit_misc:is_abnormal_exit(Reason) of
         true  -> {MXs, UC1} = dtree:take_all(QPid, UC),
                  send_nacks(MXs, State#ch{unconfirmed = UC1});
         false -> {MXs, UC1} = dtree:take(QPid, UC),
@@ -1263,8 +1265,7 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) ->
                                  QTail, DeliveryTag, Multiple)
             end;
         {empty, _} ->
-            rabbit_misc:protocol_error(
-              precondition_failed, "unknown delivery tag ~w", [DeliveryTag])
+            precondition_failed("unknown delivery tag ~w", [DeliveryTag])
     end.
 
 ack(Acked, State) ->
@@ -1342,20 +1343,16 @@ deliver_to_queues({Delivery = #delivery{message    = Message = #basic_message{
                           QPid <- DeliveredQPids]], publish, State2),
     State2.
 
-process_routing_result(unroutable,    _, XName,  MsgSeqNo, Msg, State) ->
+process_routing_result(unroutable, _, XName,  MsgSeqNo, Msg, State) ->
     ok = basic_return(Msg, State, no_route),
     maybe_incr_stats([{Msg#basic_message.exchange_name, 1}],
                      return_unroutable, State),
     record_confirm(MsgSeqNo, XName, State);
-process_routing_result(not_delivered, _, XName,  MsgSeqNo, Msg, State) ->
-    ok = basic_return(Msg, State, no_consumers),
-    maybe_incr_stats([{XName, 1}], return_not_delivered, State),
-    record_confirm(MsgSeqNo, XName, State);
-process_routing_result(routed,       [], XName,  MsgSeqNo,   _, State) ->
+process_routing_result(routed,    [], XName,  MsgSeqNo,   _, State) ->
     record_confirm(MsgSeqNo, XName, State);
-process_routing_result(routed,        _,     _, undefined,   _, State) ->
+process_routing_result(routed,     _,     _, undefined,   _, State) ->
     State;
-process_routing_result(routed,    QPids, XName,  MsgSeqNo,   _, State) ->
+process_routing_result(routed, QPids, XName,  MsgSeqNo,   _, State) ->
     State#ch{unconfirmed = dtree:insert(MsgSeqNo, QPids, XName,
                                         State#ch.unconfirmed)}.
 
@@ -1423,7 +1420,7 @@ complete_tx(State = #ch{tx_status = committing}) ->
     ok = rabbit_writer:send_command(State#ch.writer_pid, #'tx.commit_ok'{}),
     State#ch{tx_status = in_progress};
 complete_tx(State = #ch{tx_status = failed}) ->
-    {noreply, State1} = send_exception(
+    {noreply, State1} = handle_exception(
                           rabbit_misc:amqp_error(
                             precondition_failed, "partial tx completion", [],
                             'tx.commit'),
diff --git a/src/rabbit_control_main.erl b/src/rabbit_control_main.erl
index b23088cc..25f7d758 100644
--- a/src/rabbit_control_main.erl
+++ b/src/rabbit_control_main.erl
@@ -25,10 +25,14 @@
 -define(QUIET_OPT, "-q").
 -define(NODE_OPT, "-n").
 -define(VHOST_OPT, "-p").
+-define(RAM_OPT, "--ram").
+-define(OFFLINE_OPT, "--offline").
 
 -define(QUIET_DEF, {?QUIET_OPT, flag}).
 -define(NODE_DEF(Node), {?NODE_OPT, {option, Node}}).
 -define(VHOST_DEF, {?VHOST_OPT, {option, "/"}}).
+-define(RAM_DEF, {?RAM_OPT, flag}).
+-define(OFFLINE_DEF, {?OFFLINE_OPT, flag}).
 
 -define(GLOBAL_DEFS(Node), [?QUIET_DEF, ?NODE_DEF(Node)]).
 
@@ -41,8 +45,10 @@
          force_reset,
          rotate_logs,
 
-         cluster,
-         force_cluster,
+         {join_cluster, [?RAM_DEF]},
+         change_cluster_node_type,
+         update_cluster_nodes,
+         {forget_cluster_node, [?OFFLINE_DEF]},
          cluster_status,
 
          add_user,
@@ -60,9 +66,13 @@
          {list_permissions, [?VHOST_DEF]},
          list_user_permissions,
 
-         set_parameter,
-         clear_parameter,
-         list_parameters,
+         {set_parameter, [?VHOST_DEF]},
+         {clear_parameter, [?VHOST_DEF]},
+         {list_parameters, [?VHOST_DEF]},
+
+         {set_policy, [?VHOST_DEF]},
+         {clear_policy, [?VHOST_DEF]},
+         {list_policies, [?VHOST_DEF]},
 
          {list_queues, [?VHOST_DEF]},
          {list_exchanges, [?VHOST_DEF]},
@@ -92,7 +102,9 @@
          {"Bindings",  rabbit_binding,  info_all, info_keys},
          {"Consumers", rabbit_amqqueue, consumers_all, consumer_info_keys},
          {"Permissions", rabbit_auth_backend_internal, list_vhost_permissions,
-          vhost_perms_info_keys}]).
+          vhost_perms_info_keys},
+         {"Policies",   rabbit_policy,             list_formatted, info_keys},
+         {"Parameters", rabbit_runtime_parameters, list_formatted, info_keys}]).
 
 %%----------------------------------------------------------------------------
 
@@ -190,11 +202,11 @@ print_report(Node, {Descr, Module, InfoFun, KeysFun}, VHostArg) ->
     print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg).
 
 print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg) ->
-    case Results = rpc_call(Node, Module, InfoFun, VHostArg) of
-        [_|_] -> InfoItems = rpc_call(Node, Module, KeysFun, []),
-                 display_row([atom_to_list(I) || I <- InfoItems]),
-                 display_info_list(Results, InfoItems);
-        _     -> ok
+    case rpc_call(Node, Module, InfoFun, VHostArg) of
+        [_|_] = Results -> InfoItems = rpc_call(Node, Module, KeysFun, []),
+                           display_row([atom_to_list(I) || I <- InfoItems]),
+                           display_info_list(Results, InfoItems);
+        _               -> ok
     end,
     io:nl().
 
@@ -239,17 +251,34 @@ action(force_reset, Node, [], _Opts, Inform) ->
     Inform("Forcefully resetting node ~p", [Node]),
     call(Node, {rabbit_mnesia, force_reset, []});
 
-action(cluster, Node, ClusterNodeSs, _Opts, Inform) ->
-    ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
-    Inform("Clustering node ~p with ~p",
-           [Node, ClusterNodes]),
-    rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]);
-
-action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) ->
-    ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
-    Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)",
-           [Node, ClusterNodes]),
-    rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]);
+action(join_cluster, Node, [ClusterNodeS], Opts, Inform) ->
+    ClusterNode = list_to_atom(ClusterNodeS),
+    NodeType = case proplists:get_bool(?RAM_OPT, Opts) of
+                   true  -> ram;
+                   false -> disc
+               end,
+    Inform("Clustering node ~p with ~p", [Node, ClusterNode]),
+    rpc_call(Node, rabbit_mnesia, join_cluster, [ClusterNode, NodeType]);
+
+action(change_cluster_node_type, Node, ["ram"], _Opts, Inform) ->
+    Inform("Turning ~p into a ram node", [Node]),
+    rpc_call(Node, rabbit_mnesia, change_cluster_node_type, [ram]);
+action(change_cluster_node_type, Node, [Type], _Opts, Inform)
+  when Type =:= "disc" orelse Type =:= "disk" ->
+    Inform("Turning ~p into a disc node", [Node]),
+    rpc_call(Node, rabbit_mnesia, change_cluster_node_type, [disc]);
+
+action(update_cluster_nodes, Node, [ClusterNodeS], _Opts, Inform) ->
+    ClusterNode = list_to_atom(ClusterNodeS),
+    Inform("Updating cluster nodes for ~p from ~p", [Node, ClusterNode]),
+    rpc_call(Node, rabbit_mnesia, update_cluster_nodes, [ClusterNode]);
+
+action(forget_cluster_node, Node, [ClusterNodeS], Opts, Inform) ->
+    ClusterNode = list_to_atom(ClusterNodeS),
+    RemoveWhenOffline = proplists:get_bool(?OFFLINE_OPT, Opts),
+    Inform("Removing node ~p from cluster", [ClusterNode]),
+    rpc_call(Node, rabbit_mnesia, forget_cluster_node,
+             [ClusterNode, RemoveWhenOffline]);
 
 action(wait, Node, [PidFile], _Opts, Inform) ->
     Inform("Waiting for ~p", [Node]),
@@ -414,50 +443,76 @@ action(list_permissions, Node, [], Opts, Inform) ->
                              list_vhost_permissions, [VHost]}),
                       rabbit_auth_backend_internal:vhost_perms_info_keys());
 
-action(set_parameter, Node, [Component, Key, Value], _Opts, Inform) ->
+action(set_parameter, Node, [Component, Key, Value], Opts, Inform) ->
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
     Inform("Setting runtime parameter ~p for component ~p to ~p",
            [Key, Component, Value]),
     rpc_call(Node, rabbit_runtime_parameters, parse_set,
-             [list_to_binary(Component), list_to_binary(Key), Value]);
+             [VHostArg, list_to_binary(Component), list_to_binary(Key), Value]);
 
-action(clear_parameter, Node, [Component, Key], _Opts, Inform) ->
+action(clear_parameter, Node, [Component, Key], Opts, Inform) ->
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
     Inform("Clearing runtime parameter ~p for component ~p", [Key, Component]),
-    rpc_call(Node, rabbit_runtime_parameters, clear, [list_to_binary(Component),
+    rpc_call(Node, rabbit_runtime_parameters, clear, [VHostArg,
+                                                      list_to_binary(Component),
                                                       list_to_binary(Key)]);
 
-action(list_parameters, Node, Args = [], _Opts, Inform) ->
+action(list_parameters, Node, [], Opts, Inform) ->
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
     Inform("Listing runtime parameters", []),
     display_info_list(
-      rpc_call(Node, rabbit_runtime_parameters, list_formatted, Args),
+      rpc_call(Node, rabbit_runtime_parameters, list_formatted, [VHostArg]),
       rabbit_runtime_parameters:info_keys());
 
+action(set_policy, Node, [Key, Pattern, Defn | Prio], Opts, Inform)
+  when Prio == [] orelse length(Prio) == 1 ->
+    Msg = "Setting policy ~p for pattern ~p to ~p",
+    {InformMsg, Prio1} = case Prio of []  -> {Msg, undefined};
+                                      [P] -> {Msg ++ " with priority ~s", P}
+                         end,
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
+    Inform(InformMsg, [Key, Pattern, Defn] ++ Prio),
+    rpc_call(Node, rabbit_policy, parse_set,
+             [VHostArg, list_to_binary(Key), Pattern, Defn, Prio1]);
+
+action(clear_policy, Node, [Key], Opts, Inform) ->
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
+    Inform("Clearing policy ~p", [Key]),
+    rpc_call(Node, rabbit_policy, delete, [VHostArg, list_to_binary(Key)]);
+
+action(list_policies, Node, [], Opts, Inform) ->
+    VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)),
+    Inform("Listing policies", []),
+    display_info_list(rpc_call(Node, rabbit_policy, list_formatted, [VHostArg]),
+                      rabbit_policy:info_keys());
+
 action(report, Node, _Args, _Opts, Inform) ->
-    io:format("Reporting server status on ~p~n~n", [erlang:universaltime()]),
+    Inform("Reporting server status on ~p~n~n", [erlang:universaltime()]),
     [begin ok = action(Action, N, [], [], Inform), io:nl() end ||
-        N      <- unsafe_rpc(Node, rabbit_mnesia, running_clustered_nodes, []),
+        N      <- unsafe_rpc(Node, rabbit_mnesia, cluster_nodes, [running]),
         Action <- [status, cluster_status, environment]],
     VHosts = unsafe_rpc(Node, rabbit_vhost, list, []),
     [print_report(Node, Q)      || Q <- ?GLOBAL_QUERIES],
     [print_report(Node, Q, [V]) || Q <- ?VHOST_QUERIES, V <- VHosts],
-    io:format("End of server status report~n"),
     ok;
 
 action(eval, Node, [Expr], _Opts, _Inform) ->
     case erl_scan:string(Expr) of
         {ok, Scanned, _} ->
             case erl_parse:parse_exprs(Scanned) of
-                {ok, Parsed} ->
-                    {value, Value, _} = unsafe_rpc(
-                                          Node, erl_eval, exprs, [Parsed, []]),
-                    io:format("~p~n", [Value]),
-                    ok;
-                {error, E} ->
-                    {error_string, format_parse_error(E)}
+                {ok, Parsed} -> {value, Value, _} =
+                                    unsafe_rpc(
+                                      Node, erl_eval, exprs, [Parsed, []]),
+                                io:format("~p~n", [Value]),
+                                ok;
+                {error, E}   -> {error_string, format_parse_error(E)}
             end;
         {error, E, _} ->
             {error_string, format_parse_error(E)}
     end.
 
+format_parse_error({_Line, Mod, Err}) -> lists:flatten(Mod:format_error(Err)).
+
 %%----------------------------------------------------------------------------
 
 wait_for_application(Node, PidFile, Application, Inform) ->
@@ -544,9 +599,6 @@ exit_loop(Port) ->
         {Port, _}                 -> exit_loop(Port)
     end.
 
-format_parse_error({_Line, Mod, Err}) ->
-    lists:flatten(Mod:format_error(Err)).
-
 %%----------------------------------------------------------------------------
 
 default_if_empty(List, Default) when is_list(List) ->
diff --git a/src/rabbit_direct.erl b/src/rabbit_direct.erl
index c07ad832..689e5d83 100644
--- a/src/rabbit_direct.erl
+++ b/src/rabbit_direct.erl
@@ -31,8 +31,9 @@
 -spec(force_event_refresh/0 :: () -> 'ok').
 -spec(list/0 :: () -> [pid()]).
 -spec(list_local/0 :: () -> [pid()]).
--spec(connect/5 :: (rabbit_types:username(), rabbit_types:vhost(),
-                    rabbit_types:protocol(), pid(),
+-spec(connect/5 :: ((rabbit_types:username() | rabbit_types:user() |
+                     {rabbit_types:username(), rabbit_types:password()}),
+                    rabbit_types:vhost(), rabbit_types:protocol(), pid(),
                     rabbit_event:event_props()) ->
                         {'ok', {rabbit_types:user(),
                                 rabbit_framing:amqp_table()}}).
@@ -40,7 +41,6 @@
         (rabbit_channel:channel_number(), pid(), pid(), string(),
          rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(),
          rabbit_framing:amqp_table(), pid()) -> {'ok', pid()}).
-
 -spec(disconnect/2 :: (pid(), rabbit_event:event_props()) -> 'ok').
 
 -endif.
@@ -60,32 +60,40 @@ list_local() ->
     pg_local:get_members(rabbit_direct).
 
 list() ->
-    rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(),
+    rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running),
                                      rabbit_direct, list_local, []).
 
 %%----------------------------------------------------------------------------
 
+connect(User = #user{}, VHost, Protocol, Pid, Infos) ->
+    try rabbit_access_control:check_vhost_access(User, VHost) of
+        ok -> ok = pg_local:join(rabbit_direct, Pid),
+              rabbit_event:notify(connection_created, Infos),
+              {ok, {User, rabbit_reader:server_properties(Protocol)}}
+    catch
+        exit:#amqp_error{name = access_refused} ->
+            {error, access_refused}
+    end;
+
+connect({Username, Password}, VHost, Protocol, Pid, Infos) ->
+    connect0(check_user_pass_login, Username, Password, VHost, Protocol, Pid,
+             Infos);
+
 connect(Username, VHost, Protocol, Pid, Infos) ->
+    connect0(check_user_login, Username, [], VHost, Protocol, Pid, Infos).
+
+connect0(FunctionName, U, P, VHost, Protocol, Pid, Infos) ->
     case rabbit:is_running() of
         true  ->
-            case rabbit_access_control:check_user_login(Username, []) of
-                {ok, User} ->
-                    try rabbit_access_control:check_vhost_access(User, VHost) of
-                        ok -> ok = pg_local:join(rabbit_direct, Pid),
-                              rabbit_event:notify(connection_created, Infos),
-                              {ok, {User,
-                                    rabbit_reader:server_properties(Protocol)}}
-                    catch
-                        exit:#amqp_error{name = access_refused} ->
-                            {error, access_refused}
-                    end;
-                {refused, _Msg, _Args} ->
-                    {error, auth_failure}
+            case rabbit_access_control:FunctionName(U, P) of
+                {ok, User}        -> connect(User, VHost, Protocol, Pid, Infos);
+                {refused, _M, _A} -> {error, auth_failure}
             end;
         false ->
             {error, broker_not_found_on_node}
     end.
 
+
 start_channel(Number, ClientChannelPid, ConnPid, ConnName, Protocol, User,
               VHost, Capabilities, Collector) ->
     {ok, _, {ChannelPid, _}} =
diff --git a/src/rabbit_disk_monitor.erl b/src/rabbit_disk_monitor.erl
index 58375abb..6330d555 100644
--- a/src/rabbit_disk_monitor.erl
+++ b/src/rabbit_disk_monitor.erl
@@ -137,7 +137,7 @@ dir() -> rabbit_mnesia:dir().
 set_disk_limits(State, Limit) ->
     State1 = State#state { limit = Limit },
     rabbit_log:info("Disk free limit set to ~pMB~n",
-                    [trunc(interpret_limit(Limit) / 1048576)]),
+                    [trunc(interpret_limit(Limit) / 1000000)]),
     internal_update(State1).
 
 internal_update(State = #state { limit   = Limit,
@@ -148,11 +148,11 @@ internal_update(State = #state { limit   = Limit,
     NewAlarmed = CurrentFreeBytes < LimitBytes,
     case {Alarmed, NewAlarmed} of
         {false, true} ->
-            emit_update_info("exceeded", CurrentFreeBytes, LimitBytes),
-            alarm_handler:set_alarm({{resource_limit, disk, node()}, []});
+            emit_update_info("insufficient", CurrentFreeBytes, LimitBytes),
+            rabbit_alarm:set_alarm({{resource_limit, disk, node()}, []});
         {true, false} ->
-            emit_update_info("below limit", CurrentFreeBytes, LimitBytes),
-            alarm_handler:clear_alarm({resource_limit, disk, node()});
+            emit_update_info("sufficient", CurrentFreeBytes, LimitBytes),
+            rabbit_alarm:clear_alarm({resource_limit, disk, node()});
         _ ->
             ok
     end,
@@ -187,10 +187,10 @@ interpret_limit({mem_relative, R}) ->
 interpret_limit(L) ->
     L.
 
-emit_update_info(State, CurrentFree, Limit) ->
+emit_update_info(StateStr, CurrentFree, Limit) ->
     rabbit_log:info(
-      "Disk free space limit now ~s. Free bytes:~p Limit:~p~n",
-      [State, CurrentFree, Limit]).
+      "Disk free space ~s. Free bytes:~p Limit:~p~n",
+      [StateStr, CurrentFree, Limit]).
 
 start_timer(Timeout) ->
     {ok, TRef} = timer:send_interval(Timeout, update),
diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl
index f1672f4e..a9af2d8a 100644
--- a/src/rabbit_error_logger.erl
+++ b/src/rabbit_error_logger.erl
@@ -81,7 +81,7 @@ publish1(RoutingKey, Format, Data, LogExch) ->
     %% second resolution, not millisecond.
     Timestamp = rabbit_misc:now_ms() div 1000,
     {ok, _RoutingRes, _DeliveredQPids} =
-        rabbit_basic:publish(LogExch, RoutingKey, false, false,
+        rabbit_basic:publish(LogExch, RoutingKey,
                              #'P_basic'{content_type = <<"text/plain">>,
                                         timestamp    = Timestamp},
                              list_to_binary(io_lib:format(Format, Data))),
diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl
index 57c571f1..a205b23d 100644
--- a/src/rabbit_exchange.erl
+++ b/src/rabbit_exchange.erl
@@ -298,7 +298,10 @@ i(durable,     #exchange{durable     = Durable})    -> Durable;
 i(auto_delete, #exchange{auto_delete = AutoDelete}) -> AutoDelete;
 i(internal,    #exchange{internal    = Internal})   -> Internal;
 i(arguments,   #exchange{arguments   = Arguments})  -> Arguments;
-i(policy,      X)                                   -> rabbit_policy:name(X);
+i(policy,      X) ->  case rabbit_policy:name(X) of
+                          none   -> '';
+                          Policy -> Policy
+                      end;
 i(Item, _) -> throw({bad_argument, Item}).
 
 info(X = #exchange{}) -> infos(?INFO_KEYS, X).
@@ -402,7 +405,12 @@ conditional_delete(X = #exchange{name = XName}) ->
     end.
 
 unconditional_delete(X = #exchange{name = XName}) ->
-    ok = mnesia:delete({rabbit_durable_exchange, XName}),
+    %% this 'guarded' delete prevents unnecessary writes to the mnesia
+    %% disk log
+    case mnesia:wread({rabbit_durable_exchange, XName}) of
+        []  -> ok;
+        [_] -> ok = mnesia:delete({rabbit_durable_exchange, XName})
+    end,
     ok = mnesia:delete({rabbit_exchange, XName}),
     ok = mnesia:delete({rabbit_exchange_serial, XName}),
     Bindings = rabbit_binding:remove_for_source(XName),
diff --git a/src/rabbit_exchange_decorator.erl b/src/rabbit_exchange_decorator.erl
index b40ceda9..08819427 100644
--- a/src/rabbit_exchange_decorator.erl
+++ b/src/rabbit_exchange_decorator.erl
@@ -31,7 +31,7 @@
 -type(tx() :: 'transaction' | 'none').
 -type(serial() :: pos_integer() | tx()).
 
--callback description() -> [proplist:property()].
+-callback description() -> [proplists:property()].
 
 %% Should Rabbit ensure that all binding events that are
 %% delivered to an individual exchange can be serialised? (they
diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl
index e6470b72..c5583ffd 100644
--- a/src/rabbit_exchange_type.erl
+++ b/src/rabbit_exchange_type.erl
@@ -21,7 +21,7 @@
 -type(tx() :: 'transaction' | 'none').
 -type(serial() :: pos_integer() | tx()).
 
--callback description() -> [proplist:property()].
+-callback description() -> [proplists:property()].
 
 %% Should Rabbit ensure that all binding events that are
 %% delivered to an individual exchange can be serialised? (they
@@ -54,13 +54,13 @@
 
 %% called when comparing exchanges for equivalence - should return ok or
 %% exit with #amqp_error{}
--callback assert_args_equivalence (rabbit_types:exchange(),
-                                   rabbit_framing:amqp_table()) ->
+-callback assert_args_equivalence(rabbit_types:exchange(),
+                                  rabbit_framing:amqp_table()) ->
     'ok' | rabbit_types:connection_exit().
 
 %% called when the policy attached to this exchange changes.
--callback policy_changed (
-            serial(), rabbit_types:exchange(), rabbit_types:exchange()) -> 'ok'.
+-callback policy_changed(serial(), rabbit_types:exchange(),
+                         rabbit_types:exchange()) -> 'ok'.
 
 -else.
 
diff --git a/src/rabbit_file.erl b/src/rabbit_file.erl
index a95f8f26..26f74796 100644
--- a/src/rabbit_file.erl
+++ b/src/rabbit_file.erl
@@ -105,9 +105,9 @@ with_fhc_handle(Fun) ->
     with_fhc_handle(1, Fun).
 
 with_fhc_handle(N, Fun) ->
-    [ ok = file_handle_cache:obtain() || _ <- lists:seq(1, N)],
+    ok = file_handle_cache:obtain(N),
     try Fun()
-    after [ ok = file_handle_cache:release() || _ <- lists:seq(1, N)]
+    after ok = file_handle_cache:release(N)
     end.
 
 read_term_file(File) ->
diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl
index ba0cb04f..cedbbdb3 100644
--- a/src/rabbit_guid.erl
+++ b/src/rabbit_guid.erl
@@ -144,11 +144,7 @@ gen_secure() ->
 %% employs base64url encoding, which is safer in more contexts than
 %% plain base64.
 string(G, Prefix) ->
-    Prefix ++ "-" ++ lists:foldl(fun ($\+, Acc) -> [$\- | Acc];
-                                     ($\/, Acc) -> [$\_ | Acc];
-                                     ($\=, Acc) -> Acc;
-                                     (Chr, Acc) -> [Chr | Acc]
-                                 end, [], base64:encode_to_string(G)).
+    Prefix ++ "-" ++ rabbit_misc:base64url(G).
 
 binary(G, Prefix) ->
     list_to_binary(string(G, Prefix)).
diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl
index 80b4e768..05aad8c9 100644
--- a/src/rabbit_heartbeat.erl
+++ b/src/rabbit_heartbeat.erl
@@ -59,21 +59,15 @@ start_heartbeat_sender(Sock, TimeoutSec, SendFun) ->
     %% the 'div 2' is there so that we don't end up waiting for nearly
     %% 2 * TimeoutSec before sending a heartbeat in the boundary case
     %% where the last message was sent just after a heartbeat.
-    heartbeater(
-      {Sock, TimeoutSec * 1000 div 2, send_oct, 0,
-       fun () ->
-               SendFun(),
-               continue
-       end}).
+    heartbeater({Sock, TimeoutSec * 1000 div 2, send_oct, 0,
+                 fun () -> SendFun(), continue end}).
 
 start_heartbeat_receiver(Sock, TimeoutSec, ReceiveFun) ->
     %% we check for incoming data every interval, and time out after
     %% two checks with no change. As a result we will time out between
     %% 2 and 3 intervals after the last data has been received.
-    heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, fun () ->
-                                                               ReceiveFun(),
-                                                               stop
-                                                       end}).
+    heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1,
+                 fun () -> ReceiveFun(), stop end}).
 
 start_heartbeat_fun(SupPid) ->
     fun (Sock, SendTimeoutSec, SendFun, ReceiveTimeoutSec, ReceiveFun) ->
@@ -88,17 +82,11 @@ start_heartbeat_fun(SupPid) ->
             {Sender, Receiver}
     end.
 
-pause_monitor({_Sender, none}) ->
-    ok;
-pause_monitor({_Sender, Receiver}) ->
-    Receiver ! pause,
-    ok.
+pause_monitor({_Sender,     none}) -> ok;
+pause_monitor({_Sender, Receiver}) -> Receiver ! pause, ok.
 
-resume_monitor({_Sender, none}) ->
-    ok;
-resume_monitor({_Sender, Receiver}) ->
-    Receiver ! resume,
-    ok.
+resume_monitor({_Sender,     none}) -> ok;
+resume_monitor({_Sender, Receiver}) -> Receiver ! resume, ok.
 
 %%----------------------------------------------------------------------------
 start_heartbeater(0, _SupPid, _Sock, _TimeoutFun, _Name, _Callback) ->
@@ -106,8 +94,7 @@ start_heartbeater(0, _SupPid, _Sock, _TimeoutFun, _Name, _Callback) ->
 start_heartbeater(TimeoutSec, SupPid, Sock, TimeoutFun, Name, Callback) ->
     supervisor2:start_child(
       SupPid, {Name,
-               {rabbit_heartbeat, Callback,
-                [Sock, TimeoutSec, TimeoutFun]},
+               {rabbit_heartbeat, Callback, [Sock, TimeoutSec, TimeoutFun]},
                transient, ?MAX_WAIT, worker, [rabbit_heartbeat]}).
 
 heartbeater(Params) ->
@@ -117,15 +104,11 @@ heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params,
             {StatVal, SameCount}) ->
     Recurse = fun (V) -> heartbeater(Params, V) end,
     receive
-        pause ->
-            receive
-                resume ->
-                    Recurse({0, 0});
-                Other ->
-                    exit({unexpected_message, Other})
-            end;
-        Other ->
-            exit({unexpected_message, Other})
+        pause -> receive
+                     resume -> Recurse({0, 0});
+                     Other  -> exit({unexpected_message, Other})
+                 end;
+        Other -> exit({unexpected_message, Other})
     after TimeoutMillisec ->
             case rabbit_net:getstat(Sock, [StatName]) of
                 {ok, [{StatName, NewStatVal}]} ->
diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl
index a6b4eeb0..8dfa89d3 100644
--- a/src/rabbit_log.erl
+++ b/src/rabbit_log.erl
@@ -40,18 +40,20 @@
 
 -spec(log/3 :: (category(), level(), string()) -> 'ok').
 -spec(log/4 :: (category(), level(), string(), [any()]) -> 'ok').
--spec(info/1 :: (string()) -> 'ok').
--spec(info/2 :: (string(), [any()]) -> 'ok').
+
+-spec(info/1    :: (string()) -> 'ok').
+-spec(info/2    :: (string(), [any()]) -> 'ok').
 -spec(warning/1 :: (string()) -> 'ok').
 -spec(warning/2 :: (string(), [any()]) -> 'ok').
--spec(error/1 :: (string()) -> 'ok').
--spec(error/2 :: (string(), [any()]) -> 'ok').
+-spec(error/1   :: (string()) -> 'ok').
+-spec(error/2   :: (string(), [any()]) -> 'ok').
 
 -endif.
 
 %%----------------------------------------------------------------------------
 start_link() ->
     gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
 log(Category, Level, Fmt) -> log(Category, Level, Fmt, []).
 
 log(Category, Level, Fmt, Args) when is_list(Args) ->
diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl
index 3e058793..e1a21cf7 100644
--- a/src/rabbit_mirror_queue_coordinator.erl
+++ b/src/rabbit_mirror_queue_coordinator.erl
@@ -33,16 +33,14 @@
                  gm,
                  monitors,
                  death_fun,
-                 length_fun
+                 depth_fun
                }).
 
--define(ONE_SECOND, 1000).
-
 -ifdef(use_specs).
 
 -spec(start_link/4 :: (rabbit_types:amqqueue(), pid() | 'undefined',
                        rabbit_mirror_queue_master:death_fun(),
-                       rabbit_mirror_queue_master:length_fun()) ->
+                       rabbit_mirror_queue_master:depth_fun()) ->
                            rabbit_types:ok_pid_or_error()).
 -spec(get_gm/1 :: (pid()) -> pid()).
 -spec(ensure_monitoring/2 :: (pid(), [pid()]) -> 'ok').
@@ -103,19 +101,25 @@
 %% channel during a publish, only some of the mirrors may receive that
 %% publish. As a result of this problem, the messages broadcast over
 %% the gm contain published content, and thus slaves can operate
-%% successfully on messages that they only receive via the gm. The key
-%% purpose of also sending messages directly from the channels to the
-%% slaves is that without this, in the event of the death of the
-%% master, messages could be lost until a suitable slave is promoted.
+%% successfully on messages that they only receive via the gm.
+%%
+%% The key purpose of also sending messages directly from the channels
+%% to the slaves is that without this, in the event of the death of
+%% the master, messages could be lost until a suitable slave is
+%% promoted. However, that is not the only reason. A slave cannot send
+%% confirms for a message until it has seen it from the
+%% channel. Otherwise, it might send a confirm to a channel for a
+%% message that it might *never* receive from that channel. This can
+%% happen because new slaves join the gm ring (and thus receive
+%% messages from the master) before inserting themselves in the
+%% queue's mnesia record (which is what channels look at for routing).
+%% As it turns out, channels will simply ignore such bogus confirms,
+%% but relying on that would introduce a dangerously tight coupling.
 %%
-%% However, that is not the only reason. For example, if confirms are
-%% in use, then there is no guarantee that every slave will see the
-%% delivery with the same msg_seq_no. As a result, the slaves have to
-%% wait until they've seen both the publish via gm, and the publish
-%% via the channel before they have enough information to be able to
-%% perform the publish to their own bq, and subsequently issue the
-%% confirm, if necessary. Either form of publish can arrive first, and
-%% a slave can be upgraded to the master at any point during this
+%% Hence the slaves have to wait until they've seen both the publish
+%% via gm, and the publish via the channel before they issue the
+%% confirm. Either form of publish can arrive first, and a slave can
+%% be upgraded to the master at any point during this
 %% process. Confirms continue to be issued correctly, however.
 %%
 %% Because the slave is a full process, it impersonates parts of the
@@ -134,25 +138,31 @@
 %% gm should be processed as normal, but fetches which are for
 %% messages the slave has never seen should be ignored. Similarly,
 %% acks for messages the slave never fetched should be
-%% ignored. Eventually, as the master is consumed from, the messages
-%% at the head of the queue which were there before the slave joined
-%% will disappear, and the slave will become fully synced with the
-%% state of the master. The detection of the sync-status of a slave is
-%% done entirely based on length: if the slave and the master both
-%% agree on the length of the queue after the fetch of the head of the
-%% queue (or a 'set_length' results in a slave having to drop some
-%% messages from the head of its queue), then the queues must be in
-%% sync. The only other possibility is that the slave's queue is
-%% shorter, and thus the fetch should be ignored. In case slaves are
-%% joined to an empty queue which only goes on to receive publishes,
-%% they start by asking the master to broadcast its length. This is
-%% enough for slaves to always be able to work out when their head
-%% does not differ from the master (and is much simpler and cheaper
-%% than getting the master to hang on to the guid of the msg at the
-%% head of its queue). When a slave is promoted to a master, it
-%% unilaterally broadcasts its length, in order to solve the problem
-%% of length requests from new slaves being unanswered by a dead
-%% master.
+%% ignored. Similarly, we don't republish rejected messages that we
+%% haven't seen. Eventually, as the master is consumed from, the
+%% messages at the head of the queue which were there before the slave
+%% joined will disappear, and the slave will become fully synced with
+%% the state of the master.
+%%
+%% The detection of the sync-status is based on the depth of the BQs,
+%% where the depth is defined as the sum of the length of the BQ (as
+%% per BQ:len) and the messages pending an acknowledgement. When the
+%% depth of the slave is equal to the master's, then the slave is
+%% synchronised. We only store the difference between the two for
+%% simplicity. Comparing the length is not enough since we need to
+%% take into account rejected messages which will make it back into
+%% the master queue but can't go back in the slave, since we don't
+%% want "holes" in the slave queue. Note that the depth, and the
+%% length likewise, must always be shorter on the slave - we assert
+%% that in various places. In case slaves are joined to an empty queue
+%% which only goes on to receive publishes, they start by asking the
+%% master to broadcast its depth. This is enough for slaves to always
+%% be able to work out when their head does not differ from the master
+%% (and is much simpler and cheaper than getting the master to hang on
+%% to the guid of the msg at the head of its queue). When a slave is
+%% promoted to a master, it unilaterally broadcasts its depth, in
+%% order to solve the problem of depth requests from new slaves being
+%% unanswered by a dead master.
 %%
 %% Obviously, due to the async nature of communication across gm, the
 %% slaves can fall behind. This does not matter from a sync pov: if
@@ -293,15 +303,15 @@
 %% if they have no mirrored content at all. This is not surprising: to
 %% achieve anything more sophisticated would require the master and
 %% recovering slave to be able to check to see whether they agree on
-%% the last seen state of the queue: checking length alone is not
+%% the last seen state of the queue: checking depth alone is not
 %% sufficient in this case.
 %%
 %% For more documentation see the comments in bug 23554.
 %%
 %%----------------------------------------------------------------------------
 
-start_link(Queue, GM, DeathFun, LengthFun) ->
-    gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, LengthFun], []).
+start_link(Queue, GM, DeathFun, DepthFun) ->
+    gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, DepthFun], []).
 
 get_gm(CPid) ->
     gen_server2:call(CPid, get_gm, infinity).
@@ -313,10 +323,12 @@ ensure_monitoring(CPid, Pids) ->
 %% gen_server
 %% ---------------------------------------------------------------------------
 
-init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) ->
+init([#amqqueue { name = QueueName } = Q, GM, DeathFun, DepthFun]) ->
     GM1 = case GM of
               undefined ->
-                  {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]),
+                  {ok, GM2} = gm:start_link(
+                                QueueName, ?MODULE, [self()],
+                                fun rabbit_misc:execute_mnesia_transaction/1),
                   receive {joined, GM2, _Members} ->
                           ok
                   end,
@@ -325,12 +337,11 @@ init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) ->
                   true = link(GM),
                   GM
           end,
-    ensure_gm_heartbeat(),
     {ok, #state { q          = Q,
                   gm         = GM1,
                   monitors   = pmon:new(),
                   death_fun  = DeathFun,
-                  length_fun = LengthFun },
+                  depth_fun  = DepthFun },
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -340,7 +351,7 @@ handle_call(get_gm, _From, State = #state { gm = GM }) ->
 handle_cast({gm_deaths, Deaths},
             State = #state { q  = #amqqueue { name = QueueName, pid = MPid } })
   when node(MPid) =:= node() ->
-    case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of
+    case rabbit_mirror_queue_misc:remove_from_queue(QueueName, MPid, Deaths) of
         {ok, MPid, DeadPids} ->
             rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName,
                                                    DeadPids),
@@ -349,8 +360,8 @@ handle_cast({gm_deaths, Deaths},
             {stop, normal, State}
     end;
 
-handle_cast(request_length, State = #state { length_fun = LengthFun }) ->
-    ok = LengthFun(),
+handle_cast(request_depth, State = #state { depth_fun = DepthFun }) ->
+    ok = DepthFun(),
     noreply(State);
 
 handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) ->
@@ -359,11 +370,6 @@ handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) ->
 handle_cast({delete_and_terminate, Reason}, State) ->
     {stop, Reason, State}.
 
-handle_info(send_gm_heartbeat, State = #state { gm = GM }) ->
-    gm:broadcast(GM, heartbeat),
-    ensure_gm_heartbeat(),
-    noreply(State);
-
 handle_info({'DOWN', _MonitorRef, process, Pid, _Reason},
             State = #state { monitors  = Mons,
                              death_fun = DeathFun }) ->
@@ -399,9 +405,7 @@ members_changed([_CPid], _Births, []) ->
 members_changed([CPid], _Births, Deaths) ->
     ok = gen_server2:cast(CPid, {gm_deaths, Deaths}).
 
-handle_msg([_CPid], _From, heartbeat) ->
-    ok;
-handle_msg([CPid], _From, request_length = Msg) ->
+handle_msg([CPid], _From, request_depth = Msg) ->
     ok = gen_server2:cast(CPid, Msg);
 handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) ->
     ok = gen_server2:cast(CPid, Msg);
@@ -420,6 +424,3 @@ noreply(State) ->
 
 reply(Reply, State) ->
     {reply, Reply, State, hibernate}.
-
-ensure_gm_heartbeat() ->
-    erlang:send_after(?ONE_SECOND, self(), send_gm_heartbeat).
diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl
index 750bcd56..cce19c90 100644
--- a/src/rabbit_mirror_queue_master.erl
+++ b/src/rabbit_mirror_queue_master.erl
@@ -17,15 +17,17 @@
 -module(rabbit_mirror_queue_master).
 
 -export([init/3, terminate/2, delete_and_terminate/2,
-         purge/1, publish/4, publish_delivered/5, fetch/2, ack/2,
-         requeue/2, len/1, is_empty/1, drain_confirmed/1, dropwhile/3,
-         set_ram_duration_target/2, ram_duration/1,
+         purge/1, publish/4, publish_delivered/4, discard/3, fetch/2, ack/2,
+         requeue/2, len/1, is_empty/1, depth/1, drain_confirmed/1,
+         dropwhile/3, set_ram_duration_target/2, ram_duration/1,
          needs_timeout/1, timeout/1, handle_pre_hibernate/1,
-         status/1, invoke/3, is_duplicate/2, discard/3, fold/3]).
+         status/1, invoke/3, is_duplicate/2, fold/3]).
 
 -export([start/1, stop/0]).
 
--export([promote_backing_queue_state/6, sender_death_fun/0, length_fun/0]).
+-export([promote_backing_queue_state/7, sender_death_fun/0, depth_fun/0]).
+
+-export([init_with_existing_bq/3, stop_mirroring/1]).
 
 -behaviour(rabbit_backing_queue).
 
@@ -44,10 +46,10 @@
 
 -ifdef(use_specs).
 
--export_type([death_fun/0, length_fun/0]).
+-export_type([death_fun/0, depth_fun/0]).
 
 -type(death_fun() :: fun ((pid()) -> 'ok')).
--type(length_fun() :: fun (() -> 'ok')).
+-type(depth_fun() :: fun (() -> 'ok')).
 -type(master_state() :: #state { gm                  :: pid(),
                                  coordinator         :: pid(),
                                  backing_queue       :: atom(),
@@ -59,10 +61,14 @@
                                  known_senders       :: set()
                                }).
 
--spec(promote_backing_queue_state/6 ::
-        (pid(), atom(), any(), pid(), dict(), [pid()]) -> master_state()).
+-spec(promote_backing_queue_state/7 ::
+        (pid(), atom(), any(), pid(), [any()], dict(), [pid()]) ->
+                                            master_state()).
 -spec(sender_death_fun/0 :: () -> death_fun()).
--spec(length_fun/0 :: () -> length_fun()).
+-spec(depth_fun/0 :: () -> depth_fun()).
+-spec(init_with_existing_bq/3 :: (rabbit_types:amqqueue(), atom(), any()) ->
+                                      master_state()).
+-spec(stop_mirroring/1 :: (master_state()) -> {atom(), any()}).
 
 -endif.
 
@@ -82,21 +88,27 @@ stop() ->
     %% Same as start/1.
     exit({not_valid_for_generic_backing_queue, ?MODULE}).
 
-init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover,
-     AsyncCallback) ->
-    {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
-                   Q, undefined, sender_death_fun(), length_fun()),
-    GM = rabbit_mirror_queue_coordinator:get_gm(CPid),
-    MNodes1 =
-        (case MNodes of
-             all       -> rabbit_mnesia:all_clustered_nodes();
-             undefined -> [];
-             _         -> MNodes
-         end) -- [node()],
-    [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1],
+init(Q = #amqqueue{name = QName}, Recover, AsyncCallback) ->
     {ok, BQ} = application:get_env(backing_queue_module),
     BQS = BQ:init(Q, Recover, AsyncCallback),
-    ok = gm:broadcast(GM, {length, BQ:len(BQS)}),
+    State = #state{gm = GM} = init_with_existing_bq(Q, BQ, BQS),
+    {_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q),
+    rabbit_mirror_queue_misc:add_mirrors(QName, SNodes),
+    ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}),
+    State.
+
+init_with_existing_bq(Q = #amqqueue{name = QName}, BQ, BQS) ->
+    {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
+                   Q, undefined, sender_death_fun(), depth_fun()),
+    GM = rabbit_mirror_queue_coordinator:get_gm(CPid),
+    Self = self(),
+    ok = rabbit_misc:execute_mnesia_transaction(
+           fun () ->
+                   [Q1 = #amqqueue{gm_pids = GMPids}]
+                       = mnesia:read({rabbit_queue, QName}),
+                   ok = rabbit_amqqueue:store_queue(
+                          Q1#amqqueue{gm_pids = [{GM, Self} | GMPids]})
+           end),
     #state { gm                  = GM,
              coordinator         = CPid,
              backing_queue       = BQ,
@@ -107,8 +119,16 @@ init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover,
              ack_msg_id          = dict:new(),
              known_senders       = sets:new() }.
 
+stop_mirroring(State = #state { coordinator         = CPid,
+                                backing_queue       = BQ,
+                                backing_queue_state = BQS }) ->
+    unlink(CPid),
+    stop_all_slaves(shutdown, State),
+    {BQ, BQS}.
+
 terminate({shutdown, dropped} = Reason,
-          State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+          State = #state { backing_queue       = BQ,
+                           backing_queue_state = BQS }) ->
     %% Backing queue termination - this node has been explicitly
     %% dropped. Normally, non-durable queues would be tidied up on
     %% startup, but there's a possibility that we will be added back
@@ -124,28 +144,35 @@ terminate(Reason,
     %% node. Thus just let some other slave take over.
     State #state { backing_queue_state = BQ:terminate(Reason, BQS) }.
 
-delete_and_terminate(Reason, State = #state { gm                  = GM,
-                                              backing_queue       = BQ,
+delete_and_terminate(Reason, State = #state { backing_queue       = BQ,
                                               backing_queue_state = BQS }) ->
-    Slaves = [Pid || Pid <- gm:group_members(GM), node(Pid) =/= node()],
-    MRefs = [erlang:monitor(process, S) || S <- Slaves],
-    ok = gm:broadcast(GM, {delete_and_terminate, Reason}),
-    monitor_wait(MRefs),
+    stop_all_slaves(Reason, State),
     State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS),
                    set_delivered       = 0 }.
 
-monitor_wait([]) ->
-    ok;
-monitor_wait([MRef | MRefs]) ->
-    receive({'DOWN', MRef, process, _Pid, _Info}) ->
-            ok
-    end,
-    monitor_wait(MRefs).
+stop_all_slaves(Reason, #state{gm = GM}) ->
+    Info = gm:info(GM),
+    Slaves = [Pid || Pid <- proplists:get_value(group_members, Info),
+                     node(Pid) =/= node()],
+    MRefs = [erlang:monitor(process, S) || S <- Slaves],
+    ok = gm:broadcast(GM, {delete_and_terminate, Reason}),
+    [receive {'DOWN', MRef, process, _Pid, _Info} -> ok end || MRef <- MRefs],
+    %% Normally when we remove a slave another slave or master will
+    %% notice and update Mnesia. But we just removed them all, and
+    %% have stopped listening ourselves. So manually clean up.
+    QName = proplists:get_value(group_name, Info),
+    rabbit_misc:execute_mnesia_transaction(
+      fun () ->
+              [Q] = mnesia:read({rabbit_queue, QName}),
+              rabbit_mirror_queue_misc:store_updated_slaves(
+                Q #amqqueue { gm_pids = [], slave_pids = [] })
+      end),
+    ok = gm:forget_group(QName).
 
 purge(State = #state { gm                  = GM,
                        backing_queue       = BQ,
                        backing_queue_state = BQS }) ->
-    ok = gm:broadcast(GM, {set_length, 0, false}),
+    ok = gm:broadcast(GM, {drop, 0, BQ:len(BQS), false}),
     {Count, BQS1} = BQ:purge(BQS),
     {Count, State #state { backing_queue_state = BQS1,
                            set_delivered       = 0 }}.
@@ -156,28 +183,42 @@ publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid,
                          backing_queue       = BQ,
                          backing_queue_state = BQS }) ->
     false = dict:is_key(MsgId, SS), %% ASSERTION
-    ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}),
+    ok = gm:broadcast(GM, {publish, ChPid, MsgProps, Msg}),
     BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
     ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }).
 
-publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps,
+publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps,
                   ChPid, State = #state { gm                  = GM,
                                           seen_status         = SS,
                                           backing_queue       = BQ,
                                           backing_queue_state = BQS,
                                           ack_msg_id          = AM }) ->
     false = dict:is_key(MsgId, SS), %% ASSERTION
-    %% Must use confirmed_broadcast here in order to guarantee that
-    %% all slaves are forced to interpret this publish_delivered at
-    %% the same point, especially if we die and a slave is promoted.
-    ok = gm:confirmed_broadcast(
-           GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}),
-    {AckTag, BQS1} =
-        BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS),
+    ok = gm:broadcast(GM, {publish_delivered, ChPid, MsgProps, Msg}),
+    {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, BQS),
     AM1 = maybe_store_acktag(AckTag, MsgId, AM),
-    {AckTag,
-     ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1,
-                                             ack_msg_id          = AM1 })}.
+    State1 = State #state { backing_queue_state = BQS1, ack_msg_id = AM1 },
+    {AckTag, ensure_monitoring(ChPid, State1)}.
+
+discard(MsgId, ChPid, State = #state { gm                  = GM,
+                                       backing_queue       = BQ,
+                                       backing_queue_state = BQS,
+                                       seen_status         = SS }) ->
+    %% It's a massive error if we get told to discard something that's
+    %% already been published or published-and-confirmed. To do that
+    %% would require non FIFO access. Hence we should not find
+    %% 'published' or 'confirmed' in this dict:find.
+    case dict:find(MsgId, SS) of
+        error ->
+            ok = gm:broadcast(GM, {discard, ChPid, MsgId}),
+            BQS1 = BQ:discard(MsgId, ChPid, BQS),
+            ensure_monitoring(
+              ChPid, State #state {
+                       backing_queue_state = BQS1,
+                       seen_status         = dict:erase(MsgId, SS) });
+        {ok, discarded} ->
+            State
+    end.
 
 dropwhile(Pred, AckRequired,
           State = #state{gm                  = GM,
@@ -185,13 +226,13 @@ dropwhile(Pred, AckRequired,
                          set_delivered       = SetDelivered,
                          backing_queue_state = BQS }) ->
     Len  = BQ:len(BQS),
-    {Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS),
+    {Next, Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS),
     Len1 = BQ:len(BQS1),
-    ok = gm:broadcast(GM, {set_length, Len1, AckRequired}),
     Dropped = Len - Len1,
+    ok = gm:broadcast(GM, {drop, Len1, Dropped, AckRequired}),
     SetDelivered1 = lists:max([0, SetDelivered - Dropped]),
-    {Msgs, State #state { backing_queue_state = BQS1,
-                          set_delivered       = SetDelivered1 } }.
+    {Next, Msgs, State #state { backing_queue_state = BQS1,
+                                set_delivered       = SetDelivered1 } }.
 
 drain_confirmed(State = #state { backing_queue       = BQ,
                                  backing_queue_state = BQS,
@@ -274,6 +315,9 @@ len(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
 is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
     BQ:is_empty(BQS).
 
+depth(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+    BQ:depth(BQS).
+
 set_ram_duration_target(Target, State = #state { backing_queue       = BQ,
                                                  backing_queue_state = BQS }) ->
     State #state { backing_queue_state =
@@ -348,35 +392,20 @@ is_duplicate(Message = #basic_message { id = MsgId },
             {discarded, State}
     end.
 
-discard(Msg = #basic_message { id = MsgId }, ChPid,
-        State = #state { gm                  = GM,
-                         backing_queue       = BQ,
-                         backing_queue_state = BQS,
-                         seen_status         = SS }) ->
-    %% It's a massive error if we get told to discard something that's
-    %% already been published or published-and-confirmed. To do that
-    %% would require non FIFO access. Hence we should not find
-    %% 'published' or 'confirmed' in this dict:find.
-    case dict:find(MsgId, SS) of
-        error ->
-            ok = gm:broadcast(GM, {discard, ChPid, Msg}),
-            State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS),
-                           seen_status         = dict:erase(MsgId, SS) };
-        {ok, discarded} ->
-            State
-    end.
-
 %% ---------------------------------------------------------------------------
 %% Other exported functions
 %% ---------------------------------------------------------------------------
 
-promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) ->
-    Len = BQ:len(BQS),
-    ok = gm:broadcast(GM, {length, Len}),
+promote_backing_queue_state(CPid, BQ, BQS, GM, AckTags, SeenStatus, KS) ->
+    {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+    Len   = BQ:len(BQS1),
+    Depth = BQ:depth(BQS1),
+    true = Len == Depth, %% ASSERTION: everything must have been requeued
+    ok = gm:broadcast(GM, {depth, Depth}),
     #state { gm                  = GM,
              coordinator         = CPid,
              backing_queue       = BQ,
-             backing_queue_state = BQS,
+             backing_queue_state = BQS1,
              set_delivered       = Len,
              seen_status         = SeenStatus,
              confirmed           = [],
@@ -395,7 +424,7 @@ sender_death_fun() ->
               end)
     end.
 
-length_fun() ->
+depth_fun() ->
     Self = self(),
     fun () ->
             rabbit_amqqueue:run_backing_queue(
@@ -403,15 +432,13 @@ length_fun() ->
               fun (?MODULE, State = #state { gm                  = GM,
                                              backing_queue       = BQ,
                                              backing_queue_state = BQS }) ->
-                      ok = gm:broadcast(GM, {length, BQ:len(BQS)}),
+                      ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}),
                       State
               end)
     end.
 
-maybe_store_acktag(undefined, _MsgId, AM) ->
-    AM;
-maybe_store_acktag(AckTag, MsgId, AM) ->
-    dict:store(AckTag, MsgId, AM).
+maybe_store_acktag(undefined, _MsgId, AM) -> AM;
+maybe_store_acktag(AckTag,     MsgId, AM) -> dict:store(AckTag, MsgId, AM).
 
 ensure_monitoring(ChPid, State = #state { coordinator = CPid,
                                           known_senders = KS }) ->
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
index 180677fe..4a00846e 100644
--- a/src/rabbit_mirror_queue_misc.erl
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -15,28 +15,45 @@
 %%
 
 -module(rabbit_mirror_queue_misc).
+-behaviour(rabbit_policy_validator).
 
--export([remove_from_queue/2, on_node_up/0,
-         drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3,
-         report_deaths/4]).
+-export([remove_from_queue/3, on_node_up/0, add_mirrors/2, add_mirror/2,
+         report_deaths/4, store_updated_slaves/1, suggested_queue_nodes/1,
+         is_mirrored/1, update_mirrors/2, validate_policy/1]).
+
+%% for testing only
+-export([suggested_queue_nodes/4]).
 
 -include("rabbit.hrl").
 
+-rabbit_boot_step({?MODULE,
+                   [{description, "HA policy validation"},
+                    {mfa, {rabbit_registry, register,
+                           [policy_validator, <<"ha-mode">>, ?MODULE]}},
+                    {mfa, {rabbit_registry, register,
+                           [policy_validator, <<"ha-params">>, ?MODULE]}},
+                    {requires, rabbit_registry},
+                    {enables, recovery}]}).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(remove_from_queue/2 ::
-        (rabbit_amqqueue:name(), [pid()])
+-spec(remove_from_queue/3 ::
+        (rabbit_amqqueue:name(), pid(), [pid()])
         -> {'ok', pid(), [pid()]} | {'error', 'not_found'}).
 -spec(on_node_up/0 :: () -> 'ok').
--spec(drop_mirror/2 ::
-        (rabbit_amqqueue:name(), node()) -> rabbit_types:ok_or_error(any())).
+-spec(add_mirrors/2 :: (rabbit_amqqueue:name(), [node()]) -> 'ok').
 -spec(add_mirror/2 ::
-        (rabbit_amqqueue:name(), node()) -> rabbit_types:ok_or_error(any())).
--spec(add_mirror/3 ::
-        (rabbit_types:vhost(), binary(), atom())
-        -> rabbit_types:ok_or_error(any())).
+        (rabbit_amqqueue:name(), node()) ->
+                           {'ok', atom()} | rabbit_types:error(any())).
+-spec(store_updated_slaves/1 :: (rabbit_types:amqqueue()) ->
+                                     rabbit_types:amqqueue()).
+-spec(suggested_queue_nodes/1 :: (rabbit_types:amqqueue()) ->
+                                      {node(), [node()]}).
+-spec(is_mirrored/1 :: (rabbit_types:amqqueue()) -> boolean()).
+-spec(update_mirrors/2 ::
+        (rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok').
 
 -endif.
 
@@ -50,29 +67,35 @@
 %% slave (now master) receives messages it's not ready for (for
 %% example, new consumers).
 %% Returns {ok, NewMPid, DeadPids}
-remove_from_queue(QueueName, DeadPids) ->
-    DeadNodes = [node(DeadPid) || DeadPid <- DeadPids],
+remove_from_queue(QueueName, Self, DeadGMPids) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
               %% Someone else could have deleted the queue before we
               %% get here.
               case mnesia:read({rabbit_queue, QueueName}) of
                   [] -> {error, not_found};
-                  [Q = #amqqueue { pid          = QPid,
-                                   slave_pids   = SPids }] ->
-                      [QPid1 | SPids1] = Alive =
-                          [Pid || Pid <- [QPid | SPids],
-                                  not lists:member(node(Pid), DeadNodes)],
+                  [Q = #amqqueue { pid        = QPid,
+                                   slave_pids = SPids,
+                                   gm_pids    = GMPids }] ->
+                      {Dead, GMPids1} = lists:partition(
+                                          fun ({GM, _}) ->
+                                                  lists:member(GM, DeadGMPids)
+                                          end, GMPids),
+                      DeadPids = [Pid || {_GM, Pid} <- Dead],
+                      Alive = [QPid | SPids] -- DeadPids,
+                      {QPid1, SPids1} = promote_slave(Alive),
                       case {{QPid, SPids}, {QPid1, SPids1}} of
                           {Same, Same} ->
+                              GMPids = GMPids1, %% ASSERTION
                               {ok, QPid1, []};
-                          _ when QPid =:= QPid1 orelse node(QPid1) =:= node() ->
+                          _ when QPid =:= QPid1 orelse QPid1 =:= Self ->
                               %% Either master hasn't changed, so
                               %% we're ok to update mnesia; or we have
                               %% become the master.
-                              Q1 = Q #amqqueue { pid        = QPid1,
-                                                 slave_pids = SPids1 },
-                              ok = rabbit_amqqueue:store_queue(Q1),
+                              store_updated_slaves(
+                                Q #amqqueue { pid        = QPid1,
+                                              slave_pids = SPids1,
+                                              gm_pids    = GMPids1 }),
                               {ok, QPid1, [QPid | SPids] -- Alive};
                           _ ->
                               %% Master has changed, and we're not it,
@@ -85,32 +108,41 @@ remove_from_queue(QueueName, DeadPids) ->
       end).
 
 on_node_up() ->
-    Qs =
+    QNames =
         rabbit_misc:execute_mnesia_transaction(
           fun () ->
                   mnesia:foldl(
-                    fun (#amqqueue { mirror_nodes = undefined }, QsN) ->
-                            QsN;
-                        (#amqqueue { name         = QName,
-                                     mirror_nodes = all }, QsN) ->
-                            [QName | QsN];
-                        (#amqqueue { name         = QName,
-                                     mirror_nodes = MNodes }, QsN) ->
-                            case lists:member(node(), MNodes) of
-                                true  -> [QName | QsN];
-                                false -> QsN
+                    fun (Q = #amqqueue{name       = QName,
+                                       pid        = Pid,
+                                       slave_pids = SPids}, QNames0) ->
+                            %% We don't want to pass in the whole
+                            %% cluster - we don't want a situation
+                            %% where starting one node causes us to
+                            %% decide to start a mirror on another
+                            PossibleNodes0 = [node(P) || P <- [Pid | SPids]],
+                            PossibleNodes =
+                                case lists:member(node(), PossibleNodes0) of
+                                    true  -> PossibleNodes0;
+                                    false -> [node() | PossibleNodes0]
+                                end,
+                            {_MNode, SNodes} = suggested_queue_nodes(
+                                                 Q, PossibleNodes),
+                            case lists:member(node(), SNodes) of
+                                true  -> [QName | QNames0];
+                                false -> QNames0
                             end
                     end, [], rabbit_queue)
           end),
-    [add_mirror(Q, node()) || Q <- Qs],
+    [{ok, _} = add_mirror(QName, node()) || QName <- QNames],
     ok.
 
-drop_mirror(VHostPath, QueueName, MirrorNode) ->
-    drop_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode).
+drop_mirrors(QName, Nodes) ->
+    [ok = drop_mirror(QName, Node)  || Node <- Nodes],
+    ok.
 
-drop_mirror(Queue, MirrorNode) ->
+drop_mirror(QName, MirrorNode) ->
     if_mirrored_queue(
-      Queue,
+      QName,
       fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids }) ->
               case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
                   [] ->
@@ -126,38 +158,61 @@ drop_mirror(Queue, MirrorNode) ->
               end
       end).
 
-add_mirror(VHostPath, QueueName, MirrorNode) ->
-    add_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode).
+add_mirrors(QName, Nodes) ->
+    [{ok, _} = add_mirror(QName, Node)  || Node <- Nodes],
+    ok.
 
-add_mirror(Queue, MirrorNode) ->
+add_mirror(QName, MirrorNode) ->
     if_mirrored_queue(
-      Queue,
+      QName,
       fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) ->
               case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
-                  []  -> case rabbit_mirror_queue_slave_sup:start_child(
-                                MirrorNode, [Q]) of
-                             {ok, undefined} -> %% Already running
-                                 ok;
-                             {ok, SPid} ->
-                                 rabbit_log:info(
-                                   "Adding mirror of ~s on node ~p: ~p~n",
-                                   [rabbit_misc:rs(Name), MirrorNode, SPid]),
-                                 ok;
-                             Other ->
-                                 Other
-                         end;
-                  [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}}
+                  [] ->
+                      start_child(Name, MirrorNode, Q);
+                  [SPid] ->
+                      case rabbit_misc:is_process_alive(SPid) of
+                          true  -> {ok, already_mirrored};
+                          false -> start_child(Name, MirrorNode, Q)
+                      end
               end
       end).
 
-if_mirrored_queue(Queue, Fun) ->
-    rabbit_amqqueue:with(
-      Queue, fun (#amqqueue { arguments = Args } = Q) ->
-                     case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of
-                         undefined -> ok;
-                         _         -> Fun(Q)
-                     end
-             end).
+start_child(Name, MirrorNode, Q) ->
+    case rabbit_misc:with_exit_handler(
+           rabbit_misc:const({ok, down}),
+           fun () ->
+                   rabbit_mirror_queue_slave_sup:start_child(MirrorNode, [Q])
+           end) of
+        {ok, undefined} ->
+            %% this means the mirror process was
+            %% already running on the given node.
+            {ok, already_mirrored};
+        {ok, down} ->
+            %% Node went down between us deciding to start a mirror
+            %% and actually starting it. Which is fine.
+            {ok, node_down};
+        {ok, SPid} ->
+            rabbit_log:info("Adding mirror of ~s on node ~p: ~p~n",
+                            [rabbit_misc:rs(Name), MirrorNode, SPid]),
+            {ok, started};
+        {error, {{stale_master_pid, StalePid}, _}} ->
+            rabbit_log:warning("Detected stale HA master while adding "
+                               "mirror of ~s on node ~p: ~p~n",
+                               [rabbit_misc:rs(Name), MirrorNode, StalePid]),
+            {ok, stale_master};
+        {error, {{duplicate_live_master, _}=Err, _}} ->
+            Err;
+        Other ->
+            Other
+    end.
+
+if_mirrored_queue(QName, Fun) ->
+    rabbit_amqqueue:with(QName, fun (Q) ->
+                                        case is_mirrored(Q) of
+                                            false -> ok;
+                                            true  -> Fun(Q)
+                                        end
+                                end).
 
 report_deaths(_MirrorPid, _IsMaster, _QueueName, []) ->
     ok;
@@ -172,3 +227,146 @@ report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) ->
                      end,
                      rabbit_misc:pid_to_string(MirrorPid),
                      [[rabbit_misc:pid_to_string(P), $ ] || P <- DeadPids]]).
+
+store_updated_slaves(Q = #amqqueue{slave_pids      = SPids,
+                                   sync_slave_pids = SSPids}) ->
+    SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)],
+    Q1 = Q#amqqueue{sync_slave_pids = SSPids1},
+    ok = rabbit_amqqueue:store_queue(Q1),
+    %% Wake it up so that we emit a stats event
+    rabbit_amqqueue:wake_up(Q1),
+    Q1.
+
+%%----------------------------------------------------------------------------
+
+promote_slave([SPid | SPids]) ->
+    %% The slave pids are maintained in descending order of age, so
+    %% the one to promote is the oldest.
+    {SPid, SPids}.
+
+suggested_queue_nodes(Q) ->
+    suggested_queue_nodes(Q, rabbit_mnesia:cluster_nodes(running)).
+
+%% This variant exists so we can pull a call to
+%% rabbit_mnesia:cluster_nodes(running) out of a loop or
+%% transaction or both.
+suggested_queue_nodes(Q, PossibleNodes) ->
+    {MNode0, SNodes} = actual_queue_nodes(Q),
+    MNode = case MNode0 of
+                none -> node();
+                _    -> MNode0
+            end,
+    suggested_queue_nodes(policy(<<"ha-mode">>, Q), policy(<<"ha-params">>, Q),
+                          {MNode, SNodes}, PossibleNodes).
+
+policy(Policy, Q) ->
+    case rabbit_policy:get(Policy, Q) of
+        {ok, P} -> P;
+        _       -> none
+    end.
+
+suggested_queue_nodes(<<"all">>, _Params, {MNode, _SNodes}, Possible) ->
+    {MNode, Possible -- [MNode]};
+suggested_queue_nodes(<<"nodes">>, Nodes0, {MNode, _SNodes}, Possible) ->
+    Nodes = [list_to_atom(binary_to_list(Node)) || Node <- Nodes0],
+    Unavailable = Nodes -- Possible,
+    Available = Nodes -- Unavailable,
+    case Available of
+        [] -> %% We have never heard of anything? Not much we can do but
+              %% keep the master alive.
+              {MNode, []};
+        _  -> case lists:member(MNode, Available) of
+                  true  -> {MNode, Available -- [MNode]};
+                  false -> promote_slave(Available)
+              end
+    end;
+%% When we need to add nodes, we randomise our candidate list as a
+%% crude form of load-balancing. TODO it would also be nice to
+%% randomise the list of ones to remove when we have too many - but
+%% that would fail to take account of synchronisation...
+suggested_queue_nodes(<<"exactly">>, Count, {MNode, SNodes}, Possible) ->
+    SCount = Count - 1,
+    {MNode, case SCount > length(SNodes) of
+                true  -> Cand = shuffle((Possible -- [MNode]) -- SNodes),
+                         SNodes ++ lists:sublist(Cand, SCount - length(SNodes));
+                false -> lists:sublist(SNodes, SCount)
+            end};
+suggested_queue_nodes(_, _, {MNode, _}, _) ->
+    {MNode, []}.
+
+shuffle(L) ->
+    {A1,A2,A3} = now(),
+    random:seed(A1, A2, A3),
+    {_, L1} = lists:unzip(lists:keysort(1, [{random:uniform(), N} || N <- L])),
+    L1.
+
+actual_queue_nodes(#amqqueue{pid = MPid, slave_pids = SPids}) ->
+    {case MPid of
+         none -> none;
+         _    -> node(MPid)
+     end, [node(Pid) || Pid <- SPids]}.
+
+is_mirrored(Q) ->
+    case policy(<<"ha-mode">>, Q) of
+        <<"all">>     -> true;
+        <<"nodes">>   -> true;
+        <<"exactly">> -> true;
+        _             -> false
+    end.
+
+
+%% [1] - rabbit_amqqueue:start_mirroring/1 will turn unmirrored to
+%% master and start any needed slaves. However, if node(QPid) is not
+%% in the nodes for the policy, it won't switch it. So this is for the
+%% case where we kill the existing queue and restart elsewhere. TODO:
+%% is this TRTTD? All alternatives seem ugly.
+update_mirrors(OldQ = #amqqueue{pid = QPid},
+               NewQ = #amqqueue{pid = QPid}) ->
+    case {is_mirrored(OldQ), is_mirrored(NewQ)} of
+        {false, false} -> ok;
+        {true,  false} -> rabbit_amqqueue:stop_mirroring(QPid);
+        {false, true}  -> rabbit_amqqueue:start_mirroring(QPid),
+                          update_mirrors0(OldQ, NewQ); %% [1]
+        {true, true}   -> update_mirrors0(OldQ, NewQ)
+    end.
+
+update_mirrors0(OldQ = #amqqueue{name = QName},
+                NewQ = #amqqueue{name = QName}) ->
+    All = fun ({A,B}) -> [A|B] end,
+    OldNodes = All(actual_queue_nodes(OldQ)),
+    NewNodes = All(suggested_queue_nodes(NewQ)),
+    add_mirrors(QName, NewNodes -- OldNodes),
+    drop_mirrors(QName, OldNodes -- NewNodes),
+    ok.
+
+%%----------------------------------------------------------------------------
+
+validate_policy(KeyList) ->
+    validate_policy(
+      proplists:get_value(<<"ha-mode">>,   KeyList),
+      proplists:get_value(<<"ha-params">>, KeyList, none)).
+
+validate_policy(<<"all">>, none) ->
+    ok;
+validate_policy(<<"all">>, _Params) ->
+    {error, "ha-mode=\"all\" does not take parameters", []};
+
+validate_policy(<<"nodes">>, []) ->
+    {error, "ha-mode=\"nodes\" list must be non-empty", []};
+validate_policy(<<"nodes">>, Nodes) when is_list(Nodes) ->
+    case [I || I <- Nodes, not is_binary(I)] of
+        []      -> ok;
+        Invalid -> {error, "ha-mode=\"nodes\" takes a list of strings, "
+                    "~p was not a string", [Invalid]}
+    end;
+validate_policy(<<"nodes">>, Params) ->
+    {error, "ha-mode=\"nodes\" takes a list, ~p given", [Params]};
+
+validate_policy(<<"exactly">>, N) when is_integer(N) andalso N > 0 ->
+    ok;
+validate_policy(<<"exactly">>, Params) ->
+    {error, "ha-mode=\"exactly\" takes an integer, ~p given", [Params]};
+
+validate_policy(Mode, _Params) ->
+    {error, "~p is not a valid ha-mode value", [Mode]}.
+
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
index 03fafc3e..1ba1420f 100644
--- a/src/rabbit_mirror_queue_slave.erl
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -19,17 +19,8 @@
 %% For general documentation of HA design, see
 %% rabbit_mirror_queue_coordinator
 %%
-%% We join the GM group before we add ourselves to the amqqueue
-%% record. As a result:
-%% 1. We can receive msgs from GM that correspond to messages we will
-%% never receive from publishers.
-%% 2. When we receive a message from publishers, we must receive a
-%% message from the GM group for it.
-%% 3. However, that instruction from the GM group can arrive either
-%% before or after the actual message. We need to be able to
-%% distinguish between GM instructions arriving early, and case (1)
-%% above.
-%%
+%% We receive messages from GM and from publishers, and the gm
+%% messages can arrive either before or after the 'actual' message.
 %% All instructions from the GM group must be processed in the order
 %% in which they're received.
 
@@ -73,63 +64,59 @@
 
 -record(state, { q,
                  gm,
-                 master_pid,
                  backing_queue,
                  backing_queue_state,
                  sync_timer_ref,
                  rate_timer_ref,
 
-                 sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId}
+                 sender_queues, %% :: Pid -> {Q Msg, Set MsgId}
                  msg_id_ack,    %% :: MsgId -> AckTag
-                 ack_num,
 
                  msg_id_status,
                  known_senders,
 
-                 synchronised
+                 %% Master depth - local depth
+                 depth_delta
                }).
 
-start_link(Q) ->
-    gen_server2:start_link(?MODULE, Q, []).
+start_link(Q) -> gen_server2:start_link(?MODULE, Q, []).
 
 set_maximum_since_use(QPid, Age) ->
     gen_server2:cast(QPid, {set_maximum_since_use, Age}).
 
-info(QPid) ->
-    gen_server2:call(QPid, info, infinity).
-
-init(#amqqueue { name = QueueName } = Q) ->
+info(QPid) -> gen_server2:call(QPid, info, infinity).
+
+init(Q = #amqqueue { name = QName }) ->
+    %% We join the GM group before we add ourselves to the amqqueue
+    %% record. As a result:
+    %% 1. We can receive msgs from GM that correspond to messages we will
+    %% never receive from publishers.
+    %% 2. When we receive a message from publishers, we must receive a
+    %% message from the GM group for it.
+    %% 3. However, that instruction from the GM group can arrive either
+    %% before or after the actual message. We need to be able to
+    %% distinguish between GM instructions arriving early, and case (1)
+    %% above.
+    %%
+    process_flag(trap_exit, true), %% amqqueue_process traps exits too.
+    {ok, GM} = gm:start_link(QName, ?MODULE, [self()],
+                             fun rabbit_misc:execute_mnesia_transaction/1),
+    receive {joined, GM} -> ok end,
     Self = self(),
     Node = node(),
     case rabbit_misc:execute_mnesia_transaction(
-           fun () ->
-                   [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] =
-                       mnesia:read({rabbit_queue, QueueName}),
-                   case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of
-                       []     -> MPids1 = MPids ++ [Self],
-                                 ok = rabbit_amqqueue:store_queue(
-                                        Q1 #amqqueue { slave_pids = MPids1 }),
-                                 {new, QPid};
-                       [SPid] -> true = rabbit_misc:is_process_alive(SPid),
-                                 existing
-                   end
-           end) of
-        {new, MPid} ->
-            process_flag(trap_exit, true), %% amqqueue_process traps exits too.
-            {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]),
-            receive {joined, GM} ->
-                    ok
-            end,
-            erlang:monitor(process, MPid),
+           fun() -> init_it(Self, GM, Node, QName) end) of
+        {new, QPid} ->
+            erlang:monitor(process, QPid),
             ok = file_handle_cache:register_callback(
                    rabbit_amqqueue, set_maximum_since_use, [Self]),
             ok = rabbit_memory_monitor:register(
                    Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}),
             {ok, BQ} = application:get_env(backing_queue_module),
-            BQS = bq_init(BQ, Q, false),
-            State = #state { q                   = Q,
+            Q1 = Q #amqqueue { pid = QPid },
+            BQS = bq_init(BQ, Q1, false),
+            State = #state { q                   = Q1,
                              gm                  = GM,
-                             master_pid          = MPid,
                              backing_queue       = BQ,
                              backing_queue_state = BQS,
                              rate_timer_ref      = undefined,
@@ -137,70 +124,83 @@ init(#amqqueue { name = QueueName } = Q) ->
 
                              sender_queues       = dict:new(),
                              msg_id_ack          = dict:new(),
-                             ack_num             = 0,
 
                              msg_id_status       = dict:new(),
                              known_senders       = pmon:new(),
 
-                             synchronised        = false
+                             depth_delta         = undefined
                    },
             rabbit_event:notify(queue_slave_created,
                                 infos(?CREATION_EVENT_KEYS, State)),
-            ok = gm:broadcast(GM, request_length),
+            ok = gm:broadcast(GM, request_depth),
             {ok, State, hibernate,
              {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN,
               ?DESIRED_HIBERNATE}};
+        {stale, StalePid} ->
+            {stop, {stale_master_pid, StalePid}};
+        duplicate_live_master ->
+            {stop, {duplicate_live_master, Node}};
         existing ->
+            gm:leave(GM),
             ignore
     end.
 
-handle_call({deliver, Delivery = #delivery { immediate = true }},
-            From, State) ->
-    %% It is safe to reply 'false' here even if a) we've not seen the
-    %% msg via gm, or b) the master dies before we receive the msg via
-    %% gm. In the case of (a), we will eventually receive the msg via
-    %% gm, and it's only the master's result to the channel that is
-    %% important. In the case of (b), if the master does die and we do
-    %% get promoted then at that point we have no consumers, thus
-    %% 'false' is precisely the correct answer. However, we must be
-    %% careful to _not_ enqueue the message in this case.
-
-    %% Note this is distinct from the case where we receive the msg
-    %% via gm first, then we're promoted to master, and only then do
-    %% we receive the msg from the channel.
-    gen_server2:reply(From, false), %% master may deliver it, not us
-    noreply(maybe_enqueue_message(Delivery, false, State));
-
-handle_call({deliver, Delivery = #delivery { mandatory = true }},
-            From, State) ->
-    gen_server2:reply(From, true), %% amqqueue throws away the result anyway
-    noreply(maybe_enqueue_message(Delivery, true, State));
+init_it(Self, GM, Node, QName) ->
+    [Q = #amqqueue { pid = QPid, slave_pids = SPids, gm_pids = GMPids }] =
+        mnesia:read({rabbit_queue, QName}),
+    case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of
+        []     -> add_slave(Q, Self, GM),
+                  {new, QPid};
+        [QPid] -> case rabbit_misc:is_process_alive(QPid) of
+                      true  -> duplicate_live_master;
+                      false -> {stale, QPid}
+                  end;
+        [SPid] -> case rabbit_misc:is_process_alive(SPid) of
+                      true  -> existing;
+                      false -> Q1 = Q#amqqueue {
+                                      slave_pids = SPids -- [SPid],
+                                      gm_pids    = [T || T = {_, S} <- GMPids,
+                                                         S =/= SPid] },
+                               add_slave(Q1, Self, GM),
+                               {new, QPid}
+                  end
+    end.
+
+%% Add to the end, so they are in descending order of age, see
+%% rabbit_mirror_queue_misc:promote_slave/1
+add_slave(Q = #amqqueue { slave_pids = SPids, gm_pids = GMPids }, New, GM) ->
+    rabbit_mirror_queue_misc:store_updated_slaves(
+      Q#amqqueue{slave_pids = SPids ++ [New], gm_pids = [{GM, New} | GMPids]}).
+
+handle_call({deliver, Delivery, true}, From, State) ->
+    %% Synchronous, "mandatory" deliver mode.
+    gen_server2:reply(From, ok),
+    noreply(maybe_enqueue_message(Delivery, State));
 
 handle_call({gm_deaths, Deaths}, From,
-            State = #state { q          = #amqqueue { name = QueueName },
-                             gm         = GM,
-                             master_pid = MPid }) ->
-    %% The GM has told us about deaths, which means we're not going to
-    %% receive any more messages from GM
-    case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of
+            State = #state { q = Q = #amqqueue { name = QName, pid = MPid }}) ->
+    Self = self(),
+    case rabbit_mirror_queue_misc:remove_from_queue(QName, Self, Deaths) of
         {error, not_found} ->
             gen_server2:reply(From, ok),
             {stop, normal, State};
         {ok, Pid, DeadPids} ->
-            rabbit_mirror_queue_misc:report_deaths(self(), false, QueueName,
+            rabbit_mirror_queue_misc:report_deaths(Self, false, QName,
                                                    DeadPids),
-            if node(Pid) =:= node(MPid) ->
+            case Pid of
+                MPid ->
                     %% master hasn't changed
-                    reply(ok, State);
-               node(Pid) =:= node() ->
+                    gen_server2:reply(From, ok),
+                    noreply(State);
+                Self ->
                     %% we've become master
-                    promote_me(From, State);
-               true ->
-                    %% master has changed to not us.
+                    QueueState = promote_me(From, State),
+                    {become, rabbit_amqqueue_process, QueueState, hibernate};
+                _ ->
+                    %% master has changed to not us
                     gen_server2:reply(From, ok),
                     erlang:monitor(process, Pid),
-                    ok = gm:broadcast(GM, heartbeat),
-                    noreply(State #state { master_pid = Pid })
+                    noreply(State #state { q = Q #amqqueue { pid = Pid } })
             end
     end;
 
@@ -213,13 +213,14 @@ handle_cast({run_backing_queue, Mod, Fun}, State) ->
 handle_cast({gm, Instruction}, State) ->
     handle_process_result(process_instruction(Instruction, State));
 
-handle_cast({deliver, Delivery = #delivery{sender = Sender}, Flow}, State) ->
-    %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
+handle_cast({deliver, Delivery = #delivery{sender = Sender}, true, Flow},
+            State) ->
+    %% Asynchronous, non-"mandatory", deliver mode.
     case Flow of
         flow   -> credit_flow:ack(Sender);
         noflow -> ok
     end,
-    noreply(maybe_enqueue_message(Delivery, true, State));
+    noreply(maybe_enqueue_message(Delivery, State));
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
@@ -249,8 +250,8 @@ handle_info(timeout, State) ->
     noreply(backing_queue_timeout(State));
 
 handle_info({'DOWN', _MonitorRef, process, MPid, _Reason},
-           State = #state { gm = GM, master_pid = MPid }) ->
-    ok = gm:broadcast(GM, {process_death, MPid}),
+            State = #state { gm = GM, q = #amqqueue { pid = MPid } }) ->
+    ok = gm:broadcast(GM, process_death),
     noreply(State);
 
 handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) ->
@@ -286,7 +287,7 @@ terminate(Reason, #state { q                   = Q,
                            rate_timer_ref      = RateTRef }) ->
     ok = gm:leave(GM),
     QueueState = rabbit_amqqueue_process:init_with_backing_queue_state(
-                   Q, BQ, BQS, RateTRef, [], [], pmon:new(), dict:new()),
+                   Q, BQ, BQS, RateTRef, [], pmon:new(), dict:new()),
     rabbit_amqqueue_process:terminate(Reason, QueueState);
 terminate([_SPid], _Reason) ->
     %% gm case
@@ -332,25 +333,26 @@ prioritise_info(Msg, _State) ->
 %% GM
 %% ---------------------------------------------------------------------------
 
-joined([SPid], _Members) ->
-    SPid ! {joined, self()},
-    ok.
+joined([SPid], _Members) -> SPid ! {joined, self()}, ok.
 
-members_changed([_SPid], _Births, []) ->
-    ok;
-members_changed([SPid], _Births, Deaths) ->
-    inform_deaths(SPid, Deaths).
+members_changed([_SPid], _Births,     []) -> ok;
+members_changed([ SPid], _Births, Deaths) -> inform_deaths(SPid, Deaths).
 
-handle_msg([_SPid], _From, heartbeat) ->
-    ok;
-handle_msg([_SPid], _From, request_length) ->
+handle_msg([_SPid], _From, request_depth) ->
     %% This is only of value to the master
     ok;
 handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) ->
     %% This is only of value to the master
     ok;
-handle_msg([SPid], _From, {process_death, Pid}) ->
-    inform_deaths(SPid, [Pid]);
+handle_msg([_SPid], _From, process_death) ->
+    %% Since GM is by nature lazy we need to make sure there is some
+    %% traffic when a master dies, to make sure we get informed of the
+    %% death. That's all process_death does, create some traffic. We
+    %% must not take any notice of the master death here since it
+    %% comes without ordering guarantees - there could still be
+    %% messages from the master we have yet to receive. When we get
+    %% members_changed, then there will be no more messages.
+    ok;
 handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) ->
     ok = gen_server2:cast(CPid, {gm, Msg}),
     {stop, {shutdown, ring_shutdown}};
@@ -371,8 +373,8 @@ infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
 i(pid,             _State)                                   -> self();
 i(name,            #state { q = #amqqueue { name = Name } }) -> Name;
-i(master_pid,      #state { master_pid = MPid })             -> MPid;
-i(is_synchronised, #state { synchronised = Synchronised })   -> Synchronised;
+i(master_pid,      #state { q = #amqqueue { pid  = MPid } }) -> MPid;
+i(is_synchronised, #state { depth_delta = DD })              -> DD =:= 0;
 i(Item,            _State) -> throw({bad_argument, Item}).
 
 bq_init(BQ, Q, Recover) ->
@@ -390,14 +392,20 @@ run_backing_queue(Mod, Fun, State = #state { backing_queue       = BQ,
                                              backing_queue_state = BQS }) ->
     State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
 
-needs_confirming(#delivery{ msg_seq_no = undefined }, _State) ->
-    never;
-needs_confirming(#delivery { message = #basic_message {
-                               is_persistent = true } },
-                 #state { q = #amqqueue { durable = true } }) ->
-    eventually;
-needs_confirming(_Delivery, _State) ->
-    immediately.
+send_or_record_confirm(_, #delivery{ msg_seq_no = undefined }, MS, _State) ->
+    MS;
+send_or_record_confirm(published, #delivery { sender     = ChPid,
+                                              msg_seq_no = MsgSeqNo,
+                                              message    = #basic_message {
+                                                id            = MsgId,
+                                                is_persistent = true } },
+                       MS, #state { q = #amqqueue { durable = true } }) ->
+    dict:store(MsgId, {published, ChPid, MsgSeqNo} , MS);
+send_or_record_confirm(_Status, #delivery { sender     = ChPid,
+                                            msg_seq_no = MsgSeqNo },
+                       MS, _State) ->
+    ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]),
+    MS.
 
 confirm_messages(MsgIds, State = #state { msg_id_status = MS }) ->
     {CMs, MS1} =
@@ -409,16 +417,16 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) ->
                           %% If it needed confirming, it'll have
                           %% already been done.
                           Acc;
-                      {ok, {published, ChPid}} ->
+                      {ok, published} ->
                           %% Still not seen it from the channel, just
                           %% record that it's been confirmed.
-                          {CMsN, dict:store(MsgId, {confirmed, ChPid}, MSN)};
+                          {CMsN, dict:store(MsgId, confirmed, MSN)};
                       {ok, {published, ChPid, MsgSeqNo}} ->
                           %% Seen from both GM and Channel. Can now
                           %% confirm.
                           {rabbit_misc:gb_trees_cons(ChPid, MsgSeqNo, CMsN),
                            dict:erase(MsgId, MSN)};
-                      {ok, {confirmed, _ChPid}} ->
+                      {ok, confirmed} ->
                           %% It's already been confirmed. This is
                           %% probably it's been both sync'd to disk
                           %% and then delivered and ack'd before we've
@@ -442,17 +450,14 @@ promote_me(From, #state { q                   = Q = #amqqueue { name = QName },
                           msg_id_ack          = MA,
                           msg_id_status       = MS,
                           known_senders       = KS }) ->
-    rabbit_event:notify(queue_slave_promoted, [{pid,  self()},
-                                               {name, QName}]),
     rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n",
                     [rabbit_misc:rs(QName), rabbit_misc:pid_to_string(self())]),
     Q1 = Q #amqqueue { pid = self() },
     {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
                    Q1, GM, rabbit_mirror_queue_master:sender_death_fun(),
-                   rabbit_mirror_queue_master:length_fun()),
+                   rabbit_mirror_queue_master:depth_fun()),
     true = unlink(GM),
     gen_server2:reply(From, {promote, CPid}),
-    ok = gm:confirmed_broadcast(GM, heartbeat),
 
     %% Everything that we're monitoring, we need to ensure our new
     %% coordinator is monitoring.
@@ -460,8 +465,7 @@ promote_me(From, #state { q                   = Q = #amqqueue { name = QName },
     ok = rabbit_mirror_queue_coordinator:ensure_monitoring(CPid, MPids),
 
     %% We find all the messages that we've received from channels but
-    %% not from gm, and if they're due to be enqueued on promotion
-    %% then we pass them to the
+    %% not from gm, and pass them to the
     %% queue_process:init_with_backing_queue_state to be enqueued.
     %%
     %% We also have to requeue messages which are pending acks: the
@@ -489,18 +493,18 @@ promote_me(From, #state { q                   = Q = #amqqueue { name = QName },
     %%
     %% MS contains the following three entry types:
     %%
-    %% a) {published, ChPid}:
+    %% a) published:
     %%   published via gm only; pending arrival of publication from
     %%   channel, maybe pending confirm.
     %%
     %% b) {published, ChPid, MsgSeqNo}:
     %%   published via gm and channel; pending confirm.
     %%
-    %% c) {confirmed, ChPid}:
+    %% c) confirmed:
     %%   published via gm only, and confirmed; pending publication
     %%   from channel.
     %%
-    %% d) discarded
+    %% d) discarded:
     %%   seen via gm only as discarded. Pending publication from
     %%   channel
     %%
@@ -517,34 +521,24 @@ promote_me(From, #state { q                   = Q = #amqqueue { name = QName },
     %% those messages are then requeued. However, as discussed above,
     %% this does not affect MS, nor which bits go through to SS in
     %% Master, or MTC in queue_process.
-    %%
-    %% Everything that's in MA gets requeued. Consequently the new
-    %% master should start with a fresh AM as there are no messages
-    %% pending acks.
 
-    MSList = dict:to_list(MS),
-    SS = dict:from_list(
-           [E || E = {_MsgId, discarded} <- MSList] ++
-               [{MsgId, Status}
-                || {MsgId, {Status, _ChPid}} <- MSList,
-                   Status =:= published orelse Status =:= confirmed]),
+    St = [published, confirmed, discarded],
+    SS = dict:filter(fun (_MsgId, Status) -> lists:member(Status, St) end, MS),
+    AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)],
 
     MasterState = rabbit_mirror_queue_master:promote_backing_queue_state(
-                    CPid, BQ, BQS, GM, SS, MPids),
-
-    MTC = lists:foldl(fun ({MsgId, {published, ChPid, MsgSeqNo}}, MTC0) ->
-                              gb_trees:insert(MsgId, {ChPid, MsgSeqNo}, MTC0);
-                          (_, MTC0) ->
-                              MTC0
-                      end, gb_trees:empty(), MSList),
-    NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)],
-    AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)],
+                    CPid, BQ, BQS, GM, AckTags, SS, MPids),
+
+    MTC = dict:fold(fun (MsgId, {published, ChPid, MsgSeqNo}, MTC0) ->
+                            gb_trees:insert(MsgId, {ChPid, MsgSeqNo}, MTC0);
+                        (_Msgid, _Status, MTC0) ->
+                            MTC0
+                    end, gb_trees:empty(), MS),
     Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ),
-                              {Delivery, true} <- queue:to_list(PubQ)],
-    QueueState = rabbit_amqqueue_process:init_with_backing_queue_state(
-                   Q1, rabbit_mirror_queue_master, MasterState, RateTRef,
-                   AckTags, Deliveries, KS, MTC),
-    {become, rabbit_amqqueue_process, QueueState, hibernate}.
+                              Delivery <- queue:to_list(PubQ)],
+    rabbit_amqqueue_process:init_with_backing_queue_state(
+      Q1, rabbit_mirror_queue_master, MasterState, RateTRef, Deliveries, KS,
+      MTC).
 
 noreply(State) ->
     {NewState, Timeout} = next_state(State),
@@ -560,9 +554,9 @@ next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) ->
                confirm_messages(MsgIds, State #state {
                                           backing_queue_state = BQS1 })),
     case BQ:needs_timeout(BQS1) of
-        false -> {stop_sync_timer(State1),   hibernate};
-        idle  -> {stop_sync_timer(State1),   0        };
-        timed -> {ensure_sync_timer(State1), 0        }
+        false -> {stop_sync_timer(State1),   hibernate     };
+        idle  -> {stop_sync_timer(State1),   ?SYNC_INTERVAL};
+        timed -> {ensure_sync_timer(State1), 0             }
     end.
 
 backing_queue_timeout(State = #state { backing_queue = BQ }) ->
@@ -638,49 +632,22 @@ confirm_sender_death(Pid) ->
     ok.
 
 maybe_enqueue_message(
-  Delivery = #delivery { message    = #basic_message { id = MsgId },
-                         msg_seq_no = MsgSeqNo,
-                         sender     = ChPid },
-  EnqueueOnPromotion,
+  Delivery = #delivery { message = #basic_message { id = MsgId },
+                         sender  = ChPid },
   State = #state { sender_queues = SQ, msg_id_status = MS }) ->
     State1 = ensure_monitoring(ChPid, State),
     %% We will never see {published, ChPid, MsgSeqNo} here.
     case dict:find(MsgId, MS) of
         error ->
             {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
-            MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ),
+            MQ1 = queue:in(Delivery, MQ),
             SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ),
             State1 #state { sender_queues = SQ1 };
-        {ok, {confirmed, ChPid}} ->
-            %% BQ has confirmed it but we didn't know what the
-            %% msg_seq_no was at the time. We do now!
-            ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]),
-            SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
-            State1 #state { sender_queues = SQ1,
-                            msg_id_status = dict:erase(MsgId, MS) };
-        {ok, {published, ChPid}} ->
-            %% It was published to the BQ and we didn't know the
-            %% msg_seq_no so couldn't confirm it at the time.
-            case needs_confirming(Delivery, State1) of
-                never ->
-                    SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
-                    State1 #state { msg_id_status = dict:erase(MsgId, MS),
-                                    sender_queues = SQ1 };
-                eventually ->
-                    State1 #state {
-                      msg_id_status =
-                          dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) };
-                immediately ->
-                    ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]),
-                    SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
-                    State1 #state { msg_id_status = dict:erase(MsgId, MS),
-                                    sender_queues = SQ1 }
-            end;
-        {ok, discarded} ->
-            %% We've already heard from GM that the msg is to be
-            %% discarded. We won't see this again.
+        {ok, Status} ->
+            MS1 = send_or_record_confirm(
+                    Status, Delivery, dict:erase(MsgId, MS), State1),
             SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
-            State1 #state { msg_id_status = dict:erase(MsgId, MS),
+            State1 #state { msg_id_status = MS1,
                             sender_queues = SQ1 }
     end.
 
@@ -698,45 +665,27 @@ remove_from_pending_ch(MsgId, ChPid, SQ) ->
             dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ)
     end.
 
-process_instruction(
-  {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }},
-  State = #state { sender_queues       = SQ,
-                   backing_queue       = BQ,
-                   backing_queue_state = BQS,
-                   msg_id_status       = MS }) ->
-
-    %% We really are going to do the publish right now, even though we
-    %% may not have seen it directly from the channel. As a result, we
-    %% may know that it needs confirming without knowing its
-    %% msg_seq_no, which means that we can see the confirmation come
-    %% back from the backing queue without knowing the msg_seq_no,
-    %% which means that we're going to have to hang on to the fact
-    %% that we've seen the msg_id confirmed until we can associate it
-    %% with a msg_seq_no.
+publish_or_discard(Status, ChPid, MsgId,
+                   State = #state { sender_queues = SQ, msg_id_status = MS }) ->
+    %% We really are going to do the publish/discard right now, even
+    %% though we may not have seen it directly from the channel. But
+    %% we cannot issues confirms until the latter has happened. So we
+    %% need to keep track of the MsgId and its confirmation status in
+    %% the meantime.
     State1 = ensure_monitoring(ChPid, State),
     {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
     {MQ1, PendingCh1, MS1} =
         case queue:out(MQ) of
             {empty, _MQ2} ->
                 {MQ, sets:add_element(MsgId, PendingCh),
-                 dict:store(MsgId, {published, ChPid}, MS)};
-            {{value, {Delivery = #delivery {
-                        msg_seq_no = MsgSeqNo,
-                        message    = #basic_message { id = MsgId } },
-                      _EnqueueOnPromotion}}, MQ2} ->
-                %% We received the msg from the channel first. Thus we
-                %% need to deal with confirms here.
-                case needs_confirming(Delivery, State1) of
-                    never ->
-                        {MQ2, PendingCh, MS};
-                    eventually ->
-                        {MQ2, PendingCh,
-                         dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)};
-                    immediately ->
-                        ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]),
-                        {MQ2, PendingCh, MS}
-                end;
-            {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} ->
+                 dict:store(MsgId, Status, MS)};
+            {{value, Delivery = #delivery {
+                       message = #basic_message { id = MsgId } }}, MQ2} ->
+                {MQ2, PendingCh,
+                 %% We received the msg from the channel first. Thus
+                 %% we need to deal with confirms here.
+                 send_or_record_confirm(Status, Delivery, MS, State1)};
+            {{value, #delivery {}}, _MQ2} ->
                 %% The instruction was sent to us before we were
                 %% within the slave_pids within the #amqqueue{}
                 %% record. We'll never receive the message directly
@@ -744,73 +693,48 @@ process_instruction(
                 %% expecting any confirms from us.
                 {MQ, PendingCh, MS}
         end,
-
     SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ),
-    State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 },
-
-    {ok,
-     case Deliver of
-         false ->
-             BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
-             State2 #state { backing_queue_state = BQS1 };
-         {true, AckRequired} ->
-             {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps,
-                                                   ChPid, BQS),
-             maybe_store_ack(AckRequired, MsgId, AckTag,
-                             State2 #state { backing_queue_state = BQS1 })
-     end};
-process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }},
-                    State = #state { sender_queues       = SQ,
-                                     backing_queue       = BQ,
-                                     backing_queue_state = BQS,
-                                     msg_id_status       = MS }) ->
-    %% Many of the comments around the publish head above apply here
-    %% too.
-    State1 = ensure_monitoring(ChPid, State),
-    {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
-    {MQ1, PendingCh1, MS1} =
-        case queue:out(MQ) of
-            {empty, _MQ} ->
-                {MQ, sets:add_element(MsgId, PendingCh),
-                 dict:store(MsgId, discarded, MS)};
-            {{value, {#delivery { message = #basic_message { id = MsgId } },
-                      _EnqueueOnPromotion}}, MQ2} ->
-                %% We've already seen it from the channel, we're not
-                %% going to see this again, so don't add it to MS
-                {MQ2, PendingCh, MS};
-            {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} ->
-                %% The instruction was sent to us before we were
-                %% within the slave_pids within the #amqqueue{}
-                %% record. We'll never receive the message directly
-                %% from the channel.
-                {MQ, PendingCh, MS}
-        end,
-    SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ),
-    BQS1 = BQ:discard(Msg, ChPid, BQS),
-    {ok, State1 #state { sender_queues       = SQ1,
-                         msg_id_status       = MS1,
-                         backing_queue_state = BQS1 }};
-process_instruction({set_length, Length, AckRequired},
+    State1 #state { sender_queues = SQ1, msg_id_status = MS1 }.
+
+
+process_instruction({publish, ChPid, MsgProps,
+                     Msg = #basic_message { id = MsgId }}, State) ->
+    State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+        publish_or_discard(published, ChPid, MsgId, State),
+    BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
+    {ok, State1 #state { backing_queue_state = BQS1 }};
+process_instruction({publish_delivered, ChPid, MsgProps,
+                     Msg = #basic_message { id = MsgId }}, State) ->
+    State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+        publish_or_discard(published, ChPid, MsgId, State),
+    {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, BQS),
+    {ok, maybe_store_ack(true, MsgId, AckTag,
+                         State1 #state { backing_queue_state = BQS1 })};
+process_instruction({discard, ChPid, MsgId}, State) ->
+    State1 = #state { backing_queue = BQ, backing_queue_state = BQS } =
+        publish_or_discard(discarded, ChPid, MsgId, State),
+    BQS1 = BQ:discard(MsgId, ChPid, BQS),
+    {ok, State1 #state { backing_queue_state = BQS1 }};
+process_instruction({drop, Length, Dropped, AckRequired},
                     State = #state { backing_queue       = BQ,
                                      backing_queue_state = BQS }) ->
     QLen = BQ:len(BQS),
-    ToDrop = QLen - Length,
-    {ok,
-     case ToDrop >= 0 of
-         true ->
-             State1 =
-                 lists:foldl(
-                   fun (const, StateN = #state {backing_queue_state = BQSN}) ->
-                           {{#basic_message{id = MsgId}, _IsDelivered, AckTag,
-                             _Remaining}, BQSN1} = BQ:fetch(AckRequired, BQSN),
-                           maybe_store_ack(
-                             AckRequired, MsgId, AckTag,
-                             StateN #state { backing_queue_state = BQSN1 })
-                   end, State, lists:duplicate(ToDrop, const)),
-             set_synchronised(true, State1);
-         false ->
-             State
-     end};
+    ToDrop = case QLen - Length of
+                 N when N > 0 -> N;
+                 _            -> 0
+             end,
+    State1 = lists:foldl(
+               fun (const, StateN = #state{backing_queue_state = BQSN}) ->
+                       {{#basic_message{id = MsgId}, _, AckTag, _}, BQSN1} =
+                           BQ:fetch(AckRequired, BQSN),
+                       maybe_store_ack(
+                         AckRequired, MsgId, AckTag,
+                         StateN #state { backing_queue_state = BQSN1 })
+               end, State, lists:duplicate(ToDrop, const)),
+    {ok, case AckRequired of
+             true  -> State1;
+             false -> update_delta(ToDrop - Dropped, State1)
+         end};
 process_instruction({fetch, AckRequired, MsgId, Remaining},
                     State = #state { backing_queue       = BQ,
                                      backing_queue_state = BQS }) ->
@@ -821,11 +745,10 @@ process_instruction({fetch, AckRequired, MsgId, Remaining},
                    AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS),
                  maybe_store_ack(AckRequired, MsgId, AckTag,
                                  State #state { backing_queue_state = BQS1 });
-             Other when Other + 1 =:= Remaining ->
-                 set_synchronised(true, State);
-             Other when Other < Remaining ->
-                 %% we must be shorter than the master
-                 State
+             _ when QLen =< Remaining andalso AckRequired ->
+                 State;
+             _ when QLen =< Remaining ->
+                 update_delta(-1, State)
          end};
 process_instruction({ack, MsgIds},
                     State = #state { backing_queue       = BQ,
@@ -834,27 +757,17 @@ process_instruction({ack, MsgIds},
     {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
     {MsgIds1, BQS1} = BQ:ack(AckTags, BQS),
     [] = MsgIds1 -- MsgIds, %% ASSERTION
-    {ok, State #state { msg_id_ack          = MA1,
-                        backing_queue_state = BQS1 }};
+    {ok, update_delta(length(MsgIds1) - length(MsgIds),
+                      State #state { msg_id_ack          = MA1,
+                                     backing_queue_state = BQS1 })};
 process_instruction({requeue, MsgIds},
                     State = #state { backing_queue       = BQ,
                                      backing_queue_state = BQS,
                                      msg_id_ack          = MA }) ->
     {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
-    {ok, case length(AckTags) =:= length(MsgIds) of
-             true ->
-                 {MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
-                 State #state { msg_id_ack          = MA1,
-                                backing_queue_state = BQS1 };
-             false ->
-                 %% The only thing we can safely do is nuke out our BQ
-                 %% and MA. The interaction between this and confirms
-                 %% doesn't really bear thinking about...
-                 {_Count, BQS1} = BQ:purge(BQS),
-                 {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1),
-                 State #state { msg_id_ack          = dict:new(),
-                                backing_queue_state = BQS2 }
-         end};
+    {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS),
+    {ok, State #state { msg_id_ack          = MA1,
+                        backing_queue_state = BQS1 }};
 process_instruction({sender_death, ChPid},
                     State = #state { sender_queues = SQ,
                                      msg_id_status = MS,
@@ -872,10 +785,11 @@ process_instruction({sender_death, ChPid},
                                      msg_id_status = MS1,
                                      known_senders = pmon:demonitor(ChPid, KS) }
          end};
-process_instruction({length, Length},
-                    State = #state { backing_queue = BQ,
+process_instruction({depth, Depth},
+                    State = #state { backing_queue       = BQ,
                                      backing_queue_state = BQS }) ->
-    {ok, set_synchronised(Length =:= BQ:len(BQS), State)};
+    {ok, set_delta(Depth - BQ:depth(BQS), State)};
+
 process_instruction({delete_and_terminate, Reason},
                     State = #state { backing_queue       = BQ,
                                      backing_queue_state = BQS }) ->
@@ -887,31 +801,45 @@ msg_ids_to_acktags(MsgIds, MA) ->
         lists:foldl(
           fun (MsgId, {Acc, MAN}) ->
                   case dict:find(MsgId, MA) of
-                      error                -> {Acc, MAN};
-                      {ok, {_Num, AckTag}} -> {[AckTag | Acc],
-                                               dict:erase(MsgId, MAN)}
+                      error        -> {Acc, MAN};
+                      {ok, AckTag} -> {[AckTag | Acc], dict:erase(MsgId, MAN)}
                   end
           end, {[], MA}, MsgIds),
     {lists:reverse(AckTags), MA1}.
 
-ack_all(BQ, MA, BQS) ->
-    BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS).
-
 maybe_store_ack(false, _MsgId, _AckTag, State) ->
     State;
-maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA,
-                                                      ack_num    = Num }) ->
-    State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA),
-                   ack_num    = Num + 1 }.
-
-%% We intentionally leave out the head where a slave becomes
-%% unsynchronised: we assert that can never happen.
-set_synchronised(true, State = #state { q = #amqqueue { name = QName },
-                                        synchronised = false }) ->
-    rabbit_event:notify(queue_slave_synchronised, [{pid,  self()},
-                                                   {name, QName}]),
-    State #state { synchronised = true };
-set_synchronised(true, State) ->
+maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA }) ->
+    State #state { msg_id_ack = dict:store(MsgId, AckTag, MA) }.
+
+set_delta(0,        State = #state { depth_delta = undefined }) ->
+    ok = record_synchronised(State#state.q),
+    State #state { depth_delta = 0 };
+set_delta(NewDelta, State = #state { depth_delta = undefined }) ->
+    true = NewDelta > 0, %% assertion
+    State #state { depth_delta = NewDelta };
+set_delta(NewDelta, State = #state { depth_delta = Delta     }) ->
+    update_delta(NewDelta - Delta, State).
+
+update_delta(_DeltaChange, State = #state { depth_delta = undefined }) ->
     State;
-set_synchronised(false, State = #state { synchronised = false }) ->
-    State.
+update_delta( DeltaChange, State = #state { depth_delta = 0         }) ->
+    0 = DeltaChange, %% assertion: we cannot become unsync'ed
+    State;
+update_delta( DeltaChange, State = #state { depth_delta = Delta     }) ->
+    true = DeltaChange =< 0, %% assertion: we cannot become 'less' sync'ed
+    set_delta(Delta + DeltaChange, State #state { depth_delta = undefined }).
+
+record_synchronised(#amqqueue { name = QName }) ->
+    Self = self(),
+    rabbit_misc:execute_mnesia_transaction(
+      fun () ->
+              case mnesia:read({rabbit_queue, QName}) of
+                  [] ->
+                      ok;
+                  [Q = #amqqueue { sync_slave_pids = SSPids }] ->
+                      rabbit_mirror_queue_misc:store_updated_slaves(
+                        Q #amqqueue { sync_slave_pids = [Self | SSPids] }),
+                      ok
+              end
+      end).
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index d41aa09b..ab9a9ceb 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -19,7 +19,7 @@
 -include("rabbit_framing.hrl").
 
 -export([method_record_type/1, polite_pause/0, polite_pause/1]).
--export([die/1, frame_error/2, amqp_error/4, quit/1, quit/2,
+-export([die/1, frame_error/2, amqp_error/4, quit/1,
          protocol_error/3, protocol_error/4, protocol_error/1]).
 -export([not_found/1, assert_args_equivalence/4]).
 -export([dirty_read/1]).
@@ -29,14 +29,14 @@
 -export([enable_cover/1, report_cover/1]).
 -export([start_cover/1]).
 -export([confirm_to_sender/2]).
--export([throw_on_error/2, with_exit_handler/2, filter_exit_map/2]).
--export([is_abnormal_termination/1]).
+-export([throw_on_error/2, with_exit_handler/2, is_abnormal_exit/1,
+         filter_exit_map/2]).
 -export([with_user/2, with_user_and_vhost/3]).
 -export([execute_mnesia_transaction/1]).
 -export([execute_mnesia_transaction/2]).
 -export([execute_mnesia_tx_with_tail/1]).
 -export([ensure_ok/2]).
--export([tcp_name/3]).
+-export([tcp_name/3, format_inet_error/1]).
 -export([upmap/2, map_in_order/2]).
 -export([table_filter/3]).
 -export([dirty_read_all/1, dirty_foreach_key/2, dirty_dump_log/1]).
@@ -60,6 +60,15 @@
 -export([multi_call/2]).
 -export([os_cmd/1]).
 -export([gb_sets_difference/2]).
+-export([version/0]).
+-export([sequence_error/1]).
+-export([json_encode/1, json_decode/1, json_to_term/1, term_to_json/1]).
+-export([base64url/1]).
+
+%% Horrible macro to use in guards
+-define(IS_BENIGN_EXIT(R),
+        R =:= noproc; R =:= noconnection; R =:= nodedown; R =:= normal;
+            R =:= shutdown).
 
 %%----------------------------------------------------------------------------
 
@@ -87,7 +96,6 @@
         (rabbit_framing:amqp_exception()) -> channel_or_connection_exit()).
 
 -spec(quit/1 :: (integer()) -> no_return()).
--spec(quit/2 :: (string(), [term()]) -> no_return()).
 
 -spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary())
                        -> rabbit_types:connection_exit()).
@@ -137,8 +145,8 @@
 -spec(throw_on_error/2 ::
         (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A).
 -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A).
+-spec(is_abnormal_exit/1 :: (any()) -> boolean()).
 -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]).
--spec(is_abnormal_termination/1 :: (any()) -> boolean()).
 -spec(with_user/2 :: (rabbit_types:username(), thunk(A)) -> A).
 -spec(with_user_and_vhost/3 ::
         (rabbit_types:username(), rabbit_types:vhost(), thunk(A))
@@ -152,6 +160,7 @@
 -spec(tcp_name/3 ::
         (atom(), inet:ip_address(), rabbit_networking:ip_port())
         -> atom()).
+-spec(format_inet_error/1 :: (atom()) -> string()).
 -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(table_filter/3:: (fun ((A) -> boolean()), fun ((A, boolean()) -> 'ok'),
@@ -212,6 +221,14 @@
         ([pid()], any()) -> {[{pid(), any()}], [{pid(), any()}]}).
 -spec(os_cmd/1 :: (string()) -> string()).
 -spec(gb_sets_difference/2 :: (gb_set(), gb_set()) -> gb_set()).
+-spec(version/0 :: () -> string()).
+-spec(sequence_error/1 :: ([({'error', any()} | any())])
+                       -> {'error', any()} | any()).
+-spec(json_encode/1 :: (any()) -> {'ok', string()} | {'error', any()}).
+-spec(json_decode/1 :: (string()) -> {'ok', any()} | 'error').
+-spec(json_to_term/1 :: (any()) -> any()).
+-spec(term_to_json/1 :: (any()) -> any()).
+-spec(base64url/1 :: (binary()) -> string()).
 
 -endif.
 
@@ -390,19 +407,9 @@ report_coverage_percentage(File, Cov, NotCov, Mod) ->
 confirm_to_sender(Pid, MsgSeqNos) ->
     gen_server2:cast(Pid, {confirm, MsgSeqNos, self()}).
 
-%%
-%% @doc Halts the emulator after printing out an error message io-formatted with
-%% the supplied arguments. The exit status of the beam process will be set to 1.
-%%
-quit(Fmt, Args) ->
-    io:format("ERROR: " ++ Fmt ++ "~n", Args),
-    quit(1).
-
-%%
 %% @doc Halts the emulator returning the given status code to the os.
 %% On Windows this function will block indefinitely so as to give the io
 %% subsystem time to flush stdout completely.
-%%
 quit(Status) ->
     case os:type() of
         {unix,  _} -> halt(Status);
@@ -423,13 +430,14 @@ with_exit_handler(Handler, Thunk) ->
     try
         Thunk()
     catch
-        exit:{R, _} when R =:= noproc; R =:= nodedown;
-                         R =:= normal; R =:= shutdown ->
-            Handler();
-        exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown ->
-            Handler()
+        exit:{R, _}      when ?IS_BENIGN_EXIT(R) -> Handler();
+        exit:{{R, _}, _} when ?IS_BENIGN_EXIT(R) -> Handler()
     end.
 
+is_abnormal_exit(R)      when ?IS_BENIGN_EXIT(R) -> false;
+is_abnormal_exit({R, _}) when ?IS_BENIGN_EXIT(R) -> false;
+is_abnormal_exit(_)                              -> true.
+
 filter_exit_map(F, L) ->
     Ref = make_ref(),
     lists:filter(fun (R) -> R =/= Ref end,
@@ -437,11 +445,6 @@ filter_exit_map(F, L) ->
                     fun () -> Ref end,
                     fun () -> F(I) end) || I <- L]).
 
-is_abnormal_termination(Reason)
-  when Reason =:= noproc; Reason =:= noconnection;
-       Reason =:= normal; Reason =:= shutdown -> false;
-is_abnormal_termination({shutdown, _})        -> false;
-is_abnormal_termination(_)                    -> true.
 
 with_user(Username, Thunk) ->
     fun () ->
@@ -510,6 +513,10 @@ tcp_name(Prefix, IPAddress, Port)
     list_to_atom(
       format("~w_~s:~w", [Prefix, inet_parse:ntoa(IPAddress), Port])).
 
+format_inet_error(address) -> "cannot connect to host/port";
+format_inet_error(timeout) -> "timed out";
+format_inet_error(Error)   -> inet:format_error(Error).
+
 %% This is a modified version of Luke Gorrie's pmap -
 %% http://lukego.livejournal.com/6753.html - that doesn't care about
 %% the order in which results are received.
@@ -939,3 +946,53 @@ os_cmd(Command) ->
 
 gb_sets_difference(S1, S2) ->
     gb_sets:fold(fun gb_sets:delete_any/2, S1, S2).
+
+version() ->
+    {ok, VSN} = application:get_key(rabbit, vsn),
+    VSN.
+
+sequence_error([T])                      -> T;
+sequence_error([{error, _} = Error | _]) -> Error;
+sequence_error([_ | Rest])               -> sequence_error(Rest).
+
+json_encode(Term) ->
+    try
+        {ok, mochijson2:encode(Term)}
+    catch
+        exit:{json_encode, E} ->
+            {error, E}
+    end.
+
+json_decode(Term) ->
+    try
+        {ok, mochijson2:decode(Term)}
+    catch
+        %% Sadly `mochijson2:decode/1' does not offer a nice way to catch
+        %% decoding errors...
+        error:_ -> error
+    end.
+
+json_to_term({struct, L}) ->
+    [{K, json_to_term(V)} || {K, V} <- L];
+json_to_term(L) when is_list(L) ->
+    [json_to_term(I) || I <- L];
+json_to_term(V) when is_binary(V) orelse is_number(V) orelse V =:= null orelse
+                     V =:= true orelse V =:= false ->
+    V.
+
+%% This has the flaw that empty lists will never be JSON objects, so use with
+%% care.
+term_to_json([{_, _}|_] = L) ->
+    {struct, [{K, term_to_json(V)} || {K, V} <- L]};
+term_to_json(L) when is_list(L) ->
+    [term_to_json(I) || I <- L];
+term_to_json(V) when is_binary(V) orelse is_number(V) orelse V =:= null orelse
+                     V =:= true orelse V =:= false ->
+    V.
+
+base64url(In) ->
+    lists:reverse(lists:foldl(fun ($\+, Acc) -> [$\- | Acc];
+                                  ($\/, Acc) -> [$\_ | Acc];
+                                  ($\=, Acc) -> Acc;
+                                  (Chr, Acc) -> [Chr | Acc]
+                              end, [], base64:encode_to_string(In))).
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 7e9346f9..d6c6f360 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -14,23 +14,37 @@
 %% Copyright (c) 2007-2012 VMware, Inc.  All rights reserved.
 %%
 
-
 -module(rabbit_mnesia).
 
--export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0,
-         cluster/1, force_cluster/1, reset/0, force_reset/0, init_db/3,
-         is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0,
-         empty_ram_only_tables/0, copy_db/1, wait_for_tables/1,
-         create_cluster_nodes_config/1, read_cluster_nodes_config/0,
-         record_running_nodes/0, read_previously_running_nodes/0,
-         running_nodes_filename/0, is_disc_node/0, on_node_down/1,
-         on_node_up/1]).
-
--export([table_names/0]).
-
-%% create_tables/0 exported for helping embed RabbitMQ in or alongside
-%% other mnesia-using Erlang applications, such as ejabberd
--export([create_tables/0]).
+-export([init/0,
+         join_cluster/2,
+         reset/0,
+         force_reset/0,
+         update_cluster_nodes/1,
+         change_cluster_node_type/1,
+         forget_cluster_node/2,
+
+         status/0,
+         is_clustered/0,
+         cluster_nodes/1,
+         node_type/0,
+         dir/0,
+         cluster_status_from_mnesia/0,
+
+         init_db_unchecked/2,
+         copy_db/1,
+         check_cluster_consistency/0,
+         ensure_mnesia_dir/0,
+
+         on_node_up/1,
+         on_node_down/1
+        ]).
+
+%% Used internally in rpc calls
+-export([node_info/0,
+         remove_node_if_mnesia_running/1,
+         is_running_remote/0
+        ]).
 
 -include("rabbit.hrl").
 
@@ -38,314 +52,422 @@
 
 -ifdef(use_specs).
 
--export_type([node_type/0]).
+-export_type([node_type/0, cluster_status/0]).
 
--type(node_type() :: disc_only | disc | ram | unknown).
--spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} |
-                         {'running_nodes', [node()]}]).
--spec(dir/0 :: () -> file:filename()).
--spec(ensure_mnesia_dir/0 :: () -> 'ok').
+-type(node_type() :: disc | ram).
+-type(cluster_status() :: {[node()], [node()], [node()]}).
+
+%% Main interface
 -spec(init/0 :: () -> 'ok').
--spec(init_db/3 :: ([node()], boolean(), rabbit_misc:thunk('ok')) -> 'ok').
--spec(is_db_empty/0 :: () -> boolean()).
--spec(cluster/1 :: ([node()]) -> 'ok').
--spec(force_cluster/1 :: ([node()]) -> 'ok').
--spec(cluster/2 :: ([node()], boolean()) -> 'ok').
+-spec(join_cluster/2 :: (node(), node_type()) -> 'ok').
 -spec(reset/0 :: () -> 'ok').
 -spec(force_reset/0 :: () -> 'ok').
+-spec(update_cluster_nodes/1 :: (node()) -> 'ok').
+-spec(change_cluster_node_type/1 :: (node_type()) -> 'ok').
+-spec(forget_cluster_node/2 :: (node(), boolean()) -> 'ok').
+
+%% Various queries to get the status of the db
+-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} |
+                         {'running_nodes', [node()]}]).
 -spec(is_clustered/0 :: () -> boolean()).
--spec(running_clustered_nodes/0 :: () -> [node()]).
--spec(all_clustered_nodes/0 :: () -> [node()]).
--spec(empty_ram_only_tables/0 :: () -> 'ok').
--spec(create_tables/0 :: () -> 'ok').
+-spec(cluster_nodes/1 :: ('all' | 'disc' | 'ram' | 'running') -> [node()]).
+-spec(node_type/0 :: () -> node_type()).
+-spec(dir/0 :: () -> file:filename()).
+-spec(cluster_status_from_mnesia/0 :: () -> rabbit_types:ok_or_error2(
+                                              cluster_status(), any())).
+
+%% Operations on the db and utils, mainly used in `rabbit_upgrade' and `rabbit'
+-spec(init_db_unchecked/2 :: ([node()], node_type()) -> 'ok').
 -spec(copy_db/1 :: (file:filename()) ->  rabbit_types:ok_or_error(any())).
--spec(wait_for_tables/1 :: ([atom()]) -> 'ok').
--spec(create_cluster_nodes_config/1 :: ([node()]) ->  'ok').
--spec(read_cluster_nodes_config/0 :: () ->  [node()]).
--spec(record_running_nodes/0 :: () ->  'ok').
--spec(read_previously_running_nodes/0 :: () ->  [node()]).
--spec(running_nodes_filename/0 :: () -> file:filename()).
--spec(is_disc_node/0 :: () -> boolean()).
+-spec(check_cluster_consistency/0 :: () -> 'ok').
+-spec(ensure_mnesia_dir/0 :: () -> 'ok').
+
+%% Hooks used in `rabbit_node_monitor'
 -spec(on_node_up/1 :: (node()) -> 'ok').
 -spec(on_node_down/1 :: (node()) -> 'ok').
 
--spec(table_names/0 :: () -> [atom()]).
-
 -endif.
 
 %%----------------------------------------------------------------------------
-
-status() ->
-    [{nodes, case mnesia:system_info(is_running) of
-                 yes -> [{Key, Nodes} ||
-                            {Key, CopyType} <- [{disc_only, disc_only_copies},
-                                                {disc,      disc_copies},
-                                                {ram,       ram_copies}],
-                            begin
-                                Nodes = nodes_of_type(CopyType),
-                                Nodes =/= []
-                            end];
-                 no -> case all_clustered_nodes() of
-                           [] -> [];
-                           Nodes -> [{unknown, Nodes}]
-                       end;
-                 Reason when Reason =:= starting; Reason =:= stopping ->
-                     exit({rabbit_busy, try_again_later})
-             end},
-     {running_nodes, running_clustered_nodes()}].
+%% Main interface
+%%----------------------------------------------------------------------------
 
 init() ->
     ensure_mnesia_running(),
     ensure_mnesia_dir(),
-    Nodes = read_cluster_nodes_config(),
-    ok = init_db(Nodes, should_be_disc_node(Nodes)),
+    case is_virgin_node() of
+        true  -> init_from_config();
+        false -> NodeType = node_type(),
+                 init_db_and_upgrade(cluster_nodes(all), NodeType,
+                                     NodeType =:= ram)
+    end,
     %% We intuitively expect the global name server to be synced when
-    %% Mnesia is up. In fact that's not guaranteed to be the case - let's
-    %% make it so.
+    %% Mnesia is up. In fact that's not guaranteed to be the case -
+    %% let's make it so.
     ok = global:sync(),
-    ok = delete_previously_running_nodes(),
     ok.
 
-is_db_empty() ->
-    lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end,
-              table_names()).
-
-cluster(ClusterNodes) ->
-    cluster(ClusterNodes, false).
-force_cluster(ClusterNodes) ->
-    cluster(ClusterNodes, true).
-
-%% Alter which disk nodes this node is clustered with. This can be a
-%% subset of all the disk nodes in the cluster but can (and should)
-%% include the node itself if it is to be a disk rather than a ram
-%% node.  If Force is false, only connections to online nodes are
-%% allowed.
-cluster(ClusterNodes, Force) ->
-    rabbit_misc:local_info_msg("Clustering with ~p~s~n",
-                               [ClusterNodes, if Force -> " forcefully";
-                                                 true  -> ""
-                                              end]),
+init_from_config() ->
+    {ok, {TryNodes, NodeType}} =
+        application:get_env(rabbit, cluster_nodes),
+    case find_good_node(nodes_excl_me(TryNodes)) of
+        {ok, Node} ->
+            rabbit_log:info("Node '~p' selected for clustering from "
+                            "configuration~n", [Node]),
+            {ok, {_, DiscNodes, _}} = discover_cluster(Node),
+            init_db_and_upgrade(DiscNodes, NodeType, true),
+            rabbit_node_monitor:notify_joined_cluster();
+        none ->
+            rabbit_log:warning("Could not find any suitable node amongst the "
+                               "ones provided in the configuration: ~p~n",
+                               [TryNodes]),
+            init_db_and_upgrade([node()], disc, false)
+    end.
+
+%% Make the node join a cluster. The node will be reset automatically
+%% before we actually cluster it. The nodes provided will be used to
+%% find out about the nodes in the cluster.
+%%
+%% This function will fail if:
+%%
+%%   * The node is currently the only disc node of its cluster
+%%   * We can't connect to any of the nodes provided
+%%   * The node is currently already clustered with the cluster of the nodes
+%%     provided
+%%
+%% Note that we make no attempt to verify that the nodes provided are
+%% all in the same cluster, we simply pick the first online node and
+%% we cluster to its cluster.
+join_cluster(DiscoveryNode, NodeType) ->
     ensure_mnesia_not_running(),
     ensure_mnesia_dir(),
-
-    case not Force andalso is_clustered() andalso
-         is_only_disc_node(node(), false) andalso
-         not should_be_disc_node(ClusterNodes)
-    of
-        true -> log_both("last running disc node leaving cluster");
-        _    -> ok
+    case is_only_clustered_disc_node() of
+        true  -> e(clustering_only_disc_node);
+        false -> ok
     end,
-
-    %% Wipe mnesia if we're changing type from disc to ram
-    case {is_disc_node(), should_be_disc_node(ClusterNodes)} of
-        {true, false} -> rabbit_misc:with_local_io(
-                           fun () -> error_logger:warning_msg(
-                                       "changing node type; wiping "
-                                       "mnesia...~n~n")
-                           end),
-                         rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
-                                               cannot_delete_schema);
-        _             -> ok
+    {ClusterNodes, _, _} = case discover_cluster(DiscoveryNode) of
+                               {ok, Res}      -> Res;
+                               {error, _} = E -> throw(E)
+                           end,
+    case me_in_nodes(ClusterNodes) of
+        true  -> e(already_clustered);
+        false -> ok
     end,
 
-    %% Pre-emptively leave the cluster
-    %%
-    %% We're trying to handle the following two cases:
-    %% 1. We have a two-node cluster, where both nodes are disc nodes.
-    %% One node is re-clustered as a ram node.  When it tries to
-    %% re-join the cluster, but before it has time to update its
-    %% tables definitions, the other node will order it to re-create
-    %% its disc tables.  So, we need to leave the cluster before we
-    %% can join it again.
-    %% 2. We have a two-node cluster, where both nodes are disc nodes.
-    %% One node is forcefully reset (so, the other node thinks its
-    %% still a part of the cluster).  The reset node is re-clustered
-    %% as a ram node.  Same as above, we need to leave the cluster
-    %% before we can join it.  But, since we don't know if we're in a
-    %% cluster or not, we just pre-emptively leave it before joining.
-    ProperClusterNodes = ClusterNodes -- [node()],
-    try
-        ok = leave_cluster(ProperClusterNodes, ProperClusterNodes)
-    catch
-        {error, {no_running_cluster_nodes, _, _}} when Force ->
-            ok
-    end,
+    %% reset the node. this simplifies things and it will be needed in
+    %% this case - we're joining a new cluster with new nodes which
+    %% are not in synch with the current node. I also lifts the burden
+    %% of reseting the node from the user.
+    reset(false),
 
     %% Join the cluster
-    start_mnesia(),
-    try
-        ok = init_db(ClusterNodes, Force),
-        ok = create_cluster_nodes_config(ClusterNodes)
-    after
-        stop_mnesia()
-    end,
+    rabbit_misc:local_info_msg("Clustering with ~p as ~p node~n",
+                               [ClusterNodes, NodeType]),
+    ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true),
+    rabbit_node_monitor:notify_joined_cluster(),
 
     ok.
 
 %% return node to its virgin state, where it is not member of any
 %% cluster, has no cluster configuration, no local database, and no
 %% persisted messages
-reset()       -> reset(false).
-force_reset() -> reset(true).
-
-is_clustered() ->
-    RunningNodes = running_clustered_nodes(),
-    [node()] /= RunningNodes andalso [] /= RunningNodes.
-
-all_clustered_nodes() ->
-    mnesia:system_info(db_nodes).
-
-running_clustered_nodes() ->
-    mnesia:system_info(running_db_nodes).
-
-empty_ram_only_tables() ->
-    Node = node(),
-    lists:foreach(
-      fun (TabName) ->
-              case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of
-                  true  -> {atomic, ok} = mnesia:clear_table(TabName);
-                  false -> ok
-              end
-      end, table_names()),
+reset() ->
+    rabbit_misc:local_info_msg("Resetting Rabbit~n", []),
+    reset(false).
+
+force_reset() ->
+    rabbit_misc:local_info_msg("Resetting Rabbit forcefully~n", []),
+    reset(true).
+
+reset(Force) ->
+    ensure_mnesia_not_running(),
+    Nodes = case Force of
+                true ->
+                    nodes();
+                false ->
+                    AllNodes = cluster_nodes(all),
+                    %% Reconnecting so that we will get an up to date
+                    %% nodes.  We don't need to check for consistency
+                    %% because we are resetting.  Force=true here so
+                    %% that reset still works when clustered with a
+                    %% node which is down.
+                    init_db_with_mnesia(AllNodes, node_type(), false, false),
+                    case is_only_clustered_disc_node() of
+                        true  -> e(resetting_only_disc_node);
+                        false -> ok
+                    end,
+                    leave_cluster(),
+                    rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+                                          cannot_delete_schema),
+                    cluster_nodes(all)
+            end,
+    %% We need to make sure that we don't end up in a distributed
+    %% Erlang system with nodes while not being in an Mnesia cluster
+    %% with them. We don't handle that well.
+    [erlang:disconnect_node(N) || N <- Nodes],
+    %% remove persisted messages and any other garbage we find
+    ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")),
+    ok = rabbit_node_monitor:reset_cluster_status(),
     ok.
 
-%%--------------------------------------------------------------------
+change_cluster_node_type(Type) ->
+    ensure_mnesia_not_running(),
+    ensure_mnesia_dir(),
+    case is_clustered() of
+        false -> e(not_clustered);
+        true  -> ok
+    end,
+    {_, _, RunningNodes} = case discover_cluster(cluster_nodes(all)) of
+                               {ok, Status}     -> Status;
+                               {error, _Reason} -> e(cannot_connect_to_cluster)
+                           end,
+    %% We might still be marked as running by a remote node since the
+    %% information of us going down might not have propagated yet.
+    Node = case RunningNodes -- [node()] of
+               []        -> e(no_online_cluster_nodes);
+               [Node0|_] -> Node0
+           end,
+    ok = reset(),
+    ok = join_cluster(Node, Type).
 
-nodes_of_type(Type) ->
-    %% This function should return the nodes of a certain type (ram,
-    %% disc or disc_only) in the current cluster.  The type of nodes
-    %% is determined when the cluster is initially configured.
-    mnesia:table_info(schema, Type).
-
-%% The tables aren't supposed to be on disk on a ram node
-table_definitions(disc) ->
-    table_definitions();
-table_definitions(ram) ->
-    [{Tab, copy_type_to_ram(TabDef)} || {Tab, TabDef} <- table_definitions()].
-
-table_definitions() ->
-    [{rabbit_user,
-      [{record_name, internal_user},
-       {attributes, record_info(fields, internal_user)},
-       {disc_copies, [node()]},
-       {match, #internal_user{_='_'}}]},
-     {rabbit_user_permission,
-      [{record_name, user_permission},
-       {attributes, record_info(fields, user_permission)},
-       {disc_copies, [node()]},
-       {match, #user_permission{user_vhost = #user_vhost{_='_'},
-                                permission = #permission{_='_'},
-                                _='_'}}]},
-     {rabbit_vhost,
-      [{record_name, vhost},
-       {attributes, record_info(fields, vhost)},
-       {disc_copies, [node()]},
-       {match, #vhost{_='_'}}]},
-     {rabbit_listener,
-      [{record_name, listener},
-       {attributes, record_info(fields, listener)},
-       {type, bag},
-       {match, #listener{_='_'}}]},
-     {rabbit_durable_route,
-      [{record_name, route},
-       {attributes, record_info(fields, route)},
-       {disc_copies, [node()]},
-       {match, #route{binding = binding_match(), _='_'}}]},
-     {rabbit_semi_durable_route,
-      [{record_name, route},
-       {attributes, record_info(fields, route)},
-       {type, ordered_set},
-       {match, #route{binding = binding_match(), _='_'}}]},
-     {rabbit_route,
-      [{record_name, route},
-       {attributes, record_info(fields, route)},
-       {type, ordered_set},
-       {match, #route{binding = binding_match(), _='_'}}]},
-     {rabbit_reverse_route,
-      [{record_name, reverse_route},
-       {attributes, record_info(fields, reverse_route)},
-       {type, ordered_set},
-       {match, #reverse_route{reverse_binding = reverse_binding_match(),
-                              _='_'}}]},
-     {rabbit_topic_trie_node,
-      [{record_name, topic_trie_node},
-       {attributes, record_info(fields, topic_trie_node)},
-       {type, ordered_set},
-       {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]},
-     {rabbit_topic_trie_edge,
-      [{record_name, topic_trie_edge},
-       {attributes, record_info(fields, topic_trie_edge)},
-       {type, ordered_set},
-       {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]},
-     {rabbit_topic_trie_binding,
-      [{record_name, topic_trie_binding},
-       {attributes, record_info(fields, topic_trie_binding)},
-       {type, ordered_set},
-       {match, #topic_trie_binding{trie_binding = trie_binding_match(),
-                                   _='_'}}]},
-     {rabbit_durable_exchange,
-      [{record_name, exchange},
-       {attributes, record_info(fields, exchange)},
-       {disc_copies, [node()]},
-       {match, #exchange{name = exchange_name_match(), _='_'}}]},
-     {rabbit_exchange,
-      [{record_name, exchange},
-       {attributes, record_info(fields, exchange)},
-       {match, #exchange{name = exchange_name_match(), _='_'}}]},
-     {rabbit_exchange_serial,
-      [{record_name, exchange_serial},
-       {attributes, record_info(fields, exchange_serial)},
-       {match, #exchange_serial{name = exchange_name_match(), _='_'}}]},
-     {rabbit_runtime_parameters,
-      [{record_name, runtime_parameters},
-       {attributes, record_info(fields, runtime_parameters)},
-       {disc_copies, [node()]},
-       {match, #runtime_parameters{_='_'}}]},
-     {rabbit_durable_queue,
-      [{record_name, amqqueue},
-       {attributes, record_info(fields, amqqueue)},
-       {disc_copies, [node()]},
-       {match, #amqqueue{name = queue_name_match(), _='_'}}]},
-     {rabbit_queue,
-      [{record_name, amqqueue},
-       {attributes, record_info(fields, amqqueue)},
-       {match, #amqqueue{name = queue_name_match(), _='_'}}]}]
-        ++ gm:table_definitions()
-        ++ mirrored_supervisor:table_definitions().
-
-binding_match() ->
-    #binding{source = exchange_name_match(),
-             destination = binding_destination_match(),
-             _='_'}.
-reverse_binding_match() ->
-    #reverse_binding{destination = binding_destination_match(),
-                     source = exchange_name_match(),
-                     _='_'}.
-binding_destination_match() ->
-    resource_match('_').
-trie_node_match() ->
-    #trie_node{   exchange_name = exchange_name_match(), _='_'}.
-trie_edge_match() ->
-    #trie_edge{   exchange_name = exchange_name_match(), _='_'}.
-trie_binding_match() ->
-    #trie_binding{exchange_name = exchange_name_match(), _='_'}.
-exchange_name_match() ->
-    resource_match(exchange).
-queue_name_match() ->
-    resource_match(queue).
-resource_match(Kind) ->
-    #resource{kind = Kind, _='_'}.
-
-table_names() ->
-    [Tab || {Tab, _} <- table_definitions()].
-
-replicated_table_names() ->
-    [Tab || {Tab, TabDef} <- table_definitions(),
-            not lists:member({local_content, true}, TabDef)
-    ].
+update_cluster_nodes(DiscoveryNode) ->
+    ensure_mnesia_not_running(),
+    ensure_mnesia_dir(),
+    Status = {AllNodes, _, _} =
+        case discover_cluster(DiscoveryNode) of
+            {ok, Status0}    -> Status0;
+            {error, _Reason} -> e(cannot_connect_to_node)
+        end,
+    case me_in_nodes(AllNodes) of
+        true ->
+            %% As in `check_consistency/0', we can safely delete the
+            %% schema here, since it'll be replicated from the other
+            %% nodes
+            mnesia:delete_schema([node()]),
+            rabbit_node_monitor:write_cluster_status(Status),
+            rabbit_misc:local_info_msg("Updating cluster nodes from ~p~n",
+                                       [DiscoveryNode]),
+            init_db_with_mnesia(AllNodes, node_type(), true, true);
+        false ->
+            e(inconsistent_cluster)
+    end,
+    ok.
+
+%% We proceed like this: try to remove the node locally. If the node
+%% is offline, we remove the node if:
+%%   * This node is a disc node
+%%   * All other nodes are offline
+%%   * This node was, at the best of our knowledge (see comment below)
+%%     the last or second to last after the node we're removing to go
+%%     down
+forget_cluster_node(Node, RemoveWhenOffline) ->
+    case lists:member(Node, cluster_nodes(all)) of
+        true  -> ok;
+        false -> e(not_a_cluster_node)
+    end,
+    case {RemoveWhenOffline, mnesia:system_info(is_running)} of
+        {true,   no} -> remove_node_offline_node(Node);
+        {true,  yes} -> e(online_node_offline_flag);
+        {false,  no} -> e(offline_node_no_offline_flag);
+        {false, yes} -> rabbit_misc:local_info_msg(
+                          "Removing node ~p from cluster~n", [Node]),
+                        case remove_node_if_mnesia_running(Node) of
+                            ok               -> ok;
+                            {error, _} = Err -> throw(Err)
+                        end
+    end.
+
+remove_node_offline_node(Node) ->
+    %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we
+    %% want - we need to know the running nodes *now*.  If the current node is a
+    %% RAM node it will return bogus results, but we don't care since we only do
+    %% this operation from disc nodes.
+    case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of
+        {[], disc} ->
+            %% Note that while we check if the nodes was the last to go down,
+            %% apart from the node we're removing from, this is still unsafe.
+            %% Consider the situation in which A and B are clustered. A goes
+            %% down, and records B as the running node. Then B gets clustered
+            %% with C, C goes down and B goes down. In this case, C is the
+            %% second-to-last, but we don't know that and we'll remove B from A
+            %% anyway, even if that will lead to bad things.
+            case cluster_nodes(running) -- [node(), Node] of
+                [] -> start_mnesia(),
+                      try
+                          %% What we want to do here is replace the last node to
+                          %% go down with the current node.  The way we do this
+                          %% is by force loading the table, and making sure that
+                          %% they are loaded.
+                          rabbit_table:force_load(),
+                          rabbit_table:wait_for_replicated(),
+                          forget_cluster_node(Node, false)
+                      after
+                          stop_mnesia()
+                      end;
+                _  -> e(not_last_node_to_go_down)
+            end;
+        {_, _} ->
+            e(removing_node_from_offline_node)
+    end.
+
+
+%%----------------------------------------------------------------------------
+%% Queries
+%%----------------------------------------------------------------------------
+
+status() ->
+    IfNonEmpty = fun (_,       []) -> [];
+                     (Type, Nodes) -> [{Type, Nodes}]
+                 end,
+    [{nodes, (IfNonEmpty(disc, cluster_nodes(disc)) ++
+                  IfNonEmpty(ram, cluster_nodes(ram)))}] ++
+        case mnesia:system_info(is_running) of
+            yes -> RunningNodes = cluster_nodes(running),
+                   [{running_nodes, cluster_nodes(running)},
+                    {partitions,    mnesia_partitions(RunningNodes)}];
+            no  -> []
+        end.
+
+mnesia_partitions(Nodes) ->
+    {Replies, _BadNodes} = rpc:multicall(
+                             Nodes, rabbit_node_monitor, partitions, []),
+    [Reply || Reply = {_, R} <- Replies, R =/= []].
+
+is_clustered() -> AllNodes = cluster_nodes(all),
+                  AllNodes =/= [] andalso AllNodes =/= [node()].
+
+cluster_nodes(WhichNodes) -> cluster_status(WhichNodes).
+
+%% This function is the actual source of information, since it gets
+%% the data from mnesia. Obviously it'll work only when mnesia is
+%% running.
+cluster_status_from_mnesia() ->
+    case mnesia:system_info(is_running) of
+        no ->
+            {error, mnesia_not_running};
+        yes ->
+            %% If the tables are not present, it means that
+            %% `init_db/3' hasn't been run yet. In other words, either
+            %% we are a virgin node or a restarted RAM node. In both
+            %% cases we're not interested in what mnesia has to say.
+            NodeType = case mnesia:system_info(use_dir) of
+                           true  -> disc;
+                           false -> ram
+                       end,
+            case rabbit_table:is_present() of
+                true  -> AllNodes = mnesia:system_info(db_nodes),
+                         DiscCopies = mnesia:table_info(schema, disc_copies),
+                         DiscNodes = case NodeType of
+                                         disc -> nodes_incl_me(DiscCopies);
+                                         ram  -> DiscCopies
+                                     end,
+                         %% `mnesia:system_info(running_db_nodes)' is safe since
+                         %% we know that mnesia is running
+                         RunningNodes = mnesia:system_info(running_db_nodes),
+                         {ok, {AllNodes, DiscNodes, RunningNodes}};
+                false -> {error, tables_not_present}
+            end
+    end.
+
+cluster_status(WhichNodes) ->
+    {AllNodes, DiscNodes, RunningNodes} = Nodes =
+        case cluster_status_from_mnesia() of
+            {ok, Nodes0} ->
+                Nodes0;
+            {error, _Reason} ->
+                {AllNodes0, DiscNodes0, RunningNodes0} =
+                    rabbit_node_monitor:read_cluster_status(),
+                %% The cluster status file records the status when the node is
+                %% online, but we know for sure that the node is offline now, so
+                %% we can remove it from the list of running nodes.
+                {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)}
+        end,
+    case WhichNodes of
+        status  -> Nodes;
+        all     -> AllNodes;
+        disc    -> DiscNodes;
+        ram     -> AllNodes -- DiscNodes;
+        running -> RunningNodes
+    end.
+
+node_info() ->
+    {erlang:system_info(otp_release), rabbit_misc:version(),
+     cluster_status_from_mnesia()}.
+
+node_type() ->
+    DiscNodes = cluster_nodes(disc),
+    case DiscNodes =:= [] orelse me_in_nodes(DiscNodes) of
+        true  -> disc;
+        false -> ram
+    end.
 
 dir() -> mnesia:system_info(directory).
 
+%%----------------------------------------------------------------------------
+%% Operations on the db
+%%----------------------------------------------------------------------------
+
+%% Adds the provided nodes to the mnesia cluster, creating a new
+%% schema if there is the need to and catching up if there are other
+%% nodes in the cluster already. It also updates the cluster status
+%% file.
+init_db(ClusterNodes, NodeType, CheckOtherNodes) ->
+    Nodes = change_extra_db_nodes(ClusterNodes, CheckOtherNodes),
+    %% Note that we use `system_info' here and not the cluster status
+    %% since when we start rabbit for the first time the cluster
+    %% status will say we are a disc node but the tables won't be
+    %% present yet.
+    WasDiscNode = mnesia:system_info(use_dir),
+    case {Nodes, WasDiscNode, NodeType} of
+        {[], _, ram} ->
+            %% Standalone ram node, we don't want that
+            throw({error, cannot_create_standalone_ram_node});
+        {[], false, disc} ->
+            %% RAM -> disc, starting from scratch
+            ok = create_schema();
+        {[], true, disc} ->
+            %% First disc node up
+            ok;
+        {[AnotherNode | _], _, _} ->
+            %% Subsequent node in cluster, catch up
+            ensure_version_ok(
+              rpc:call(AnotherNode, rabbit_version, recorded, [])),
+            ok = rabbit_table:wait_for_replicated(),
+            ok = rabbit_table:create_local_copy(NodeType)
+    end,
+    ensure_schema_integrity(),
+    rabbit_node_monitor:update_cluster_status(),
+    ok.
+
+init_db_unchecked(ClusterNodes, NodeType) ->
+    init_db(ClusterNodes, NodeType, false).
+
+init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes) ->
+    ok = init_db(ClusterNodes, NodeType, CheckOtherNodes),
+    ok = case rabbit_upgrade:maybe_upgrade_local() of
+             ok                    -> ok;
+             starting_from_scratch -> rabbit_version:record_desired();
+             version_not_available -> schema_ok_or_move()
+         end,
+    %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget
+    %% about the cluster
+    case NodeType of
+        ram  -> start_mnesia(),
+                change_extra_db_nodes(ClusterNodes, false),
+                rabbit_table:wait_for_replicated();
+        disc -> ok
+    end,
+    ok.
+
+init_db_with_mnesia(ClusterNodes, NodeType,
+                    CheckOtherNodes, CheckConsistency) ->
+    start_mnesia(CheckConsistency),
+    try
+        init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes)
+    after
+        stop_mnesia()
+    end.
+
 ensure_mnesia_dir() ->
     MnesiaDir = dir() ++ "/",
     case filelib:ensure_dir(MnesiaDir) of
@@ -378,210 +500,111 @@ ensure_mnesia_not_running() ->
     end.
 
 ensure_schema_integrity() ->
-    case check_schema_integrity() of
+    case rabbit_table:check_schema_integrity() of
         ok ->
             ok;
         {error, Reason} ->
             throw({error, {schema_integrity_check_failed, Reason}})
     end.
 
-check_schema_integrity() ->
-    Tables = mnesia:system_info(tables),
-    case check_tables(fun (Tab, TabDef) ->
-                              case lists:member(Tab, Tables) of
-                                  false -> {error, {table_missing, Tab}};
-                                  true  -> check_table_attributes(Tab, TabDef)
-                              end
-                      end) of
-        ok     -> ok = wait_for_tables(),
-                  check_tables(fun check_table_content/2);
-        Other  -> Other
-    end.
-
-check_table_attributes(Tab, TabDef) ->
-    {_, ExpAttrs} = proplists:lookup(attributes, TabDef),
-    case mnesia:table_info(Tab, attributes) of
-        ExpAttrs -> ok;
-        Attrs    -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}}
-    end.
+copy_db(Destination) ->
+    ok = ensure_mnesia_not_running(),
+    rabbit_file:recursive_copy(dir(), Destination).
 
-check_table_content(Tab, TabDef) ->
-    {_, Match} = proplists:lookup(match, TabDef),
-    case mnesia:dirty_first(Tab) of
-        '$end_of_table' ->
+%% This does not guarantee us much, but it avoids some situations that
+%% will definitely end up badly
+check_cluster_consistency() ->
+    %% We want to find 0 or 1 consistent nodes.
+    case lists:foldl(
+           fun (Node,  {error, _})    -> check_cluster_consistency(Node);
+               (_Node, {ok, Status})  -> {ok, Status}
+           end, {error, not_found}, nodes_excl_me(cluster_nodes(all)))
+    of
+        {ok, Status = {RemoteAllNodes, _, _}} ->
+            case ordsets:is_subset(ordsets:from_list(cluster_nodes(all)),
+                                   ordsets:from_list(RemoteAllNodes)) of
+                true  ->
+                    ok;
+                false ->
+                    %% We delete the schema here since we think we are
+                    %% clustered with nodes that are no longer in the
+                    %% cluster and there is no other way to remove
+                    %% them from our schema. On the other hand, we are
+                    %% sure that there is another online node that we
+                    %% can use to sync the tables with. There is a
+                    %% race here: if between this check and the
+                    %% `init_db' invocation the cluster gets
+                    %% disbanded, we're left with a node with no
+                    %% mnesia data that will try to connect to offline
+                    %% nodes.
+                    mnesia:delete_schema([node()])
+            end,
+            rabbit_node_monitor:write_cluster_status(Status);
+        {error, not_found} ->
             ok;
-        Key ->
-            ObjList = mnesia:dirty_read(Tab, Key),
-            MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]),
-            case ets:match_spec_run(ObjList, MatchComp) of
-                ObjList -> ok;
-                _       -> {error, {table_content_invalid, Tab, Match, ObjList}}
-            end
+        {error, _} = E ->
+            throw(E)
     end.
 
-check_tables(Fun) ->
-    case [Error || {Tab, TabDef} <- table_definitions(
-                                      case is_disc_node() of
-                                          true  -> disc;
-                                          false -> ram
-                                      end),
-                   case Fun(Tab, TabDef) of
-                       ok             -> Error = none, false;
-                       {error, Error} -> true
-                   end] of
-        []     -> ok;
-        Errors -> {error, Errors}
+check_cluster_consistency(Node) ->
+    case rpc:call(Node, rabbit_mnesia, node_info, []) of
+        {badrpc, _Reason} ->
+            {error, not_found};
+        {_OTP, _Rabbit, {error, _}} ->
+            {error, not_found};
+        {OTP, Rabbit, {ok, Status}} ->
+            case check_consistency(OTP, Rabbit, Node, Status) of
+                {error, _} = E -> E;
+                {ok, Res}      -> {ok, Res}
+            end
     end.
 
-%% The cluster node config file contains some or all of the disk nodes
-%% that are members of the cluster this node is / should be a part of.
-%%
-%% If the file is absent, the list is empty, or only contains the
-%% current node, then the current node is a standalone (disk)
-%% node. Otherwise it is a node that is part of a cluster as either a
-%% disk node, if it appears in the cluster node config, or ram node if
-%% it doesn't.
-
-cluster_nodes_config_filename() ->
-    dir() ++ "/cluster_nodes.config".
-
-create_cluster_nodes_config(ClusterNodes) ->
-    FileName = cluster_nodes_config_filename(),
-    case rabbit_file:write_term_file(FileName, [ClusterNodes]) of
-        ok -> ok;
-        {error, Reason} ->
-            throw({error, {cannot_create_cluster_nodes_config,
-                           FileName, Reason}})
-    end.
+%%--------------------------------------------------------------------
+%% Hooks for `rabbit_node_monitor'
+%%--------------------------------------------------------------------
 
-read_cluster_nodes_config() ->
-    FileName = cluster_nodes_config_filename(),
-    case rabbit_file:read_term_file(FileName) of
-        {ok, [ClusterNodes]} -> ClusterNodes;
-        {error, enoent} ->
-            {ok, ClusterNodes} = application:get_env(rabbit, cluster_nodes),
-            ClusterNodes;
-        {error, Reason} ->
-            throw({error, {cannot_read_cluster_nodes_config,
-                           FileName, Reason}})
+on_node_up(Node) ->
+    case running_disc_nodes() of
+        [Node] -> rabbit_log:info("cluster contains disc nodes again~n");
+        _      -> ok
     end.
 
-delete_cluster_nodes_config() ->
-    FileName = cluster_nodes_config_filename(),
-    case file:delete(FileName) of
-        ok -> ok;
-        {error, enoent} -> ok;
-        {error, Reason} ->
-            throw({error, {cannot_delete_cluster_nodes_config,
-                           FileName, Reason}})
+on_node_down(_Node) ->
+    case running_disc_nodes() of
+        [] -> rabbit_log:info("only running disc node went down~n");
+        _  -> ok
     end.
 
-running_nodes_filename() ->
-    filename:join(dir(), "nodes_running_at_shutdown").
-
-record_running_nodes() ->
-    FileName = running_nodes_filename(),
-    Nodes = running_clustered_nodes() -- [node()],
-    %% Don't check the result: we're shutting down anyway and this is
-    %% a best-effort-basis.
-    rabbit_file:write_term_file(FileName, [Nodes]),
-    ok.
-
-read_previously_running_nodes() ->
-    FileName = running_nodes_filename(),
-    case rabbit_file:read_term_file(FileName) of
-        {ok, [Nodes]}   -> Nodes;
-        {error, enoent} -> [];
-        {error, Reason} -> throw({error, {cannot_read_previous_nodes_file,
-                                          FileName, Reason}})
-    end.
+running_disc_nodes() ->
+    {_AllNodes, DiscNodes, RunningNodes} = cluster_status(status),
+    ordsets:to_list(ordsets:intersection(ordsets:from_list(DiscNodes),
+                                         ordsets:from_list(RunningNodes))).
 
-delete_previously_running_nodes() ->
-    FileName = running_nodes_filename(),
-    case file:delete(FileName) of
-        ok              -> ok;
-        {error, enoent} -> ok;
-        {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file,
-                                          FileName, Reason}})
-    end.
-
-init_db(ClusterNodes, Force) ->
-    init_db(
-      ClusterNodes, Force,
-      fun () ->
-              case rabbit_upgrade:maybe_upgrade_local() of
-                  ok                    -> ok;
-                  %% If we're just starting up a new node we won't have a
-                  %% version
-                  starting_from_scratch -> ok = rabbit_version:record_desired()
-              end
-      end).
-
-%% Take a cluster node config and create the right kind of node - a
-%% standalone disk node, or disk or ram node connected to the
-%% specified cluster nodes.  If Force is false, don't allow
-%% connections to offline nodes.
-init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) ->
-    UClusterNodes = lists:usort(ClusterNodes),
-    ProperClusterNodes = UClusterNodes -- [node()],
-    case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of
-        {ok, []} when not Force andalso ProperClusterNodes =/= [] ->
-            throw({error, {failed_to_cluster_with, ProperClusterNodes,
-                           "Mnesia could not connect to any disc nodes."}});
-        {ok, Nodes} ->
-            WasDiscNode = is_disc_node(),
-            WantDiscNode = should_be_disc_node(ClusterNodes),
-            %% We create a new db (on disk, or in ram) in the first
-            %% two cases and attempt to upgrade the in the other two
-            case {Nodes, WasDiscNode, WantDiscNode} of
-                {[], _, false} ->
-                    %% New ram node; start from scratch
-                    ok = create_schema(ram);
-                {[], false, true} ->
-                    %% Nothing there at all, start from scratch
-                    ok = create_schema(disc);
-                {[], true, true} ->
-                    %% We're the first node up
-                    case rabbit_upgrade:maybe_upgrade_local() of
-                        ok                    -> ensure_schema_integrity();
-                        version_not_available -> ok = schema_ok_or_move()
-                    end;
-                {[AnotherNode|_], _, _} ->
-                    %% Subsequent node in cluster, catch up
-                    ensure_version_ok(
-                      rpc:call(AnotherNode, rabbit_version, recorded, [])),
-                    {CopyType, CopyTypeAlt} =
-                        case WantDiscNode of
-                            true  -> {disc, disc_copies};
-                            false -> {ram, ram_copies}
-                        end,
-                    ok = wait_for_replicated_tables(),
-                    ok = create_local_table_copy(schema, CopyTypeAlt),
-                    ok = create_local_table_copies(CopyType),
-
-                    ok = SecondaryPostMnesiaFun(),
-                    %% We've taken down mnesia, so ram nodes will need
-                    %% to re-sync
-                    case is_disc_node() of
-                        false -> start_mnesia(),
-                                 mnesia:change_config(extra_db_nodes,
-                                                      ProperClusterNodes),
-                                 wait_for_replicated_tables();
-                        true  -> ok
-                    end,
+%%--------------------------------------------------------------------
+%% Internal helpers
+%%--------------------------------------------------------------------
 
-                    ensure_schema_integrity(),
-                    ok
-            end;
-        {error, Reason} ->
-            %% one reason we may end up here is if we try to join
-            %% nodes together that are currently running standalone or
-            %% are members of a different cluster
-            throw({error, {unable_to_join_cluster, ClusterNodes, Reason}})
+discover_cluster(Nodes) when is_list(Nodes) ->
+    lists:foldl(fun (_, {ok, Res})     -> {ok, Res};
+                    (Node, {error, _}) -> discover_cluster(Node)
+                end, {error, no_nodes_provided}, Nodes);
+discover_cluster(Node) ->
+    OfflineError =
+        {error, {cannot_discover_cluster,
+                 "The nodes provided are either offline or not running"}},
+    case node() of
+        Node -> {error, {cannot_discover_cluster,
+                         "Cannot cluster node with itself"}};
+        _    -> case rpc:call(Node,
+                              rabbit_mnesia, cluster_status_from_mnesia, []) of
+                    {badrpc, _Reason}           -> OfflineError;
+                    {error, mnesia_not_running} -> OfflineError;
+                    {ok, Res}                   -> {ok, Res}
+                end
     end.
 
 schema_ok_or_move() ->
-    case check_schema_integrity() of
+    case rabbit_table:check_schema_integrity() of
         ok ->
             ok;
         {error, Reason} ->
@@ -592,7 +615,7 @@ schema_ok_or_move() ->
                                      "and recreating schema from scratch~n",
                                      [Reason]),
             ok = move_db(),
-            ok = create_schema(disc)
+            ok = create_schema()
     end.
 
 ensure_version_ok({ok, DiscVersion}) ->
@@ -604,25 +627,16 @@ ensure_version_ok({ok, DiscVersion}) ->
 ensure_version_ok({error, _}) ->
     ok = rabbit_version:record_desired().
 
-create_schema(Type) ->
+%% We only care about disc nodes since ram nodes are supposed to catch
+%% up only
+create_schema() ->
     stop_mnesia(),
-    case Type of
-        disc -> rabbit_misc:ensure_ok(mnesia:create_schema([node()]),
-                                      cannot_create_schema);
-        ram  -> %% remove the disc schema since this is a ram node
-                rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
-                                      cannot_delete_schema)
-    end,
+    rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema),
     start_mnesia(),
-    ok = create_tables(Type),
+    ok = rabbit_table:create(),
     ensure_schema_integrity(),
     ok = rabbit_version:record_desired().
 
-is_disc_node() -> mnesia:system_info(use_dir).
-
-should_be_disc_node(ClusterNodes) ->
-    ClusterNodes == [] orelse lists:member(node(), ClusterNodes).
-
 move_db() ->
     stop_mnesia(),
     MnesiaDir = filename:dirname(dir() ++ "/"),
@@ -644,186 +658,187 @@ move_db() ->
     start_mnesia(),
     ok.
 
-copy_db(Destination) ->
-    ok = ensure_mnesia_not_running(),
-    rabbit_file:recursive_copy(dir(), Destination).
-
-create_tables() -> create_tables(disc).
-
-create_tables(Type) ->
-    lists:foreach(fun ({Tab, TabDef}) ->
-                          TabDef1 = proplists:delete(match, TabDef),
-                          case mnesia:create_table(Tab, TabDef1) of
-                              {atomic, ok} -> ok;
-                              {aborted, Reason} ->
-                                  throw({error, {table_creation_failed,
-                                                 Tab, TabDef1, Reason}})
-                          end
-                  end,
-                  table_definitions(Type)),
-    ok.
-
-copy_type_to_ram(TabDef) ->
-    [{disc_copies, []}, {ram_copies, [node()]}
-     | proplists:delete(ram_copies, proplists:delete(disc_copies, TabDef))].
-
-table_has_copy_type(TabDef, DiscType) ->
-    lists:member(node(), proplists:get_value(DiscType, TabDef, [])).
-
-create_local_table_copies(Type) ->
-    lists:foreach(
-      fun ({Tab, TabDef}) ->
-              HasDiscCopies     = table_has_copy_type(TabDef, disc_copies),
-              HasDiscOnlyCopies = table_has_copy_type(TabDef, disc_only_copies),
-              LocalTab          = proplists:get_bool(local_content, TabDef),
-              StorageType =
-                  if
-                      Type =:= disc orelse LocalTab ->
-                          if
-                              HasDiscCopies     -> disc_copies;
-                              HasDiscOnlyCopies -> disc_only_copies;
-                              true              -> ram_copies
-                          end;
-%%% unused code - commented out to keep dialyzer happy
-%%%                      Type =:= disc_only ->
-%%%                          if
-%%%                              HasDiscCopies or HasDiscOnlyCopies ->
-%%%                                  disc_only_copies;
-%%%                              true -> ram_copies
-%%%                          end;
-                      Type =:= ram ->
-                          ram_copies
-                  end,
-              ok = create_local_table_copy(Tab, StorageType)
-      end,
-      table_definitions(Type)),
-    ok.
-
-create_local_table_copy(Tab, Type) ->
-    StorageType = mnesia:table_info(Tab, storage_type),
-    {atomic, ok} =
-        if
-            StorageType == unknown ->
-                mnesia:add_table_copy(Tab, node(), Type);
-            StorageType /= Type ->
-                mnesia:change_table_copy_type(Tab, node(), Type);
-            true -> {atomic, ok}
-        end,
-    ok.
-
-wait_for_replicated_tables() -> wait_for_tables(replicated_table_names()).
-
-wait_for_tables() -> wait_for_tables(table_names()).
-
-wait_for_tables(TableNames) ->
-    case mnesia:wait_for_tables(TableNames, 30000) of
-        ok ->
-            ok;
-        {timeout, BadTabs} ->
-            throw({error, {timeout_waiting_for_tables, BadTabs}});
-        {error, Reason} ->
-            throw({error, {failed_waiting_for_tables, Reason}})
+remove_node_if_mnesia_running(Node) ->
+    case mnesia:system_info(is_running) of
+        yes ->
+            %% Deleting the the schema copy of the node will result in
+            %% the node being removed from the cluster, with that
+            %% change being propagated to all nodes
+            case mnesia:del_table_copy(schema, Node) of
+                {atomic, ok} ->
+                    rabbit_node_monitor:notify_left_cluster(Node),
+                    ok;
+                {aborted, Reason} ->
+                    {error, {failed_to_remove_node, Node, Reason}}
+            end;
+        no  ->
+            {error, mnesia_not_running}
     end.
 
-reset(Force) ->
-    rabbit_misc:local_info_msg("Resetting Rabbit~s~n", [if Force -> " forcefully";
-                                                           true  -> ""
-                                                        end]),
-    ensure_mnesia_not_running(),
-    case not Force andalso is_clustered() andalso
-         is_only_disc_node(node(), false)
-    of
-        true  -> log_both("no other disc nodes running");
-        false -> ok
-    end,
-    Node = node(),
-    Nodes = all_clustered_nodes() -- [Node],
-    case Force of
-        true  -> ok;
-        false ->
-            ensure_mnesia_dir(),
-            start_mnesia(),
-            RunningNodes =
-                try
-                    %% Force=true here so that reset still works when clustered
-                    %% with a node which is down
-                    ok = init_db(read_cluster_nodes_config(), true),
-                    running_clustered_nodes() -- [Node]
-                after
-                    stop_mnesia()
-                end,
-            leave_cluster(Nodes, RunningNodes),
-            rabbit_misc:ensure_ok(mnesia:delete_schema([Node]),
-                                  cannot_delete_schema)
-    end,
-    %% We need to make sure that we don't end up in a distributed
-    %% Erlang system with nodes while not being in an Mnesia cluster
-    %% with them. We don't handle that well.
-    [erlang:disconnect_node(N) || N <- Nodes],
-    ok = delete_cluster_nodes_config(),
-    %% remove persisted messages and any other garbage we find
-    ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")),
-    ok.
+leave_cluster() ->
+    case nodes_excl_me(cluster_nodes(all)) of
+        []       -> ok;
+        AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of
+                        true  -> ok;
+                        false -> e(no_running_cluster_nodes)
+                    end
+    end.
 
-leave_cluster([], _) -> ok;
-leave_cluster(Nodes, RunningNodes) ->
-    %% find at least one running cluster node and instruct it to
-    %% remove our schema copy which will in turn result in our node
-    %% being removed as a cluster node from the schema, with that
-    %% change being propagated to all nodes
-    case lists:any(
-           fun (Node) ->
-                   case rpc:call(Node, mnesia, del_table_copy,
-                                 [schema, node()]) of
-                       {atomic, ok} -> true;
-                       {badrpc, nodedown} -> false;
-                       {aborted, {node_not_running, _}} -> false;
-                       {aborted, Reason} ->
-                           throw({error, {failed_to_leave_cluster,
-                                          Nodes, RunningNodes, Reason}})
-                   end
-           end,
-           RunningNodes) of
-        true -> ok;
-        false -> throw({error, {no_running_cluster_nodes,
-                                Nodes, RunningNodes}})
+leave_cluster(Node) ->
+    case rpc:call(Node,
+                  rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of
+        ok                          -> true;
+        {error, mnesia_not_running} -> false;
+        {error, Reason}             -> throw({error, Reason});
+        {badrpc, nodedown}          -> false
     end.
 
 wait_for(Condition) ->
     error_logger:info_msg("Waiting for ~p...~n", [Condition]),
     timer:sleep(1000).
 
-on_node_up(Node) ->
-    case is_only_disc_node(Node, true) of
-        true  -> rabbit_log:info("cluster contains disc nodes again~n");
-        false -> ok
-    end.
-
-on_node_down(Node) ->
-    case is_only_disc_node(Node, true) of
-        true  -> rabbit_log:info("only running disc node went down~n");
+start_mnesia(CheckConsistency) ->
+    case CheckConsistency of
+        true  -> check_cluster_consistency();
         false -> ok
-    end.
-
-is_only_disc_node(Node, _MnesiaRunning = true) ->
-    RunningSet = sets:from_list(running_clustered_nodes()),
-    DiscSet = sets:from_list(nodes_of_type(disc_copies)),
-    [Node] =:= sets:to_list(sets:intersection(RunningSet, DiscSet));
-is_only_disc_node(Node, false) ->
-    start_mnesia(),
-    Res = is_only_disc_node(Node, true),
-    stop_mnesia(),
-    Res.
-
-log_both(Warning) ->
-    io:format("Warning: ~s~n", [Warning]),
-    rabbit_misc:with_local_io(
-      fun () -> error_logger:warning_msg("~s~n", [Warning]) end).
-
-start_mnesia() ->
+    end,
     rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
     ensure_mnesia_running().
 
+start_mnesia() ->
+    start_mnesia(true).
+
 stop_mnesia() ->
     stopped = mnesia:stop(),
     ensure_mnesia_not_running().
+
+change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) ->
+    ClusterNodes = nodes_excl_me(ClusterNodes0),
+    case {mnesia:change_config(extra_db_nodes, ClusterNodes), ClusterNodes} of
+        {{ok, []}, [_|_]} when CheckOtherNodes ->
+            throw({error, {failed_to_cluster_with, ClusterNodes,
+                           "Mnesia could not connect to any nodes."}});
+        {{ok, Nodes}, _} ->
+            Nodes
+    end.
+
+is_running_remote() -> {mnesia:system_info(is_running) =:= yes, node()}.
+
+check_consistency(OTP, Rabbit) ->
+    rabbit_misc:sequence_error(
+      [check_otp_consistency(OTP), check_rabbit_consistency(Rabbit)]).
+
+check_consistency(OTP, Rabbit, Node, Status) ->
+    rabbit_misc:sequence_error(
+      [check_otp_consistency(OTP),
+       check_rabbit_consistency(Rabbit),
+       check_nodes_consistency(Node, Status)]).
+
+check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) ->
+    case me_in_nodes(RemoteAllNodes) of
+        true ->
+            {ok, RemoteStatus};
+        false ->
+            {error, {inconsistent_cluster,
+                     rabbit_misc:format("Node ~p thinks it's clustered "
+                                        "with node ~p, but ~p disagrees",
+                                        [node(), Node, Node])}}
+    end.
+
+check_version_consistency(This, Remote, _) when This =:= Remote ->
+    ok;
+check_version_consistency(This, Remote, Name) ->
+    {error, {inconsistent_cluster,
+             rabbit_misc:format("~s version mismatch: local node is ~s, "
+                                "remote node ~s", [Name, This, Remote])}}.
+
+check_otp_consistency(Remote) ->
+    check_version_consistency(erlang:system_info(otp_release), Remote, "OTP").
+
+check_rabbit_consistency(Remote) ->
+    check_version_consistency(rabbit_misc:version(), Remote, "Rabbit").
+
+%% This is fairly tricky.  We want to know if the node is in the state
+%% that a `reset' would leave it in.  We cannot simply check if the
+%% mnesia tables aren't there because restarted RAM nodes won't have
+%% tables while still being non-virgin.  What we do instead is to
+%% check if the mnesia directory is non existant or empty, with the
+%% exception of the cluster status files, which will be there thanks to
+%% `rabbit_node_monitor:prepare_cluster_status_file/0'.
+is_virgin_node() ->
+    case rabbit_file:list_dir(dir()) of
+        {error, enoent} ->
+            true;
+        {ok, []} ->
+            true;
+        {ok, [File1, File2]} ->
+            lists:usort([dir() ++ "/" ++ File1, dir() ++ "/" ++ File2]) =:=
+                lists:usort([rabbit_node_monitor:cluster_status_filename(),
+                             rabbit_node_monitor:running_nodes_filename()]);
+        {ok, _} ->
+            false
+    end.
+
+find_good_node([]) ->
+    none;
+find_good_node([Node | Nodes]) ->
+    case rpc:call(Node, rabbit_mnesia, node_info, []) of
+        {badrpc, _Reason} -> find_good_node(Nodes);
+        {OTP, Rabbit, _}  -> case check_consistency(OTP, Rabbit) of
+                                 {error, _} -> find_good_node(Nodes);
+                                 ok         -> {ok, Node}
+                             end
+    end.
+
+is_only_clustered_disc_node() ->
+    node_type() =:= disc andalso is_clustered() andalso
+        cluster_nodes(disc) =:= [node()].
+
+me_in_nodes(Nodes) -> lists:member(node(), Nodes).
+
+nodes_incl_me(Nodes) -> lists:usort([node()|Nodes]).
+
+nodes_excl_me(Nodes) -> Nodes -- [node()].
+
+e(Tag) -> throw({error, {Tag, error_description(Tag)}}).
+
+error_description(clustering_only_disc_node) ->
+    "You cannot cluster a node if it is the only disc node in its existing "
+        " cluster. If new nodes joined while this node was offline, use "
+        "\"update_cluster_nodes\" to add them manually.";
+error_description(resetting_only_disc_node) ->
+    "You cannot reset a node when it is the only disc node in a cluster. "
+        "Please convert another node of the cluster to a disc node first.";
+error_description(already_clustered) ->
+    "You are already clustered with the nodes you have selected.";
+error_description(not_clustered) ->
+    "Non-clustered nodes can only be disc nodes.";
+error_description(cannot_connect_to_cluster) ->
+    "Could not connect to the cluster nodes present in this node's "
+        "status file. If the cluster has changed, you can use the "
+        "\"update_cluster_nodes\" command to point to the new cluster nodes.";
+error_description(no_online_cluster_nodes) ->
+    "Could not find any online cluster nodes. If the cluster has changed, "
+        "you can use the 'recluster' command.";
+error_description(cannot_connect_to_node) ->
+    "Could not connect to the cluster node provided.";
+error_description(inconsistent_cluster) ->
+    "The nodes provided do not have this node as part of the cluster.";
+error_description(not_a_cluster_node) ->
+    "The node selected is not in the cluster.";
+error_description(online_node_offline_flag) ->
+    "You set the --offline flag, which is used to remove nodes remotely from "
+        "offline nodes, but this node is online.";
+error_description(offline_node_no_offline_flag) ->
+    "You are trying to remove a node from an offline node. That is dangerous, "
+        "but can be done with the --offline flag. Please consult the manual "
+        "for rabbitmqctl for more information.";
+error_description(not_last_node_to_go_down) ->
+    "The node you're trying to remove from was not the last to go down "
+        "(excluding the node you are removing). Please use the the last node "
+        "to go down to remove nodes when the cluster is offline.";
+error_description(removing_node_from_offline_node) ->
+    "To remove a node remotely from an offline node, the node you're removing "
+        "from must be a disc node and all the other nodes must be offline.";
+error_description(no_running_cluster_nodes) ->
+    "You cannot leave a cluster if no online nodes are present.".
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index d69dad1f..c2e55022 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1394,7 +1394,7 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 
 filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
-list_sorted_file_names(Dir, Ext) ->
+list_sorted_filenames(Dir, Ext) ->
     lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
                filelib:wildcard("*" ++ Ext, Dir)).
 
@@ -1531,8 +1531,8 @@ count_msg_refs(Gen, Seed, State) ->
     end.
 
 recover_crashed_compactions(Dir) ->
-    FileNames =    list_sorted_file_names(Dir, ?FILE_EXTENSION),
-    TmpFileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION_TMP),
+    FileNames =    list_sorted_filenames(Dir, ?FILE_EXTENSION),
+    TmpFileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION_TMP),
     lists:foreach(
       fun (TmpFileName) ->
               NonTmpRelatedFileName =
@@ -1609,7 +1609,7 @@ build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit},
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
     {ok, Pid} = gatherer:start_link(),
     case [filename_to_num(FileName) ||
-             FileName <- list_sorted_file_names(Dir, ?FILE_EXTENSION)] of
+             FileName <- list_sorted_filenames(Dir, ?FILE_EXTENSION)] of
         []     -> build_index(Pid, undefined, [State #msstate.current_file],
                               State);
         Files  -> {Offset, State1} = build_index(Pid, undefined, Files, State),
@@ -2023,7 +2023,7 @@ transform_dir(BaseDir, Store, TransformFun) ->
     CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end,
     case filelib:is_dir(TmpDir) of
         true  -> throw({error, transform_failed_previously});
-        false -> FileList = list_sorted_file_names(Dir, ?FILE_EXTENSION),
+        false -> FileList = list_sorted_filenames(Dir, ?FILE_EXTENSION),
                  foreach_file(Dir, TmpDir, TransformFile,     FileList),
                  foreach_file(Dir,         fun file:delete/1, FileList),
                  foreach_file(TmpDir, Dir, CopyFile,          FileList),
diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl
index bedf5142..038154c3 100644
--- a/src/rabbit_net.erl
+++ b/src/rabbit_net.erl
@@ -19,7 +19,7 @@
 
 -export([is_ssl/1, ssl_info/1, controlling_process/2, getstat/2,
          recv/1, async_recv/3, port_command/2, getopts/2, setopts/2, send/2,
-         close/1, maybe_fast_close/1, sockname/1, peername/1, peercert/1,
+         close/1, fast_close/1, sockname/1, peername/1, peercert/1,
          tune_buffer_size/1, connection_string/2]).
 
 %%---------------------------------------------------------------------------
@@ -59,7 +59,7 @@
 -spec(setopts/2 :: (socket(), opts()) -> ok_or_any_error()).
 -spec(send/2 :: (socket(), binary() | iolist()) -> ok_or_any_error()).
 -spec(close/1 :: (socket()) -> ok_or_any_error()).
--spec(maybe_fast_close/1 :: (socket()) -> ok_or_any_error()).
+-spec(fast_close/1 :: (socket()) -> ok_or_any_error()).
 -spec(sockname/1 ::
         (socket())
         -> ok_val_or_error({inet:ip_address(), rabbit_networking:ip_port()})).
@@ -77,6 +77,8 @@
 
 %%---------------------------------------------------------------------------
 
+-define(SSL_CLOSE_TIMEOUT, 5000).
+
 -define(IS_SSL(Sock), is_record(Sock, ssl_socket)).
 
 is_ssl(Sock) -> ?IS_SSL(Sock).
@@ -148,8 +150,31 @@ send(Sock, Data) when is_port(Sock) -> gen_tcp:send(Sock, Data).
 close(Sock)      when ?IS_SSL(Sock) -> ssl:close(Sock#ssl_socket.ssl);
 close(Sock)      when is_port(Sock) -> gen_tcp:close(Sock).
 
-maybe_fast_close(Sock) when ?IS_SSL(Sock) -> ok;
-maybe_fast_close(Sock) when is_port(Sock) -> erlang:port_close(Sock), ok.
+fast_close(Sock) when ?IS_SSL(Sock) ->
+    %% We cannot simply port_close the underlying tcp socket since the
+    %% TLS protocol is quite insistent that a proper closing handshake
+    %% should take place (see RFC 5245 s7.2.1). So we call ssl:close
+    %% instead, but that can block for a very long time, e.g. when
+    %% there is lots of pending output and there is tcp backpressure,
+    %% or the ssl_connection process has entered the the
+    %% workaround_transport_delivery_problems function during
+    %% termination, which, inexplicably, does a gen_tcp:recv(Socket,
+    %% 0), which may never return if the client doesn't send a FIN or
+    %% that gets swallowed by the network. Since there is no timeout
+    %% variant of ssl:close, we construct our own.
+    {Pid, MRef} = spawn_monitor(fun () -> ssl:close(Sock#ssl_socket.ssl) end),
+    erlang:send_after(?SSL_CLOSE_TIMEOUT, self(), {Pid, ssl_close_timeout}),
+    receive
+        {Pid, ssl_close_timeout} ->
+            erlang:demonitor(MRef, [flush]),
+            exit(Pid, kill);
+        {'DOWN', MRef, process, Pid, _Reason} ->
+            ok
+    end,
+    catch port_close(Sock#ssl_socket.tcp),
+    ok;
+fast_close(Sock) when is_port(Sock) ->
+    catch port_close(Sock), ok.
 
 sockname(Sock)   when ?IS_SSL(Sock) -> ssl:sockname(Sock#ssl_socket.ssl);
 sockname(Sock)   when is_port(Sock) -> inet:sockname(Sock).
diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl
index 94a5a2b7..5cf8d1ae 100644
--- a/src/rabbit_networking.erl
+++ b/src/rabbit_networking.erl
@@ -160,7 +160,19 @@ ssl_transform_fun(SslOpts) ->
             case catch ssl:ssl_accept(Sock, SslOpts, ?SSL_TIMEOUT * 1000) of
                 {ok, SslSock} ->
                     {ok, #ssl_socket{tcp = Sock, ssl = SslSock}};
+                {error, timeout} ->
+                    {error, {ssl_upgrade_error, timeout}};
                 {error, Reason} ->
+                    %% We have no idea what state the ssl_connection
+                    %% process is in - it could still be happily
+                    %% going, it might be stuck, or it could be just
+                    %% about to fail. There is little that our caller
+                    %% can do but close the TCP socket, but this could
+                    %% cause ssl alerts to get dropped (which is bad
+                    %% form, according to the TLS spec). So we give
+                    %% the ssl_connection a little bit of time to send
+                    %% such alerts.
+                    timer:sleep(?SSL_TIMEOUT * 1000),
                     {error, {ssl_upgrade_error, Reason}};
                 {'EXIT', Reason} ->
                     {error, {ssl_upgrade_failure, Reason}}
@@ -283,7 +295,7 @@ start_ssl_client(SslOpts, Sock) ->
     start_client(Sock, ssl_transform_fun(SslOpts)).
 
 connections() ->
-    rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(),
+    rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running),
                                      rabbit_networking, connections_local, []).
 
 connections_local() ->
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 323cf0ce..b11c9d04 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -19,68 +19,232 @@
 -behaviour(gen_server).
 
 -export([start_link/0]).
+-export([running_nodes_filename/0,
+         cluster_status_filename/0, prepare_cluster_status_files/0,
+         write_cluster_status/1, read_cluster_status/0,
+         update_cluster_status/0, reset_cluster_status/0]).
+-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]).
+-export([partitions/0]).
 
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
--export([notify_cluster/0, rabbit_running_on/1]).
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 -define(SERVER, ?MODULE).
 -define(RABBIT_UP_RPC_TIMEOUT, 2000).
 
+-record(state, {monitors, partitions}).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
 -spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()).
--spec(rabbit_running_on/1 :: (node()) -> 'ok').
--spec(notify_cluster/0 :: () -> 'ok').
+
+-spec(running_nodes_filename/0 :: () -> string()).
+-spec(cluster_status_filename/0 :: () -> string()).
+-spec(prepare_cluster_status_files/0 :: () -> 'ok').
+-spec(write_cluster_status/1 :: (rabbit_mnesia:cluster_status()) -> 'ok').
+-spec(read_cluster_status/0 :: () -> rabbit_mnesia:cluster_status()).
+-spec(update_cluster_status/0 :: () -> 'ok').
+-spec(reset_cluster_status/0 :: () -> 'ok').
+
+-spec(notify_node_up/0 :: () -> 'ok').
+-spec(notify_joined_cluster/0 :: () -> 'ok').
+-spec(notify_left_cluster/1 :: (node()) -> 'ok').
+
+-spec(partitions/0 :: () -> {node(), [{atom(), node()}]}).
 
 -endif.
 
-%%--------------------------------------------------------------------
+%%----------------------------------------------------------------------------
+%% Start
+%%----------------------------------------------------------------------------
+
+start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+%%----------------------------------------------------------------------------
+%% Cluster file operations
+%%----------------------------------------------------------------------------
+
+%% The cluster file information is kept in two files.  The "cluster
+%% status file" contains all the clustered nodes and the disc nodes.
+%% The "running nodes file" contains the currently running nodes or
+%% the running nodes at shutdown when the node is down.
+%%
+%% We strive to keep the files up to date and we rely on this
+%% assumption in various situations. Obviously when mnesia is offline
+%% the information we have will be outdated, but it cannot be
+%% otherwise.
+
+running_nodes_filename() ->
+    filename:join(rabbit_mnesia:dir(), "nodes_running_at_shutdown").
+
+cluster_status_filename() ->
+    rabbit_mnesia:dir() ++ "/cluster_nodes.config".
+
+prepare_cluster_status_files() ->
+    rabbit_mnesia:ensure_mnesia_dir(),
+    CorruptFiles = fun () -> throw({error, corrupt_cluster_status_files}) end,
+    RunningNodes1 = case try_read_file(running_nodes_filename()) of
+                        {ok, [Nodes]} when is_list(Nodes) -> Nodes;
+                        {ok, _      }                     -> CorruptFiles();
+                        {error, enoent}                   -> []
+                    end,
+    ThisNode = [node()],
+    %% The running nodes file might contain a set or a list, in case
+    %% of the legacy file
+    RunningNodes2 = lists:usort(ThisNode ++ RunningNodes1),
+    {AllNodes1, WantDiscNode} =
+        case try_read_file(cluster_status_filename()) of
+            {ok, [{AllNodes, DiscNodes0}]} ->
+                {AllNodes, lists:member(node(), DiscNodes0)};
+            {ok, [AllNodes0]} when is_list(AllNodes0) ->
+                {legacy_cluster_nodes(AllNodes0),
+                 legacy_should_be_disc_node(AllNodes0)};
+            {ok, _} ->
+                CorruptFiles();
+            {error, enoent} ->
+                {legacy_cluster_nodes([]), true}
+        end,
+    AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2),
+    DiscNodes = case WantDiscNode of
+                    true  -> ThisNode;
+                    false -> []
+                end,
+    ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}).
+
+write_cluster_status({All, Disc, Running}) ->
+    ClusterStatusFN = cluster_status_filename(),
+    Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of
+              ok ->
+                  RunningNodesFN = running_nodes_filename(),
+                  {RunningNodesFN,
+                   rabbit_file:write_term_file(RunningNodesFN, [Running])};
+              E1 = {error, _} ->
+                  {ClusterStatusFN, E1}
+          end,
+    case Res of
+        {_, ok}           -> ok;
+        {FN, {error, E2}} -> throw({error, {could_not_write_file, FN, E2}})
+    end.
+
+read_cluster_status() ->
+    case {try_read_file(cluster_status_filename()),
+          try_read_file(running_nodes_filename())} of
+        {{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) ->
+            {All, Disc, Running};
+        {_, _} ->
+            throw({error, corrupt_or_missing_cluster_files})
+    end.
 
-start_link() ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
-
-rabbit_running_on(Node) ->
-    gen_server:cast(rabbit_node_monitor, {rabbit_running_on, Node}).
-
-notify_cluster() ->
-    Node = node(),
-    Nodes = rabbit_mnesia:running_clustered_nodes() -- [Node],
-    %% notify other rabbits of this rabbit
-    case rpc:multicall(Nodes, rabbit_node_monitor, rabbit_running_on,
-                       [Node], ?RABBIT_UP_RPC_TIMEOUT) of
-        {_, [] } -> ok;
-        {_, Bad} -> rabbit_log:info("failed to contact nodes ~p~n", [Bad])
-    end,
+update_cluster_status() ->
+    {ok, Status} = rabbit_mnesia:cluster_status_from_mnesia(),
+    write_cluster_status(Status).
+
+reset_cluster_status() ->
+    write_cluster_status({[node()], [node()], [node()]}).
+
+%%----------------------------------------------------------------------------
+%% Cluster notifications
+%%----------------------------------------------------------------------------
+
+notify_node_up() ->
+    Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()],
+    gen_server:abcast(Nodes, ?SERVER,
+                      {node_up, node(), rabbit_mnesia:node_type()}),
     %% register other active rabbits with this rabbit
-    [ rabbit_running_on(N) || N <- Nodes ],
+    DiskNodes = rabbit_mnesia:cluster_nodes(disc),
+    [gen_server:cast(?SERVER, {node_up, N, case lists:member(N, DiskNodes) of
+                                               true  -> disc;
+                                               false -> ram
+                                           end}) || N <- Nodes],
     ok.
 
-%%--------------------------------------------------------------------
+notify_joined_cluster() ->
+    Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()],
+    gen_server:abcast(Nodes, ?SERVER,
+                      {joined_cluster, node(), rabbit_mnesia:node_type()}),
+    ok.
+
+notify_left_cluster(Node) ->
+    Nodes = rabbit_mnesia:cluster_nodes(running),
+    gen_server:abcast(Nodes, ?SERVER, {left_cluster, Node}),
+    ok.
+
+%%----------------------------------------------------------------------------
+%% Server calls
+%%----------------------------------------------------------------------------
+
+partitions() ->
+    gen_server:call(?SERVER, partitions, infinity).
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
 
 init([]) ->
-    {ok, ordsets:new()}.
+    {ok, _} = mnesia:subscribe(system),
+    {ok, #state{monitors   = pmon:new(),
+                partitions = []}}.
+
+handle_call(partitions, _From, State = #state{partitions = Partitions}) ->
+    {reply, {node(), Partitions}, State};
 
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
-handle_cast({rabbit_running_on, Node}, Nodes) ->
-    case ordsets:is_element(Node, Nodes) of
-        true  -> {noreply, Nodes};
+%% Note: when updating the status file, we can't simply write the
+%% mnesia information since the message can (and will) overtake the
+%% mnesia propagation.
+handle_cast({node_up, Node, NodeType},
+            State = #state{monitors = Monitors}) ->
+    case pmon:is_monitored({rabbit, Node}, Monitors) of
+        true  -> {noreply, State};
         false -> rabbit_log:info("rabbit on node ~p up~n", [Node]),
-                 erlang:monitor(process, {rabbit, Node}),
+                 {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+                 write_cluster_status({add_node(Node, AllNodes),
+                                       case NodeType of
+                                           disc -> add_node(Node, DiscNodes);
+                                           ram  -> DiscNodes
+                                       end,
+                                       add_node(Node, RunningNodes)}),
                  ok = handle_live_rabbit(Node),
-                 {noreply, ordsets:add_element(Node, Nodes)}
+                 {noreply, State#state{
+                             monitors = pmon:monitor({rabbit, Node}, Monitors)}}
     end;
+handle_cast({joined_cluster, Node, NodeType}, State) ->
+    {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+    write_cluster_status({add_node(Node, AllNodes),
+                          case NodeType of
+                              disc -> add_node(Node, DiscNodes);
+                              ram  -> DiscNodes
+                          end,
+                          RunningNodes}),
+    {noreply, State};
+handle_cast({left_cluster, Node}, State) ->
+    {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+    write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes),
+                          del_node(Node, RunningNodes)}),
+    {noreply, State};
 handle_cast(_Msg, State) ->
     {noreply, State}.
 
-handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, Nodes) ->
+handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason},
+            State = #state{monitors = Monitors}) ->
     rabbit_log:info("rabbit on node ~p down~n", [Node]),
+    {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
+    write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}),
     ok = handle_dead_rabbit(Node),
-    {noreply, ordsets:del_element(Node, Nodes)};
+    {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}};
+
+handle_info({mnesia_system_event,
+             {inconsistent_database, running_partitioned_network, Node}},
+            State = #state{partitions = Partitions}) ->
+    Partitions1 = ordsets:to_list(
+                    ordsets:add_element(Node, ordsets:from_list(Partitions))),
+    {noreply, State#state{partitions = Partitions1}};
+
 handle_info(_Info, State) ->
     {noreply, State}.
 
@@ -90,7 +254,9 @@ terminate(_Reason, _State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-%%--------------------------------------------------------------------
+%%----------------------------------------------------------------------------
+%% Functions that call the module specific hooks when nodes go up/down
+%%----------------------------------------------------------------------------
 
 %% TODO: This may turn out to be a performance hog when there are lots
 %% of nodes.  We really only need to execute some of these statements
@@ -104,3 +270,27 @@ handle_dead_rabbit(Node) ->
 handle_live_rabbit(Node) ->
     ok = rabbit_alarm:on_node_up(Node),
     ok = rabbit_mnesia:on_node_up(Node).
+
+%%--------------------------------------------------------------------
+%% Internal utils
+%%--------------------------------------------------------------------
+
+try_read_file(FileName) ->
+    case rabbit_file:read_term_file(FileName) of
+        {ok, Term}      -> {ok, Term};
+        {error, enoent} -> {error, enoent};
+        {error, E}      -> throw({error, {cannot_read_file, FileName, E}})
+    end.
+
+legacy_cluster_nodes(Nodes) ->
+    %% We get all the info that we can, including the nodes from
+    %% mnesia, which will be there if the node is a disc node (empty
+    %% list otherwise)
+    lists:usort(Nodes ++ mnesia:system_info(db_nodes)).
+
+legacy_should_be_disc_node(DiscNodes) ->
+    DiscNodes == [] orelse lists:member(node(), DiscNodes).
+
+add_node(Node, Nodes) -> lists:usort([Node | Nodes]).
+
+del_node(Node, Nodes) -> Nodes -- [Node].
diff --git a/src/rabbit_nodes.erl b/src/rabbit_nodes.erl
index 1c23632d..c8d77b0f 100644
--- a/src/rabbit_nodes.erl
+++ b/src/rabbit_nodes.erl
@@ -70,8 +70,8 @@ diagnostics0() ->
 diagnostics_host(Host) ->
     case names(Host) of
         {error, EpmdReason} ->
-            {"- unable to connect to epmd on ~s: ~w",
-             [Host, EpmdReason]};
+            {"- unable to connect to epmd on ~s: ~w (~s)",
+             [Host, EpmdReason, rabbit_misc:format_inet_error(EpmdReason)]};
         {ok, NamePorts} ->
             {"- ~s: ~p",
              [Host, [{list_to_atom(Name), Port} ||
diff --git a/src/rabbit_parameter_validation.erl b/src/rabbit_parameter_validation.erl
index af940dde..24762a73 100644
--- a/src/rabbit_parameter_validation.erl
+++ b/src/rabbit_parameter_validation.erl
@@ -16,7 +16,7 @@
 
 -module(rabbit_parameter_validation).
 
--export([number/2, binary/2, list/2, proplist/3]).
+-export([number/2, binary/2, boolean/2, list/2, regex/2, proplist/3]).
 
 number(_Name, Term) when is_number(Term) ->
     ok;
@@ -30,12 +30,26 @@ binary(_Name, Term) when is_binary(Term) ->
 binary(Name, Term) ->
     {error, "~s should be binary, actually was ~p", [Name, Term]}.
 
+boolean(_Name, Term) when is_boolean(Term) ->
+    ok;
+boolean(Name, Term) ->
+    {error, "~s should be boolean, actually was ~p", [Name, Term]}.
+
 list(_Name, Term) when is_list(Term) ->
     ok;
 
 list(Name, Term) ->
     {error, "~s should be list, actually was ~p", [Name, Term]}.
 
+regex(Name, Term) when is_binary(Term) ->
+    case re:compile(Term) of
+        {ok, _}         -> ok;
+        {error, Reason} -> {error, "~s should be regular expression "
+                                   "but is invalid: ~p", [Name, Reason]}
+    end;
+regex(Name, Term) ->
+    {error, "~s should be a binary but was ~p", [Name, Term]}.
+
 proplist(Name, Constraints, Term) when is_list(Term) ->
     {Results, Remainder}
         = lists:foldl(
diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl
index 7cf6eea9..ecb19611 100644
--- a/src/rabbit_plugins.erl
+++ b/src/rabbit_plugins.erl
@@ -17,8 +17,7 @@
 -module(rabbit_plugins).
 -include("rabbit.hrl").
 
--export([setup/0, active/0, read_enabled/1,
-         list/1, dependencies/3]).
+-export([setup/0, active/0, read_enabled/1, list/1, dependencies/3]).
 
 -define(VERBOSE_DEF, {?VERBOSE_OPT, flag}).
 -define(MINIMAL_DEF, {?MINIMAL_OPT, flag}).
@@ -36,28 +35,25 @@
 
 -ifdef(use_specs).
 
--spec(setup/0 :: () -> [atom()]).
--spec(active/0 :: () -> [atom()]).
+-type(plugin_name() :: atom()).
+
+-spec(setup/0 :: () -> [plugin_name()]).
+-spec(active/0 :: () -> [plugin_name()]).
 -spec(list/1 :: (string()) -> [#plugin{}]).
--spec(read_enabled/1 :: (file:filename()) -> [atom()]).
--spec(dependencies/3 ::
-            (boolean(), [atom()], [#plugin{}]) -> [atom()]).
+-spec(read_enabled/1 :: (file:filename()) -> [plugin_name()]).
+-spec(dependencies/3 :: (boolean(), [plugin_name()], [#plugin{}]) ->
+                             [plugin_name()]).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-%%
 %% @doc Prepares the file system and installs all enabled plugins.
-%%
 setup() ->
-    {ok, PluginDir} = application:get_env(rabbit, plugins_dir),
-    {ok, ExpandDir} = application:get_env(rabbit, plugins_expand_dir),
-    {ok, EnabledPluginsFile} = application:get_env(rabbit,
-                                                   enabled_plugins_file),
-    prepare_plugins(EnabledPluginsFile, PluginDir, ExpandDir),
-    [prepare_dir_plugin(PluginName) ||
-            PluginName <- filelib:wildcard(ExpandDir ++ "/*/ebin/*.app")].
+    {ok, PluginDir}   = application:get_env(rabbit, plugins_dir),
+    {ok, ExpandDir}   = application:get_env(rabbit, plugins_expand_dir),
+    {ok, EnabledFile} = application:get_env(rabbit, enabled_plugins_file),
+    prepare_plugins(EnabledFile, PluginDir, ExpandDir).
 
 %% @doc Lists the plugins which are currently running.
 active() ->
@@ -77,8 +73,7 @@ list(PluginsDir) ->
                         (Plugin = #plugin{}, {Plugins1, Problems1}) ->
                             {[Plugin|Plugins1], Problems1}
                     end, {[], []},
-                    [get_plugin_info(PluginsDir, Plug) ||
-                        Plug <- EZs ++ FreeApps]),
+                    [plugin_info(PluginsDir, Plug) || Plug <- EZs ++ FreeApps]),
     case Problems of
         [] -> ok;
         _  -> io:format("Warning: Problem reading some plugins: ~p~n",
@@ -98,11 +93,9 @@ read_enabled(PluginsFile) ->
                                           PluginsFile, Reason}})
     end.
 
-%%
 %% @doc Calculate the dependency graph from <i>Sources</i>.
 %% When Reverse =:= true the bottom/leaf level applications are returned in
 %% the resulting list, otherwise they're skipped.
-%%
 dependencies(Reverse, Sources, AllPlugins) ->
     {ok, G} = rabbit_misc:build_acyclic_graph(
                 fun (App, _Deps) -> [{App, App}] end,
@@ -118,42 +111,38 @@ dependencies(Reverse, Sources, AllPlugins) ->
 
 %%----------------------------------------------------------------------------
 
-prepare_plugins(EnabledPluginsFile, PluginsDistDir, DestDir) ->
+prepare_plugins(EnabledFile, PluginsDistDir, ExpandDir) ->
     AllPlugins = list(PluginsDistDir),
-    Enabled = read_enabled(EnabledPluginsFile),
+    Enabled = read_enabled(EnabledFile),
     ToUnpack = dependencies(false, Enabled, AllPlugins),
     ToUnpackPlugins = lookup_plugins(ToUnpack, AllPlugins),
 
-    Missing = Enabled -- plugin_names(ToUnpackPlugins),
-    case Missing of
-        [] -> ok;
-        _  -> io:format("Warning: the following enabled plugins were "
-                       "not found: ~p~n", [Missing])
+    case Enabled -- plugin_names(ToUnpackPlugins) of
+        []      -> ok;
+        Missing -> io:format("Warning: the following enabled plugins were "
+                             "not found: ~p~n", [Missing])
     end,
 
     %% Eliminate the contents of the destination directory
-    case delete_recursively(DestDir) of
-        ok         -> ok;
-        {error, E} -> rabbit_misc:quit("Could not delete dir ~s (~p)",
-                                            [DestDir, E])
+    case delete_recursively(ExpandDir) of
+        ok          -> ok;
+        {error, E1} -> throw({error, {cannot_delete_plugins_expand_dir,
+                                      [ExpandDir, E1]}})
     end,
-    case filelib:ensure_dir(DestDir ++ "/") of
+    case filelib:ensure_dir(ExpandDir ++ "/") of
         ok          -> ok;
-        {error, E2} -> rabbit_misc:quit("Could not create dir ~s (~p)",
-                                             [DestDir, E2])
+        {error, E2} -> throw({error, {cannot_create_plugins_expand_dir,
+                                      [ExpandDir, E2]}})
     end,
 
-    [prepare_plugin(Plugin, DestDir) || Plugin <- ToUnpackPlugins].
+    [prepare_plugin(Plugin, ExpandDir) || Plugin <- ToUnpackPlugins],
 
-prepare_dir_plugin(PluginAppDescFn) ->
-    %% Add the plugin ebin directory to the load path
-    PluginEBinDirN = filename:dirname(PluginAppDescFn),
-    code:add_path(PluginEBinDirN),
+    [prepare_dir_plugin(PluginAppDescPath) ||
+        PluginAppDescPath <- filelib:wildcard(ExpandDir ++ "/*/ebin/*.app")].
 
-    %% We want the second-last token
-    NameTokens = string:tokens(PluginAppDescFn,"/."),
-    PluginNameString = lists:nth(length(NameTokens) - 1, NameTokens),
-    list_to_atom(PluginNameString).
+prepare_dir_plugin(PluginAppDescPath) ->
+    code:add_path(filename:dirname(PluginAppDescPath)),
+    list_to_atom(filename:basename(PluginAppDescPath, ".app")).
 
 %%----------------------------------------------------------------------------
 
@@ -164,22 +153,19 @@ delete_recursively(Fn) ->
         Error              -> Error
     end.
 
-prepare_plugin(#plugin{type = ez, location = Location}, PluginDestDir) ->
-    zip:unzip(Location, [{cwd, PluginDestDir}]);
+prepare_plugin(#plugin{type = ez, location = Location}, ExpandDir) ->
+    zip:unzip(Location, [{cwd, ExpandDir}]);
 prepare_plugin(#plugin{type = dir, name = Name, location = Location},
-              PluginsDestDir) ->
-    rabbit_file:recursive_copy(Location,
-                              filename:join([PluginsDestDir, Name])).
+               ExpandDir) ->
+    rabbit_file:recursive_copy(Location, filename:join([ExpandDir, Name])).
 
-%% Get the #plugin{} from an .ez.
-get_plugin_info(Base, {ez, EZ0}) ->
+plugin_info(Base, {ez, EZ0}) ->
     EZ = filename:join([Base, EZ0]),
     case read_app_file(EZ) of
         {application, Name, Props} -> mkplugin(Name, Props, ez, EZ);
         {error, Reason}            -> {error, EZ, Reason}
     end;
-%% Get the #plugin{} from an .app.
-get_plugin_info(Base, {app, App0}) ->
+plugin_info(Base, {app, App0}) ->
     App = filename:join([Base, App0]),
     case rabbit_file:read_term_file(App) of
         {ok, [{application, Name, Props}]} ->
@@ -198,7 +184,6 @@ mkplugin(Name, Props, Type, Location) ->
     #plugin{name = Name, version = Version, description = Description,
             dependencies = Dependencies, location = Location, type = Type}.
 
-%% Read the .app file from an ez.
 read_app_file(EZ) ->
     case zip:list_dir(EZ) of
         {ok, [_|ZippedFiles]} ->
@@ -214,13 +199,11 @@ read_app_file(EZ) ->
             {error, {invalid_ez, Reason}}
     end.
 
-%% Return the path of the .app files in ebin/.
 find_app_files(ZippedFiles) ->
     {ok, RE} = re:compile("^.*/ebin/.*.app$"),
     [Path || {zip_file, Path, _, _, _, _} <- ZippedFiles,
              re:run(Path, RE, [{capture, none}]) =:= match].
 
-%% Parse a binary into a term.
 parse_binary(Bin) ->
     try
         {ok, Ts, _} = erl_scan:string(binary_to_list(Bin)),
@@ -230,13 +213,10 @@ parse_binary(Bin) ->
         Err -> {error, {invalid_app, Err}}
     end.
 
-%% Filter out applications that can be loaded *right now*.
 filter_applications(Applications) ->
     [Application || Application <- Applications,
                     not is_available_app(Application)].
 
-%% Return whether is application is already available (and hence
-%% doesn't need enabling).
 is_available_app(Application) ->
     case application:load(Application) of
         {error, {already_loaded, _}} -> true;
@@ -245,10 +225,8 @@ is_available_app(Application) ->
         _                            -> false
     end.
 
-%% Return the names of the given plugins.
 plugin_names(Plugins) ->
     [Name || #plugin{name = Name} <- Plugins].
 
-%% Find plugins by name in a list of plugins.
 lookup_plugins(Names, AllPlugins) ->
     [P || P = #plugin{name = Name} <- AllPlugins, lists:member(Name, Names)].
diff --git a/src/rabbit_policy.erl b/src/rabbit_policy.erl
index 1551795f..2717cc92 100644
--- a/src/rabbit_policy.erl
+++ b/src/rabbit_policy.erl
@@ -26,7 +26,9 @@
 
 -export([register/0]).
 -export([name/1, get/2, set/1]).
--export([validate/3, validate_clear/2, notify/3, notify_clear/2]).
+-export([validate/4, validate_clear/3, notify/4, notify_clear/3]).
+-export([parse_set/5, set/5, delete/2, lookup/2, list/0, list/1,
+         list_formatted/1, info_keys/0]).
 
 -rabbit_boot_step({?MODULE,
                    [{description, "policy parameters"},
@@ -41,20 +43,21 @@ name(#amqqueue{policy = Policy}) -> name0(Policy);
 name(#exchange{policy = Policy}) -> name0(Policy).
 
 name0(undefined) -> none;
-name0(Policy)    -> pget(<<"name">>, Policy).
+name0(Policy)    -> pget(name, Policy).
 
 set(Q = #amqqueue{name = Name}) -> Q#amqqueue{policy = set0(Name)};
 set(X = #exchange{name = Name}) -> X#exchange{policy = set0(Name)}.
 
-set0(Name) -> match(Name, list()).
+set0(Name = #resource{virtual_host = VHost}) -> match(Name, list(VHost)).
 
 get(Name, #amqqueue{policy = Policy}) -> get0(Name, Policy);
 get(Name, #exchange{policy = Policy}) -> get0(Name, Policy);
 %% Caution - SLOW.
-get(Name, EntityName = #resource{})   -> get0(Name, match(EntityName, list())).
+get(Name, EntityName = #resource{virtual_host = VHost}) ->
+    get0(Name, match(EntityName, list(VHost))).
 
 get0(_Name, undefined) -> {error, not_found};
-get0(Name, List)       -> case pget(<<"policy">>, List) of
+get0(Name, List)       -> case pget(definition, List) of
                               undefined -> {error, not_found};
                               Policy    -> case pget(Name, Policy) of
                                                undefined -> {error, not_found};
@@ -64,54 +67,121 @@ get0(Name, List)       -> case pget(<<"policy">>, List) of
 
 %%----------------------------------------------------------------------------
 
-validate(<<"policy">>, Name, Term) ->
+parse_set(VHost, Name, Pattern, Definition, undefined) ->
+    parse_set0(VHost, Name, Pattern, Definition, 0);
+parse_set(VHost, Name, Pattern, Definition, Priority) ->
+    try list_to_integer(Priority) of
+        Num -> parse_set0(VHost, Name, Pattern, Definition, Num)
+    catch
+        error:badarg -> {error, "~p priority must be a number", [Priority]}
+    end.
+
+parse_set0(VHost, Name, Pattern, Defn, Priority) ->
+    case rabbit_misc:json_decode(Defn) of
+        {ok, JSON} ->
+            set0(VHost, Name,
+                 [{<<"pattern">>,    list_to_binary(Pattern)},
+                  {<<"definition">>, rabbit_misc:json_to_term(JSON)},
+                  {<<"priority">>,   Priority}]);
+        error ->
+            {error_string, "JSON decoding error"}
+    end.
+
+set(VHost, Name, Pattern, Definition, Priority) ->
+    PolicyProps = [{<<"pattern">>,    Pattern},
+                   {<<"definition">>, Definition},
+                   {<<"priority">>,   case Priority of
+                                          undefined -> 0;
+                                          _         -> Priority
+                                      end}],
+    set0(VHost, Name, PolicyProps).
+
+set0(VHost, Name, Term) ->
+    rabbit_runtime_parameters:set_any(VHost, <<"policy">>, Name, Term).
+
+delete(VHost, Name) ->
+    rabbit_runtime_parameters:clear_any(VHost, <<"policy">>, Name).
+
+lookup(VHost, Name) ->
+    case rabbit_runtime_parameters:lookup(VHost, <<"policy">>, Name) of
+        not_found  -> not_found;
+        P          -> p(P, fun ident/1)
+    end.
+
+list() ->
+    list('_').
+
+list(VHost) ->
+    list0(VHost, fun ident/1).
+
+list_formatted(VHost) ->
+    order_policies(list0(VHost, fun format/1)).
+
+list0(VHost, DefnFun) ->
+    [p(P, DefnFun) || P <- rabbit_runtime_parameters:list(VHost, <<"policy">>)].
+
+order_policies(PropList) ->
+    lists:sort(fun (A, B) -> pget(priority, A) < pget(priority, B) end,
+               PropList).
+
+p(Parameter, DefnFun) ->
+    Value = pget(value, Parameter),
+    [{vhost,      pget(vhost, Parameter)},
+     {name,       pget(name, Parameter)},
+     {pattern,    pget(<<"pattern">>, Value)},
+     {definition, DefnFun(pget(<<"definition">>, Value))},
+     {priority,   pget(<<"priority">>, Value)}].
+
+format(Term) ->
+    {ok, JSON} = rabbit_misc:json_encode(rabbit_misc:term_to_json(Term)),
+    list_to_binary(JSON).
+
+ident(X) -> X.
+
+info_keys() -> [vhost, name, pattern, definition, priority].
+
+%%----------------------------------------------------------------------------
+
+validate(_VHost, <<"policy">>, Name, Term) ->
     rabbit_parameter_validation:proplist(
       Name, policy_validation(), Term).
 
-validate_clear(<<"policy">>, _Name) ->
+validate_clear(_VHost, <<"policy">>, _Name) ->
     ok.
 
-notify(<<"policy">>, _Name, _Term) ->
-    update_policies().
+notify(VHost, <<"policy">>, _Name, _Term) ->
+    update_policies(VHost).
 
-notify_clear(<<"policy">>, _Name) ->
-    update_policies().
+notify_clear(VHost, <<"policy">>, _Name) ->
+    update_policies(VHost).
 
 %%----------------------------------------------------------------------------
 
-list() ->
-    [[{<<"name">>, pget(key, P)} | pget(value, P)]
-     || P <- rabbit_runtime_parameters:list(<<"policy">>)].
-
-update_policies() ->
-    Policies = list(),
+update_policies(VHost) ->
+    Policies = list(VHost),
     {Xs, Qs} = rabbit_misc:execute_mnesia_transaction(
                  fun() ->
                          {[update_exchange(X, Policies) ||
-                              VHost <- rabbit_vhost:list(),
-                              X     <- rabbit_exchange:list(VHost)],
+                              X <- rabbit_exchange:list(VHost)],
                           [update_queue(Q, Policies) ||
-                              VHost <- rabbit_vhost:list(),
-                              Q     <- rabbit_amqqueue:list(VHost)]}
+                              Q <- rabbit_amqqueue:list(VHost)]}
                  end),
     [notify(X) || X <- Xs],
     [notify(Q) || Q <- Qs],
     ok.
 
 update_exchange(X = #exchange{name = XName, policy = OldPolicy}, Policies) ->
-    NewPolicy = match(XName, Policies),
-    case NewPolicy of
+    case match(XName, Policies) of
         OldPolicy -> no_change;
-        _         -> rabbit_exchange:update(
+        NewPolicy -> rabbit_exchange:update(
                        XName, fun(X1) -> X1#exchange{policy = NewPolicy} end),
                      {X, X#exchange{policy = NewPolicy}}
     end.
 
 update_queue(Q = #amqqueue{name = QName, policy = OldPolicy}, Policies) ->
-    NewPolicy = match(QName, Policies),
-    case NewPolicy of
+    case match(QName, Policies) of
         OldPolicy -> no_change;
-        _         -> rabbit_amqqueue:update(
+        NewPolicy -> rabbit_amqqueue:update(
                        QName, fun(Q1) -> Q1#amqqueue{policy = NewPolicy} end),
                      {Q, Q#amqqueue{policy = NewPolicy}}
     end.
@@ -129,28 +199,53 @@ match(Name, Policies) ->
         [Policy | _Rest] -> Policy
     end.
 
-matches(#resource{name = Name, virtual_host = VHost}, Policy) ->
-    Prefix = pget(<<"prefix">>, Policy),
-    case pget(<<"vhost">>, Policy) of
-        undefined -> prefix(Prefix, Name);
-        VHost     -> prefix(Prefix, Name);
-        _         -> false
-    end.
-
-prefix(A, B) -> lists:prefix(binary_to_list(A), binary_to_list(B)).
+matches(#resource{name = Name}, Policy) ->
+    match =:= re:run(Name, pget(pattern, Policy), [{capture, none}]).
 
-sort_pred(A, B) ->
-    R = size(pget(<<"prefix">>, A)) >= size(pget(<<"prefix">>, B)),
-    case {pget(<<"vhost">>, A), pget(<<"vhost">>, B)} of
-        {undefined, undefined} -> R;
-        {undefined, _}         -> true;
-        {_, undefined}         -> false;
-        _                      -> R
-    end.
+sort_pred(A, B) -> pget(priority, A) >= pget(priority, B).
 
 %%----------------------------------------------------------------------------
 
 policy_validation() ->
-    [{<<"vhost">>,  fun rabbit_parameter_validation:binary/2, optional},
-     {<<"prefix">>, fun rabbit_parameter_validation:binary/2, mandatory},
-     {<<"policy">>, fun rabbit_parameter_validation:list/2,   mandatory}].
+    [{<<"priority">>,   fun rabbit_parameter_validation:number/2, mandatory},
+     {<<"pattern">>,    fun rabbit_parameter_validation:regex/2,  mandatory},
+     {<<"definition">>, fun validation/2,                         mandatory}].
+
+validation(_Name, []) ->
+    {error, "no policy provided", []};
+validation(_Name, Terms) when is_list(Terms) ->
+    {Keys, Modules} = lists:unzip(
+                        rabbit_registry:lookup_all(policy_validator)),
+    [] = dups(Keys), %% ASSERTION
+    Validators = lists:zipwith(fun (M, K) ->  {M, a2b(K)} end, Modules, Keys),
+    {TermKeys, _} = lists:unzip(Terms),
+    case dups(TermKeys) of
+        []   -> validation0(Validators, Terms);
+        Dup  -> {error, "~p duplicate keys not allowed", [Dup]}
+    end;
+validation(_Name, Term) ->
+    {error, "parse error while reading policy: ~p", [Term]}.
+
+validation0(Validators, Terms) ->
+    case lists:foldl(
+           fun (Mod, {ok, TermsLeft}) ->
+                   ModKeys = proplists:get_all_values(Mod, Validators),
+                   case [T || {Key, _} = T <- TermsLeft,
+                              lists:member(Key, ModKeys)] of
+                       []    -> {ok, TermsLeft};
+                       Scope -> {Mod:validate_policy(Scope), TermsLeft -- Scope}
+                   end;
+               (_, Acc) ->
+                   Acc
+           end, {ok, Terms}, proplists:get_keys(Validators)) of
+         {ok, []} ->
+             ok;
+         {ok, Unvalidated} ->
+             {error, "~p are not recognised policy settings", [Unvalidated]};
+         {Error, _} ->
+             Error
+    end.
+
+a2b(A) -> list_to_binary(atom_to_list(A)).
+
+dups(L) -> L -- lists:usort(L).
diff --git a/src/rabbit_policy_validator.erl b/src/rabbit_policy_validator.erl
new file mode 100644
index 00000000..b59dec2b
--- /dev/null
+++ b/src/rabbit_policy_validator.erl
@@ -0,0 +1,37 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2012 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_policy_validator).
+
+-ifdef(use_specs).
+
+-type(validate_results() ::
+        'ok' | {error, string(), [term()]} | [validate_results()]).
+
+-callback validate_policy([{binary(), term()}]) -> validate_results().
+
+-else.
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [
+     {validate_policy, 1}
+    ];
+behaviour_info(_Other) ->
+    undefined.
+
+-endif.
diff --git a/src/rabbit_prelaunch.erl b/src/rabbit_prelaunch.erl
index d56211b5..404afe3c 100644
--- a/src/rabbit_prelaunch.erl
+++ b/src/rabbit_prelaunch.erl
@@ -57,7 +57,7 @@ duplicate_node_check(NodeStr) ->
     case rabbit_nodes:names(NodeHost) of
         {ok, NamePorts}  ->
             case proplists:is_defined(NodeName, NamePorts) of
-                true -> io:format("node with name ~p "
+                true -> io:format("ERROR: node with name ~p "
                                   "already running on ~p~n",
                                   [NodeName, NodeHost]),
                         io:format(rabbit_nodes:diagnostics([Node]) ++ "~n"),
@@ -65,11 +65,8 @@ duplicate_node_check(NodeStr) ->
                 false -> ok
             end;
         {error, EpmdReason} ->
-            rabbit_misc:quit("epmd error for host ~p: ~p (~s)~n",
+            io:format("ERROR: epmd error for host ~p: ~p (~s)~n",
                       [NodeHost, EpmdReason,
-                       case EpmdReason of
-                           address -> "unable to establish tcp connection";
-                           timeout -> "timed out establishing tcp connection";
-                           _       -> inet:format_error(EpmdReason)
-                       end])
+                       rabbit_misc:format_inet_error(EpmdReason)]),
+            rabbit_misc:quit(?ERROR_CODE)
     end.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 3ef769c7..21f58154 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -400,19 +400,19 @@ blank_state_dir(Dir) ->
                on_sync             = fun (_) -> ok end,
                unsynced_msg_ids    = gb_sets:new() }.
 
-clean_file_name(Dir) -> filename:join(Dir, ?CLEAN_FILENAME).
+clean_filename(Dir) -> filename:join(Dir, ?CLEAN_FILENAME).
 
 detect_clean_shutdown(Dir) ->
-    case rabbit_file:delete(clean_file_name(Dir)) of
+    case rabbit_file:delete(clean_filename(Dir)) of
         ok              -> true;
         {error, enoent} -> false
     end.
 
 read_shutdown_terms(Dir) ->
-    rabbit_file:read_term_file(clean_file_name(Dir)).
+    rabbit_file:read_term_file(clean_filename(Dir)).
 
 store_clean_shutdown(Terms, Dir) ->
-    CleanFileName = clean_file_name(Dir),
+    CleanFileName = clean_filename(Dir),
     ok = rabbit_file:ensure_dir(CleanFileName),
     rabbit_file:write_term_file(CleanFileName, Terms).
 
@@ -537,7 +537,7 @@ queue_index_walker_reader(QueueName, Gatherer) ->
     State = blank_state(QueueName),
     ok = scan_segments(
            fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) ->
-                   gatherer:in(Gatherer, {MsgId, 1});
+                   gatherer:sync_in(Gatherer, {MsgId, 1});
                (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered,
                 _IsAcked, Acc) ->
                    Acc
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index bd5cf588..aef48b20 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -173,6 +173,8 @@ server_capabilities(rabbit_framing_amqp_0_9_1) ->
 server_capabilities(_) ->
     [].
 
+%%--------------------------------------------------------------------------
+
 log(Level, Fmt, Args) -> rabbit_log:log(connection, Level, Fmt, Args).
 
 inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F).
@@ -182,6 +184,8 @@ socket_op(Sock, Fun) ->
         {ok, Res}       -> Res;
         {error, Reason} -> log(error, "error on AMQP connection ~p: ~p~n",
                                [self(), Reason]),
+                           %% NB: this is tcp socket, even in case of ssl
+                           rabbit_net:fast_close(Sock),
                            exit(normal)
     end.
 
@@ -234,15 +238,14 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb,
                   end, "closing AMQP connection ~p (~s):~n~p~n",
                   [self(), ConnStr, Ex])
     after
-        %% The reader is the controlling process and hence its
-        %% termination will close the socket. Furthermore,
-        %% gen_tcp:close/1 waits for pending output to be sent, which
-        %% results in unnecessary delays. However, to keep the
-        %% file_handle_cache accounting as accurate as possible it
-        %% would be good to close the socket immediately if we
-        %% can. But we can only do this for non-ssl sockets.
-        %%
-        rabbit_net:maybe_fast_close(ClientSock),
+        %% We don't call gen_tcp:close/1 here since it waits for
+        %% pending output to be sent, which results in unnecessary
+        %% delays. We could just terminate - the reader is the
+        %% controlling process and hence its termination will close
+        %% the socket. However, to keep the file_handle_cache
+        %% accounting as accurate as possible we ought to close the
+        %% socket w/o delay before termination.
+        rabbit_net:fast_close(ClientSock),
         rabbit_event:notify(connection_closed, [{pid, self()}])
     end,
     done.
@@ -311,7 +314,7 @@ handle_other(handshake_timeout, Deb, State)
     mainloop(Deb, State);
 handle_other(handshake_timeout, _Deb, State) ->
     throw({handshake_timeout, State#v1.callback});
-handle_other(timeout, Deb, State = #v1{connection_state = closed}) ->
+handle_other(heartbeat_timeout, Deb, State = #v1{connection_state = closed}) ->
     mainloop(Deb, State);
 handle_other(heartbeat_timeout, _Deb, #v1{connection_state = S}) ->
     throw({heartbeat_timeout, S});
@@ -353,9 +356,9 @@ switch_callback(State, Callback, Length) ->
     State#v1{callback = Callback, recv_len = Length}.
 
 terminate(Explanation, State) when ?IS_RUNNING(State) ->
-    {normal, send_exception(State, 0,
-                            rabbit_misc:amqp_error(
-                              connection_forced, Explanation, [], none))};
+    {normal, handle_exception(State, 0,
+                              rabbit_misc:amqp_error(
+                                connection_forced, Explanation, [], none))};
 terminate(_Explanation, State) ->
     {force, State}.
 
@@ -383,6 +386,9 @@ update_last_blocked_by(State = #v1{conserve_resources = true}) ->
 update_last_blocked_by(State = #v1{conserve_resources = false}) ->
     State#v1{last_blocked_by = flow}.
 
+%%--------------------------------------------------------------------------
+%% error handling / termination
+
 close_connection(State = #v1{queue_collector = Collector,
                              connection = #connection{
                                timeout_sec = TimeoutSec}}) ->
@@ -406,24 +412,10 @@ handle_dependent_exit(ChPid, Reason, State) ->
         {_Channel, controlled} ->
             maybe_close(control_throttle(State));
         {Channel, uncontrolled} ->
-            log(error, "AMQP connection ~p, channel ~p - error:~n~p~n",
-                [self(), Channel, Reason]),
             maybe_close(handle_exception(control_throttle(State),
                                          Channel, Reason))
     end.
 
-channel_cleanup(ChPid) ->
-    case get({ch_pid, ChPid}) of
-        undefined       -> undefined;
-        {Channel, MRef} -> credit_flow:peer_down(ChPid),
-                           erase({channel, Channel}),
-                           erase({ch_pid, ChPid}),
-                           erlang:demonitor(MRef, [flush]),
-                           Channel
-    end.
-
-all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()].
-
 terminate_channels() ->
     NChannels =
         length([rabbit_channel:shutdown(ChPid) || ChPid <- all_channels()]),
@@ -477,6 +469,80 @@ maybe_close(State) ->
 termination_kind(normal) -> controlled;
 termination_kind(_)      -> uncontrolled.
 
+handle_exception(State = #v1{connection_state = closed}, Channel, Reason) ->
+    log(error, "AMQP connection ~p (~p), channel ~p - error:~n~p~n",
+        [self(), closed, Channel, Reason]),
+    State;
+handle_exception(State = #v1{connection = #connection{protocol = Protocol},
+                             connection_state = CS},
+                 Channel, Reason)
+  when ?IS_RUNNING(State) orelse CS =:= closing ->
+    log(error, "AMQP connection ~p (~p), channel ~p - error:~n~p~n",
+        [self(), CS, Channel, Reason]),
+    {0, CloseMethod} =
+        rabbit_binary_generator:map_exception(Channel, Reason, Protocol),
+    terminate_channels(),
+    State1 = close_connection(State),
+    ok = send_on_channel0(State1#v1.sock, CloseMethod, Protocol),
+    State1;
+handle_exception(State, Channel, Reason) ->
+    %% We don't trust the client at this point - force them to wait
+    %% for a bit so they can't DOS us with repeated failed logins etc.
+    timer:sleep(?SILENT_CLOSE_DELAY * 1000),
+    throw({handshake_error, State#v1.connection_state, Channel, Reason}).
+
+frame_error(Error, Type, Channel, Payload, State) ->
+    {Str, Bin} = payload_snippet(Payload),
+    handle_exception(State, Channel,
+                     rabbit_misc:amqp_error(frame_error,
+                                            "type ~p, ~s octets = ~p: ~p",
+                                            [Type, Str, Bin, Error], none)).
+
+unexpected_frame(Type, Channel, Payload, State) ->
+    {Str, Bin} = payload_snippet(Payload),
+    handle_exception(State, Channel,
+                     rabbit_misc:amqp_error(unexpected_frame,
+                                            "type ~p, ~s octets = ~p",
+                                            [Type, Str, Bin], none)).
+
+payload_snippet(Payload) when size(Payload) =< 16 ->
+    {"all", Payload};
+payload_snippet(<<Snippet:16/binary, _/binary>>) ->
+    {"first 16", Snippet}.
+
+%%--------------------------------------------------------------------------
+
+create_channel(Channel, State) ->
+    #v1{sock = Sock, queue_collector = Collector,
+        channel_sup_sup_pid = ChanSupSup,
+        connection = #connection{protocol     = Protocol,
+                                 frame_max    = FrameMax,
+                                 user         = User,
+                                 vhost        = VHost,
+                                 capabilities = Capabilities}} = State,
+    {ok, _ChSupPid, {ChPid, AState}} =
+        rabbit_channel_sup_sup:start_channel(
+          ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), name(Sock),
+                       Protocol, User, VHost, Capabilities, Collector}),
+    MRef = erlang:monitor(process, ChPid),
+    put({ch_pid, ChPid}, {Channel, MRef}),
+    put({channel, Channel}, {ChPid, AState}),
+    {ChPid, AState}.
+
+channel_cleanup(ChPid) ->
+    case get({ch_pid, ChPid}) of
+        undefined       -> undefined;
+        {Channel, MRef} -> credit_flow:peer_down(ChPid),
+                           erase({channel, Channel}),
+                           erase({ch_pid, ChPid}),
+                           erlang:demonitor(MRef, [flush]),
+                           Channel
+    end.
+
+all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()].
+
+%%--------------------------------------------------------------------------
+
 handle_frame(Type, 0, Payload,
              State = #v1{connection_state = CS,
                          connection = #connection{protocol = Protocol}})
@@ -492,34 +558,43 @@ handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS})
 handle_frame(Type, 0, Payload,
              State = #v1{connection = #connection{protocol = Protocol}}) ->
     case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of
-        error     -> throw({unknown_frame, 0, Type, Payload});
+        error     -> frame_error(unknown_frame, Type, 0, Payload, State);
         heartbeat -> State;
         {method, MethodName, FieldsBin} ->
             handle_method0(MethodName, FieldsBin, State);
-        Other -> throw({unexpected_frame_on_channel0, Other})
+        _Other    -> unexpected_frame(Type, 0, Payload, State)
     end;
 handle_frame(Type, Channel, Payload,
-             State = #v1{connection = #connection{protocol = Protocol}}) ->
+             State = #v1{connection = #connection{protocol = Protocol}})
+  when ?IS_RUNNING(State) ->
     case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of
-        error         -> throw({unknown_frame, Channel, Type, Payload});
-        heartbeat     -> throw({unexpected_heartbeat_frame, Channel});
-        AnalyzedFrame -> process_frame(AnalyzedFrame, Channel, State)
-    end.
+        error     -> frame_error(unknown_frame, Type, Channel, Payload, State);
+        heartbeat -> unexpected_frame(Type, Channel, Payload, State);
+        Frame     -> process_frame(Frame, Channel, State)
+    end;
+handle_frame(Type, Channel, Payload, State) ->
+    unexpected_frame(Type, Channel, Payload, State).
 
 process_frame(Frame, Channel, State) ->
-    case get({channel, Channel}) of
-        {ChPid, AState} ->
-            case process_channel_frame(Frame,  ChPid, AState) of
-                {ok, NewAState} -> put({channel, Channel}, {ChPid, NewAState}),
-                                   post_process_frame(Frame, ChPid, State);
-                {error, Reason} -> handle_exception(State, Channel, Reason)
-            end;
-        undefined when ?IS_RUNNING(State) ->
-            ok = create_channel(Channel, State),
-            process_frame(Frame, Channel, State);
-        undefined ->
-            throw({channel_frame_while_starting,
-                   Channel, State#v1.connection_state, Frame})
+    {ChPid, AState} = case get({channel, Channel}) of
+                          undefined -> create_channel(Channel, State);
+                          Other     -> Other
+                      end,
+    case process_channel_frame(Frame,  ChPid, AState) of
+        {ok, NewAState} -> put({channel, Channel}, {ChPid, NewAState}),
+                           post_process_frame(Frame, ChPid, State);
+        {error, Reason} -> handle_exception(State, Channel, Reason)
+    end.
+
+process_channel_frame(Frame, ChPid, AState) ->
+    case rabbit_command_assembler:process(Frame, AState) of
+        {ok, NewAState}                  -> {ok, NewAState};
+        {ok, Method, NewAState}          -> rabbit_channel:do(ChPid, Method),
+                                            {ok, NewAState};
+        {ok, Method, Content, NewAState} -> rabbit_channel:do_flow(
+                                              ChPid, Method, Content),
+                                            {ok, NewAState};
+        {error, Reason}                  -> {error, Reason}
     end.
 
 post_process_frame({method, 'channel.close_ok', _}, ChPid, State) ->
@@ -536,19 +611,20 @@ post_process_frame({method, MethodName, _}, _ChPid,
 post_process_frame(_Frame, _ChPid, State) ->
     control_throttle(State).
 
+%%--------------------------------------------------------------------------
+
 handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) ->
     ensure_stats_timer(
       switch_callback(State, {frame_payload, Type, Channel, PayloadSize},
                       PayloadSize + 1));
 
-handle_input({frame_payload, Type, Channel, PayloadSize},
-             PayloadAndMarker, State) ->
-    case PayloadAndMarker of
-        <<Payload:PayloadSize/binary, ?FRAME_END>> ->
-            switch_callback(handle_frame(Type, Channel, Payload, State),
-                            frame_header, 7);
-        _ ->
-            throw({bad_payload, Type, Channel, PayloadSize, PayloadAndMarker})
+handle_input({frame_payload, Type, Channel, PayloadSize}, Data, State) ->
+    <<Payload:PayloadSize/binary, EndMarker>> = Data,
+    case EndMarker of
+        ?FRAME_END -> State1 = handle_frame(Type, Channel, Payload, State),
+                      switch_callback(State1, frame_header, 7);
+        _          -> frame_error({invalid_frame_end_marker, EndMarker},
+                                  Type, Channel, Payload, State)
     end;
 
 %% The two rules pertaining to version negotiation:
@@ -619,24 +695,14 @@ ensure_stats_timer(State) ->
 
 handle_method0(MethodName, FieldsBin,
                State = #v1{connection = #connection{protocol = Protocol}}) ->
-    HandleException =
-        fun(R) ->
-                case ?IS_RUNNING(State) of
-                    true  -> send_exception(State, 0, R);
-                    %% We don't trust the client at this point - force
-                    %% them to wait for a bit so they can't DOS us with
-                    %% repeated failed logins etc.
-                    false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000),
-                             throw({channel0_error, State#v1.connection_state, R})
-                end
-        end,
     try
         handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin),
                        State)
     catch exit:#amqp_error{method = none} = Reason ->
-            HandleException(Reason#amqp_error{method = MethodName});
+            handle_exception(State, 0, Reason#amqp_error{method = MethodName});
           Type:Reason ->
-            HandleException({Type, Reason, MethodName, erlang:get_stacktrace()})
+            Stack = erlang:get_stacktrace(),
+            handle_exception(State, 0, {Type, Reason, MethodName, Stack})
     end.
 
 handle_method0(#'connection.start_ok'{mechanism = Mechanism,
@@ -740,6 +806,10 @@ server_frame_max() ->
     {ok, FrameMax} = application:get_env(rabbit, frame_max),
     FrameMax.
 
+server_heartbeat() ->
+    {ok, Heartbeat} = application:get_env(rabbit, heartbeat),
+    Heartbeat.
+
 send_on_channel0(Sock, Method, Protocol) ->
     ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol).
 
@@ -791,7 +861,7 @@ auth_phase(Response,
         {ok, User} ->
             Tune = #'connection.tune'{channel_max = 0,
                                       frame_max = server_frame_max(),
-                                      heartbeat = 0},
+                                      heartbeat = server_heartbeat()},
             ok = send_on_channel0(Sock, Tune, Protocol),
             State#v1{connection_state = tuning,
                      connection = Connection#connection{user = User}}
@@ -834,8 +904,8 @@ i(SockStat, #v1{sock = Sock}) when SockStat =:= recv_oct;
                                    SockStat =:= send_oct;
                                    SockStat =:= send_cnt;
                                    SockStat =:= send_pend ->
-    socket_info(fun () -> rabbit_net:getstat(Sock, [SockStat]) end,
-                fun ([{_, I}]) -> I end);
+    socket_info(fun (S) -> rabbit_net:getstat(S, [SockStat]) end,
+                fun ([{_, I}]) -> I end, Sock);
 i(state, #v1{connection_state = S}) ->
     S;
 i(last_blocked_by, #v1{last_blocked_by = By}) ->
@@ -871,10 +941,7 @@ i(Item, #v1{}) ->
     throw({bad_argument, Item}).
 
 socket_info(Get, Select, Sock) ->
-    socket_info(fun() -> Get(Sock) end, Select).
-
-socket_info(Get, Select) ->
-    case Get() of
+    case Get(Sock) of
         {ok,    T} -> Select(T);
         {error, _} -> ''
     end.
@@ -897,51 +964,6 @@ cert_info(F, Sock) ->
         {ok, Cert}           -> list_to_binary(F(Cert))
     end.
 
-%%--------------------------------------------------------------------------
-
-create_channel(Channel, State) ->
-    #v1{sock = Sock, queue_collector = Collector,
-        channel_sup_sup_pid = ChanSupSup,
-        connection = #connection{protocol     = Protocol,
-                                 frame_max    = FrameMax,
-                                 user         = User,
-                                 vhost        = VHost,
-                                 capabilities = Capabilities}} = State,
-    {ok, _ChSupPid, {ChPid, AState}} =
-        rabbit_channel_sup_sup:start_channel(
-          ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), name(Sock),
-                       Protocol, User, VHost, Capabilities, Collector}),
-    MRef = erlang:monitor(process, ChPid),
-    put({ch_pid, ChPid}, {Channel, MRef}),
-    put({channel, Channel}, {ChPid, AState}),
-    ok.
-
-process_channel_frame(Frame, ChPid, AState) ->
-    case rabbit_command_assembler:process(Frame, AState) of
-        {ok, NewAState}                  -> {ok, NewAState};
-        {ok, Method, NewAState}          -> rabbit_channel:do(ChPid, Method),
-                                            {ok, NewAState};
-        {ok, Method, Content, NewAState} -> rabbit_channel:do_flow(
-                                              ChPid, Method, Content),
-                                            {ok, NewAState};
-        {error, Reason}                  -> {error, Reason}
-    end.
-
-handle_exception(State = #v1{connection_state = closed}, _Channel, _Reason) ->
-    State;
-handle_exception(State, Channel, Reason) ->
-    send_exception(State, Channel, Reason).
-
-send_exception(State = #v1{connection = #connection{protocol = Protocol}},
-               Channel, Reason) ->
-    {0, CloseMethod} =
-        rabbit_binary_generator:map_exception(Channel, Reason, Protocol),
-    terminate_channels(),
-    State1 = close_connection(State),
-    ok = rabbit_writer:internal_send_command(
-           State1#v1.sock, 0, CloseMethod, Protocol),
-    State1.
-
 emit_stats(State) ->
     rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)),
     rabbit_event:reset_stats_timer(State, #v1.stats_timer).
diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl
index e14bbba0..32709d24 100644
--- a/src/rabbit_registry.erl
+++ b/src/rabbit_registry.erl
@@ -107,7 +107,8 @@ sanity_check_module(ClassModule, Module) ->
 class_module(exchange)           -> rabbit_exchange_type;
 class_module(auth_mechanism)     -> rabbit_auth_mechanism;
 class_module(runtime_parameter)  -> rabbit_runtime_parameter;
-class_module(exchange_decorator) -> rabbit_exchange_decorator.
+class_module(exchange_decorator) -> rabbit_exchange_decorator;
+class_module(policy_validator)   -> rabbit_policy_validator.
 
 %%---------------------------------------------------------------------------
 
diff --git a/src/rabbit_runtime_parameter.erl b/src/rabbit_runtime_parameter.erl
index c7d30116..18668049 100644
--- a/src/rabbit_runtime_parameter.erl
+++ b/src/rabbit_runtime_parameter.erl
@@ -21,10 +21,12 @@
 -type(validate_results() ::
         'ok' | {error, string(), [term()]} | [validate_results()]).
 
--callback validate(binary(), binary(), term()) -> validate_results().
--callback validate_clear(binary(), binary()) -> validate_results().
--callback notify(binary(), binary(), term()) -> 'ok'.
--callback notify_clear(binary(), binary()) -> 'ok'.
+-callback validate(rabbit_types:vhost(), binary(), binary(),
+                   term()) -> validate_results().
+-callback validate_clear(rabbit_types:vhost(), binary(),
+                         binary()) -> validate_results().
+-callback notify(rabbit_types:vhost(), binary(), binary(), term()) -> 'ok'.
+-callback notify_clear(rabbit_types:vhost(), binary(), binary()) -> 'ok'.
 
 -else.
 
@@ -32,10 +34,10 @@
 
 behaviour_info(callbacks) ->
     [
-     {validate, 3},
-     {validate_clear, 2},
-     {notify, 3},
-     {notify_clear, 2}
+     {validate, 4},
+     {validate_clear, 3},
+     {notify, 4},
+     {notify_clear, 3}
     ];
 behaviour_info(_Other) ->
     undefined.
diff --git a/src/rabbit_runtime_parameters.erl b/src/rabbit_runtime_parameters.erl
index 3a54e8f6..49060409 100644
--- a/src/rabbit_runtime_parameters.erl
+++ b/src/rabbit_runtime_parameters.erl
@@ -18,8 +18,9 @@
 
 -include("rabbit.hrl").
 
--export([parse_set/3, set/3, clear/2, list/0, list/1, list_strict/1,
-         list_formatted/0, lookup/2, value/2, value/3, info_keys/0]).
+-export([parse_set/4, set/4, set_any/4, clear/3, clear_any/3, list/0, list/1,
+         list_strict/1, list/2, list_strict/2, list_formatted/1, lookup/3,
+         value/3, value/4, info_keys/0]).
 
 %%----------------------------------------------------------------------------
 
@@ -27,16 +28,29 @@
 
 -type(ok_or_error_string() :: 'ok' | {'error_string', string()}).
 
--spec(parse_set/3 :: (binary(), binary(), string()) -> ok_or_error_string()).
--spec(set/3 :: (binary(), binary(), term()) -> ok_or_error_string()).
--spec(clear/2 :: (binary(), binary()) -> ok_or_error_string()).
+-spec(parse_set/4 :: (rabbit_types:vhost(), binary(), binary(), string())
+                     -> ok_or_error_string()).
+-spec(set/4 :: (rabbit_types:vhost(), binary(), binary(), term())
+               -> ok_or_error_string()).
+-spec(set_any/4 :: (rabbit_types:vhost(), binary(), binary(), term())
+                   -> ok_or_error_string()).
+-spec(clear/3 :: (rabbit_types:vhost(), binary(), binary())
+                 -> ok_or_error_string()).
+-spec(clear_any/3 :: (rabbit_types:vhost(), binary(), binary())
+                     -> ok_or_error_string()).
 -spec(list/0 :: () -> [rabbit_types:infos()]).
--spec(list/1 :: (binary()) -> [rabbit_types:infos()]).
--spec(list_strict/1 :: (binary()) -> [rabbit_types:infos()] | 'not_found').
--spec(list_formatted/0 :: () -> [rabbit_types:infos()]).
--spec(lookup/2 :: (binary(), binary()) -> rabbit_types:infos()).
--spec(value/2 :: (binary(), binary()) -> term()).
--spec(value/3 :: (binary(), binary(), term()) -> term()).
+-spec(list/1 :: (rabbit_types:vhost() | '_') -> [rabbit_types:infos()]).
+-spec(list_strict/1 :: (binary() | '_')
+                       -> [rabbit_types:infos()] | 'not_found').
+-spec(list/2 :: (rabbit_types:vhost() | '_', binary() | '_')
+                -> [rabbit_types:infos()]).
+-spec(list_strict/2 :: (rabbit_types:vhost() | '_', binary() | '_')
+                       -> [rabbit_types:infos()] | 'not_found').
+-spec(list_formatted/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]).
+-spec(lookup/3 :: (rabbit_types:vhost(), binary(), binary())
+                  -> rabbit_types:infos() | 'not_found').
+-spec(value/3 :: (rabbit_types:vhost(), binary(), binary()) -> term()).
+-spec(value/4 :: (rabbit_types:vhost(), binary(), binary(), term()) -> term()).
 -spec(info_keys/0 :: () -> rabbit_types:info_keys()).
 
 -endif.
@@ -49,36 +63,39 @@
 
 %%---------------------------------------------------------------------------
 
-parse_set(Component, Key, String) ->
-    case parse(String) of
-        {ok, Term}  -> set(Component, Key, Term);
-        {errors, L} -> format_error(L)
+parse_set(_, <<"policy">>, _, _) ->
+    {error_string, "policies may not be set using this method"};
+parse_set(VHost, Component, Name, String) ->
+    case rabbit_misc:json_decode(String) of
+        {ok, JSON} -> set(VHost, Component, Name,
+                          rabbit_misc:json_to_term(JSON));
+        error      -> {error_string, "JSON decoding error"}
     end.
 
-set(Component, Key, Term) ->
-    case set0(Component, Key, Term) of
-        ok          -> ok;
-        {errors, L} -> format_error(L)
-    end.
+set(_, <<"policy">>, _, _) ->
+    {error_string, "policies may not be set using this method"};
+set(VHost, Component, Name, Term) ->
+    set_any(VHost, Component, Name, Term).
 
 format_error(L) ->
     {error_string, rabbit_misc:format_many([{"Validation failed~n", []} | L])}.
 
-set0(Component, Key, Term) ->
+set_any(VHost, Component, Name, Term) ->
+    case set_any0(VHost, Component, Name, Term) of
+        ok          -> ok;
+        {errors, L} -> format_error(L)
+    end.
+
+set_any0(VHost, Component, Name, Term) ->
     case lookup_component(Component) of
         {ok, Mod} ->
-            case flatten_errors(validate(Term)) of
+            case flatten_errors(Mod:validate(VHost, Component, Name, Term)) of
                 ok ->
-                    case flatten_errors(Mod:validate(Component, Key, Term)) of
-                        ok ->
-                            case mnesia_update(Component, Key, Term) of
-                                {old, Term} -> ok;
-                                _           -> Mod:notify(Component, Key, Term)
-                            end,
-                            ok;
-                        E ->
-                            E
-                    end;
+                    case mnesia_update(VHost, Component, Name, Term) of
+                        {old, Term} -> ok;
+                        _           -> Mod:notify(VHost, Component, Name, Term)
+                    end,
+                    ok;
                 E ->
                     E
             end;
@@ -86,102 +103,125 @@ set0(Component, Key, Term) ->
             E
     end.
 
-mnesia_update(Component, Key, Term) ->
+mnesia_update(VHost, Component, Name, Term) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
-              Res = case mnesia:read(?TABLE, {Component, Key}, read) of
+              Res = case mnesia:read(?TABLE, {VHost, Component, Name}, read) of
                         []       -> new;
                         [Params] -> {old, Params#runtime_parameters.value}
                     end,
-              ok = mnesia:write(?TABLE, c(Component, Key, Term), write),
+              ok = mnesia:write(?TABLE, c(VHost, Component, Name, Term), write),
               Res
       end).
 
-clear(Component, Key) ->
-    case clear0(Component, Key) of
+clear(_, <<"policy">> , _) ->
+    {error_string, "policies may not be cleared using this method"};
+clear(VHost, Component, Name) ->
+    clear_any(VHost, Component, Name).
+
+clear_any(VHost, Component, Name) ->
+    case clear_any0(VHost, Component, Name) of
         ok          -> ok;
         {errors, L} -> format_error(L)
     end.
 
-clear0(Component, Key) ->
+clear_any0(VHost, Component, Name) ->
     case lookup_component(Component) of
-        {ok, Mod} -> case flatten_errors(Mod:validate_clear(Component, Key)) of
-                         ok -> mnesia_clear(Component, Key),
-                               Mod:notify_clear(Component, Key),
+        {ok, Mod} -> case flatten_errors(
+                            Mod:validate_clear(VHost, Component, Name)) of
+                         ok -> mnesia_clear(VHost, Component, Name),
+                               Mod:notify_clear(VHost, Component, Name),
                                ok;
                          E  -> E
                      end;
         E         -> E
     end.
 
-mnesia_clear(Component, Key) ->
+mnesia_clear(VHost, Component, Name) ->
     ok = rabbit_misc:execute_mnesia_transaction(
            fun () ->
-                   ok = mnesia:delete(?TABLE, {Component, Key}, write)
+                   ok = mnesia:delete(?TABLE, {VHost, Component, Name}, write)
            end).
 
 list() ->
-    [p(P) || P <- rabbit_misc:dirty_read_all(?TABLE)].
-
-list(Component)        -> list(Component, []).
-list_strict(Component) -> list(Component, not_found).
-
-list(Component, Default) ->
-    case lookup_component(Component) of
-        {ok, _} -> Match = #runtime_parameters{key = {Component, '_'}, _ = '_'},
-                   [p(P) || P <- mnesia:dirty_match_object(?TABLE, Match)];
-        _       -> Default
+    [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <-
+             rabbit_misc:dirty_read_all(?TABLE), Comp /= <<"policy">>].
+
+list(VHost)                   -> list(VHost, '_', []).
+list_strict(Component)        -> list('_',   Component, not_found).
+list(VHost, Component)        -> list(VHost, Component, []).
+list_strict(VHost, Component) -> list(VHost, Component, not_found).
+
+list(VHost, Component, Default) ->
+    case component_good(Component) of
+        true -> Match = #runtime_parameters{key = {VHost, Component, '_'},
+                                            _ = '_'},
+                [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <-
+                         mnesia:dirty_match_object(?TABLE, Match),
+                         Comp =/= <<"policy">> orelse
+                             Component =:= <<"policy">>];
+        _    -> Default
     end.
 
-list_formatted() ->
-    [pset(value, format(pget(value, P)), P) || P <- list()].
+list_formatted(VHost) ->
+    [pset(value, format(pget(value, P)), P) || P <- list(VHost)].
 
-lookup(Component, Key) ->
-    case lookup0(Component, Key, rabbit_misc:const(not_found)) of
+lookup(VHost, Component, Name) ->
+    case lookup0(VHost, Component, Name, rabbit_misc:const(not_found)) of
         not_found -> not_found;
         Params    -> p(Params)
     end.
 
-value(Component, Key) ->
-    case lookup0(Component, Key, rabbit_misc:const(not_found)) of
+value(VHost, Component, Name) ->
+    case lookup0(VHost, Component, Name, rabbit_misc:const(not_found)) of
         not_found -> not_found;
         Params    -> Params#runtime_parameters.value
     end.
 
-value(Component, Key, Default) ->
-    Params = lookup0(Component, Key,
-                     fun () -> lookup_missing(Component, Key, Default) end),
+value(VHost, Component, Name, Default) ->
+    Params = lookup0(VHost, Component, Name,
+                     fun () ->
+                             lookup_missing(VHost, Component, Name, Default)
+                     end),
     Params#runtime_parameters.value.
 
-lookup0(Component, Key, DefaultFun) ->
-    case mnesia:dirty_read(?TABLE, {Component, Key}) of
+lookup0(VHost, Component, Name, DefaultFun) ->
+    case mnesia:dirty_read(?TABLE, {VHost, Component, Name}) of
         []  -> DefaultFun();
         [R] -> R
     end.
 
-lookup_missing(Component, Key, Default) ->
+lookup_missing(VHost, Component, Name, Default) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
-              case mnesia:read(?TABLE, {Component, Key}, read) of
-                  []  -> Record = c(Component, Key, Default),
+              case mnesia:read(?TABLE, {VHost, Component, Name}, read) of
+                  []  -> Record = c(VHost, Component, Name, Default),
                          mnesia:write(?TABLE, Record, write),
                          Record;
                   [R] -> R
               end
       end).
 
-c(Component, Key, Default) -> #runtime_parameters{key = {Component, Key},
-                                                  value = Default}.
+c(VHost, Component, Name, Default) ->
+    #runtime_parameters{key = {VHost, Component, Name},
+                        value = Default}.
 
-p(#runtime_parameters{key = {Component, Key}, value = Value}) ->
-    [{component, Component},
-     {key,       Key},
+p(#runtime_parameters{key = {VHost, Component, Name}, value = Value}) ->
+    [{vhost,     VHost},
+     {component, Component},
+     {name,      Name},
      {value,     Value}].
 
-info_keys() -> [component, key, value].
+info_keys() -> [component, name, value].
 
 %%---------------------------------------------------------------------------
 
+component_good('_')       -> true;
+component_good(Component) -> case lookup_component(Component) of
+                                 {ok, _} -> true;
+                                 _       -> false
+                             end.
+
 lookup_component(Component) ->
     case rabbit_registry:lookup_module(
            runtime_parameter, list_to_atom(binary_to_list(Component))) of
@@ -190,51 +230,9 @@ lookup_component(Component) ->
         {ok, Module}       -> {ok, Module}
     end.
 
-parse(Src0) ->
-    Src1 = string:strip(Src0),
-    Src = case lists:reverse(Src1) of
-              [$. |_] -> Src1;
-              _       -> Src1 ++ "."
-          end,
-    case erl_scan:string(Src) of
-        {ok, Scanned, _} ->
-            case erl_parse:parse_term(Scanned) of
-                {ok, Parsed} ->
-                    {ok, Parsed};
-                {error, E} ->
-                    {errors,
-                     [{"Could not parse value: ~s", [format_parse_error(E)]}]}
-            end;
-        {error, E, _} ->
-            {errors, [{"Could not scan value: ~s", [format_parse_error(E)]}]}
-    end.
-
-format_parse_error({_Line, Mod, Err}) ->
-    lists:flatten(Mod:format_error(Err)).
-
 format(Term) ->
-    list_to_binary(rabbit_misc:format("~p", [Term])).
-
-%%---------------------------------------------------------------------------
-
-%% We will want to be able to biject these to JSON. So we have some
-%% generic restrictions on what we consider acceptable.
-validate(Proplist = [T | _]) when is_tuple(T) -> validate_proplist(Proplist);
-validate(L) when is_list(L)                   -> validate_list(L);
-validate(T) when is_tuple(T)                  -> {error, "tuple: ~p", [T]};
-validate(B) when is_boolean(B)                -> ok;
-validate(null)                                -> ok;
-validate(A) when is_atom(A)                   -> {error, "atom: ~p", [A]};
-validate(N) when is_number(N)                 -> ok;
-validate(B) when is_binary(B)                 -> ok;
-validate(B) when is_bitstring(B)              -> {error, "bitstring: ~p", [B]}.
-
-validate_list(L) -> [validate(I) || I <- L].
-validate_proplist(L) -> [vp(I) || I <- L].
-
-vp({K, V}) when is_binary(K) -> validate(V);
-vp({K, _V})                  -> {error, "bad key: ~p", [K]};
-vp(H)                        -> {error, "not two tuple: ~p", [H]}.
+    {ok, JSON} = rabbit_misc:json_encode(rabbit_misc:term_to_json(Term)),
+    list_to_binary(JSON).
 
 flatten_errors(L) ->
     case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of
diff --git a/src/rabbit_runtime_parameters_test.erl b/src/rabbit_runtime_parameters_test.erl
index f23b3227..d4d7271e 100644
--- a/src/rabbit_runtime_parameters_test.erl
+++ b/src/rabbit_runtime_parameters_test.erl
@@ -16,9 +16,14 @@
 
 -module(rabbit_runtime_parameters_test).
 -behaviour(rabbit_runtime_parameter).
+-behaviour(rabbit_policy_validator).
 
--export([validate/3, validate_clear/2, notify/3, notify_clear/2]).
+-export([validate/4, validate_clear/3, notify/4, notify_clear/3]).
 -export([register/0, unregister/0]).
+-export([validate_policy/1]).
+-export([register_policy_validator/0, unregister_policy_validator/0]).
+
+%----------------------------------------------------------------------------
 
 register() ->
     rabbit_registry:register(runtime_parameter, <<"test">>, ?MODULE).
@@ -26,13 +31,38 @@ register() ->
 unregister() ->
     rabbit_registry:unregister(runtime_parameter, <<"test">>).
 
-validate(<<"test">>, <<"good">>,  _Term)      -> ok;
-validate(<<"test">>, <<"maybe">>, <<"good">>) -> ok;
-validate(<<"test">>, _, _)                    -> {error, "meh", []}.
+validate(_, <<"test">>, <<"good">>,  _Term)      -> ok;
+validate(_, <<"test">>, <<"maybe">>, <<"good">>) -> ok;
+validate(_, <<"test">>, _, _)                    -> {error, "meh", []}.
+
+validate_clear(_, <<"test">>, <<"good">>)  -> ok;
+validate_clear(_, <<"test">>, <<"maybe">>) -> ok;
+validate_clear(_, <<"test">>, _)           -> {error, "meh", []}.
+
+notify(_, _, _, _) -> ok.
+notify_clear(_, _, _) -> ok.
+
+%----------------------------------------------------------------------------
+
+register_policy_validator() ->
+    rabbit_registry:register(policy_validator, <<"testeven">>, ?MODULE),
+    rabbit_registry:register(policy_validator, <<"testpos">>,  ?MODULE).
+
+unregister_policy_validator() ->
+    rabbit_registry:unregister(policy_validator, <<"testeven">>),
+    rabbit_registry:unregister(policy_validator, <<"testpos">>).
+
+validate_policy([{<<"testeven">>, Terms}]) when is_list(Terms) ->
+    case  length(Terms) rem 2 =:= 0 of
+        true  -> ok;
+        false -> {error, "meh", []}
+    end;
 
-validate_clear(<<"test">>, <<"good">>)  -> ok;
-validate_clear(<<"test">>, <<"maybe">>) -> ok;
-validate_clear(<<"test">>, _)           -> {error, "meh", []}.
+validate_policy([{<<"testpos">>, Terms}]) when is_list(Terms) ->
+    case lists:all(fun (N) -> is_integer(N) andalso N > 0 end, Terms) of
+        true  -> ok;
+        false -> {error, "meh", []}
+    end;
 
-notify(_, _, _) -> ok.
-notify_clear(_, _) -> ok.
+validate_policy(_) ->
+    {error, "meh", []}.
diff --git a/src/rabbit_table.erl b/src/rabbit_table.erl
new file mode 100644
index 00000000..fa1c5bbd
--- /dev/null
+++ b/src/rabbit_table.erl
@@ -0,0 +1,311 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2012 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_table).
+
+-export([create/0, create_local_copy/1, wait_for_replicated/0, wait/1,
+         force_load/0, is_present/0, is_empty/0,
+         check_schema_integrity/0, clear_ram_only_tables/0]).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(create/0 :: () -> 'ok').
+-spec(create_local_copy/1 :: ('disc' | 'ram') -> 'ok').
+-spec(wait_for_replicated/0 :: () -> 'ok').
+-spec(wait/1 :: ([atom()]) -> 'ok').
+-spec(force_load/0 :: () -> 'ok').
+-spec(is_present/0 :: () -> boolean()).
+-spec(is_empty/0 :: () -> boolean()).
+-spec(check_schema_integrity/0 :: () -> rabbit_types:ok_or_error(any())).
+-spec(clear_ram_only_tables/0 :: () -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+%% Main interface
+%%----------------------------------------------------------------------------
+
+create() ->
+    lists:foreach(fun ({Tab, TabDef}) ->
+                          TabDef1 = proplists:delete(match, TabDef),
+                          case mnesia:create_table(Tab, TabDef1) of
+                              {atomic, ok} -> ok;
+                              {aborted, Reason} ->
+                                  throw({error, {table_creation_failed,
+                                                 Tab, TabDef1, Reason}})
+                          end
+                  end, definitions()),
+    ok.
+
+%% The sequence in which we delete the schema and then the other
+%% tables is important: if we delete the schema first when moving to
+%% RAM mnesia will loudly complain since it doesn't make much sense to
+%% do that. But when moving to disc, we need to move the schema first.
+create_local_copy(disc) ->
+    create_local_copy(schema, disc_copies),
+    create_local_copies(disc);
+create_local_copy(ram)  ->
+    create_local_copies(ram),
+    create_local_copy(schema, ram_copies).
+
+wait_for_replicated() ->
+    wait([Tab || {Tab, TabDef} <- definitions(),
+                 not lists:member({local_content, true}, TabDef)]).
+
+wait(TableNames) ->
+    case mnesia:wait_for_tables(TableNames, 30000) of
+        ok ->
+            ok;
+        {timeout, BadTabs} ->
+            throw({error, {timeout_waiting_for_tables, BadTabs}});
+        {error, Reason} ->
+            throw({error, {failed_waiting_for_tables, Reason}})
+    end.
+
+force_load() -> [mnesia:force_load_table(T) || T <- names()], ok.
+
+is_present() -> names() -- mnesia:system_info(tables) =:= [].
+
+is_empty() ->
+    lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end,
+              names()).
+
+check_schema_integrity() ->
+    Tables = mnesia:system_info(tables),
+    case check(fun (Tab, TabDef) ->
+                       case lists:member(Tab, Tables) of
+                           false -> {error, {table_missing, Tab}};
+                           true  -> check_attributes(Tab, TabDef)
+                       end
+               end) of
+        ok     -> ok = wait(names()),
+                  check(fun check_content/2);
+        Other  -> Other
+    end.
+
+clear_ram_only_tables() ->
+    Node = node(),
+    lists:foreach(
+      fun (TabName) ->
+              case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of
+                  true  -> {atomic, ok} = mnesia:clear_table(TabName);
+                  false -> ok
+              end
+      end, names()),
+    ok.
+
+%%--------------------------------------------------------------------
+%% Internal helpers
+%%--------------------------------------------------------------------
+
+create_local_copies(Type) ->
+    lists:foreach(
+      fun ({Tab, TabDef}) ->
+              HasDiscCopies     = has_copy_type(TabDef, disc_copies),
+              HasDiscOnlyCopies = has_copy_type(TabDef, disc_only_copies),
+              LocalTab          = proplists:get_bool(local_content, TabDef),
+              StorageType =
+                  if
+                      Type =:= disc orelse LocalTab ->
+                          if
+                              HasDiscCopies     -> disc_copies;
+                              HasDiscOnlyCopies -> disc_only_copies;
+                              true              -> ram_copies
+                          end;
+                      Type =:= ram ->
+                          ram_copies
+                  end,
+              ok = create_local_copy(Tab, StorageType)
+      end, definitions(Type)),
+    ok.
+
+create_local_copy(Tab, Type) ->
+    StorageType = mnesia:table_info(Tab, storage_type),
+    {atomic, ok} =
+        if
+            StorageType == unknown ->
+                mnesia:add_table_copy(Tab, node(), Type);
+            StorageType /= Type ->
+                mnesia:change_table_copy_type(Tab, node(), Type);
+            true -> {atomic, ok}
+        end,
+    ok.
+
+has_copy_type(TabDef, DiscType) ->
+    lists:member(node(), proplists:get_value(DiscType, TabDef, [])).
+
+check_attributes(Tab, TabDef) ->
+    {_, ExpAttrs} = proplists:lookup(attributes, TabDef),
+    case mnesia:table_info(Tab, attributes) of
+        ExpAttrs -> ok;
+        Attrs    -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}}
+    end.
+
+check_content(Tab, TabDef) ->
+    {_, Match} = proplists:lookup(match, TabDef),
+    case mnesia:dirty_first(Tab) of
+        '$end_of_table' ->
+            ok;
+        Key ->
+            ObjList = mnesia:dirty_read(Tab, Key),
+            MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]),
+            case ets:match_spec_run(ObjList, MatchComp) of
+                ObjList -> ok;
+                _       -> {error, {table_content_invalid, Tab, Match, ObjList}}
+            end
+    end.
+
+check(Fun) ->
+    case [Error || {Tab, TabDef} <- definitions(),
+                   case Fun(Tab, TabDef) of
+                       ok             -> Error = none, false;
+                       {error, Error} -> true
+                   end] of
+        []     -> ok;
+        Errors -> {error, Errors}
+    end.
+
+%%--------------------------------------------------------------------
+%% Table definitions
+%%--------------------------------------------------------------------
+
+names() -> [Tab || {Tab, _} <- definitions()].
+
+%% The tables aren't supposed to be on disk on a ram node
+definitions(disc) ->
+    definitions();
+definitions(ram) ->
+    [{Tab, [{disc_copies, []}, {ram_copies, [node()]} |
+            proplists:delete(
+              ram_copies, proplists:delete(disc_copies, TabDef))]} ||
+        {Tab, TabDef} <- definitions()].
+
+definitions() ->
+    [{rabbit_user,
+      [{record_name, internal_user},
+       {attributes, record_info(fields, internal_user)},
+       {disc_copies, [node()]},
+       {match, #internal_user{_='_'}}]},
+     {rabbit_user_permission,
+      [{record_name, user_permission},
+       {attributes, record_info(fields, user_permission)},
+       {disc_copies, [node()]},
+       {match, #user_permission{user_vhost = #user_vhost{_='_'},
+                                permission = #permission{_='_'},
+                                _='_'}}]},
+     {rabbit_vhost,
+      [{record_name, vhost},
+       {attributes, record_info(fields, vhost)},
+       {disc_copies, [node()]},
+       {match, #vhost{_='_'}}]},
+     {rabbit_listener,
+      [{record_name, listener},
+       {attributes, record_info(fields, listener)},
+       {type, bag},
+       {match, #listener{_='_'}}]},
+     {rabbit_durable_route,
+      [{record_name, route},
+       {attributes, record_info(fields, route)},
+       {disc_copies, [node()]},
+       {match, #route{binding = binding_match(), _='_'}}]},
+     {rabbit_semi_durable_route,
+      [{record_name, route},
+       {attributes, record_info(fields, route)},
+       {type, ordered_set},
+       {match, #route{binding = binding_match(), _='_'}}]},
+     {rabbit_route,
+      [{record_name, route},
+       {attributes, record_info(fields, route)},
+       {type, ordered_set},
+       {match, #route{binding = binding_match(), _='_'}}]},
+     {rabbit_reverse_route,
+      [{record_name, reverse_route},
+       {attributes, record_info(fields, reverse_route)},
+       {type, ordered_set},
+       {match, #reverse_route{reverse_binding = reverse_binding_match(),
+                              _='_'}}]},
+     {rabbit_topic_trie_node,
+      [{record_name, topic_trie_node},
+       {attributes, record_info(fields, topic_trie_node)},
+       {type, ordered_set},
+       {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]},
+     {rabbit_topic_trie_edge,
+      [{record_name, topic_trie_edge},
+       {attributes, record_info(fields, topic_trie_edge)},
+       {type, ordered_set},
+       {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]},
+     {rabbit_topic_trie_binding,
+      [{record_name, topic_trie_binding},
+       {attributes, record_info(fields, topic_trie_binding)},
+       {type, ordered_set},
+       {match, #topic_trie_binding{trie_binding = trie_binding_match(),
+                                   _='_'}}]},
+     {rabbit_durable_exchange,
+      [{record_name, exchange},
+       {attributes, record_info(fields, exchange)},
+       {disc_copies, [node()]},
+       {match, #exchange{name = exchange_name_match(), _='_'}}]},
+     {rabbit_exchange,
+      [{record_name, exchange},
+       {attributes, record_info(fields, exchange)},
+       {match, #exchange{name = exchange_name_match(), _='_'}}]},
+     {rabbit_exchange_serial,
+      [{record_name, exchange_serial},
+       {attributes, record_info(fields, exchange_serial)},
+       {match, #exchange_serial{name = exchange_name_match(), _='_'}}]},
+     {rabbit_runtime_parameters,
+      [{record_name, runtime_parameters},
+       {attributes, record_info(fields, runtime_parameters)},
+       {disc_copies, [node()]},
+       {match, #runtime_parameters{_='_'}}]},
+     {rabbit_durable_queue,
+      [{record_name, amqqueue},
+       {attributes, record_info(fields, amqqueue)},
+       {disc_copies, [node()]},
+       {match, #amqqueue{name = queue_name_match(), _='_'}}]},
+     {rabbit_queue,
+      [{record_name, amqqueue},
+       {attributes, record_info(fields, amqqueue)},
+       {match, #amqqueue{name = queue_name_match(), _='_'}}]}]
+        ++ gm:table_definitions()
+        ++ mirrored_supervisor:table_definitions().
+
+binding_match() ->
+    #binding{source = exchange_name_match(),
+             destination = binding_destination_match(),
+             _='_'}.
+reverse_binding_match() ->
+    #reverse_binding{destination = binding_destination_match(),
+                     source = exchange_name_match(),
+                     _='_'}.
+binding_destination_match() ->
+    resource_match('_').
+trie_node_match() ->
+    #trie_node{   exchange_name = exchange_name_match(), _='_'}.
+trie_edge_match() ->
+    #trie_edge{   exchange_name = exchange_name_match(), _='_'}.
+trie_binding_match() ->
+    #trie_binding{exchange_name = exchange_name_match(), _='_'}.
+exchange_name_match() ->
+    resource_match(exchange).
+queue_name_match() ->
+    resource_match(queue).
+resource_match(Kind) ->
+    #resource{kind = Kind, _='_'}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index bb60bd12..962bb648 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -32,6 +32,8 @@
 -define(TIMEOUT, 5000).
 
 all_tests() ->
+    ok = setup_cluster(),
+    ok = supervisor2_tests:test_all(),
     passed = gm_tests:all_tests(),
     passed = mirrored_supervisor_tests:all_tests(),
     application:set_env(rabbit, file_handles_high_watermark, 10, infinity),
@@ -52,36 +54,63 @@ all_tests() ->
     passed = test_log_management_during_startup(),
     passed = test_statistics(),
     passed = test_arguments_parser(),
-    passed = test_cluster_management(),
+    passed = test_dynamic_mirroring(),
     passed = test_user_management(),
     passed = test_runtime_parameters(),
+    passed = test_policy_validation(),
     passed = test_server_status(),
     passed = test_confirms(),
-    passed = maybe_run_cluster_dependent_tests(),
+    passed =
+        do_if_secondary_node(
+          fun run_cluster_dependent_tests/1,
+          fun (SecondaryNode) ->
+                  io:format("Skipping cluster dependent tests with node ~p~n",
+                            [SecondaryNode]),
+                  passed
+          end),
     passed = test_configurable_server_properties(),
     passed.
 
-maybe_run_cluster_dependent_tests() ->
+do_if_secondary_node(Up, Down) ->
     SecondaryNode = rabbit_nodes:make("hare"),
 
     case net_adm:ping(SecondaryNode) of
-        pong -> passed = run_cluster_dependent_tests(SecondaryNode);
-        pang -> io:format("Skipping cluster dependent tests with node ~p~n",
-                          [SecondaryNode])
-    end,
-    passed.
+        pong -> Up(SecondaryNode);
+        pang -> Down(SecondaryNode)
+    end.
 
-run_cluster_dependent_tests(SecondaryNode) ->
-    SecondaryNodeS = atom_to_list(SecondaryNode),
+setup_cluster() ->
+    do_if_secondary_node(
+      fun (SecondaryNode) ->
+              cover:stop(SecondaryNode),
+              ok = control_action(stop_app, []),
+              %% 'cover' does not cope at all well with nodes disconnecting,
+              %% which happens as part of reset. So we turn it off
+              %% temporarily. That is ok even if we're not in general using
+              %% cover, it just turns the engine on / off and doesn't log
+              %% anything.  Note that this way cover won't be on when joining
+              %% the cluster, but this is OK since we're testing the clustering
+              %% interface elsewere anyway.
+              cover:stop(nodes()),
+              ok = control_action(join_cluster,
+                                  [atom_to_list(SecondaryNode)]),
+              cover:start(nodes()),
+              ok = control_action(start_app, []),
+              ok = control_action(start_app, SecondaryNode, [], [])
+      end,
+      fun (_) -> ok end).
 
-    cover:stop(SecondaryNode),
-    ok = control_action(stop_app, []),
-    ok = control_action(reset, []),
-    ok = control_action(cluster, [SecondaryNodeS]),
-    ok = control_action(start_app, []),
-    cover:start(SecondaryNode),
-    ok = control_action(start_app, SecondaryNode, [], []),
+maybe_run_cluster_dependent_tests() ->
+    do_if_secondary_node(
+      fun (SecondaryNode) ->
+              passed = run_cluster_dependent_tests(SecondaryNode)
+      end,
+      fun (SecondaryNode) ->
+              io:format("Skipping cluster dependent tests with node ~p~n",
+                        [SecondaryNode])
+      end).
 
+run_cluster_dependent_tests(SecondaryNode) ->
     io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]),
     passed = test_delegates_async(SecondaryNode),
     passed = test_delegates_sync(SecondaryNode),
@@ -629,7 +658,6 @@ test_topic_expect_match(X, List) ->
                                              #'P_basic'{}, <<>>),
               Res = rabbit_exchange_type_topic:route(
                       X, #delivery{mandatory = false,
-                                   immediate = false,
                                    sender    = self(),
                                    message   = Message}),
               ExpectedRes = lists:map(
@@ -747,7 +775,9 @@ test_log_management_during_startup() ->
     ok = case catch control_action(start_app, []) of
              ok -> exit({got_success_but_expected_failure,
                          log_rotation_tty_no_handlers_test});
-             {error, {cannot_log_to_tty, _, _}} -> ok
+             {badrpc, {'EXIT', {rabbit,failure_during_boot,
+               {error,{cannot_log_to_tty,
+                       _, not_installed}}}}} -> ok
          end,
 
     %% fix sasl logging
@@ -771,7 +801,9 @@ test_log_management_during_startup() ->
     ok = case control_action(start_app, []) of
              ok -> exit({got_success_but_expected_failure,
                          log_rotation_no_write_permission_dir_test});
-             {error, {cannot_log_to_file, _, _}} -> ok
+             {badrpc, {'EXIT',
+               {rabbit, failure_during_boot,
+                {error, {cannot_log_to_file, _, _}}}}} -> ok
          end,
 
     %% start application with logging to a subdirectory which
@@ -782,8 +814,11 @@ test_log_management_during_startup() ->
     ok = case control_action(start_app, []) of
              ok -> exit({got_success_but_expected_failure,
                          log_rotatation_parent_dirs_test});
-             {error, {cannot_log_to_file, _,
-                      {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok
+             {badrpc,
+              {'EXIT', {rabbit,failure_during_boot,
+                {error, {cannot_log_to_file, _,
+                  {error,
+                   {cannot_create_parent_dirs, _, eacces}}}}}}} -> ok
          end,
     ok = set_permissions(TmpDir, 8#00700),
     ok = set_permissions(TmpLog, 8#00600),
@@ -856,199 +891,51 @@ test_arguments_parser() ->
 
     passed.
 
-test_cluster_management() ->
-    %% 'cluster' and 'reset' should only work if the app is stopped
-    {error, _} = control_action(cluster, []),
-    {error, _} = control_action(reset, []),
-    {error, _} = control_action(force_reset, []),
-
-    ok = control_action(stop_app, []),
-
-    %% various ways of creating a standalone node
-    NodeS = atom_to_list(node()),
-    ClusteringSequence = [[],
-                          [NodeS],
-                          ["invalid@invalid", NodeS],
-                          [NodeS, "invalid@invalid"]],
-
-    ok = control_action(reset, []),
-    lists:foreach(fun (Arg) ->
-                          ok = control_action(force_cluster, Arg),
-                          ok
-                  end,
-                  ClusteringSequence),
-    lists:foreach(fun (Arg) ->
-                          ok = control_action(reset, []),
-                          ok = control_action(force_cluster, Arg),
-                          ok
-                  end,
-                  ClusteringSequence),
-    ok = control_action(reset, []),
-    lists:foreach(fun (Arg) ->
-                          ok = control_action(force_cluster, Arg),
-                          ok = control_action(start_app, []),
-                          ok = control_action(stop_app, []),
-                          ok
-                  end,
-                  ClusteringSequence),
-    lists:foreach(fun (Arg) ->
-                          ok = control_action(reset, []),
-                          ok = control_action(force_cluster, Arg),
-                          ok = control_action(start_app, []),
-                          ok = control_action(stop_app, []),
-                          ok
-                  end,
-                  ClusteringSequence),
-
-    %% convert a disk node into a ram node
-    ok = control_action(reset, []),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_disc_node(),
-    ok = control_action(force_cluster, ["invalid1@invalid",
-                                        "invalid2@invalid"]),
-    ok = assert_ram_node(),
-
-    %% join a non-existing cluster as a ram node
-    ok = control_action(reset, []),
-    ok = control_action(force_cluster, ["invalid1@invalid",
-                                        "invalid2@invalid"]),
-    ok = assert_ram_node(),
+test_dynamic_mirroring() ->
+    %% Just unit tests of the node selection logic, see multi node
+    %% tests for the rest...
+    Test = fun ({NewM, NewSs, ExtraSs}, Policy, Params, {OldM, OldSs}, All) ->
+                   {NewM, NewSs0} =
+                       rabbit_mirror_queue_misc:suggested_queue_nodes(
+                         Policy, Params, {OldM, OldSs}, All),
+                   NewSs1 = lists:sort(NewSs0),
+                   case dm_list_match(NewSs, NewSs1, ExtraSs) of
+                       ok    -> ok;
+                       error -> exit({no_match, NewSs, NewSs1, ExtraSs})
+                   end
+           end,
+
+    Test({a,[b,c],0},<<"all">>,'_',{a,[]},   [a,b,c]),
+    Test({a,[b,c],0},<<"all">>,'_',{a,[b,c]},[a,b,c]),
+    Test({a,[b,c],0},<<"all">>,'_',{a,[d]},  [a,b,c]),
+
+    %% Add a node
+    Test({a,[b,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{a,[b]},[a,b,c,d]),
+    Test({b,[a,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{b,[a]},[a,b,c,d]),
+    %% Add two nodes and drop one
+    Test({a,[b,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{a,[d]},[a,b,c,d]),
+    %% Promote slave to master by policy
+    Test({a,[b,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{d,[a]},[a,b,c,d]),
+    %% Don't try to include nodes that are not running
+    Test({a,[b],  0},<<"nodes">>,[<<"a">>,<<"b">>,<<"f">>],{a,[b]},[a,b,c,d]),
+    %% If we can't find any of the nodes listed then just keep the master
+    Test({a,[],   0},<<"nodes">>,[<<"f">>,<<"g">>,<<"h">>],{a,[b]},[a,b,c,d]),
+
+    Test({a,[],   1},<<"exactly">>,2,{a,[]},   [a,b,c,d]),
+    Test({a,[],   2},<<"exactly">>,3,{a,[]},   [a,b,c,d]),
+    Test({a,[c],  0},<<"exactly">>,2,{a,[c]},  [a,b,c,d]),
+    Test({a,[c],  1},<<"exactly">>,3,{a,[c]},  [a,b,c,d]),
+    Test({a,[c],  0},<<"exactly">>,2,{a,[c,d]},[a,b,c,d]),
+    Test({a,[c,d],0},<<"exactly">>,3,{a,[c,d]},[a,b,c,d]),
 
-    ok = control_action(reset, []),
-
-    SecondaryNode = rabbit_nodes:make("hare"),
-    case net_adm:ping(SecondaryNode) of
-        pong -> passed = test_cluster_management2(SecondaryNode);
-        pang -> io:format("Skipping clustering tests with node ~p~n",
-                          [SecondaryNode])
-    end,
-
-    ok = control_action(start_app, []),
     passed.
 
-test_cluster_management2(SecondaryNode) ->
-    NodeS = atom_to_list(node()),
-    SecondaryNodeS = atom_to_list(SecondaryNode),
-
-    %% make a disk node
-    ok = control_action(cluster, [NodeS]),
-    ok = assert_disc_node(),
-    %% make a ram node
-    ok = control_action(reset, []),
-    ok = control_action(cluster, [SecondaryNodeS]),
-    ok = assert_ram_node(),
-
-    %% join cluster as a ram node
-    ok = control_action(reset, []),
-    ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_ram_node(),
-
-    %% ram node will not start by itself
-    ok = control_action(stop_app, []),
-    ok = control_action(stop_app, SecondaryNode, [], []),
-    {error, _} = control_action(start_app, []),
-    ok = control_action(start_app, SecondaryNode, [], []),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-
-    %% change cluster config while remaining in same cluster
-    ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-
-    %% join non-existing cluster as a ram node
-    ok = control_action(force_cluster, ["invalid1@invalid",
-                                        "invalid2@invalid"]),
-    {error, _} = control_action(start_app, []),
-    ok = assert_ram_node(),
-
-    %% join empty cluster as a ram node (converts to disc)
-    ok = control_action(cluster, []),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_disc_node(),
-
-    %% make a new ram node
-    ok = control_action(reset, []),
-    ok = control_action(force_cluster, [SecondaryNodeS]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_ram_node(),
-
-    %% turn ram node into disk node
-    ok = control_action(cluster, [SecondaryNodeS, NodeS]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_disc_node(),
-
-    %% convert a disk node into a ram node
-    ok = assert_disc_node(),
-    ok = control_action(force_cluster, ["invalid1@invalid",
-                                        "invalid2@invalid"]),
-    ok = assert_ram_node(),
-
-    %% make a new disk node
-    ok = control_action(force_reset, []),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_disc_node(),
-
-    %% turn a disk node into a ram node
-    ok = control_action(reset, []),
-    ok = control_action(cluster, [SecondaryNodeS]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    ok = assert_ram_node(),
-
-    %% NB: this will log an inconsistent_database error, which is harmless
-    %% Turning cover on / off is OK even if we're not in general using cover,
-    %% it just turns the engine on / off, doesn't actually log anything.
-    cover:stop([SecondaryNode]),
-    true = disconnect_node(SecondaryNode),
-    pong = net_adm:ping(SecondaryNode),
-    cover:start([SecondaryNode]),
-
-    %% leaving a cluster as a ram node
-    ok = control_action(reset, []),
-    %% ...and as a disk node
-    ok = control_action(cluster, [SecondaryNodeS, NodeS]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, []),
-    cover:stop(SecondaryNode),
-    ok = control_action(reset, []),
-    cover:start(SecondaryNode),
-
-    %% attempt to leave cluster when no other node is alive
-    ok = control_action(cluster, [SecondaryNodeS, NodeS]),
-    ok = control_action(start_app, []),
-    ok = control_action(stop_app, SecondaryNode, [], []),
-    ok = control_action(stop_app, []),
-    {error, {no_running_cluster_nodes, _, _}} =
-        control_action(reset, []),
-
-    %% attempt to change type when no other node is alive
-    {error, {no_running_cluster_nodes, _, _}} =
-        control_action(cluster, [SecondaryNodeS]),
-
-    %% leave system clustered, with the secondary node as a ram node
-    ok = control_action(force_reset, []),
-    ok = control_action(start_app, []),
-    %% Yes, this is rather ugly. But since we're a clustered Mnesia
-    %% node and we're telling another clustered node to reset itself,
-    %% we will get disconnected half way through causing a
-    %% badrpc. This never happens in real life since rabbitmqctl is
-    %% not a clustered Mnesia node.
-    cover:stop(SecondaryNode),
-    {badrpc, nodedown} = control_action(force_reset, SecondaryNode, [], []),
-    pong = net_adm:ping(SecondaryNode),
-    cover:start(SecondaryNode),
-    ok = control_action(cluster, SecondaryNode, [NodeS], []),
-    ok = control_action(start_app, SecondaryNode, [], []),
-
-    passed.
+%% Does the first list match the second where the second is required
+%% to have exactly Extra superfluous items?
+dm_list_match([],     [],      0)     -> ok;
+dm_list_match(_,      [],     _Extra) -> error;
+dm_list_match([H|T1], [H |T2], Extra) -> dm_list_match(T1, T2, Extra);
+dm_list_match(L1,     [_H|T2], Extra) -> dm_list_match(L1, T2, Extra - 1).
 
 test_user_management() ->
 
@@ -1135,22 +1022,21 @@ test_runtime_parameters() ->
     Bad  = fun(L) -> {error_string, _} = control_action(set_parameter, L) end,
 
     %% Acceptable for bijection
-    Good(["test", "good", "<<\"ignore\">>"]),
+    Good(["test", "good", "\"ignore\""]),
     Good(["test", "good", "123"]),
     Good(["test", "good", "true"]),
     Good(["test", "good", "false"]),
     Good(["test", "good", "null"]),
-    Good(["test", "good", "[{<<\"key\">>, <<\"value\">>}]"]),
+    Good(["test", "good", "{\"key\": \"value\"}"]),
 
-    %% Various forms of fail due to non-bijectability
+    %% Invalid json
     Bad(["test", "good", "atom"]),
-    Bad(["test", "good", "{tuple, foo}"]),
-    Bad(["test", "good", "[{<<\"key\">>, <<\"value\">>, 1}]"]),
-    Bad(["test", "good", "[{key, <<\"value\">>}]"]),
+    Bad(["test", "good", "{\"foo\": \"bar\""]),
+    Bad(["test", "good", "{foo: \"bar\"}"]),
 
     %% Test actual validation hook
-    Good(["test", "maybe", "<<\"good\">>"]),
-    Bad(["test", "maybe", "<<\"bad\">>"]),
+    Good(["test", "maybe", "\"good\""]),
+    Bad(["test", "maybe", "\"bad\""]),
 
     ok = control_action(list_parameters, []),
 
@@ -1161,6 +1047,26 @@ test_runtime_parameters() ->
     rabbit_runtime_parameters_test:unregister(),
     passed.
 
+test_policy_validation() ->
+    rabbit_runtime_parameters_test:register_policy_validator(),
+    SetPol =
+        fun (Key, Val) ->
+                control_action(
+                  set_policy,
+                  ["name", ".*", rabbit_misc:format("{\"~s\":~p}", [Key, Val])])
+        end,
+
+    ok                 = SetPol("testeven", []),
+    ok                 = SetPol("testeven", [1, 2]),
+    ok                 = SetPol("testeven", [1, 2, 3, 4]),
+    ok                 = SetPol("testpos",  [2, 5, 5678]),
+
+    {error_string, _}  = SetPol("testpos",  [-1, 0, 1]),
+    {error_string, _}  = SetPol("testeven", [ 1, 2, 3]),
+
+    rabbit_runtime_parameters_test:unregister_policy_validator(),
+    passed.
+
 test_server_status() ->
     %% create a few things so there is some useful information to list
     Writer = spawn(fun () -> receive shutdown -> ok end end),
@@ -1216,7 +1122,15 @@ test_server_status() ->
     ok = control_action(list_consumers, []),
 
     %% set vm memory high watermark
+    HWM = vm_memory_monitor:get_vm_memory_high_watermark(),
+    ok = control_action(set_vm_memory_high_watermark, ["1"]),
     ok = control_action(set_vm_memory_high_watermark, ["1.0"]),
+    ok = control_action(set_vm_memory_high_watermark, [float_to_list(HWM)]),
+
+    %% eval
+    {error_string, _} = control_action(eval, ["\""]),
+    {error_string, _} = control_action(eval, ["a("]),
+    ok = control_action(eval, ["a."]),
 
     %% cleanup
     [{ok, _} = rabbit_amqqueue:delete(QR, false, false) || QR <- [Q, Q2]],
@@ -1672,15 +1586,15 @@ clean_logs(Files, Suffix) ->
     ok.
 
 assert_ram_node() ->
-    case rabbit_mnesia:is_disc_node() of
-        true  -> exit('not_ram_node');
-        false -> ok
+    case rabbit_mnesia:node_type() of
+        disc -> exit('not_ram_node');
+        ram  -> ok
     end.
 
 assert_disc_node() ->
-    case rabbit_mnesia:is_disc_node() of
-        true  -> ok;
-        false -> exit('not_disc_node')
+    case rabbit_mnesia:node_type() of
+        disc -> ok;
+        ram  -> exit('not_disc_node')
     end.
 
 delete_file(File) ->
@@ -2354,8 +2268,8 @@ publish_and_confirm(Q, Payload, Count) ->
          Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>),
                                     <<>>, #'P_basic'{delivery_mode = 2},
                                     Payload),
-         Delivery = #delivery{mandatory = false, immediate = false,
-                              sender = self(), message = Msg, msg_seq_no = Seq},
+         Delivery = #delivery{mandatory = false, sender = self(),
+                              message = Msg, msg_seq_no = Seq},
          {routed, _} = rabbit_amqqueue:deliver([Q], Delivery)
      end || Seq <- Seqs],
     wait_for_confirms(gb_sets:from_list(Seqs)).
@@ -2447,10 +2361,10 @@ test_dropwhile(VQ0) ->
             fun (N, Props) -> Props#message_properties{expiry = N} end, VQ0),
 
     %% drop the first 5 messages
-    {undefined, VQ2} = rabbit_variable_queue:dropwhile(
-                         fun(#message_properties { expiry = Expiry }) ->
-                                 Expiry =< 5
-                         end, false, VQ1),
+    {_, undefined, VQ2} = rabbit_variable_queue:dropwhile(
+                            fun(#message_properties { expiry = Expiry }) ->
+                                    Expiry =< 5
+                            end, false, VQ1),
 
     %% fetch five now
     VQ3 = lists:foldl(fun (_N, VQN) ->
@@ -2467,11 +2381,11 @@ test_dropwhile(VQ0) ->
 test_dropwhile_varying_ram_duration(VQ0) ->
     VQ1 = variable_queue_publish(false, 1, VQ0),
     VQ2 = rabbit_variable_queue:set_ram_duration_target(0, VQ1),
-    {undefined, VQ3} = rabbit_variable_queue:dropwhile(
-                         fun(_) -> false end, false, VQ2),
+    {_, undefined, VQ3} = rabbit_variable_queue:dropwhile(
+                            fun(_) -> false end, false, VQ2),
     VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
     VQ5 = variable_queue_publish(false, 1, VQ4),
-    {undefined, VQ6} =
+    {_, undefined, VQ6} =
         rabbit_variable_queue:dropwhile(fun(_) -> false end, false, VQ5),
     VQ6.
 
diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl
index 732c29b6..5bc3d9f5 100644
--- a/src/rabbit_types.erl
+++ b/src/rabbit_types.erl
@@ -64,12 +64,11 @@
         #basic_message{exchange_name  :: rabbit_exchange:name(),
                        routing_keys   :: [rabbit_router:routing_key()],
                        content        :: content(),
-                     id             :: msg_id(),
+                       id             :: msg_id(),
                        is_persistent  :: boolean()}).
 -type(message() :: basic_message()).
 -type(delivery() ::
         #delivery{mandatory :: boolean(),
-                  immediate :: boolean(),
                   sender    :: pid(),
                   message   :: message()}).
 -type(message_properties() ::
@@ -118,8 +117,7 @@
                   exclusive_owner :: rabbit_types:maybe(pid()),
                   arguments       :: rabbit_framing:amqp_table(),
                   pid             :: rabbit_types:maybe(pid()),
-                  slave_pids      :: [pid()],
-                  mirror_nodes    :: [node()] | 'undefined' | 'all'}).
+                  slave_pids      :: [pid()]}).
 
 -type(exchange() ::
         #exchange{name        :: rabbit_exchange:name(),
diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl
index e1a7bcae..455134da 100644
--- a/src/rabbit_upgrade.erl
+++ b/src/rabbit_upgrade.erl
@@ -66,11 +66,11 @@
 %% into the boot process by prelaunch before the mnesia application is
 %% started. By the time Mnesia is started the upgrades have happened
 %% (on the primary), or Mnesia has been reset (on the secondary) and
-%% rabbit_mnesia:init_db/3 can then make the node rejoin the cluster
+%% rabbit_mnesia:init_db_unchecked/2 can then make the node rejoin the cluster
 %% in the normal way.
 %%
 %% The non-mnesia upgrades are then triggered by
-%% rabbit_mnesia:init_db/3. Of course, it's possible for a given
+%% rabbit_mnesia:init_db_unchecked/2. Of course, it's possible for a given
 %% upgrade process to only require Mnesia upgrades, or only require
 %% non-Mnesia upgrades. In the latter case no Mnesia resets and
 %% reclusterings occur.
@@ -121,19 +121,16 @@ remove_backup() ->
     info("upgrades: Mnesia backup removed~n", []).
 
 maybe_upgrade_mnesia() ->
-    %% rabbit_mnesia:all_clustered_nodes/0 will return [] at this point
-    %% if we are a RAM node since Mnesia has not started yet.
-    AllNodes = lists:usort(rabbit_mnesia:all_clustered_nodes() ++
-                               rabbit_mnesia:read_cluster_nodes_config()),
+    AllNodes = rabbit_mnesia:cluster_nodes(all),
     case rabbit_version:upgrades_required(mnesia) of
         {error, starting_from_scratch} ->
             ok;
         {error, version_not_available} ->
             case AllNodes of
-                [_] -> ok;
-                _   -> die("Cluster upgrade needed but upgrading from "
-                           "< 2.1.1.~nUnfortunately you will need to "
-                           "rebuild the cluster.", [])
+                [] -> die("Cluster upgrade needed but upgrading from "
+                          "< 2.1.1.~nUnfortunately you will need to "
+                          "rebuild the cluster.", []);
+                _  -> ok
             end;
         {error, _} = Err ->
             throw(Err);
@@ -150,12 +147,12 @@ maybe_upgrade_mnesia() ->
 upgrade_mode(AllNodes) ->
     case nodes_running(AllNodes) of
         [] ->
-            AfterUs = rabbit_mnesia:read_previously_running_nodes(),
-            case {is_disc_node_legacy(), AfterUs} of
-                {true, []}  ->
+            AfterUs = rabbit_mnesia:cluster_nodes(running) -- [node()],
+            case {node_type_legacy(), AfterUs} of
+                {disc, []}  ->
                     primary;
-                {true, _}  ->
-                    Filename = rabbit_mnesia:running_nodes_filename(),
+                {disc, _}  ->
+                    Filename = rabbit_node_monitor:running_nodes_filename(),
                     die("Cluster upgrade needed but other disc nodes shut "
                         "down after this one.~nPlease first start the last "
                         "disc node to shut down.~n~nNote: if several disc "
@@ -163,7 +160,7 @@ upgrade_mode(AllNodes) ->
                         "all~nshow this message. In which case, remove "
                         "the lock file on one of them and~nstart that node. "
                         "The lock file on this node is:~n~n ~s ", [Filename]);
-                {false, _} ->
+                {ram, _} ->
                     die("Cluster upgrade needed but this is a ram node.~n"
                         "Please first start the last disc node to shut down.",
                         [])
@@ -204,7 +201,7 @@ primary_upgrade(Upgrades, Nodes) ->
            mnesia,
            Upgrades,
            fun () ->
-                   force_tables(),
+                   rabbit_table:force_load(),
                    case Others of
                        [] -> ok;
                        _  -> info("mnesia upgrades: Breaking cluster~n", []),
@@ -214,23 +211,13 @@ primary_upgrade(Upgrades, Nodes) ->
            end),
     ok.
 
-force_tables() ->
-    [mnesia:force_load_table(T) || T <- rabbit_mnesia:table_names()].
-
 secondary_upgrade(AllNodes) ->
     %% must do this before we wipe out schema
-    IsDiscNode = is_disc_node_legacy(),
+    NodeType = node_type_legacy(),
     rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
                           cannot_delete_schema),
-    %% Note that we cluster with all nodes, rather than all disc nodes
-    %% (as we can't know all disc nodes at this point). This is safe as
-    %% we're not writing the cluster config, just setting up Mnesia.
-    ClusterNodes = case IsDiscNode of
-                       true  -> AllNodes;
-                       false -> AllNodes -- [node()]
-                   end,
     rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
-    ok = rabbit_mnesia:init_db(ClusterNodes, true, fun () -> ok end),
+    ok = rabbit_mnesia:init_db_unchecked(AllNodes, NodeType),
     ok = rabbit_version:record_desired_for_scope(mnesia),
     ok.
 
@@ -278,13 +265,16 @@ lock_filename() -> lock_filename(dir()).
 lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME).
 backup_dir() -> dir() ++ "-upgrade-backup".
 
-is_disc_node_legacy() ->
+node_type_legacy() ->
     %% This is pretty ugly but we can't start Mnesia and ask it (will
     %% hang), we can't look at the config file (may not include us
     %% even if we're a disc node).  We also can't use
-    %% rabbit_mnesia:is_disc_node/0 because that will give false
+    %% rabbit_mnesia:node_type/0 because that will give false
     %% postivies on Rabbit up to 2.5.1.
-    filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")).
+    case filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")) of
+        true  -> disc;
+        false -> ram
+    end.
 
 %% NB: we cannot use rabbit_log here since it may not have been
 %% started yet
diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl
index 18704807..21fdcd66 100644
--- a/src/rabbit_upgrade_functions.erl
+++ b/src/rabbit_upgrade_functions.erl
@@ -40,6 +40,9 @@
 -rabbit_upgrade({exchange_scratches,    mnesia, [exchange_scratch]}).
 -rabbit_upgrade({policy,                mnesia,
                  [exchange_scratches, ha_mirrors]}).
+-rabbit_upgrade({sync_slave_pids,       mnesia, [policy]}).
+-rabbit_upgrade({no_mirror_nodes,       mnesia, [sync_slave_pids]}).
+-rabbit_upgrade({gm_pids,               mnesia, [no_mirror_nodes]}).
 
 %% -------------------------------------------------------------------
 
@@ -62,6 +65,9 @@
 -spec(topic_trie_node/0       :: () -> 'ok').
 -spec(runtime_parameters/0    :: () -> 'ok').
 -spec(policy/0                :: () -> 'ok').
+-spec(sync_slave_pids/0       :: () -> 'ok').
+-spec(no_mirror_nodes/0       :: () -> 'ok').
+-spec(gm_pids/0               :: () -> 'ok').
 
 -endif.
 
@@ -240,15 +246,53 @@ queue_policy(Table) ->
       [name, durable, auto_delete, exclusive_owner, arguments, pid,
        slave_pids, mirror_nodes, policy]).
 
+sync_slave_pids() ->
+    Tables = [rabbit_queue, rabbit_durable_queue],
+    AddSyncSlavesFun =
+        fun ({amqqueue, N, D, AD, Excl, Args, Pid, SPids, MNodes, Pol}) ->
+                {amqqueue, N, D, AD, Excl, Args, Pid, SPids, [], MNodes, Pol}
+        end,
+    [ok = transform(T, AddSyncSlavesFun,
+                    [name, durable, auto_delete, exclusive_owner, arguments,
+                     pid, slave_pids, sync_slave_pids, mirror_nodes, policy])
+     || T <- Tables],
+    ok.
+
+no_mirror_nodes() ->
+    Tables = [rabbit_queue, rabbit_durable_queue],
+    RemoveMirrorNodesFun =
+        fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, _MNodes, Pol}) ->
+                {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}
+        end,
+    [ok = transform(T, RemoveMirrorNodesFun,
+                    [name, durable, auto_delete, exclusive_owner, arguments,
+                     pid, slave_pids, sync_slave_pids, policy])
+     || T <- Tables],
+    ok.
+
+gm_pids() ->
+    Tables = [rabbit_queue, rabbit_durable_queue],
+    AddGMPidsFun =
+        fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}) ->
+                {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol, []}
+        end,
+    [ok = transform(T, AddGMPidsFun,
+                    [name, durable, auto_delete, exclusive_owner, arguments,
+                     pid, slave_pids, sync_slave_pids, policy, gm_pids])
+     || T <- Tables],
+    ok.
+
+
+
 %%--------------------------------------------------------------------
 
 transform(TableName, Fun, FieldList) ->
-    rabbit_mnesia:wait_for_tables([TableName]),
+    rabbit_table:wait([TableName]),
     {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList),
     ok.
 
 transform(TableName, Fun, FieldList, NewRecordName) ->
-    rabbit_mnesia:wait_for_tables([TableName]),
+    rabbit_table:wait([TableName]),
     {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList,
                                           NewRecordName),
     ok.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 49213c95..8a3fd9d9 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -17,11 +17,11 @@
 -module(rabbit_variable_queue).
 
 -export([init/3, terminate/2, delete_and_terminate/2, purge/1,
-         publish/4, publish_delivered/5, drain_confirmed/1,
+         publish/4, publish_delivered/4, discard/3, drain_confirmed/1,
          dropwhile/3, fetch/2, ack/2, requeue/2, len/1, is_empty/1,
-         set_ram_duration_target/2, ram_duration/1, needs_timeout/1,
-         timeout/1, handle_pre_hibernate/1, status/1, invoke/3,
-         is_duplicate/2, discard/3, multiple_routing_keys/0, fold/3]).
+         depth/1, set_ram_duration_target/2, ram_duration/1,
+         needs_timeout/1, timeout/1, handle_pre_hibernate/1, status/1, invoke/3,
+         is_duplicate/2, multiple_routing_keys/0, fold/3]).
 
 -export([start/1, stop/0]).
 
@@ -545,17 +545,8 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId },
                                           ram_msg_count    = RamMsgCount + 1,
                                           unconfirmed      = UC1 })).
 
-publish_delivered(false, #basic_message { id = MsgId },
-                  #message_properties { needs_confirming = NeedsConfirming },
-                  _ChPid, State = #vqstate { async_callback = Callback,
-                                             len = 0 }) ->
-    case NeedsConfirming of
-        true  -> blind_confirm(Callback, gb_sets:singleton(MsgId));
-        false -> ok
-    end,
-    {undefined, a(State)};
-publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent,
-                                               id = MsgId },
+publish_delivered(Msg = #basic_message { is_persistent = IsPersistent,
+                                         id = MsgId },
                   MsgProps = #message_properties {
                     needs_confirming = NeedsConfirming },
                   _ChPid, State = #vqstate { len              = 0,
@@ -579,6 +570,8 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent,
                                   persistent_count = PCount1,
                                   unconfirmed      = UC1 }))}.
 
+discard(_MsgId, _ChPid, State) -> State.
+
 drain_confirmed(State = #vqstate { confirmed = C }) ->
     case gb_sets:is_empty(C) of
         true  -> {[], State}; %% common case
@@ -589,12 +582,12 @@ drain_confirmed(State = #vqstate { confirmed = C }) ->
 dropwhile(Pred, AckRequired, State) -> dropwhile(Pred, AckRequired, State, []).
 
 dropwhile(Pred, AckRequired, State, Msgs) ->
-    End = fun(S) when AckRequired -> {lists:reverse(Msgs), S};
-             (S)                  -> {undefined, S}
+    End = fun(Next, S) when AckRequired -> {Next, lists:reverse(Msgs), S};
+             (Next, S)                  -> {Next, undefined, S}
           end,
     case queue_out(State) of
         {empty, State1} ->
-            End(a(State1));
+            End(undefined, a(State1));
         {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} ->
             case {Pred(MsgProps), AckRequired} of
                 {true, true} ->
@@ -606,7 +599,7 @@ dropwhile(Pred, AckRequired, State, Msgs) ->
                     {_, State2} = internal_fetch(false, MsgStatus, State1),
                     dropwhile(Pred, AckRequired, State2, undefined);
                 {false, _} ->
-                    End(a(in_r(MsgStatus, State1)))
+                    End(MsgProps, a(in_r(MsgStatus, State1)))
             end
     end.
 
@@ -681,6 +674,9 @@ len(#vqstate { len = Len }) -> Len.
 
 is_empty(State) -> 0 == len(State).
 
+depth(State = #vqstate { pending_ack = Ack }) ->
+    len(State) + gb_trees:size(Ack).
+
 set_ram_duration_target(
   DurationTarget, State = #vqstate {
                     rates     = #rates { avg_egress  = AvgEgressRate,
@@ -818,8 +814,6 @@ invoke(?MODULE, Fun, State) -> Fun(?MODULE, State).
 
 is_duplicate(_Msg, State) -> {false, State}.
 
-discard(_Msg, _ChPid, State) -> State.
-
 %%----------------------------------------------------------------------------
 %% Minor helpers
 %%----------------------------------------------------------------------------
@@ -871,9 +865,10 @@ gb_sets_maybe_insert(false, _Val, Set) -> Set;
 gb_sets_maybe_insert(true,  Val,  Set) -> gb_sets:add(Val, Set).
 
 msg_status(IsPersistent, SeqId, Msg = #basic_message { id = MsgId },
-           MsgProps) ->
+           MsgProps = #message_properties { delivered = Delivered }) ->
+    %% TODO would it make sense to remove #msg_status.is_delivered?
     #msg_status { seq_id = SeqId, msg_id = MsgId, msg = Msg,
-                  is_persistent = IsPersistent, is_delivered = false,
+                  is_persistent = IsPersistent, is_delivered = Delivered,
                   msg_on_disk = false, index_on_disk = false,
                   msg_props = MsgProps }.
 
@@ -1321,12 +1316,9 @@ must_sync_index(#vqstate { msg_indices_on_disk = MIOD,
     %% subtraction.
     not (gb_sets:is_empty(UC) orelse gb_sets:is_subset(UC, MIOD)).
 
-blind_confirm(Callback, MsgIdSet) ->
-    Callback(?MODULE,
-             fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end).
-
 msgs_written_to_disk(Callback, MsgIdSet, ignored) ->
-    blind_confirm(Callback, MsgIdSet);
+    Callback(?MODULE,
+             fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end);
 msgs_written_to_disk(Callback, MsgIdSet, written) ->
     Callback(?MODULE,
              fun (?MODULE, State = #vqstate { msgs_on_disk        = MOD,
diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl
index 5548ef6d..297fa56f 100644
--- a/src/rabbit_vhost.erl
+++ b/src/rabbit_vhost.erl
@@ -90,12 +90,15 @@ delete(VHostPath) ->
     R.
 
 internal_delete(VHostPath) ->
-    lists:foreach(
-      fun (Info) ->
-              ok = rabbit_auth_backend_internal:clear_permissions(
-                     proplists:get_value(user, Info), VHostPath)
-      end,
-      rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)),
+    [ok = rabbit_auth_backend_internal:clear_permissions(
+            proplists:get_value(user, Info), VHostPath)
+     || Info <- rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)],
+    [ok = rabbit_runtime_parameters:clear(VHostPath,
+                                          proplists:get_value(component, Info),
+                                          proplists:get_value(key, Info))
+     || Info <- rabbit_runtime_parameters:list(VHostPath)],
+    [ok = rabbit_policy:delete(VHostPath, proplists:get_value(key, Info))
+     || Info <- rabbit_policy:list(VHostPath)],
     ok = mnesia:delete({rabbit_vhost, VHostPath}),
     ok.
 
diff --git a/src/rabbit_vm.erl b/src/rabbit_vm.erl
new file mode 100644
index 00000000..53f3df18
--- /dev/null
+++ b/src/rabbit_vm.erl
@@ -0,0 +1,129 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2012 VMware, Inc.  All rights reserved.
+%%
+
+-module(rabbit_vm).
+
+-export([memory/0]).
+
+-define(MAGIC_PLUGINS, ["mochiweb", "webmachine", "cowboy", "sockjs",
+                        "rfc4627_jsonrpc"]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(memory/0 :: () -> rabbit_types:infos()).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+%% Like erlang:memory(), but with awareness of rabbit-y things
+memory() ->
+    Conns        = (sup_memory(rabbit_tcp_client_sup) +
+                        sup_memory(ssl_connection_sup) +
+                        sup_memory(amqp_sup)),
+    Qs           = (sup_memory(rabbit_amqqueue_sup) +
+                        sup_memory(rabbit_mirror_queue_slave_sup)),
+    Mnesia       = mnesia_memory(),
+    MsgIndexETS  = ets_memory(rabbit_msg_store_ets_index),
+    MsgIndexProc = (pid_memory(msg_store_transient) +
+                        pid_memory(msg_store_persistent)),
+    MgmtDbETS    = ets_memory(rabbit_mgmt_db),
+    MgmtDbProc   = sup_memory(rabbit_mgmt_sup),
+    Plugins      = plugin_memory() - MgmtDbProc,
+
+    [{total,     Total},
+     {processes, Processes},
+     {ets,       ETS},
+     {atom,      Atom},
+     {binary,    Bin},
+     {code,      Code},
+     {system,    System}] =
+        erlang:memory([total, processes, ets, atom, binary, code, system]),
+
+    OtherProc = Processes - Conns - Qs - MsgIndexProc - MgmtDbProc - Plugins,
+
+    [{total,            Total},
+     {connection_procs, Conns},
+     {queue_procs,      Qs},
+     {plugins,          Plugins},
+     {other_proc,       lists:max([0, OtherProc])}, %% [1]
+     {mnesia,           Mnesia},
+     {mgmt_db,          MgmtDbETS + MgmtDbProc},
+     {msg_index,        MsgIndexETS + MsgIndexProc},
+     {other_ets,        ETS - Mnesia - MsgIndexETS - MgmtDbETS},
+     {binary,           Bin},
+     {code,             Code},
+     {atom,             Atom},
+     {other_system,     System - ETS - Atom - Bin - Code}].
+
+%% [1] - erlang:memory(processes) can be less than the sum of its
+%% parts. Rather than display something nonsensical, just silence any
+%% claims about negative memory. See
+%% http://erlang.org/pipermail/erlang-questions/2012-September/069320.html
+
+%%----------------------------------------------------------------------------
+
+sup_memory(Sup) ->
+    lists:sum([child_memory(P, T) || {_, P, T, _} <- sup_children(Sup)]) +
+        pid_memory(Sup).
+
+sup_children(Sup) ->
+    rabbit_misc:with_exit_handler(
+      rabbit_misc:const([]), fun () -> supervisor:which_children(Sup) end).
+
+pid_memory(Pid)  when is_pid(Pid)   -> case process_info(Pid, memory) of
+                                           {memory, M} -> M;
+                                           _           -> 0
+                                       end;
+pid_memory(Name) when is_atom(Name) -> case whereis(Name) of
+                                           P when is_pid(P) -> pid_memory(P);
+                                           _                -> 0
+                                       end.
+
+child_memory(Pid, worker)     when is_pid (Pid) -> pid_memory(Pid);
+child_memory(Pid, supervisor) when is_pid (Pid) -> sup_memory(Pid);
+child_memory(_, _)                              -> 0.
+
+mnesia_memory() ->
+    case mnesia:system_info(is_running) of
+        yes -> lists:sum([bytes(mnesia:table_info(Tab, memory)) ||
+                             Tab <- mnesia:system_info(tables)]);
+        no  -> 0
+    end.
+
+ets_memory(Name) ->
+    lists:sum([bytes(ets:info(T, memory)) || T <- ets:all(),
+                                             N <- [ets:info(T, name)],
+                                             N =:= Name]).
+
+bytes(Words) ->  Words * erlang:system_info(wordsize).
+
+plugin_memory() ->
+    lists:sum([plugin_memory(App) ||
+                  {App, _, _} <- application:which_applications(),
+                  is_plugin(atom_to_list(App))]).
+
+plugin_memory(App) ->
+    case catch application_master:get_child(
+                 application_controller:get_master(App)) of
+        {Pid, _} -> sup_memory(Pid);
+        _        -> 0
+    end.
+
+is_plugin("rabbitmq_" ++ _) -> true;
+is_plugin(App)              -> lists:member(App, ?MAGIC_PLUGINS).
diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 3d3623d7..5af38573 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -255,10 +255,10 @@ behaviour_info(_Other) ->
 %%% ---------------------------------------------------
 start_link(Mod, Args) ->
     gen_server:start_link(?MODULE, {self, Mod, Args}, []).
- 
+
 start_link(SupName, Mod, Args) ->
     gen_server:start_link(SupName, ?MODULE, {SupName, Mod, Args}, []).
- 
+
 %%% ---------------------------------------------------
 %%% Interface functions.
 %%% ---------------------------------------------------
@@ -298,9 +298,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
 check_childspecs(X) -> {error, {badarg, X}}.
 
 %%% ---------------------------------------------------
-%%% 
+%%%
 %%% Initialize the supervisor.
-%%% 
+%%%
 %%% ---------------------------------------------------
 init({SupName, Mod, Args}) ->
     process_flag(trap_exit, true),
@@ -319,7 +319,7 @@ init({SupName, Mod, Args}) ->
 	Error ->
 	    {stop, {bad_return, {Mod, init, Error}}}
     end.
-	
+
 init_children(State, StartSpec) ->
     SupName = State#state.name,
     case check_startspec(StartSpec) of
@@ -349,7 +349,7 @@ init_dynamic(_State, StartSpec) ->
 %% Func: start_children/2
 %% Args: Children = [#child] in start order
 %%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
-%% Purpose: Start all children.  The new list contains #child's 
+%% Purpose: Start all children.  The new list contains #child's
 %%          with pids.
 %% Returns: {ok, NChildren} | {error, NChildren}
 %%          NChildren = [#child] in termination order (reversed
@@ -381,7 +381,7 @@ do_start_child(SupName, Child) ->
 	    NChild = Child#child{pid = Pid},
 	    report_progress(NChild, SupName),
 	    {ok, Pid, Extra};
-	ignore -> 
+	ignore ->
 	    {ok, undefined};
 	{error, What} -> {error, What};
 	What -> {error, What}
@@ -400,12 +400,12 @@ do_start_child_i(M, F, A) ->
 	What ->
 	    {error, What}
     end.
-    
+
 
 %%% ---------------------------------------------------
-%%% 
+%%%
 %%% Callback functions.
-%%% 
+%%%
 %%% ---------------------------------------------------
 handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
     #child{mfa = {M, F, A}} = hd(State#state.children),
@@ -414,11 +414,11 @@ handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
         {ok, undefined} ->
             {reply, {ok, undefined}, State};
 	{ok, Pid} ->
-	    NState = State#state{dynamics = 
+	    NState = State#state{dynamics =
 				 ?DICT:store(Pid, Args, State#state.dynamics)},
 	    {reply, {ok, Pid}, NState};
 	{ok, Pid, Extra} ->
-	    NState = State#state{dynamics = 
+	    NState = State#state{dynamics =
 				 ?DICT:store(Pid, Args, State#state.dynamics)},
 	    {reply, {ok, Pid, Extra}, NState};
 	What ->
@@ -497,7 +497,7 @@ handle_call(which_children, _From, State) ->
 %%% Hopefully cause a function-clause as there is no API function
 %%% that utilizes cast.
 handle_cast(null, State) ->
-    error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", 
+    error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n",
 			   []),
 
     {noreply, State}.
@@ -527,7 +527,7 @@ handle_info({'EXIT', Pid, Reason}, State) ->
     end;
 
 handle_info(Msg, State) ->
-    error_logger:error_msg("Supervisor received unexpected message: ~p~n", 
+    error_logger:error_msg("Supervisor received unexpected message: ~p~n",
 			   [Msg]),
     {noreply, State}.
 %%
@@ -577,13 +577,13 @@ check_flags({Strategy, MaxIntensity, Period}) ->
 check_flags(What) ->
     {bad_flags, What}.
 
-update_childspec(State, StartSpec)  when ?is_simple(State) -> 
-    case check_startspec(StartSpec) of                        
-        {ok, [Child]} ->                                      
-            {ok, State#state{children = [Child]}};            
-        Error ->                                              
-            {error, Error}                                    
-    end;                                                      
+update_childspec(State, StartSpec)  when ?is_simple(State) ->
+    case check_startspec(StartSpec) of
+        {ok, [Child]} ->
+            {ok, State#state{children = [Child]}};
+        Error ->
+            {error, Error}
+    end;
 
 update_childspec(State, StartSpec) ->
     case check_startspec(StartSpec) of
@@ -604,7 +604,7 @@ update_childspec1([Child|OldC], Children, KeepOld) ->
     end;
 update_childspec1([], Children, KeepOld) ->
     % Return them in (keeped) reverse start order.
-    lists:reverse(Children ++ KeepOld).  
+    lists:reverse(Children ++ KeepOld).
 
 update_chsp(OldCh, Children) ->
     case lists:map(fun (Ch) when OldCh#child.name =:= Ch#child.name ->
@@ -618,7 +618,7 @@ update_chsp(OldCh, Children) ->
 	NewC ->
 	    {ok, NewC}
     end.
-    
+
 %%% ---------------------------------------------------
 %%% Start a new child.
 %%% ---------------------------------------------------
@@ -630,12 +630,12 @@ handle_start_child(Child, State) ->
 		{ok, Pid} ->
 		    Children = State#state.children,
 		    {{ok, Pid},
-		     State#state{children = 
+		     State#state{children =
 				 [Child#child{pid = Pid}|Children]}};
 		{ok, Pid, Extra} ->
 		    Children = State#state.children,
 		    {{ok, Pid, Extra},
-		     State#state{children = 
+		     State#state{children =
 				 [Child#child{pid = Pid}|Children]}};
 		{error, What} ->
 		    {{error, {What, Child}}, State}
@@ -816,29 +816,32 @@ terminate_simple_children(Child, Dynamics, SupName) ->
     {Replies, Timedout} =
         lists:foldl(
           fun (_Pid, {Replies, Timedout}) ->
-                  {Reply, Timedout1} =
+                  {Pid1, Reason1, Timedout1} =
                       receive
                           TimeoutMsg ->
                               Remaining = Pids -- [P || {P, _} <- Replies],
                               [exit(P, kill) || P <- Remaining],
-                              receive {'DOWN', _MRef, process, Pid, Reason} ->
-                                      {{error, Reason}, true}
+                              receive
+                                  {'DOWN', _MRef, process, Pid, Reason} ->
+                                      {Pid, Reason, true}
                               end;
                           {'DOWN', _MRef, process, Pid, Reason} ->
-                              {child_res(Child, Reason, Timedout), Timedout};
-                          {'EXIT', Pid, Reason} ->
-                              receive {'DOWN', _MRef, process, Pid, _} ->
-                                      {{error, Reason}, Timedout}
-                              end
+                              {Pid, Reason, Timedout}
                       end,
-                  {[{Pid, Reply} | Replies], Timedout1}
+                  {[{Pid1, child_res(Child, Reason1, Timedout1)} | Replies],
+                   Timedout1}
           end, {[], false}, Pids),
     timeout_stop(Child, TRef, TimeoutMsg, Timedout),
     ReportError = shutdown_error_reporter(SupName),
-    [case Reply of
-         {_Pid, ok}         -> ok;
-         {Pid,  {error, R}} -> ReportError(R, Child#child{pid = Pid})
-     end || Reply <- Replies],
+    Report = fun(_, ok)           -> ok;
+                (Pid, {error, R}) -> ReportError(R, Child#child{pid = Pid})
+             end,
+    [receive
+         {'EXIT', Pid, Reason} ->
+             Report(Pid, child_res(Child, Reason, Timedout))
+     after
+         0 -> Report(Pid, Reply)
+     end || {Pid, Reply} <- Replies],
     ok.
 
 child_exit_reason(#child{shutdown = brutal_kill}) -> kill;
@@ -863,7 +866,7 @@ timeout_stop(#child{shutdown = Time}, TRef, Msg, false) when is_integer(Time) ->
     after
         0 -> ok
     end;
-timeout_stop(#child{}, ok, _Msg, _Timedout) ->
+timeout_stop(#child{}, _TRef, _Msg, _Timedout) ->
     ok.
 
 do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
@@ -885,17 +888,17 @@ do_terminate(Child, _SupName) ->
     Child.
 
 %%-----------------------------------------------------------------
-%% Shutdowns a child. We must check the EXIT value 
+%% Shutdowns a child. We must check the EXIT value
 %% of the child, because it might have died with another reason than
-%% the wanted. In that case we want to report the error. We put a 
-%% monitor on the child an check for the 'DOWN' message instead of 
-%% checking for the 'EXIT' message, because if we check the 'EXIT' 
-%% message a "naughty" child, who does unlink(Sup), could hang the 
-%% supervisor. 
+%% the wanted. In that case we want to report the error. We put a
+%% monitor on the child an check for the 'DOWN' message instead of
+%% checking for the 'EXIT' message, because if we check the 'EXIT'
+%% message a "naughty" child, who does unlink(Sup), could hang the
+%% supervisor.
 %% Returns: ok | {error, OtherReason}  (this should be reported)
 %%-----------------------------------------------------------------
 shutdown(Pid, brutal_kill) ->
-  
+
     case monitor_child(Pid) of
 	ok ->
 	    exit(Pid, kill),
@@ -905,16 +908,16 @@ shutdown(Pid, brutal_kill) ->
 		{'DOWN', _MRef, process, Pid, OtherReason} ->
 		    {error, OtherReason}
 	    end;
-	{error, Reason} ->      
+	{error, Reason} ->
 	    {error, Reason}
     end;
 
 shutdown(Pid, Time) ->
-    
+
     case monitor_child(Pid) of
 	ok ->
 	    exit(Pid, shutdown), %% Try to shutdown gracefully
-	    receive 
+	    receive
 		{'DOWN', _MRef, process, Pid, shutdown} ->
 		    ok;
 		{'DOWN', _MRef, process, Pid, OtherReason} ->
@@ -926,14 +929,14 @@ shutdown(Pid, Time) ->
 			    {error, OtherReason}
 		    end
 	    end;
-	{error, Reason} ->      
+	{error, Reason} ->
 	    {error, Reason}
     end.
 
 %% Help function to shutdown/2 switches from link to monitor approach
 monitor_child(Pid) ->
-    
-    %% Do the monitor operation first so that if the child dies 
+
+    %% Do the monitor operation first so that if the child dies
     %% before the monitoring is done causing a 'DOWN'-message with
     %% reason noproc, we will get the real reason in the 'EXIT'-message
     %% unless a naughty child has already done unlink...
@@ -943,22 +946,22 @@ monitor_child(Pid) ->
     receive
 	%% If the child dies before the unlik we must empty
 	%% the mail-box of the 'EXIT'-message and the 'DOWN'-message.
-	{'EXIT', Pid, Reason} -> 
-	    receive 
+	{'EXIT', Pid, Reason} ->
+	    receive
 		{'DOWN', _, process, Pid, _} ->
 		    {error, Reason}
 	    end
-    after 0 -> 
+    after 0 ->
 	    %% If a naughty child did unlink and the child dies before
-	    %% monitor the result will be that shutdown/2 receives a 
+	    %% monitor the result will be that shutdown/2 receives a
 	    %% 'DOWN'-message with reason noproc.
 	    %% If the child should die after the unlink there
 	    %% will be a 'DOWN'-message with a correct reason
-	    %% that will be handled in shutdown/2. 
-	    ok   
+	    %% that will be handled in shutdown/2.
+	    ok
     end.
-    
-   
+
+
 %%-----------------------------------------------------------------
 %% Child/State manipulating functions.
 %%-----------------------------------------------------------------
@@ -1012,7 +1015,7 @@ remove_child(Child, State) ->
 %% Args: SupName = {local, atom()} | {global, atom()} | self
 %%       Type = {Strategy, MaxIntensity, Period}
 %%         Strategy = one_for_one | one_for_all | simple_one_for_one |
-%%                    rest_for_one 
+%%                    rest_for_one
 %%         MaxIntensity = integer()
 %%         Period = integer()
 %%       Mod :== atom()
@@ -1107,10 +1110,10 @@ validChildType(supervisor) -> true;
 validChildType(worker) -> true;
 validChildType(What) -> throw({invalid_child_type, What}).
 
-validName(_Name) -> true. 
+validName(_Name) -> true.
 
-validFunc({M, F, A}) when is_atom(M), 
-                          is_atom(F), 
+validFunc({M, F, A}) when is_atom(M),
+                          is_atom(F),
                           is_list(A) -> true;
 validFunc(Func)                      -> throw({invalid_mfa, Func}).
 
@@ -1128,7 +1131,7 @@ validDelay(Delay) when is_number(Delay),
                        Delay >= 0 -> true;
 validDelay(What)                  -> throw({invalid_delay, What}).
 
-validShutdown(Shutdown, _) 
+validShutdown(Shutdown, _)
   when is_integer(Shutdown), Shutdown > 0 -> true;
 validShutdown(infinity, supervisor)    -> true;
 validShutdown(brutal_kill, _)          -> true;
@@ -1154,7 +1157,7 @@ validMods(Mods) -> throw({invalid_modules, Mods}).
 %%% Returns: {ok, State'} | {terminate, State'}
 %%% ------------------------------------------------------
 
-add_restart(State) ->  
+add_restart(State) ->
     I = State#state.intensity,
     P = State#state.period,
     R = State#state.restarts,
diff --git a/src/supervisor2_tests.erl b/src/supervisor2_tests.erl
new file mode 100644
index 00000000..e42ded7b
--- /dev/null
+++ b/src/supervisor2_tests.erl
@@ -0,0 +1,70 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2011-2012 VMware, Inc.  All rights reserved.
+%%
+
+-module(supervisor2_tests).
+-behaviour(supervisor2).
+
+-export([test_all/0, start_link/0]).
+-export([init/1]).
+
+test_all() ->
+    ok = check_shutdown(stop,    200, 200, 2000),
+    ok = check_shutdown(ignored,   1,   2, 2000).
+
+check_shutdown(SigStop, Iterations, ChildCount, SupTimeout) ->
+    {ok, Sup} = supervisor2:start_link(?MODULE, [SupTimeout]),
+    Res = lists:foldl(
+            fun (I, ok) ->
+                    TestSupPid = erlang:whereis(?MODULE),
+                    ChildPids =
+                        [begin
+                             {ok, ChildPid} =
+                                 supervisor2:start_child(TestSupPid, []),
+                             ChildPid
+                         end || _ <- lists:seq(1, ChildCount)],
+                    MRef = erlang:monitor(process, TestSupPid),
+                    [P ! SigStop || P <- ChildPids],
+                    ok = supervisor2:terminate_child(Sup, test_sup),
+                    {ok, _} = supervisor2:restart_child(Sup, test_sup),
+                    receive
+                        {'DOWN', MRef, process, TestSupPid, shutdown} ->
+                            ok;
+                        {'DOWN', MRef, process, TestSupPid, Reason} ->
+                            {error, {I, Reason}}
+                    end;
+                (_, R) ->
+                    R
+            end, ok, lists:seq(1, Iterations)),
+    unlink(Sup),
+    exit(Sup, shutdown),
+    Res.
+
+start_link() ->
+    Pid = spawn_link(fun () ->
+                             process_flag(trap_exit, true),
+                             receive stop -> ok end
+                     end),
+    {ok, Pid}.
+
+init([Timeout]) ->
+    {ok, {{one_for_one, 0, 1},
+          [{test_sup, {supervisor2, start_link,
+                       [{local, ?MODULE}, ?MODULE, []]},
+            transient, Timeout, supervisor, [?MODULE]}]}};
+init([]) ->
+    {ok, {{simple_one_for_one_terminate, 0, 1},
+          [{test_worker, {?MODULE, start_link, []},
+            temporary, 1000, worker, [?MODULE]}]}}.
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index fb184d1a..5ce894a9 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -27,7 +27,7 @@
 
 -behaviour(gen_server).
 
--export([start_link/1]).
+-export([start_link/1, start_link/3]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -49,9 +49,11 @@
 
 -record(state, {total_memory,
                 memory_limit,
+                memory_fraction,
                 timeout,
                 timer,
-                alarmed
+                alarmed,
+                alarm_funs
                }).
 
 %%----------------------------------------------------------------------------
@@ -59,6 +61,8 @@
 -ifdef(use_specs).
 
 -spec(start_link/1 :: (float()) -> rabbit_types:ok_pid_or_error()).
+-spec(start_link/3 :: (float(), fun ((any()) -> 'ok'),
+                       fun ((any()) -> 'ok')) -> rabbit_types:ok_pid_or_error()).
 -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')).
 -spec(get_vm_limit/0 :: () -> non_neg_integer()).
 -spec(get_check_interval/0 :: () -> non_neg_integer()).
@@ -73,11 +77,9 @@
 %% Public API
 %%----------------------------------------------------------------------------
 
-get_total_memory() ->
-    get_total_memory(os:type()).
+get_total_memory() -> get_total_memory(os:type()).
 
-get_vm_limit() ->
-    get_vm_limit(os:type()).
+get_vm_limit() -> get_vm_limit(os:type()).
 
 get_check_interval() ->
     gen_server:call(?MODULE, get_check_interval, infinity).
@@ -99,24 +101,27 @@ get_memory_limit() ->
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
 
-start_link(Args) ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []).
+start_link(MemFraction) ->
+    start_link(MemFraction,
+               fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1).
 
-init([MemFraction]) ->
+start_link(MemFraction, AlarmSet, AlarmClear) ->
+    gen_server:start_link({local, ?SERVER}, ?MODULE,
+                          [MemFraction, {AlarmSet, AlarmClear}], []).
+
+init([MemFraction, AlarmFuns]) ->
     TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL),
-    State = #state { timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL,
-                     timer = TRef,
-                     alarmed = false},
+    State = #state { timeout    = ?DEFAULT_MEMORY_CHECK_INTERVAL,
+                     timer      = TRef,
+                     alarmed    = false,
+                     alarm_funs = AlarmFuns },
     {ok, set_mem_limits(State, MemFraction)}.
 
 handle_call(get_vm_memory_high_watermark, _From, State) ->
-    {reply, State#state.memory_limit / State#state.total_memory, State};
+    {reply, State#state.memory_fraction, State};
 
 handle_call({set_vm_memory_high_watermark, MemFraction}, _From, State) ->
-    State1 = set_mem_limits(State, MemFraction),
-    error_logger:info_msg("Memory alarm changed to ~p, ~p bytes.~n",
-                          [MemFraction, State1#state.memory_limit]),
-    {reply, ok, State1};
+    {reply, ok, set_mem_limits(State, MemFraction)};
 
 handle_call(get_check_interval, _From, State) ->
     {reply, State#state.timeout, State};
@@ -168,32 +173,41 @@ set_mem_limits(State, MemFraction) ->
                 ?MEMORY_SIZE_FOR_UNKNOWN_OS;
             M -> M
         end,
-    MemLim = get_mem_limit(MemFraction, TotalMemory),
+    UsableMemory = case get_vm_limit() of
+                       Limit when Limit < TotalMemory ->
+                           error_logger:warning_msg(
+                             "Only ~pMB of ~pMB memory usable due to "
+                             "limited address space.~n",
+                             [trunc(V/?ONE_MB) || V <- [Limit, TotalMemory]]),
+                           Limit;
+                       _ ->
+                           TotalMemory
+                   end,
+    MemLim = trunc(MemFraction * UsableMemory),
     error_logger:info_msg("Memory limit set to ~pMB of ~pMB total.~n",
                           [trunc(MemLim/?ONE_MB), trunc(TotalMemory/?ONE_MB)]),
-    internal_update(State #state { total_memory = TotalMemory,
-                                   memory_limit = MemLim }).
+    internal_update(State #state { total_memory    = TotalMemory,
+                                   memory_limit    = MemLim,
+                                   memory_fraction = MemFraction}).
 
 internal_update(State = #state { memory_limit = MemLimit,
-                                 alarmed = Alarmed}) ->
+                                 alarmed      = Alarmed,
+                                 alarm_funs   = {AlarmSet, AlarmClear} }) ->
     MemUsed = erlang:memory(total),
     NewAlarmed = MemUsed > MemLimit,
     case {Alarmed, NewAlarmed} of
-        {false, true} ->
-            emit_update_info(set, MemUsed, MemLimit),
-            alarm_handler:set_alarm({{resource_limit, memory, node()}, []});
-        {true, false} ->
-            emit_update_info(clear, MemUsed, MemLimit),
-            alarm_handler:clear_alarm({resource_limit, memory, node()});
-        _ ->
-            ok
+        {false, true} -> emit_update_info(set, MemUsed, MemLimit),
+                         AlarmSet({{resource_limit, memory, node()}, []});
+        {true, false} -> emit_update_info(clear, MemUsed, MemLimit),
+                         AlarmClear({resource_limit, memory, node()});
+        _             -> ok
     end,
     State #state {alarmed = NewAlarmed}.
 
-emit_update_info(State, MemUsed, MemLimit) ->
+emit_update_info(AlarmState, MemUsed, MemLimit) ->
     error_logger:info_msg(
       "vm_memory_high_watermark ~p. Memory used:~p allowed:~p~n",
-      [State, MemUsed, MemLimit]).
+      [AlarmState, MemUsed, MemLimit]).
 
 start_timer(Timeout) ->
     {ok, TRef} = timer:send_interval(Timeout, update),
@@ -207,7 +221,7 @@ get_vm_limit({win32,_OSname}) ->
         8 -> 8*1024*1024*1024*1024      %% 8 TB for 64 bits  2^42
     end;
 
-%% On a 32-bit machine, if you're using more than 2 gigs of RAM you're
+%% On a 32-bit machine, if you're using more than 4 gigs of RAM you're
 %% in big trouble anyway.
 get_vm_limit(_OsType) ->
     case erlang:system_info(wordsize) of
@@ -216,10 +230,6 @@ get_vm_limit(_OsType) ->
              %%http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
     end.
 
-get_mem_limit(MemFraction, TotalMemory) ->
-    AvMem = lists:min([TotalMemory, get_vm_limit()]),
-    trunc(AvMem * MemFraction).
-
 %%----------------------------------------------------------------------------
 %% Internal Helpers
 %%----------------------------------------------------------------------------
author	Emile Joubert <emile@rabbitmq.com>	2012-10-25 13:00:09 +0100
committer	Emile Joubert <emile@rabbitmq.com>	2012-10-25 13:00:09 +0100
commit	c419fcc1d934827226df2493a9f100e2f8d72933 (patch)
tree	b0cc0604e0434f3a845fd7132505055dbb0c5e80
parent	ccd409f9522378fed49bf9a56c474734e8a85b7d (diff)
parent	fa60848094653e1764565e7e5582c0bbd794638c (diff)
download	rabbitmq-server-c419fcc1d934827226df2493a9f100e2f8d72933.tar.gz