Merge bug25158.

author: Simon MacMullen <simon@rabbitmq.com> 2012-09-18 14:09:58 +0100
committer: Simon MacMullen <simon@rabbitmq.com> 2012-09-18 14:09:58 +0100
commit: 4ac537045776c03e790946f3ca0e4543fcd63c06 (patch)
tree: 4961a4b641e864adca64bfb977369a298209fa85
parent: cc904e726e83529e4a1a58f4072f66d50e9359d2 (diff)
parent: 21c979a5c5fe71290bd67cd0834a383cf2dcf22c (diff)
download: rabbitmq-server-4ac537045776c03e790946f3ca0e4543fcd63c06.tar.gz
3 files changed, 266 insertions, 267 deletions
diff --git a/src/gm.erl b/src/gm.erl
index f88ed18f..90433e84 100644
--- a/src/gm.erl
+++ b/src/gm.erl
@@ -77,9 +77,13 @@
 %% confirmed_broadcast/2 directly from the callback module otherwise
 %% you will deadlock the entire group.
 %%
-%% group_members/1
-%% Provide the Pid. Returns a list of the current group members.
+%% info/1
+%% Provide the Pid. Returns a proplist with various facts, including
+%% the group name and the current group members.
 %%
+%% forget_group/1
+%% Provide the group name. Removes its mnesia record. Makes no attempt
+%% to ensure the group is empty.
 %%
 %% Implementation Overview
 %% -----------------------
@@ -373,7 +377,7 @@
 -behaviour(gen_server2).
 
 -export([create_tables/0, start_link/3, leave/1, broadcast/2,
-         confirmed_broadcast/2, group_members/1]).
+         confirmed_broadcast/2, info/1, forget_group/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
          code_change/3, prioritise_info/2]).
@@ -431,7 +435,8 @@
 -spec(leave/1 :: (pid()) -> 'ok').
 -spec(broadcast/2 :: (pid(), any()) -> 'ok').
 -spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok').
--spec(group_members/1 :: (pid()) -> [pid()]).
+-spec(info/1 :: (pid()) -> rabbit_types:infos()).
+-spec(forget_group/1 :: (group_name()) -> 'ok').
 
 %% The joined, members_changed and handle_msg callbacks can all return
 %% any of the following terms:
@@ -514,9 +519,15 @@ broadcast(Server, Msg) ->
 confirmed_broadcast(Server, Msg) ->
     gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity).
 
-group_members(Server) ->
-    gen_server2:call(Server, group_members, infinity).
+info(Server) ->
+    gen_server2:call(Server, info, infinity).
 
+forget_group(GroupName) ->
+    {atomic, ok} = mnesia:sync_transaction(
+                     fun () ->
+                             mnesia:delete({?GROUP_TABLE, GroupName})
+                     end),
+    ok.
 
 init([GroupName, Module, Args]) ->
     {MegaSecs, Secs, MicroSecs} = now(),
@@ -553,12 +564,16 @@ handle_call({confirmed_broadcast, Msg}, _From,
 handle_call({confirmed_broadcast, Msg}, From, State) ->
     internal_broadcast(Msg, From, State);
 
-handle_call(group_members, _From,
+handle_call(info, _From,
             State = #state { members_state = undefined }) ->
     reply(not_joined, State);
 
-handle_call(group_members, _From, State = #state { view = View }) ->
-    reply(get_pids(alive_view_members(View)), State);
+handle_call(info, _From, State = #state { group_name = GroupName,
+                                          module     = Module,
+                                          view       = View }) ->
+    reply([{group_name,    GroupName},
+           {module,        Module},
+           {group_members, get_pids(alive_view_members(View))}], State);
 
 handle_call({add_on_right, _NewMember}, _From,
             State = #state { members_state = undefined }) ->
diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl
index fb9f7e34..c11a8ff7 100644
--- a/src/rabbit_mirror_queue_master.erl
+++ b/src/rabbit_mirror_queue_master.erl
@@ -127,10 +127,13 @@ terminate(Reason,
 delete_and_terminate(Reason, State = #state { gm                  = GM,
                                               backing_queue       = BQ,
                                               backing_queue_state = BQS }) ->
-    Slaves = [Pid || Pid <- gm:group_members(GM), node(Pid) =/= node()],
+    Info = gm:info(GM),
+    Slaves = [Pid || Pid <- proplists:get_value(group_members, Info),
+                     node(Pid) =/= node()],
     MRefs = [erlang:monitor(process, S) || S <- Slaves],
     ok = gm:broadcast(GM, {delete_and_terminate, Reason}),
     monitor_wait(MRefs),
+    ok = gm:forget_group(proplists:get_value(group_name, Info)),
     State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS),
                    set_delivered       = 0 }.
 
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 40600063..f19046a0 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -123,37 +123,38 @@ init() ->
     ensure_mnesia_dir(),
     case is_virgin_node() of
         true  -> init_from_config();
-        false -> normal_init(is_disc_node(), all_clustered_nodes())
+        false -> init(is_disc_node(), all_clustered_nodes())
     end,
     %% We intuitively expect the global name server to be synced when
-    %% Mnesia is up. In fact that's not guaranteed to be the case - let's
-    %% make it so.
+    %% Mnesia is up. In fact that's not guaranteed to be the case -
+    %% let's make it so.
     ok = global:sync(),
     ok.
 
-normal_init(DiscNode, AllNodes) ->
-    init_db_and_upgrade(AllNodes, DiscNode, DiscNode).
+init(WantDiscNode, AllNodes) ->
+    init_db_and_upgrade(AllNodes, WantDiscNode, WantDiscNode).
 
 init_from_config() ->
-    {ok, {TryNodes, DiscNode}} =
+    {ok, {TryNodes, WantDiscNode}} =
         application:get_env(rabbit, cluster_nodes),
     case find_good_node(TryNodes -- [node()]) of
         {ok, Node} ->
             rabbit_log:info("Node '~p' selected for clustering from "
                             "configuration~n", [Node]),
             {ok, {_, DiscNodes, _}} = discover_cluster(Node),
-            init_db_and_upgrade(DiscNodes, DiscNode, false),
+            init_db_and_upgrade(DiscNodes, WantDiscNode, false),
             rabbit_node_monitor:notify_joined_cluster();
         none ->
             rabbit_log:warning("Could not find any suitable node amongst the "
                                "ones provided in the configuration: ~p~n",
                                [TryNodes]),
-            normal_init(true, [node()])
+            init(true, [node()])
     end.
 
-%% Make the node join a cluster. The node will be reset automatically before we
-%% actually cluster it. The nodes provided will be used to find out about the
-%% nodes in the cluster.
+%% Make the node join a cluster. The node will be reset automatically
+%% before we actually cluster it. The nodes provided will be used to
+%% find out about the nodes in the cluster.
+%%
 %% This function will fail if:
 %%
 %%   * The node is currently the only disc node of its cluster
@@ -161,17 +162,12 @@ init_from_config() ->
 %%   * The node is currently already clustered with the cluster of the nodes
 %%     provided
 %%
-%% Note that we make no attempt to verify that the nodes provided are all in the
-%% same cluster, we simply pick the first online node and we cluster to its
-%% cluster.
+%% Note that we make no attempt to verify that the nodes provided are
+%% all in the same cluster, we simply pick the first online node and
+%% we cluster to its cluster.
 join_cluster(DiscoveryNode, WantDiscNode) ->
     case is_disc_and_clustered() andalso [node()] =:= clustered_disc_nodes() of
-        true -> throw({error,
-                       {standalone_ram_node,
-                        "You can't cluster a node if it's the only "
-                        "disc node in its existing cluster. If new nodes "
-                        "joined while this node was offline, use "
-                        "\"update_cluster_nodes\" to add them manually"}});
+        true -> e(clustering_only_disc_node);
         _    -> ok
     end,
 
@@ -184,16 +180,14 @@ join_cluster(DiscoveryNode, WantDiscNode) ->
                            end,
 
     case lists:member(node(), ClusterNodes) of
-        true  -> throw({error, {already_clustered,
-                                "You are already clustered with the nodes you "
-                                "have selected"}});
+        true  -> e(already_clustered);
         false -> ok
     end,
 
-    %% reset the node. this simplifies things and it will be needed in this case
-    %% - we're joining a new cluster with new nodes which are not in synch with
-    %% the current node. I also lifts the burden of reseting the node from the
-    %% user.
+    %% reset the node. this simplifies things and it will be needed in
+    %% this case - we're joining a new cluster with new nodes which
+    %% are not in synch with the current node. I also lifts the burden
+    %% of reseting the node from the user.
     reset(false),
 
     rabbit_misc:local_info_msg("Clustering with ~p~n", [ClusterNodes]),
@@ -224,18 +218,14 @@ reset(Force) ->
         false ->
             AllNodes = all_clustered_nodes(),
             %% Reconnecting so that we will get an up to date nodes.
-            %% We don't need to check for consistency because we are resetting.
-            %% Force=true here so that reset still works when clustered with a
-            %% node which is down.
+            %% We don't need to check for consistency because we are
+            %% resetting.  Force=true here so that reset still works
+            %% when clustered with a node which is down.
             init_db_with_mnesia(AllNodes, is_disc_node(), false, true),
             case is_disc_and_clustered() andalso
                  [node()] =:= clustered_disc_nodes()
             of
-                true  -> throw({error, {standalone_ram_node,
-                                        "You can't reset a node if it's the "
-                                        "only disc node in a cluster. Please "
-                                        "convert another node of the cluster "
-                                        "to a disc node first."}});
+                true  -> e(resetting_only_disc_node);
                 false -> ok
             end,
             leave_cluster(),
@@ -249,40 +239,26 @@ reset(Force) ->
     ok = rabbit_node_monitor:reset_cluster_status(),
     ok.
 
-%% We need to make sure that we don't end up in a distributed Erlang system with
-%% nodes while not being in an Mnesia cluster with them. We don't handle that
-%% well.
+%% We need to make sure that we don't end up in a distributed Erlang
+%% system with nodes while not being in an Mnesia cluster with
+%% them. We don't handle that well.
 disconnect_nodes(Nodes) -> [erlang:disconnect_node(N) || N <- Nodes].
 
 change_cluster_node_type(Type) ->
     ensure_mnesia_dir(),
     ensure_mnesia_not_running(),
     case is_clustered() of
-        false -> throw({error, {not_clustered,
-                                "Non-clustered nodes can only be disc nodes"}});
+        false -> e(not_clustered);
         true  -> ok
     end,
     {_, _, RunningNodes} =
         case discover_cluster(all_clustered_nodes()) of
-            {ok, Status} ->
-                Status;
-            {error, _Reason} ->
-                throw({error,
-                       {cannot_connect_to_cluster,
-                        "Could not connect to the cluster nodes present in "
-                        "this node status file. If the cluster has changed, "
-                        "you can use the \"update_cluster_nodes\" command to "
-                        "point to the new cluster nodes"}})
-    end,
+            {ok, Status}     -> Status;
+            {error, _Reason} -> e(cannot_connect_to_cluster)
+        end,
     Node = case RunningNodes of
-               [] ->
-                   throw({error,
-                          {no_online_cluster_nodes,
-                           "Could not find any online cluster nodes. If the "
-                           "cluster has changed, you can use the 'recluster' "
-                           "command."}});
-               [Node0|_] ->
-                   Node0
+               []        -> e(no_online_cluster_nodes);
+               [Node0|_] -> Node0
            end,
     ok = reset(false),
     ok = join_cluster(Node, case Type of
@@ -296,81 +272,63 @@ update_cluster_nodes(DiscoveryNode) ->
 
     Status = {AllNodes, _, _} =
         case discover_cluster(DiscoveryNode) of
-            {ok, Status0} ->
-                Status0;
-            {error, _Reason} ->
-                throw({error,
-                       {cannot_connect_to_node,
-                        "Could not connect to the cluster node provided"}})
+            {ok, Status0}    -> Status0;
+            {error, _Reason} -> e(cannot_connect_to_node)
         end,
     case ordsets:is_element(node(), AllNodes) of
-        true  -> %% As in `check_consistency/0', we can safely delete the schema
-                 %% here, since it'll be replicated from the other nodes
-                 mnesia:delete_schema([node()]),
-                 rabbit_node_monitor:write_cluster_status(Status),
-                 init_db_with_mnesia(AllNodes, is_disc_node(), false);
-        false -> throw({error,
-                        {inconsistent_cluster,
-                         "The nodes provided do not have this node as part of "
-                         "the cluster"}})
+        true ->
+            %% As in `check_consistency/0', we can safely delete the
+            %% schema here, since it'll be replicated from the other
+            %% nodes
+            mnesia:delete_schema([node()]),
+            rabbit_node_monitor:write_cluster_status(Status),
+            init_db_with_mnesia(AllNodes, is_disc_node(), false);
+        false ->
+            e(inconsistent_cluster)
     end,
-
     ok.
 
-%% We proceed like this: try to remove the node locally. If the node is offline,
-%% we remove the node if:
+%% We proceed like this: try to remove the node locally. If the node
+%% is offline, we remove the node if:
 %%   * This node is a disc node
 %%   * All other nodes are offline
-%%   * This node was, at the best of our knowledge (see comment below) the last
-%%     or second to last after the node we're removing to go down
+%%   * This node was, at the best of our knowledge (see comment below)
+%%     the last or second to last after the node we're removing to go
+%%     down
 forget_cluster_node(Node, RemoveWhenOffline) ->
     case ordsets:is_element(Node, all_clustered_nodes()) of
         true  -> ok;
-        false -> throw({error, {not_a_cluster_node,
-                                "The node selected is not in the cluster."}})
+        false -> e(not_a_cluster_node)
     end,
     case {mnesia:system_info(is_running), RemoveWhenOffline} of
-        {yes, true} -> throw({error, {online_node_offline_flag,
-                                      "You set the --offline flag, which is "
-                                      "used to remove nodes remotely from "
-                                      "offline nodes, but this node is "
-                                      "online. "}});
+        {yes, true} -> e(online_node_offline_flag);
         _           -> ok
     end,
     case remove_node_if_mnesia_running(Node) of
         ok ->
             ok;
+        {error, mnesia_not_running} when RemoveWhenOffline ->
+            remove_node_offline_node(Node);
         {error, mnesia_not_running} ->
-            case RemoveWhenOffline of
-                true  -> remove_node_offline_node(Node);
-                false -> throw({error,
-                                {offline_node_no_offline_flag,
-                                 "You are trying to remove a node from an "
-                                 "offline node. That's dangerous, but can be "
-                                 "done with the --offline flag. Please consult "
-                                 "the manual for rabbitmqctl for more "
-                                 "information."}})
-            end;
+            e(offline_node_no_offline_flag);
         Err = {error, _} ->
             throw(Err)
     end.
 
 remove_node_offline_node(Node) ->
-    case {ordsets:del_element(Node,
-                              running_nodes(all_clustered_nodes())),
-          is_disc_node()}
-    of
+    case {ordsets:del_element(Node, running_nodes(all_clustered_nodes())),
+          is_disc_node()} of
         {[], true} ->
-            %% Note that while we check if the nodes was the last to go down,
-            %% apart from the node we're removing from, this is still unsafe.
-            %% Consider the situation in which A and B are clustered. A goes
-            %% down, and records B as the running node. Then B gets clustered
-            %% with C, C goes down and B goes down. In this case, C is the
-            %% second-to-last, but we don't know that and we'll remove B from A
+            %% Note that while we check if the nodes was the last to
+            %% go down, apart from the node we're removing from, this
+            %% is still unsafe.  Consider the situation in which A and
+            %% B are clustered. A goes down, and records B as the
+            %% running node. Then B gets clustered with C, C goes down
+            %% and B goes down. In this case, C is the second-to-last,
+            %% but we don't know that and we'll remove B from A
             %% anyway, even if that will lead to bad things.
             case ordsets:subtract(running_clustered_nodes(),
-                                  ordsets:from_list([node(), Node]))
-            of
+                                  ordsets:from_list([node(), Node])) of
                 [] -> start_mnesia(),
                       try
                           [mnesia:force_load_table(T) ||
@@ -380,20 +338,10 @@ remove_node_offline_node(Node) ->
                       after
                           stop_mnesia()
                       end;
-                _  -> throw({error,
-                             {not_last_node_to_go_down,
-                              "The node you're trying to remove from was not "
-                              "the last to go down (excluding the node you are "
-                              "removing). Please use the the last node to go "
-                              "down to remove nodes when the cluster is "
-                              "offline."}})
+                _  -> e(not_last_node_to_go_down)
             end;
         {_, _} ->
-            throw({error,
-                   {removing_node_from_offline_node,
-                    "To remove a node remotely from an offline node, the node "
-                    "you're removing from must be a disc node and all the "
-                    "other nodes must be offline."}})
+            e(removing_node_from_offline_node)
     end.
 
 
@@ -420,63 +368,62 @@ is_clustered() ->
     Nodes = all_clustered_nodes(),
     [node()] =/= Nodes andalso [] =/= Nodes.
 
-is_disc_and_clustered() ->
-    is_disc_node() andalso is_clustered().
+is_disc_and_clustered() -> is_disc_node() andalso is_clustered().
 
-%% Functions that retrieve the nodes in the cluster will rely on the status file
-%% if offline.
+%% Functions that retrieve the nodes in the cluster will rely on the
+%% status file if offline.
 
-all_clustered_nodes() ->
-    cluster_status(all).
+all_clustered_nodes() -> cluster_status(all).
 
-clustered_disc_nodes() ->
-    cluster_status(disc).
+clustered_disc_nodes() -> cluster_status(disc).
 
-clustered_ram_nodes() ->
-    ordsets:subtract(cluster_status(all), cluster_status(disc)).
+clustered_ram_nodes() -> ordsets:subtract(cluster_status(all),
+                                          cluster_status(disc)).
 
-running_clustered_nodes() ->
-    cluster_status(running).
+running_clustered_nodes() -> cluster_status(running).
 
 running_clustered_disc_nodes() ->
     {_, DiscNodes, RunningNodes} = cluster_status(),
     ordsets:intersection(DiscNodes, RunningNodes).
 
-%% This function is the actual source of information, since it gets the data
-%% from mnesia. Obviously it'll work only when mnesia is running.
+%% This function is the actual source of information, since it gets
+%% the data from mnesia. Obviously it'll work only when mnesia is
+%% running.
 mnesia_nodes() ->
     case mnesia:system_info(is_running) of
-        no  -> {error, mnesia_not_running};
-        yes -> %% If the tables are not present, it means that `init_db/3'
-               %% hasn't been run yet. In other words, either we are a virgin
-               %% node or a restarted RAM node. In both cases we're not
-               %% interested in what mnesia has to say.
-               IsDiscNode = mnesia:system_info(use_dir),
-               Tables = mnesia:system_info(tables),
-               {Table, _} = case table_definitions(case IsDiscNode of
-                                                       true  -> disc;
-                                                       false -> ram
-                                                   end) of [T|_] -> T end,
-               case lists:member(Table, Tables) of
-                   true ->
-                       AllNodes =
-                           ordsets:from_list(mnesia:system_info(db_nodes)),
-                       DiscCopies = ordsets:from_list(
-                                      mnesia:table_info(schema, disc_copies)),
-                       DiscNodes =
-                           case IsDiscNode of
-                               true  -> ordsets:add_element(node(), DiscCopies);
-                               false -> DiscCopies
-                           end,
-                       {ok, {AllNodes, DiscNodes}};
-                   false ->
-                       {error, tables_not_present}
-               end
+        no ->
+            {error, mnesia_not_running};
+        yes ->
+            %% If the tables are not present, it means that
+            %% `init_db/3' hasn't been run yet. In other words, either
+            %% we are a virgin node or a restarted RAM node. In both
+            %% cases we're not interested in what mnesia has to say.
+            IsDiscNode = mnesia:system_info(use_dir),
+            Tables = mnesia:system_info(tables),
+            {Table, _} = case table_definitions(case IsDiscNode of
+                                                    true  -> disc;
+                                                    false -> ram
+                                                end) of [T|_] -> T end,
+            case lists:member(Table, Tables) of
+                true ->
+                    AllNodes =
+                        ordsets:from_list(mnesia:system_info(db_nodes)),
+                    DiscCopies = ordsets:from_list(
+                                   mnesia:table_info(schema, disc_copies)),
+                    DiscNodes =
+                        case IsDiscNode of
+                            true  -> ordsets:add_element(node(), DiscCopies);
+                            false -> DiscCopies
+                        end,
+                    {ok, {AllNodes, DiscNodes}};
+                false ->
+                    {error, tables_not_present}
+            end
     end.
 
 cluster_status(WhichNodes, ForceMnesia) ->
-    %% I don't want to call `running_nodes/1' unless if necessary, since it can
-    %% deadlock when stopping applications.
+    %% I don't want to call `running_nodes/1' unless if necessary,
+    %% since it can deadlock when stopping applications.
     Nodes = case mnesia_nodes() of
                 {ok, {AllNodes, DiscNodes}} ->
                     {ok, {AllNodes, DiscNodes,
@@ -484,9 +431,10 @@ cluster_status(WhichNodes, ForceMnesia) ->
                 {error, _Reason} when not ForceMnesia ->
                     {AllNodes, DiscNodes, RunningNodes} =
                         rabbit_node_monitor:read_cluster_status(),
-                    %% The cluster status file records the status when the node
-                    %% is online, but we know for sure that the node is offline
-                    %% now, so we can remove it from the list of running nodes.
+                    %% The cluster status file records the status when
+                    %% the node is online, but we know for sure that
+                    %% the node is offline now, so we can remove it
+                    %% from the list of running nodes.
                     {ok,
                      {AllNodes, DiscNodes,
                       fun() -> ordsets:del_element(node(), RunningNodes) end}};
@@ -509,11 +457,9 @@ cluster_status(WhichNodes) ->
     {ok, Status} = cluster_status(WhichNodes, false),
     Status.
 
-cluster_status() ->
-    cluster_status(status).
+cluster_status() -> cluster_status(status).
 
-cluster_status_from_mnesia() ->
-    cluster_status(status, true).
+cluster_status_from_mnesia() -> cluster_status(status, true).
 
 node_info() ->
     {erlang:system_info(otp_release), rabbit_misc:version(),
@@ -525,21 +471,22 @@ is_disc_node() ->
 
 dir() -> mnesia:system_info(directory).
 
-table_names() ->
-    [Tab || {Tab, _} <- table_definitions()].
+table_names() -> [Tab || {Tab, _} <- table_definitions()].
 
 %%----------------------------------------------------------------------------
 %% Operations on the db
 %%----------------------------------------------------------------------------
 
-%% Adds the provided nodes to the mnesia cluster, creating a new schema if there
-%% is the need to and catching up if there are other nodes in the cluster
-%% already. It also updates the cluster status file.
+%% Adds the provided nodes to the mnesia cluster, creating a new
+%% schema if there is the need to and catching up if there are other
+%% nodes in the cluster already. It also updates the cluster status
+%% file.
 init_db(ClusterNodes, WantDiscNode, Force) ->
     Nodes = change_extra_db_nodes(ClusterNodes, Force),
-    %% Note that we use `system_info' here and not the cluster status since when
-    %% we start rabbit for the first time the cluster status will say we are a
-    %% disc node but the tables won't be present yet.
+    %% Note that we use `system_info' here and not the cluster status
+    %% since when we start rabbit for the first time the cluster
+    %% status will say we are a disc node but the tables won't be
+    %% present yet.
     WasDiscNode = mnesia:system_info(use_dir),
     case {Nodes, WasDiscNode, WantDiscNode} of
         {[], _, false} ->
@@ -556,11 +503,11 @@ init_db(ClusterNodes, WantDiscNode, Force) ->
             ensure_version_ok(
               rpc:call(AnotherNode, rabbit_version, recorded, [])),
             ok = wait_for_replicated_tables(),
-            %% The sequence in which we delete the schema and then the other
-            %% tables is important: if we delete the schema first when moving to
-            %% RAM mnesia will loudly complain since it doesn't make much sense
-            %% to do that. But when moving to disc, we need to move the schema
-            %% first.
+            %% The sequence in which we delete the schema and then the
+            %% other tables is important: if we delete the schema
+            %% first when moving to RAM mnesia will loudly complain
+            %% since it doesn't make much sense to do that. But when
+            %% moving to disc, we need to move the schema first.
             case WantDiscNode of
                 true  -> create_local_table_copy(schema, disc_copies),
                          create_local_table_copies(disc);
@@ -579,8 +526,8 @@ init_db_and_upgrade(ClusterNodes, WantDiscNode, Force) ->
              starting_from_scratch -> rabbit_version:record_desired();
              version_not_available -> schema_ok_or_move()
          end,
-    %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget about the
-    %% cluster
+    %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget
+    %% about the cluster
     case WantDiscNode of
         false -> start_mnesia(),
                  change_extra_db_nodes(ClusterNodes, true),
@@ -696,8 +643,8 @@ wait_for_tables(TableNames) ->
             throw({error, {failed_waiting_for_tables, Reason}})
     end.
 
-%% This does not guarantee us much, but it avoids some situations that will
-%% definitely end up badly
+%% This does not guarantee us much, but it avoids some situations that
+%% will definitely end up badly
 check_cluster_consistency() ->
     %% We want to find 0 or 1 consistent nodes.
     case lists:foldl(
@@ -708,18 +655,21 @@ check_cluster_consistency() ->
     of
         {ok, Status = {RemoteAllNodes, _, _}} ->
             case ordsets:is_subset(all_clustered_nodes(), RemoteAllNodes) of
-                true  -> ok;
-                false -> %% We delete the schema here since we think we are
-                         %% clustered with nodes that are no longer in the
-                         %% cluster and there is no other way to remove them
-                         %% from our schema. On the other hand, we are sure
-                         %% that there is another online node that we can use
-                         %% to sync the tables with. There is a race here: if
-                         %% between this check and the `init_db' invocation the
-                         %% cluster gets disbanded, we're left with a node with
-                         %% no mnesia data that will try to connect to offline
-                         %% nodes.
-                         mnesia:delete_schema([node()])
+                true  ->
+                    ok;
+                false ->
+                    %% We delete the schema here since we think we are
+                    %% clustered with nodes that are no longer in the
+                    %% cluster and there is no other way to remove
+                    %% them from our schema. On the other hand, we are
+                    %% sure that there is another online node that we
+                    %% can use to sync the tables with. There is a
+                    %% race here: if between this check and the
+                    %% `init_db' invocation the cluster gets
+                    %% disbanded, we're left with a node with no
+                    %% mnesia data that will try to connect to offline
+                    %% nodes.
+                    mnesia:delete_schema([node()])
             end,
             rabbit_node_monitor:write_cluster_status(Status);
         {error, not_found} ->
@@ -764,9 +714,7 @@ on_node_down(_Node) ->
 discover_cluster(Nodes) when is_list(Nodes) ->
     lists:foldl(fun (_, {ok, Res})     -> {ok, Res};
                     (Node, {error, _}) -> discover_cluster(Node)
-                end,
-                {error, no_nodes_provided},
-                Nodes);
+                end, {error, no_nodes_provided}, Nodes);
 discover_cluster(Node) ->
     OfflineError =
         {error, {cannot_discover_cluster,
@@ -776,7 +724,8 @@ discover_cluster(Node) ->
             {error, {cannot_discover_cluster,
                      "You provided the current node as node to cluster with"}};
         false ->
-            case rpc:call(Node, rabbit_mnesia, cluster_status_from_mnesia, []) of
+            case rpc:call(Node,
+                          rabbit_mnesia, cluster_status_from_mnesia, []) of
                 {badrpc, _Reason}           -> OfflineError;
                 {error, mnesia_not_running} -> OfflineError;
                 {ok, Res}                   -> {ok, Res}
@@ -966,7 +915,8 @@ ensure_version_ok({ok, DiscVersion}) ->
 ensure_version_ok({error, _}) ->
     ok = rabbit_version:record_desired().
 
-%% We only care about disc nodes since ram nodes are supposed to catch up only
+%% We only care about disc nodes since ram nodes are supposed to catch
+%% up only
 create_schema() ->
     stop_mnesia(),
     rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema),
@@ -1039,50 +989,39 @@ create_local_table_copy(Tab, Type) ->
 
 remove_node_if_mnesia_running(Node) ->
     case mnesia:system_info(is_running) of
-        yes -> %% Deleting the the schema copy of the node will result in the
-               %% node being removed from the cluster, with that change being
-               %% propagated to all nodes
-               case mnesia:del_table_copy(schema, Node) of
-                   {atomic, ok} ->
-                       rabbit_node_monitor:notify_left_cluster(Node),
-                       ok;
-                   {aborted, Reason} ->
-                       {error, {failed_to_remove_node, Node, Reason}}
-               end;
-        no  -> {error, mnesia_not_running}
+        yes ->
+            %% Deleting the the schema copy of the node will result in
+            %% the node being removed from the cluster, with that
+            %% change being propagated to all nodes
+            case mnesia:del_table_copy(schema, Node) of
+                {atomic, ok} ->
+                    rabbit_node_monitor:notify_left_cluster(Node),
+                    ok;
+                {aborted, Reason} ->
+                    {error, {failed_to_remove_node, Node, Reason}}
+            end;
+        no  ->
+            {error, mnesia_not_running}
     end.
 
 leave_cluster() ->
     case {is_clustered(),
           running_nodes(ordsets:del_element(node(), all_clustered_nodes()))}
     of
-        {false, []} ->
-            ok;
-        {_, AllNodes} ->
-            case lists:any(
-                   fun (Node) ->
-                           case rpc:call(Node, rabbit_mnesia,
-                                         remove_node_if_mnesia_running,
-                                         [node()])
-                           of
-                               ok ->
-                                   true;
-                               {error, mnesia_not_running} ->
-                                   false;
-                               {error, Reason} ->
-                                   throw({error, Reason});
-                               {badrpc, nodedown} ->
-                                   false
-                           end
-                   end,
-                   AllNodes)
-            of
-                true  -> ok;
-                false -> throw({error,
-                                {no_running_cluster_nodes,
-                                 "You cannot leave a cluster if no online "
-                                 "nodes are present"}})
-            end
+        {false, []}   -> ok;
+        {_, AllNodes} -> case lists:any(fun leave_cluster/1, AllNodes) of
+                             true  -> ok;
+                             false -> e(no_running_cluster_nodes)
+                         end
+    end.
+
+leave_cluster(Node) ->
+    case rpc:call(Node,
+                  rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of
+        ok                          -> true;
+        {error, mnesia_not_running} -> false;
+        {error, Reason}             -> throw({error, Reason});
+        {badrpc, nodedown}          -> false
     end.
 
 wait_for(Condition) ->
@@ -1114,10 +1053,10 @@ change_extra_db_nodes(ClusterNodes0, Force) ->
             Nodes
     end.
 
-%% What we really want is nodes running rabbit, not running mnesia. Using
-%% `rabbit_mnesia:system_info(running_db_nodes)' will return false positives
-%% when we are actually just doing cluster operations (e.g. joining the
-%% cluster).
+%% What we really want is nodes running rabbit, not running
+%% mnesia. Using `mnesia:system_info(running_db_nodes)' will
+%% return false positives when we are actually just doing cluster
+%% operations (e.g. joining the cluster).
 running_nodes(Nodes) ->
     {Replies, _BadNodes} =
         rpc:multicall(Nodes, rabbit_mnesia, is_running_remote, []),
@@ -1162,12 +1101,13 @@ check_otp_consistency(Remote) ->
 check_rabbit_consistency(Remote) ->
     check_version_consistency(rabbit_misc:version(), Remote, "Rabbit").
 
-%% This is fairly tricky.  We want to know if the node is in the state that a
-%% `reset' would leave it in.  We cannot simply check if the mnesia tables
-%% aren't there because restarted RAM nodes won't have tables while still being
-%% non-virgin.  What we do instead is to check if the mnesia directory is non
-%% existant or empty, with the exception of the cluster status file, which will
-%% be there thanks to `rabbit_node_monitor:prepare_cluster_status_file/0'.
+%% This is fairly tricky.  We want to know if the node is in the state
+%% that a `reset' would leave it in.  We cannot simply check if the
+%% mnesia tables aren't there because restarted RAM nodes won't have
+%% tables while still being non-virgin.  What we do instead is to
+%% check if the mnesia directory is non existant or empty, with the
+%% exception of the cluster status file, which will be there thanks to
+%% `rabbit_node_monitor:prepare_cluster_status_file/0'.
 is_virgin_node() ->
     case rabbit_file:list_dir(dir()) of
         {error, enoent} -> true;
@@ -1182,11 +1122,52 @@ find_good_node([]) ->
     none;
 find_good_node([Node | Nodes]) ->
     case rpc:call(Node, rabbit_mnesia, node_info, []) of
-        {badrpc, _Reason} ->
-            find_good_node(Nodes);
-        {OTP, Rabbit, _} ->
-            case check_consistency(OTP, Rabbit) of
-                {error, _} -> find_good_node(Nodes);
-                ok         -> {ok, Node}
-            end
+        {badrpc, _Reason} -> find_good_node(Nodes);
+        {OTP, Rabbit, _}  -> case check_consistency(OTP, Rabbit) of
+                                 {error, _} -> find_good_node(Nodes);
+                                 ok         -> {ok, Node}
+                             end
     end.
+
+e(Tag) -> throw({error, {Tag, error_description(Tag)}}).
+
+error_description(clustering_only_disc_node) ->
+    "You cannot cluster a node if it is the only disc node in its existing "
+        " cluster. If new nodes joined while this node was offline, use "
+        "\"update_cluster_nodes\" to add them manually.";
+error_description(resetting_only_disc_node) ->
+    "You cannot reset a node when it is the only disc node in a cluster. "
+        "Please convert another node of the cluster to a disc node first.";
+error_description(already_clustered) ->
+    "You are already clustered with the nodes you have selected.";
+error_description(not_clustered) ->
+    "Non-clustered nodes can only be disc nodes.";
+error_description(cannot_connect_to_cluster) ->
+    "Could not connect to the cluster nodes present in this node's "
+        "status file. If the cluster has changed, you can use the "
+        "\"update_cluster_nodes\" command to point to the new cluster nodes.";
+error_description(no_online_cluster_nodes) ->
+    "Could not find any online cluster nodes. If the cluster has changed, "
+        "you can use the 'recluster' command.";
+error_description(cannot_connect_to_node) ->
+    "Could not connect to the cluster node provided.";
+error_description(inconsistent_cluster) ->
+    "The nodes provided do not have this node as part of the cluster.";
+error_description(not_a_cluster_node) ->
+    "The node selected is not in the cluster.";
+error_description(online_node_offline_flag) ->
+    "You set the --offline flag, which is used to remove nodes remotely from "
+        "offline nodes, but this node is online.";
+error_description(offline_node_no_offline_flag) ->
+    "You are trying to remove a node from an offline node. That is dangerous, "
+        "but can be done with the --offline flag. Please consult the manual "
+        "for rabbitmqctl for more information.";
+error_description(not_last_node_to_go_down) ->
+    "The node you're trying to remove from was not the last to go down "
+        "(excluding the node you are removing). Please use the the last node "
+        "to go down to remove nodes when the cluster is offline.";
+error_description(removing_node_from_offline_node) ->
+    "To remove a node remotely from an offline node, the node you're removing "
+        "from must be a disc node and all the other nodes must be offline.";
+error_description(no_running_cluster_nodes) ->
+    "You cannot leave a cluster if no online nodes are present.".
author	Simon MacMullen <simon@rabbitmq.com>	2012-09-18 14:09:58 +0100
committer	Simon MacMullen <simon@rabbitmq.com>	2012-09-18 14:09:58 +0100
commit	4ac537045776c03e790946f3ca0e4543fcd63c06 (patch)
tree	4961a4b641e864adca64bfb977369a298209fa85
parent	cc904e726e83529e4a1a58f4072f66d50e9359d2 (diff)
parent	21c979a5c5fe71290bd67cd0834a383cf2dcf22c (diff)
download	rabbitmq-server-4ac537045776c03e790946f3ca0e4543fcd63c06.tar.gz