diff options
author | Simon MacMullen <simon@rabbitmq.com> | 2012-09-18 14:09:58 +0100 |
---|---|---|
committer | Simon MacMullen <simon@rabbitmq.com> | 2012-09-18 14:09:58 +0100 |
commit | 4ac537045776c03e790946f3ca0e4543fcd63c06 (patch) | |
tree | 4961a4b641e864adca64bfb977369a298209fa85 | |
parent | cc904e726e83529e4a1a58f4072f66d50e9359d2 (diff) | |
parent | 21c979a5c5fe71290bd67cd0834a383cf2dcf22c (diff) | |
download | rabbitmq-server-4ac537045776c03e790946f3ca0e4543fcd63c06.tar.gz |
Merge bug25158.
-rw-r--r-- | src/gm.erl | 33 | ||||
-rw-r--r-- | src/rabbit_mirror_queue_master.erl | 5 | ||||
-rw-r--r-- | src/rabbit_mnesia.erl | 495 |
3 files changed, 266 insertions, 267 deletions
@@ -77,9 +77,13 @@ %% confirmed_broadcast/2 directly from the callback module otherwise %% you will deadlock the entire group. %% -%% group_members/1 -%% Provide the Pid. Returns a list of the current group members. +%% info/1 +%% Provide the Pid. Returns a proplist with various facts, including +%% the group name and the current group members. %% +%% forget_group/1 +%% Provide the group name. Removes its mnesia record. Makes no attempt +%% to ensure the group is empty. %% %% Implementation Overview %% ----------------------- @@ -373,7 +377,7 @@ -behaviour(gen_server2). -export([create_tables/0, start_link/3, leave/1, broadcast/2, - confirmed_broadcast/2, group_members/1]). + confirmed_broadcast/2, info/1, forget_group/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, prioritise_info/2]). @@ -431,7 +435,8 @@ -spec(leave/1 :: (pid()) -> 'ok'). -spec(broadcast/2 :: (pid(), any()) -> 'ok'). -spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok'). --spec(group_members/1 :: (pid()) -> [pid()]). +-spec(info/1 :: (pid()) -> rabbit_types:infos()). +-spec(forget_group/1 :: (group_name()) -> 'ok'). %% The joined, members_changed and handle_msg callbacks can all return %% any of the following terms: @@ -514,9 +519,15 @@ broadcast(Server, Msg) -> confirmed_broadcast(Server, Msg) -> gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). -group_members(Server) -> - gen_server2:call(Server, group_members, infinity). +info(Server) -> + gen_server2:call(Server, info, infinity). +forget_group(GroupName) -> + {atomic, ok} = mnesia:sync_transaction( + fun () -> + mnesia:delete({?GROUP_TABLE, GroupName}) + end), + ok. init([GroupName, Module, Args]) -> {MegaSecs, Secs, MicroSecs} = now(), @@ -553,12 +564,16 @@ handle_call({confirmed_broadcast, Msg}, _From, handle_call({confirmed_broadcast, Msg}, From, State) -> internal_broadcast(Msg, From, State); -handle_call(group_members, _From, +handle_call(info, _From, State = #state { members_state = undefined }) -> reply(not_joined, State); -handle_call(group_members, _From, State = #state { view = View }) -> - reply(get_pids(alive_view_members(View)), State); +handle_call(info, _From, State = #state { group_name = GroupName, + module = Module, + view = View }) -> + reply([{group_name, GroupName}, + {module, Module}, + {group_members, get_pids(alive_view_members(View))}], State); handle_call({add_on_right, _NewMember}, _From, State = #state { members_state = undefined }) -> diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index fb9f7e34..c11a8ff7 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -127,10 +127,13 @@ terminate(Reason, delete_and_terminate(Reason, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS }) -> - Slaves = [Pid || Pid <- gm:group_members(GM), node(Pid) =/= node()], + Info = gm:info(GM), + Slaves = [Pid || Pid <- proplists:get_value(group_members, Info), + node(Pid) =/= node()], MRefs = [erlang:monitor(process, S) || S <- Slaves], ok = gm:broadcast(GM, {delete_and_terminate, Reason}), monitor_wait(MRefs), + ok = gm:forget_group(proplists:get_value(group_name, Info)), State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), set_delivered = 0 }. diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 40600063..f19046a0 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -123,37 +123,38 @@ init() -> ensure_mnesia_dir(), case is_virgin_node() of true -> init_from_config(); - false -> normal_init(is_disc_node(), all_clustered_nodes()) + false -> init(is_disc_node(), all_clustered_nodes()) end, %% We intuitively expect the global name server to be synced when - %% Mnesia is up. In fact that's not guaranteed to be the case - let's - %% make it so. + %% Mnesia is up. In fact that's not guaranteed to be the case - + %% let's make it so. ok = global:sync(), ok. -normal_init(DiscNode, AllNodes) -> - init_db_and_upgrade(AllNodes, DiscNode, DiscNode). +init(WantDiscNode, AllNodes) -> + init_db_and_upgrade(AllNodes, WantDiscNode, WantDiscNode). init_from_config() -> - {ok, {TryNodes, DiscNode}} = + {ok, {TryNodes, WantDiscNode}} = application:get_env(rabbit, cluster_nodes), case find_good_node(TryNodes -- [node()]) of {ok, Node} -> rabbit_log:info("Node '~p' selected for clustering from " "configuration~n", [Node]), {ok, {_, DiscNodes, _}} = discover_cluster(Node), - init_db_and_upgrade(DiscNodes, DiscNode, false), + init_db_and_upgrade(DiscNodes, WantDiscNode, false), rabbit_node_monitor:notify_joined_cluster(); none -> rabbit_log:warning("Could not find any suitable node amongst the " "ones provided in the configuration: ~p~n", [TryNodes]), - normal_init(true, [node()]) + init(true, [node()]) end. -%% Make the node join a cluster. The node will be reset automatically before we -%% actually cluster it. The nodes provided will be used to find out about the -%% nodes in the cluster. +%% Make the node join a cluster. The node will be reset automatically +%% before we actually cluster it. The nodes provided will be used to +%% find out about the nodes in the cluster. +%% %% This function will fail if: %% %% * The node is currently the only disc node of its cluster @@ -161,17 +162,12 @@ init_from_config() -> %% * The node is currently already clustered with the cluster of the nodes %% provided %% -%% Note that we make no attempt to verify that the nodes provided are all in the -%% same cluster, we simply pick the first online node and we cluster to its -%% cluster. +%% Note that we make no attempt to verify that the nodes provided are +%% all in the same cluster, we simply pick the first online node and +%% we cluster to its cluster. join_cluster(DiscoveryNode, WantDiscNode) -> case is_disc_and_clustered() andalso [node()] =:= clustered_disc_nodes() of - true -> throw({error, - {standalone_ram_node, - "You can't cluster a node if it's the only " - "disc node in its existing cluster. If new nodes " - "joined while this node was offline, use " - "\"update_cluster_nodes\" to add them manually"}}); + true -> e(clustering_only_disc_node); _ -> ok end, @@ -184,16 +180,14 @@ join_cluster(DiscoveryNode, WantDiscNode) -> end, case lists:member(node(), ClusterNodes) of - true -> throw({error, {already_clustered, - "You are already clustered with the nodes you " - "have selected"}}); + true -> e(already_clustered); false -> ok end, - %% reset the node. this simplifies things and it will be needed in this case - %% - we're joining a new cluster with new nodes which are not in synch with - %% the current node. I also lifts the burden of reseting the node from the - %% user. + %% reset the node. this simplifies things and it will be needed in + %% this case - we're joining a new cluster with new nodes which + %% are not in synch with the current node. I also lifts the burden + %% of reseting the node from the user. reset(false), rabbit_misc:local_info_msg("Clustering with ~p~n", [ClusterNodes]), @@ -224,18 +218,14 @@ reset(Force) -> false -> AllNodes = all_clustered_nodes(), %% Reconnecting so that we will get an up to date nodes. - %% We don't need to check for consistency because we are resetting. - %% Force=true here so that reset still works when clustered with a - %% node which is down. + %% We don't need to check for consistency because we are + %% resetting. Force=true here so that reset still works + %% when clustered with a node which is down. init_db_with_mnesia(AllNodes, is_disc_node(), false, true), case is_disc_and_clustered() andalso [node()] =:= clustered_disc_nodes() of - true -> throw({error, {standalone_ram_node, - "You can't reset a node if it's the " - "only disc node in a cluster. Please " - "convert another node of the cluster " - "to a disc node first."}}); + true -> e(resetting_only_disc_node); false -> ok end, leave_cluster(), @@ -249,40 +239,26 @@ reset(Force) -> ok = rabbit_node_monitor:reset_cluster_status(), ok. -%% We need to make sure that we don't end up in a distributed Erlang system with -%% nodes while not being in an Mnesia cluster with them. We don't handle that -%% well. +%% We need to make sure that we don't end up in a distributed Erlang +%% system with nodes while not being in an Mnesia cluster with +%% them. We don't handle that well. disconnect_nodes(Nodes) -> [erlang:disconnect_node(N) || N <- Nodes]. change_cluster_node_type(Type) -> ensure_mnesia_dir(), ensure_mnesia_not_running(), case is_clustered() of - false -> throw({error, {not_clustered, - "Non-clustered nodes can only be disc nodes"}}); + false -> e(not_clustered); true -> ok end, {_, _, RunningNodes} = case discover_cluster(all_clustered_nodes()) of - {ok, Status} -> - Status; - {error, _Reason} -> - throw({error, - {cannot_connect_to_cluster, - "Could not connect to the cluster nodes present in " - "this node status file. If the cluster has changed, " - "you can use the \"update_cluster_nodes\" command to " - "point to the new cluster nodes"}}) - end, + {ok, Status} -> Status; + {error, _Reason} -> e(cannot_connect_to_cluster) + end, Node = case RunningNodes of - [] -> - throw({error, - {no_online_cluster_nodes, - "Could not find any online cluster nodes. If the " - "cluster has changed, you can use the 'recluster' " - "command."}}); - [Node0|_] -> - Node0 + [] -> e(no_online_cluster_nodes); + [Node0|_] -> Node0 end, ok = reset(false), ok = join_cluster(Node, case Type of @@ -296,81 +272,63 @@ update_cluster_nodes(DiscoveryNode) -> Status = {AllNodes, _, _} = case discover_cluster(DiscoveryNode) of - {ok, Status0} -> - Status0; - {error, _Reason} -> - throw({error, - {cannot_connect_to_node, - "Could not connect to the cluster node provided"}}) + {ok, Status0} -> Status0; + {error, _Reason} -> e(cannot_connect_to_node) end, case ordsets:is_element(node(), AllNodes) of - true -> %% As in `check_consistency/0', we can safely delete the schema - %% here, since it'll be replicated from the other nodes - mnesia:delete_schema([node()]), - rabbit_node_monitor:write_cluster_status(Status), - init_db_with_mnesia(AllNodes, is_disc_node(), false); - false -> throw({error, - {inconsistent_cluster, - "The nodes provided do not have this node as part of " - "the cluster"}}) + true -> + %% As in `check_consistency/0', we can safely delete the + %% schema here, since it'll be replicated from the other + %% nodes + mnesia:delete_schema([node()]), + rabbit_node_monitor:write_cluster_status(Status), + init_db_with_mnesia(AllNodes, is_disc_node(), false); + false -> + e(inconsistent_cluster) end, - ok. -%% We proceed like this: try to remove the node locally. If the node is offline, -%% we remove the node if: +%% We proceed like this: try to remove the node locally. If the node +%% is offline, we remove the node if: %% * This node is a disc node %% * All other nodes are offline -%% * This node was, at the best of our knowledge (see comment below) the last -%% or second to last after the node we're removing to go down +%% * This node was, at the best of our knowledge (see comment below) +%% the last or second to last after the node we're removing to go +%% down forget_cluster_node(Node, RemoveWhenOffline) -> case ordsets:is_element(Node, all_clustered_nodes()) of true -> ok; - false -> throw({error, {not_a_cluster_node, - "The node selected is not in the cluster."}}) + false -> e(not_a_cluster_node) end, case {mnesia:system_info(is_running), RemoveWhenOffline} of - {yes, true} -> throw({error, {online_node_offline_flag, - "You set the --offline flag, which is " - "used to remove nodes remotely from " - "offline nodes, but this node is " - "online. "}}); + {yes, true} -> e(online_node_offline_flag); _ -> ok end, case remove_node_if_mnesia_running(Node) of ok -> ok; + {error, mnesia_not_running} when RemoveWhenOffline -> + remove_node_offline_node(Node); {error, mnesia_not_running} -> - case RemoveWhenOffline of - true -> remove_node_offline_node(Node); - false -> throw({error, - {offline_node_no_offline_flag, - "You are trying to remove a node from an " - "offline node. That's dangerous, but can be " - "done with the --offline flag. Please consult " - "the manual for rabbitmqctl for more " - "information."}}) - end; + e(offline_node_no_offline_flag); Err = {error, _} -> throw(Err) end. remove_node_offline_node(Node) -> - case {ordsets:del_element(Node, - running_nodes(all_clustered_nodes())), - is_disc_node()} - of + case {ordsets:del_element(Node, running_nodes(all_clustered_nodes())), + is_disc_node()} of {[], true} -> - %% Note that while we check if the nodes was the last to go down, - %% apart from the node we're removing from, this is still unsafe. - %% Consider the situation in which A and B are clustered. A goes - %% down, and records B as the running node. Then B gets clustered - %% with C, C goes down and B goes down. In this case, C is the - %% second-to-last, but we don't know that and we'll remove B from A + %% Note that while we check if the nodes was the last to + %% go down, apart from the node we're removing from, this + %% is still unsafe. Consider the situation in which A and + %% B are clustered. A goes down, and records B as the + %% running node. Then B gets clustered with C, C goes down + %% and B goes down. In this case, C is the second-to-last, + %% but we don't know that and we'll remove B from A %% anyway, even if that will lead to bad things. case ordsets:subtract(running_clustered_nodes(), - ordsets:from_list([node(), Node])) - of + ordsets:from_list([node(), Node])) of [] -> start_mnesia(), try [mnesia:force_load_table(T) || @@ -380,20 +338,10 @@ remove_node_offline_node(Node) -> after stop_mnesia() end; - _ -> throw({error, - {not_last_node_to_go_down, - "The node you're trying to remove from was not " - "the last to go down (excluding the node you are " - "removing). Please use the the last node to go " - "down to remove nodes when the cluster is " - "offline."}}) + _ -> e(not_last_node_to_go_down) end; {_, _} -> - throw({error, - {removing_node_from_offline_node, - "To remove a node remotely from an offline node, the node " - "you're removing from must be a disc node and all the " - "other nodes must be offline."}}) + e(removing_node_from_offline_node) end. @@ -420,63 +368,62 @@ is_clustered() -> Nodes = all_clustered_nodes(), [node()] =/= Nodes andalso [] =/= Nodes. -is_disc_and_clustered() -> - is_disc_node() andalso is_clustered(). +is_disc_and_clustered() -> is_disc_node() andalso is_clustered(). -%% Functions that retrieve the nodes in the cluster will rely on the status file -%% if offline. +%% Functions that retrieve the nodes in the cluster will rely on the +%% status file if offline. -all_clustered_nodes() -> - cluster_status(all). +all_clustered_nodes() -> cluster_status(all). -clustered_disc_nodes() -> - cluster_status(disc). +clustered_disc_nodes() -> cluster_status(disc). -clustered_ram_nodes() -> - ordsets:subtract(cluster_status(all), cluster_status(disc)). +clustered_ram_nodes() -> ordsets:subtract(cluster_status(all), + cluster_status(disc)). -running_clustered_nodes() -> - cluster_status(running). +running_clustered_nodes() -> cluster_status(running). running_clustered_disc_nodes() -> {_, DiscNodes, RunningNodes} = cluster_status(), ordsets:intersection(DiscNodes, RunningNodes). -%% This function is the actual source of information, since it gets the data -%% from mnesia. Obviously it'll work only when mnesia is running. +%% This function is the actual source of information, since it gets +%% the data from mnesia. Obviously it'll work only when mnesia is +%% running. mnesia_nodes() -> case mnesia:system_info(is_running) of - no -> {error, mnesia_not_running}; - yes -> %% If the tables are not present, it means that `init_db/3' - %% hasn't been run yet. In other words, either we are a virgin - %% node or a restarted RAM node. In both cases we're not - %% interested in what mnesia has to say. - IsDiscNode = mnesia:system_info(use_dir), - Tables = mnesia:system_info(tables), - {Table, _} = case table_definitions(case IsDiscNode of - true -> disc; - false -> ram - end) of [T|_] -> T end, - case lists:member(Table, Tables) of - true -> - AllNodes = - ordsets:from_list(mnesia:system_info(db_nodes)), - DiscCopies = ordsets:from_list( - mnesia:table_info(schema, disc_copies)), - DiscNodes = - case IsDiscNode of - true -> ordsets:add_element(node(), DiscCopies); - false -> DiscCopies - end, - {ok, {AllNodes, DiscNodes}}; - false -> - {error, tables_not_present} - end + no -> + {error, mnesia_not_running}; + yes -> + %% If the tables are not present, it means that + %% `init_db/3' hasn't been run yet. In other words, either + %% we are a virgin node or a restarted RAM node. In both + %% cases we're not interested in what mnesia has to say. + IsDiscNode = mnesia:system_info(use_dir), + Tables = mnesia:system_info(tables), + {Table, _} = case table_definitions(case IsDiscNode of + true -> disc; + false -> ram + end) of [T|_] -> T end, + case lists:member(Table, Tables) of + true -> + AllNodes = + ordsets:from_list(mnesia:system_info(db_nodes)), + DiscCopies = ordsets:from_list( + mnesia:table_info(schema, disc_copies)), + DiscNodes = + case IsDiscNode of + true -> ordsets:add_element(node(), DiscCopies); + false -> DiscCopies + end, + {ok, {AllNodes, DiscNodes}}; + false -> + {error, tables_not_present} + end end. cluster_status(WhichNodes, ForceMnesia) -> - %% I don't want to call `running_nodes/1' unless if necessary, since it can - %% deadlock when stopping applications. + %% I don't want to call `running_nodes/1' unless if necessary, + %% since it can deadlock when stopping applications. Nodes = case mnesia_nodes() of {ok, {AllNodes, DiscNodes}} -> {ok, {AllNodes, DiscNodes, @@ -484,9 +431,10 @@ cluster_status(WhichNodes, ForceMnesia) -> {error, _Reason} when not ForceMnesia -> {AllNodes, DiscNodes, RunningNodes} = rabbit_node_monitor:read_cluster_status(), - %% The cluster status file records the status when the node - %% is online, but we know for sure that the node is offline - %% now, so we can remove it from the list of running nodes. + %% The cluster status file records the status when + %% the node is online, but we know for sure that + %% the node is offline now, so we can remove it + %% from the list of running nodes. {ok, {AllNodes, DiscNodes, fun() -> ordsets:del_element(node(), RunningNodes) end}}; @@ -509,11 +457,9 @@ cluster_status(WhichNodes) -> {ok, Status} = cluster_status(WhichNodes, false), Status. -cluster_status() -> - cluster_status(status). +cluster_status() -> cluster_status(status). -cluster_status_from_mnesia() -> - cluster_status(status, true). +cluster_status_from_mnesia() -> cluster_status(status, true). node_info() -> {erlang:system_info(otp_release), rabbit_misc:version(), @@ -525,21 +471,22 @@ is_disc_node() -> dir() -> mnesia:system_info(directory). -table_names() -> - [Tab || {Tab, _} <- table_definitions()]. +table_names() -> [Tab || {Tab, _} <- table_definitions()]. %%---------------------------------------------------------------------------- %% Operations on the db %%---------------------------------------------------------------------------- -%% Adds the provided nodes to the mnesia cluster, creating a new schema if there -%% is the need to and catching up if there are other nodes in the cluster -%% already. It also updates the cluster status file. +%% Adds the provided nodes to the mnesia cluster, creating a new +%% schema if there is the need to and catching up if there are other +%% nodes in the cluster already. It also updates the cluster status +%% file. init_db(ClusterNodes, WantDiscNode, Force) -> Nodes = change_extra_db_nodes(ClusterNodes, Force), - %% Note that we use `system_info' here and not the cluster status since when - %% we start rabbit for the first time the cluster status will say we are a - %% disc node but the tables won't be present yet. + %% Note that we use `system_info' here and not the cluster status + %% since when we start rabbit for the first time the cluster + %% status will say we are a disc node but the tables won't be + %% present yet. WasDiscNode = mnesia:system_info(use_dir), case {Nodes, WasDiscNode, WantDiscNode} of {[], _, false} -> @@ -556,11 +503,11 @@ init_db(ClusterNodes, WantDiscNode, Force) -> ensure_version_ok( rpc:call(AnotherNode, rabbit_version, recorded, [])), ok = wait_for_replicated_tables(), - %% The sequence in which we delete the schema and then the other - %% tables is important: if we delete the schema first when moving to - %% RAM mnesia will loudly complain since it doesn't make much sense - %% to do that. But when moving to disc, we need to move the schema - %% first. + %% The sequence in which we delete the schema and then the + %% other tables is important: if we delete the schema + %% first when moving to RAM mnesia will loudly complain + %% since it doesn't make much sense to do that. But when + %% moving to disc, we need to move the schema first. case WantDiscNode of true -> create_local_table_copy(schema, disc_copies), create_local_table_copies(disc); @@ -579,8 +526,8 @@ init_db_and_upgrade(ClusterNodes, WantDiscNode, Force) -> starting_from_scratch -> rabbit_version:record_desired(); version_not_available -> schema_ok_or_move() end, - %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget about the - %% cluster + %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget + %% about the cluster case WantDiscNode of false -> start_mnesia(), change_extra_db_nodes(ClusterNodes, true), @@ -696,8 +643,8 @@ wait_for_tables(TableNames) -> throw({error, {failed_waiting_for_tables, Reason}}) end. -%% This does not guarantee us much, but it avoids some situations that will -%% definitely end up badly +%% This does not guarantee us much, but it avoids some situations that +%% will definitely end up badly check_cluster_consistency() -> %% We want to find 0 or 1 consistent nodes. case lists:foldl( @@ -708,18 +655,21 @@ check_cluster_consistency() -> of {ok, Status = {RemoteAllNodes, _, _}} -> case ordsets:is_subset(all_clustered_nodes(), RemoteAllNodes) of - true -> ok; - false -> %% We delete the schema here since we think we are - %% clustered with nodes that are no longer in the - %% cluster and there is no other way to remove them - %% from our schema. On the other hand, we are sure - %% that there is another online node that we can use - %% to sync the tables with. There is a race here: if - %% between this check and the `init_db' invocation the - %% cluster gets disbanded, we're left with a node with - %% no mnesia data that will try to connect to offline - %% nodes. - mnesia:delete_schema([node()]) + true -> + ok; + false -> + %% We delete the schema here since we think we are + %% clustered with nodes that are no longer in the + %% cluster and there is no other way to remove + %% them from our schema. On the other hand, we are + %% sure that there is another online node that we + %% can use to sync the tables with. There is a + %% race here: if between this check and the + %% `init_db' invocation the cluster gets + %% disbanded, we're left with a node with no + %% mnesia data that will try to connect to offline + %% nodes. + mnesia:delete_schema([node()]) end, rabbit_node_monitor:write_cluster_status(Status); {error, not_found} -> @@ -764,9 +714,7 @@ on_node_down(_Node) -> discover_cluster(Nodes) when is_list(Nodes) -> lists:foldl(fun (_, {ok, Res}) -> {ok, Res}; (Node, {error, _}) -> discover_cluster(Node) - end, - {error, no_nodes_provided}, - Nodes); + end, {error, no_nodes_provided}, Nodes); discover_cluster(Node) -> OfflineError = {error, {cannot_discover_cluster, @@ -776,7 +724,8 @@ discover_cluster(Node) -> {error, {cannot_discover_cluster, "You provided the current node as node to cluster with"}}; false -> - case rpc:call(Node, rabbit_mnesia, cluster_status_from_mnesia, []) of + case rpc:call(Node, + rabbit_mnesia, cluster_status_from_mnesia, []) of {badrpc, _Reason} -> OfflineError; {error, mnesia_not_running} -> OfflineError; {ok, Res} -> {ok, Res} @@ -966,7 +915,8 @@ ensure_version_ok({ok, DiscVersion}) -> ensure_version_ok({error, _}) -> ok = rabbit_version:record_desired(). -%% We only care about disc nodes since ram nodes are supposed to catch up only +%% We only care about disc nodes since ram nodes are supposed to catch +%% up only create_schema() -> stop_mnesia(), rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema), @@ -1039,50 +989,39 @@ create_local_table_copy(Tab, Type) -> remove_node_if_mnesia_running(Node) -> case mnesia:system_info(is_running) of - yes -> %% Deleting the the schema copy of the node will result in the - %% node being removed from the cluster, with that change being - %% propagated to all nodes - case mnesia:del_table_copy(schema, Node) of - {atomic, ok} -> - rabbit_node_monitor:notify_left_cluster(Node), - ok; - {aborted, Reason} -> - {error, {failed_to_remove_node, Node, Reason}} - end; - no -> {error, mnesia_not_running} + yes -> + %% Deleting the the schema copy of the node will result in + %% the node being removed from the cluster, with that + %% change being propagated to all nodes + case mnesia:del_table_copy(schema, Node) of + {atomic, ok} -> + rabbit_node_monitor:notify_left_cluster(Node), + ok; + {aborted, Reason} -> + {error, {failed_to_remove_node, Node, Reason}} + end; + no -> + {error, mnesia_not_running} end. leave_cluster() -> case {is_clustered(), running_nodes(ordsets:del_element(node(), all_clustered_nodes()))} of - {false, []} -> - ok; - {_, AllNodes} -> - case lists:any( - fun (Node) -> - case rpc:call(Node, rabbit_mnesia, - remove_node_if_mnesia_running, - [node()]) - of - ok -> - true; - {error, mnesia_not_running} -> - false; - {error, Reason} -> - throw({error, Reason}); - {badrpc, nodedown} -> - false - end - end, - AllNodes) - of - true -> ok; - false -> throw({error, - {no_running_cluster_nodes, - "You cannot leave a cluster if no online " - "nodes are present"}}) - end + {false, []} -> ok; + {_, AllNodes} -> case lists:any(fun leave_cluster/1, AllNodes) of + true -> ok; + false -> e(no_running_cluster_nodes) + end + end. + +leave_cluster(Node) -> + case rpc:call(Node, + rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of + ok -> true; + {error, mnesia_not_running} -> false; + {error, Reason} -> throw({error, Reason}); + {badrpc, nodedown} -> false end. wait_for(Condition) -> @@ -1114,10 +1053,10 @@ change_extra_db_nodes(ClusterNodes0, Force) -> Nodes end. -%% What we really want is nodes running rabbit, not running mnesia. Using -%% `rabbit_mnesia:system_info(running_db_nodes)' will return false positives -%% when we are actually just doing cluster operations (e.g. joining the -%% cluster). +%% What we really want is nodes running rabbit, not running +%% mnesia. Using `mnesia:system_info(running_db_nodes)' will +%% return false positives when we are actually just doing cluster +%% operations (e.g. joining the cluster). running_nodes(Nodes) -> {Replies, _BadNodes} = rpc:multicall(Nodes, rabbit_mnesia, is_running_remote, []), @@ -1162,12 +1101,13 @@ check_otp_consistency(Remote) -> check_rabbit_consistency(Remote) -> check_version_consistency(rabbit_misc:version(), Remote, "Rabbit"). -%% This is fairly tricky. We want to know if the node is in the state that a -%% `reset' would leave it in. We cannot simply check if the mnesia tables -%% aren't there because restarted RAM nodes won't have tables while still being -%% non-virgin. What we do instead is to check if the mnesia directory is non -%% existant or empty, with the exception of the cluster status file, which will -%% be there thanks to `rabbit_node_monitor:prepare_cluster_status_file/0'. +%% This is fairly tricky. We want to know if the node is in the state +%% that a `reset' would leave it in. We cannot simply check if the +%% mnesia tables aren't there because restarted RAM nodes won't have +%% tables while still being non-virgin. What we do instead is to +%% check if the mnesia directory is non existant or empty, with the +%% exception of the cluster status file, which will be there thanks to +%% `rabbit_node_monitor:prepare_cluster_status_file/0'. is_virgin_node() -> case rabbit_file:list_dir(dir()) of {error, enoent} -> true; @@ -1182,11 +1122,52 @@ find_good_node([]) -> none; find_good_node([Node | Nodes]) -> case rpc:call(Node, rabbit_mnesia, node_info, []) of - {badrpc, _Reason} -> - find_good_node(Nodes); - {OTP, Rabbit, _} -> - case check_consistency(OTP, Rabbit) of - {error, _} -> find_good_node(Nodes); - ok -> {ok, Node} - end + {badrpc, _Reason} -> find_good_node(Nodes); + {OTP, Rabbit, _} -> case check_consistency(OTP, Rabbit) of + {error, _} -> find_good_node(Nodes); + ok -> {ok, Node} + end end. + +e(Tag) -> throw({error, {Tag, error_description(Tag)}}). + +error_description(clustering_only_disc_node) -> + "You cannot cluster a node if it is the only disc node in its existing " + " cluster. If new nodes joined while this node was offline, use " + "\"update_cluster_nodes\" to add them manually."; +error_description(resetting_only_disc_node) -> + "You cannot reset a node when it is the only disc node in a cluster. " + "Please convert another node of the cluster to a disc node first."; +error_description(already_clustered) -> + "You are already clustered with the nodes you have selected."; +error_description(not_clustered) -> + "Non-clustered nodes can only be disc nodes."; +error_description(cannot_connect_to_cluster) -> + "Could not connect to the cluster nodes present in this node's " + "status file. If the cluster has changed, you can use the " + "\"update_cluster_nodes\" command to point to the new cluster nodes."; +error_description(no_online_cluster_nodes) -> + "Could not find any online cluster nodes. If the cluster has changed, " + "you can use the 'recluster' command."; +error_description(cannot_connect_to_node) -> + "Could not connect to the cluster node provided."; +error_description(inconsistent_cluster) -> + "The nodes provided do not have this node as part of the cluster."; +error_description(not_a_cluster_node) -> + "The node selected is not in the cluster."; +error_description(online_node_offline_flag) -> + "You set the --offline flag, which is used to remove nodes remotely from " + "offline nodes, but this node is online."; +error_description(offline_node_no_offline_flag) -> + "You are trying to remove a node from an offline node. That is dangerous, " + "but can be done with the --offline flag. Please consult the manual " + "for rabbitmqctl for more information."; +error_description(not_last_node_to_go_down) -> + "The node you're trying to remove from was not the last to go down " + "(excluding the node you are removing). Please use the the last node " + "to go down to remove nodes when the cluster is offline."; +error_description(removing_node_from_offline_node) -> + "To remove a node remotely from an offline node, the node you're removing " + "from must be a disc node and all the other nodes must be offline."; +error_description(no_running_cluster_nodes) -> + "You cannot leave a cluster if no online nodes are present.". |