diff options
-rw-r--r-- | docs/rabbitmqctl.1.xml | 30 | ||||
-rw-r--r-- | src/rabbit_control.erl | 6 | ||||
-rw-r--r-- | src/rabbit_mnesia.erl | 98 | ||||
-rw-r--r-- | src/rabbit_tests.erl | 36 |
4 files changed, 117 insertions, 53 deletions
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index a2038cf0..26863ae7 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -270,8 +270,8 @@ <title>Cluster management</title> <variablelist> - <varlistentry> - <term><cmdsynopsis><command>cluster</command> <arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> + <varlistentry id="cluster"> + <term><cmdsynopsis><command>cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> <listitem> <variablelist> <varlistentry> @@ -281,7 +281,8 @@ </variablelist> <para> Instruct the node to become member of a cluster with the - specified nodes. + specified nodes. To cluster with currently offline nodes, + use <link linkend="force_cluster"><command>force_cluster</command></link>. </para> <para> Cluster nodes can be of two types: disk or ram. Disk nodes @@ -334,6 +335,29 @@ </para> </listitem> </varlistentry> + <varlistentry id="force_cluster"> + <term><cmdsynopsis><command>force_cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> + <listitem> + <variablelist> + <varlistentry> + <term>clusternode</term> + <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem> + </varlistentry> + </variablelist> + <para> + Instruct the node to become member of a cluster with the + specified nodes. This will succeed even if the specified nodes + are offline. For a more detailed description, see + <link linkend="cluster"><command>cluster</command>.</link> + </para> + <para> + Note that this variant of the cluster command just + ignores the current status of the specified nodes. + Clustering may still fail for a variety of other + reasons. + </para> + </listitem> + </varlistentry> </variablelist> </refsect2> diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index 95a49f86..6e6ad06c 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -160,6 +160,12 @@ action(cluster, Node, ClusterNodeSs, Inform) -> [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); +action(force_cluster, Node, ClusterNodeSs, Inform) -> + ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), + Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", + [Node, ClusterNodes]), + rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); + action(status, Node, [], Inform) -> Inform("Status of node ~p", [Node]), case call(Node, {rabbit, status, []}) of diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 5c14ba7b..e2b6927f 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -33,8 +33,8 @@ -module(rabbit_mnesia). -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, - cluster/1, reset/0, force_reset/0, is_clustered/0, - empty_ram_only_tables/0]). + cluster/1, force_cluster/1, reset/0, force_reset/0, + is_clustered/0, empty_ram_only_tables/0]). -export([table_names/0]). @@ -58,6 +58,8 @@ -spec(init/0 :: () -> 'ok'). -spec(is_db_empty/0 :: () -> boolean()). -spec(cluster/1 :: ([node()]) -> 'ok'). +-spec(force_cluster/1 :: ([node()]) -> 'ok'). +-spec(cluster/2 :: ([node()], boolean()) -> 'ok'). -spec(reset/0 :: () -> 'ok'). -spec(force_reset/0 :: () -> 'ok'). -spec(is_clustered/0 :: () -> boolean()). @@ -88,7 +90,7 @@ status() -> init() -> ok = ensure_mnesia_running(), ok = ensure_mnesia_dir(), - ok = init_db(read_cluster_nodes_config()), + ok = init_db(read_cluster_nodes_config(), true), ok = wait_for_tables(), ok. @@ -96,16 +98,22 @@ is_db_empty() -> lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, table_names()). +cluster(ClusterNodes) -> + cluster(ClusterNodes, false). +force_cluster(ClusterNodes) -> + cluster(ClusterNodes, true). + %% Alter which disk nodes this node is clustered with. This can be a %% subset of all the disk nodes in the cluster but can (and should) %% include the node itself if it is to be a disk rather than a ram -%% node. -cluster(ClusterNodes) -> +%% node. If Force is false, only connections to online nodes are +%% allowed. +cluster(ClusterNodes, Force) -> ok = ensure_mnesia_not_running(), ok = ensure_mnesia_dir(), rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), try - ok = init_db(ClusterNodes), + ok = init_db(ClusterNodes, Force), ok = wait_for_tables(), ok = create_cluster_nodes_config(ClusterNodes) after @@ -277,38 +285,56 @@ delete_cluster_nodes_config() -> %% Take a cluster node config and create the right kind of node - a %% standalone disk node, or disk or ram node connected to the -%% specified cluster nodes. -init_db(ClusterNodes) -> - case mnesia:change_config(extra_db_nodes, ClusterNodes -- [node()]) of - {ok, []} -> - case mnesia:system_info(use_dir) of - true -> - case check_schema_integrity() of - ok -> - ok; - {error, Reason} -> - %% NB: we cannot use rabbit_log here since - %% it may not have been started yet - error_logger:warning_msg( - "schema integrity check failed: ~p~n" - "moving database to backup location " - "and recreating schema from scratch~n", - [Reason]), - ok = move_db(), +%% specified cluster nodes. If Force is false, don't allow +%% connections to offline nodes. +init_db(ClusterNodes, Force) -> + UClusterNodes = lists:usort(ClusterNodes), + ProperClusterNodes = UClusterNodes -- [node()], + case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of + {ok, Nodes} -> + case Force of + false -> + FailedClusterNodes = ProperClusterNodes -- Nodes, + case FailedClusterNodes of + [] -> ok; + _ -> + throw({error, {failed_to_cluster_with, + FailedClusterNodes, + "Mnesia could not connect to some nodes."}}) + end; + _ -> ok + end, + case Nodes of + [] -> + case mnesia:system_info(use_dir) of + true -> + case check_schema_integrity() of + ok -> + ok; + {error, Reason} -> + %% NB: we cannot use rabbit_log here since + %% it may not have been started yet + error_logger:warning_msg( + "schema integrity check failed: ~p~n" + "moving database to backup location " + "and recreating schema from scratch~n", + [Reason]), + ok = move_db(), + ok = create_schema() + end; + false -> ok = create_schema() end; - false -> - ok = create_schema() - end; - {ok, [_|_]} -> - IsDiskNode = ClusterNodes == [] orelse - lists:member(node(), ClusterNodes), - ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, disc_copies), - ok = create_local_table_copies(case IsDiskNode of - true -> disc; - false -> ram - end); + [_|_] -> + IsDiskNode = ClusterNodes == [] orelse + lists:member(node(), ClusterNodes), + ok = wait_for_replicated_tables(), + ok = create_local_table_copy(schema, disc_copies), + ok = create_local_table_copies(case IsDiskNode of + true -> disc; + false -> ram + end) + end; {error, Reason} -> %% one reason we may end up here is if we try to join %% nodes together that are currently running standalone or diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 7fbbc1ea..ff7c07e3 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -597,19 +597,19 @@ test_cluster_management() -> ok = control_action(reset, []), lists:foreach(fun (Arg) -> - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok end, ClusteringSequence), lists:foreach(fun (Arg) -> ok = control_action(reset, []), - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok end, ClusteringSequence), ok = control_action(reset, []), lists:foreach(fun (Arg) -> - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok = control_action(start_app, []), ok = control_action(stop_app, []), ok @@ -617,7 +617,7 @@ test_cluster_management() -> ClusteringSequence), lists:foreach(fun (Arg) -> ok = control_action(reset, []), - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok = control_action(start_app, []), ok = control_action(stop_app, []), ok @@ -628,13 +628,13 @@ test_cluster_management() -> ok = control_action(reset, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), %% join a non-existing cluster as a ram node ok = control_action(reset, []), - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), SecondaryNode = rabbit_misc:makenode("hare"), case net_adm:ping(SecondaryNode) of @@ -659,18 +659,26 @@ test_cluster_management2(SecondaryNode) -> %% join cluster as a ram node ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS, "invalid1@invalid"]), + ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), %% change cluster config while remaining in same cluster - ok = control_action(cluster, ["invalid2@invalid", SecondaryNodeS]), + ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), %% join non-existing cluster as a ram node - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + + %% join empty cluster as a ram node + ok = control_action(cluster, []), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + %% turn ram node into disk node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS, NodeS]), @@ -678,8 +686,8 @@ test_cluster_management2(SecondaryNode) -> ok = control_action(stop_app, []), %% convert a disk node into a ram node - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), %% turn a disk node into a ram node ok = control_action(reset, []), |