summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandru Scvortov <alexandru@rabbitmq.com>2011-07-20 16:53:17 +0100
committerAlexandru Scvortov <alexandru@rabbitmq.com>2011-07-20 16:53:17 +0100
commit287f643f8c443f4d019f5a14a09d1dbd94baa4e7 (patch)
tree304f1ef0fef74a0f423a6e642d8337eb570b4c52
parentc2bc79c3aa64213305e79d61895127d3d0fff507 (diff)
downloadrabbitmq-server-287f643f8c443f4d019f5a14a09d1dbd94baa4e7.tar.gz
always reset and preemptively leave clusters
Always backup and reset during clustering if the node type has changed. Worst case, it ensures that we actually have an empty mnesia dir with new ram nodes. Also, preemptively leave a cluster before joining it. Suppose we had a two-node cluster, the first node goes down, the second hard resets, the first node comes back up, the second node tries to rejoin the cluster with a different type. Since it hard-reset, it doesn't know that it used to be part of the cluster, and the other node is unaware that our node is supposed to have left the cluster. So, when clustering, we always try to leave a cluster before joining it. In leave_cluster/2, I added {aborted, {node_not_running, _}} to the "not error" returns, because it looks similar to {badrpc, nodedown}, which was already there. This may be wrong.
-rw-r--r--src/rabbit_mnesia.erl35
1 files changed, 23 insertions, 12 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 6b901eae..56bd2bfa 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -118,18 +118,27 @@ cluster(ClusterNodes, Force) ->
ensure_mnesia_not_running(),
ensure_mnesia_dir(),
- %% Reset the node if we're in a cluster and have just changed node type
- rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
- AllClusteredNodes =
- lists:usort(all_clustered_nodes() ++
- read_cluster_nodes_config()) -- [node()],
- mnesia:stop(),
- case {AllClusteredNodes =/= [],
- is_disc_node() =/= should_be_disc_node(ClusterNodes)} of
- {true, true} -> error_logger:warning_msg("changing node type; "
- "resetting...~n"),
- reset();
- {_, _} -> ok
+ %% Reset the node if we've just changed node type
+ case {is_disc_node(), should_be_disc_node(ClusterNodes)} of
+ {true, false} -> error_logger:warning_msg(
+ "changing node type; backing up db and "
+ "resetting...~n"),
+ ok = move_db(),
+ mnesia:stop(),
+ reset();
+ _ -> ok
+ end,
+
+ %% Pre-emtively leave the cluster (in case we had been part of it
+ %% and force_reseted)
+ ProperClusterNodes = ClusterNodes -- [node()],
+ try leave_cluster(ProperClusterNodes, ProperClusterNodes) of
+ ok -> ok
+ catch
+ throw:({error, {no_running_cluster_nodes, _, _}} = E) ->
+ if Force -> ok;
+ true -> throw(E)
+ end
end,
%% Join the cluster
@@ -496,6 +505,7 @@ init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) ->
false -> {ram, ram_copies}
end,
ok = wait_for_replicated_tables(),
+
ok = create_local_table_copy(schema, CopyTypeAlt),
ok = create_local_table_copies(CopyType),
ok = SecondaryPostMnesiaFun(),
@@ -722,6 +732,7 @@ leave_cluster(Nodes, RunningNodes) ->
[schema, node()]) of
{atomic, ok} -> true;
{badrpc, nodedown} -> false;
+ {aborted, {node_not_running, _}} -> false;
{aborted, Reason} ->
throw({error, {failed_to_leave_cluster,
Nodes, RunningNodes, Reason}})