diff options
author | Alexandru Scvortov <alexandru@rabbitmq.com> | 2011-07-20 16:53:17 +0100 |
---|---|---|
committer | Alexandru Scvortov <alexandru@rabbitmq.com> | 2011-07-20 16:53:17 +0100 |
commit | 287f643f8c443f4d019f5a14a09d1dbd94baa4e7 (patch) | |
tree | 304f1ef0fef74a0f423a6e642d8337eb570b4c52 | |
parent | c2bc79c3aa64213305e79d61895127d3d0fff507 (diff) | |
download | rabbitmq-server-287f643f8c443f4d019f5a14a09d1dbd94baa4e7.tar.gz |
always reset and preemptively leave clusters
Always backup and reset during clustering if the node type has
changed. Worst case, it ensures that we actually have an empty mnesia
dir with new ram nodes.
Also, preemptively leave a cluster before joining it. Suppose we had
a two-node cluster, the first node goes down, the second hard resets,
the first node comes back up, the second node tries to rejoin the
cluster with a different type. Since it hard-reset, it doesn't know
that it used to be part of the cluster, and the other node is unaware
that our node is supposed to have left the cluster. So, when
clustering, we always try to leave a cluster before joining it.
In leave_cluster/2, I added {aborted, {node_not_running, _}} to the
"not error" returns, because it looks similar to {badrpc, nodedown},
which was already there. This may be wrong.
-rw-r--r-- | src/rabbit_mnesia.erl | 35 |
1 files changed, 23 insertions, 12 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 6b901eae..56bd2bfa 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -118,18 +118,27 @@ cluster(ClusterNodes, Force) -> ensure_mnesia_not_running(), ensure_mnesia_dir(), - %% Reset the node if we're in a cluster and have just changed node type - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), - AllClusteredNodes = - lists:usort(all_clustered_nodes() ++ - read_cluster_nodes_config()) -- [node()], - mnesia:stop(), - case {AllClusteredNodes =/= [], - is_disc_node() =/= should_be_disc_node(ClusterNodes)} of - {true, true} -> error_logger:warning_msg("changing node type; " - "resetting...~n"), - reset(); - {_, _} -> ok + %% Reset the node if we've just changed node type + case {is_disc_node(), should_be_disc_node(ClusterNodes)} of + {true, false} -> error_logger:warning_msg( + "changing node type; backing up db and " + "resetting...~n"), + ok = move_db(), + mnesia:stop(), + reset(); + _ -> ok + end, + + %% Pre-emtively leave the cluster (in case we had been part of it + %% and force_reseted) + ProperClusterNodes = ClusterNodes -- [node()], + try leave_cluster(ProperClusterNodes, ProperClusterNodes) of + ok -> ok + catch + throw:({error, {no_running_cluster_nodes, _, _}} = E) -> + if Force -> ok; + true -> throw(E) + end end, %% Join the cluster @@ -496,6 +505,7 @@ init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) -> false -> {ram, ram_copies} end, ok = wait_for_replicated_tables(), + ok = create_local_table_copy(schema, CopyTypeAlt), ok = create_local_table_copies(CopyType), ok = SecondaryPostMnesiaFun(), @@ -722,6 +732,7 @@ leave_cluster(Nodes, RunningNodes) -> [schema, node()]) of {atomic, ok} -> true; {badrpc, nodedown} -> false; + {aborted, {node_not_running, _}} -> false; {aborted, Reason} -> throw({error, {failed_to_leave_cluster, Nodes, RunningNodes, Reason}}) |