From 79abe0891a9ce0e240c82d49bafe6d224482501a Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 27 Nov 2014 12:59:49 +0000 Subject: Rearrange things a bit. We now have something that seems to work. Merging a modified database back into a running cluster seems to be a bit of a non-starter; Mnesia blows up with "Failed to merge schema: Incompatible schema cookies." So instead we do something similar to if we were doing upgrades: we always create the modified backup, but only load it if we are the first node in the cluster to start; if not we reset our mnesia state, resync with the cluster, then tell the cluster to forget our old incarnation and update its queue records for the new one. This also has the advantage that we don't need two rename commands, we just have the local offline one. Which is hopefully nicer operationally. It does mean that we go to the trouble of taking the backup and modifying it only to throw it away if we turn out to be a secondary node, but it should not be too expensive. --- src/rabbit_control_main.erl | 8 ------ src/rabbit_mnesia_offline.erl | 62 +++++++++++++++++++++++++++++++++++-------- src/rabbit_upgrade.erl | 6 +++-- 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/src/rabbit_control_main.erl b/src/rabbit_control_main.erl index fc049da1..b19971fc 100644 --- a/src/rabbit_control_main.erl +++ b/src/rabbit_control_main.erl @@ -41,7 +41,6 @@ update_cluster_nodes, {forget_cluster_node, [?OFFLINE_DEF]}, rename_current_node, - rename_other_node, force_boot, cluster_status, {sync_queue, [?VHOST_DEF]}, @@ -242,13 +241,6 @@ action(rename_current_node, _Node, [FromNodeS, ToNodeS], _Opts, Inform) -> Inform("Renaming local cluster node ~s to ~s", [FromNode, ToNode]), rabbit_mnesia_offline:rename_local_node(FromNode, ToNode); -action(rename_other_node, Node, [FromNodeS, ToNodeS], _Opts, Inform) -> - FromNode = list_to_atom(FromNodeS), - ToNode = list_to_atom(ToNodeS), - Inform("Renaming remote cluster node ~s to ~s", [FromNode, ToNode]), - rpc_call(Node, rabbit_mnesia_offline, rename_remote_node, - [FromNode, ToNode]); - action(force_boot, Node, [], _Opts, Inform) -> Inform("Forcing boot for Mnesia dir ~s", [mnesia:system_info(directory)]), case rabbit:is_running(Node) of diff --git a/src/rabbit_mnesia_offline.erl b/src/rabbit_mnesia_offline.erl index 3d9d8642..ae72462e 100644 --- a/src/rabbit_mnesia_offline.erl +++ b/src/rabbit_mnesia_offline.erl @@ -15,9 +15,11 @@ %% -module(rabbit_mnesia_offline). +-include("rabbit.hrl"). -export([rename_local_node/2]). -export([rename_remote_node/2]). +-export([maybe_complete_rename/2]). %%---------------------------------------------------------------------------- @@ -43,17 +45,16 @@ rename_local_node(FromNode, ToNode) -> end, rabbit_table:force_load(), rabbit_table:wait_for_replicated(), - FromBackup = rabbit_mnesia:dir() ++ "/rename-backup-from", - ToBackup = rabbit_mnesia:dir() ++ "/rename-backup-to", + FromBackup = from_backup_name(), + ToBackup = to_backup_name(), io:format(" * Backing up to '~s'~n", [FromBackup]), ok = mnesia:backup(FromBackup), stop_mnesia(), rabbit_control_main:become(ToNode), io:format(" * Converting backup '~s'~n", [ToBackup]), convert_backup(FromNode, ToNode, FromBackup, ToBackup), - ok = mnesia:install_fallback(ToBackup, [{scope, local}]), - io:format(" * Loading backup '~s'~n", [ToBackup]), - start_mnesia(), + ok = rabbit_file:write_term_file(rename_config_name(), + [{FromNode, ToNode}]), io:format(" * Converting config files~n", []), convert_config_file(FromNode, ToNode, rabbit_node_monitor:running_nodes_filename()), @@ -64,6 +65,42 @@ rename_local_node(FromNode, ToNode) -> stop_mnesia() end. +maybe_complete_rename(primary, _AllNodes) -> + case rabbit_file:read_term_file(rename_config_name()) of + {ok, [{_FromNode, _ToNode}]} -> + %% We are alone, restore the backup we previously took + ToBackup = to_backup_name(), + io:format(" * Loading backup '~s'~n", [ToBackup]), + ok = mnesia:install_fallback(ToBackup, [{scope, local}]), + start_mnesia(), + stop_mnesia(), + rabbit_file:delete(rename_config_name()), + rabbit_file:delete(from_backup_name()), + rabbit_file:delete(to_backup_name()), + ok; + _ -> + ok + end; + +maybe_complete_rename(secondary, AllNodes) -> + case rabbit_file:read_term_file(rename_config_name()) of + {ok, [{FromNode, ToNode}]} -> + rabbit_upgrade:secondary_upgrade(AllNodes), + [Another | _] = rabbit_mnesia:cluster_nodes(running) -- [node()], + ok = rpc:call(Another, ?MODULE, rename_remote_node, + [FromNode, ToNode]), + rabbit_file:delete(rename_config_name()), + rabbit_file:delete(from_backup_name()), + rabbit_file:delete(to_backup_name()), + ok; + _ -> + ok + end. + +from_backup_name() -> rabbit_mnesia:dir() ++ "/rename-backup-from". +to_backup_name() -> rabbit_mnesia:dir() ++ "/rename-backup-to". +rename_config_name() -> rabbit_mnesia:dir() ++ "/rename-pending.config". + start_mnesia() -> rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia). stop_mnesia() -> stopped = mnesia:stop(). @@ -138,12 +175,15 @@ rename_remote_node(FromNode, ToNode) -> case {lists:member(FromNode, All), lists:member(FromNode, Running), lists:member(ToNode, All)} of - {true, false, false} -> ok; - {false, _, _} -> exit({node_not_in_cluster, FromNode}); - {_, true, _} -> exit({node_running, FromNode}); - {_, _, true} -> exit({node_already_in_cluster, ToNode}) + {true, false, true} -> ok; + {false, _, _} -> exit({old_node_not_in_cluster, FromNode}); + {_, true, _} -> exit({old_node_running, FromNode}); + {_, _, false} -> exit({new_node_not_in_cluster, ToNode}) end, mnesia:del_table_copy(schema, FromNode), - mnesia:change_config(extra_db_nodes, [ToNode]), - mnesia:add_table_copy(schema, ToNode, ram_copies), + {atomic, ok} = mnesia:transform_table( + rabbit_durable_queue, + fun (Q) -> update_term(FromNode, ToNode, Q) end, + record_info(fields, amqqueue)), ok. + diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index 72bf7855..420aa205 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -16,7 +16,7 @@ -module(rabbit_upgrade). --export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0]). +-export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0, secondary_upgrade/1]). -include("rabbit.hrl"). @@ -122,6 +122,8 @@ remove_backup() -> maybe_upgrade_mnesia() -> AllNodes = rabbit_mnesia:cluster_nodes(all), + Mode = upgrade_mode(AllNodes), + ok = rabbit_mnesia_offline:maybe_complete_rename(Mode, AllNodes), case rabbit_version:upgrades_required(mnesia) of {error, starting_from_scratch} -> ok; @@ -138,7 +140,7 @@ maybe_upgrade_mnesia() -> ok; {ok, Upgrades} -> ensure_backup_taken(), - ok = case upgrade_mode(AllNodes) of + ok = case Mode of primary -> primary_upgrade(Upgrades, AllNodes); secondary -> secondary_upgrade(AllNodes) end -- cgit v1.2.1