summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Watson <tim@rabbitmq.com>2012-08-14 05:15:56 -0400
committerTim Watson <tim@rabbitmq.com>2012-08-14 05:15:56 -0400
commitb68b1f91598084862b2e0b109c43522e4dca6b85 (patch)
treeb7201957cde448aa2909c4b25b6538638324debb
parent3ae88950fdadad18d604e3d22d43846f194cce94 (diff)
downloadrabbitmq-server-b68b1f91598084862b2e0b109c43522e4dca6b85.tar.gz
Backport 5e80230e2225 (Merge of bug25094; unclustering ram nodes broken)
-rw-r--r--src/rabbit_mnesia.erl26
-rw-r--r--src/rabbit_tests.erl58
2 files changed, 51 insertions, 33 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index c714d3a7..73ea02c9 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -726,40 +726,42 @@ reset(Force) ->
end]),
ensure_mnesia_not_running(),
case not Force andalso is_clustered() andalso
- is_only_disc_node(node(), false)
+ is_only_disc_node(node(), false)
of
true -> log_both("no other disc nodes running");
false -> ok
end,
- Node = node(),
- Nodes = all_clustered_nodes() -- [Node],
case Force of
- true -> ok;
+ true ->
+ disconnect_nodes(nodes());
false ->
ensure_mnesia_dir(),
start_mnesia(),
- RunningNodes =
+ {Nodes, RunningNodes} =
try
%% Force=true here so that reset still works when clustered
%% with a node which is down
ok = init_db(read_cluster_nodes_config(), true),
- running_clustered_nodes() -- [Node]
+ {all_clustered_nodes() -- [node()],
+ running_clustered_nodes() -- [node()]}
after
stop_mnesia()
end,
leave_cluster(Nodes, RunningNodes),
- rabbit_misc:ensure_ok(mnesia:delete_schema([Node]),
- cannot_delete_schema)
+ rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+ cannot_delete_schema),
+ disconnect_nodes(Nodes)
end,
- %% We need to make sure that we don't end up in a distributed
- %% Erlang system with nodes while not being in an Mnesia cluster
- %% with them. We don't handle that well.
- [erlang:disconnect_node(N) || N <- Nodes],
ok = delete_cluster_nodes_config(),
%% remove persisted messages and any other garbage we find
ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")),
ok.
+%% We need to make sure that we don't end up in a distributed Erlang
+%% system with nodes while not being in an Mnesia cluster with
+%% them. We don't handle that well.
+disconnect_nodes(Nodes) -> [erlang:disconnect_node(N) || N <- Nodes].
+
leave_cluster([], _) -> ok;
leave_cluster(Nodes, RunningNodes) ->
%% find at least one running cluster node and instruct it to
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 04ee6ef2..91465cae 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -72,12 +72,10 @@ maybe_run_cluster_dependent_tests() ->
run_cluster_dependent_tests(SecondaryNode) ->
SecondaryNodeS = atom_to_list(SecondaryNode),
- cover:stop(SecondaryNode),
ok = control_action(stop_app, []),
- ok = control_action(reset, []),
+ ok = safe_reset(),
ok = control_action(cluster, [SecondaryNodeS]),
ok = control_action(start_app, []),
- cover:start(SecondaryNode),
ok = control_action(start_app, SecondaryNode, [], []),
io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]),
@@ -908,7 +906,7 @@ test_cluster_management2(SecondaryNode) ->
ok = assert_ram_node(),
%% join cluster as a ram node
- ok = control_action(reset, []),
+ ok = safe_reset(),
ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]),
ok = control_action(start_app, []),
ok = control_action(stop_app, []),
@@ -965,29 +963,30 @@ test_cluster_management2(SecondaryNode) ->
ok = assert_disc_node(),
%% turn a disk node into a ram node
- ok = control_action(reset, []),
+ %%
+ %% can't use safe_reset here since for some reason nodes()==[] and
+ %% yet w/o stopping coverage things break
+ with_suspended_cover(
+ [SecondaryNode], fun () -> ok = control_action(reset, []) end),
ok = control_action(cluster, [SecondaryNodeS]),
ok = control_action(start_app, []),
ok = control_action(stop_app, []),
ok = assert_ram_node(),
%% NB: this will log an inconsistent_database error, which is harmless
- %% Turning cover on / off is OK even if we're not in general using cover,
- %% it just turns the engine on / off, doesn't actually log anything.
- cover:stop([SecondaryNode]),
- true = disconnect_node(SecondaryNode),
- pong = net_adm:ping(SecondaryNode),
- cover:start([SecondaryNode]),
+ with_suspended_cover(
+ [SecondaryNode], fun () ->
+ true = disconnect_node(SecondaryNode),
+ pong = net_adm:ping(SecondaryNode)
+ end),
%% leaving a cluster as a ram node
- ok = control_action(reset, []),
+ ok = safe_reset(),
%% ...and as a disk node
ok = control_action(cluster, [SecondaryNodeS, NodeS]),
ok = control_action(start_app, []),
ok = control_action(stop_app, []),
- cover:stop(SecondaryNode),
- ok = control_action(reset, []),
- cover:start(SecondaryNode),
+ ok = safe_reset(),
%% attempt to leave cluster when no other node is alive
ok = control_action(cluster, [SecondaryNodeS, NodeS]),
@@ -1002,22 +1001,39 @@ test_cluster_management2(SecondaryNode) ->
control_action(cluster, [SecondaryNodeS]),
%% leave system clustered, with the secondary node as a ram node
- ok = control_action(force_reset, []),
+ with_suspended_cover(
+ [SecondaryNode], fun () -> ok = control_action(force_reset, []) end),
ok = control_action(start_app, []),
%% Yes, this is rather ugly. But since we're a clustered Mnesia
%% node and we're telling another clustered node to reset itself,
%% we will get disconnected half way through causing a
%% badrpc. This never happens in real life since rabbitmqctl is
- %% not a clustered Mnesia node.
- cover:stop(SecondaryNode),
- {badrpc, nodedown} = control_action(force_reset, SecondaryNode, [], []),
- pong = net_adm:ping(SecondaryNode),
- cover:start(SecondaryNode),
+ %% not a clustered Mnesia node and is a hidden node.
+ with_suspended_cover(
+ [SecondaryNode],
+ fun () ->
+ {badrpc, nodedown} =
+ control_action(force_reset, SecondaryNode, [], []),
+ pong = net_adm:ping(SecondaryNode)
+ end),
ok = control_action(cluster, SecondaryNode, [NodeS], []),
ok = control_action(start_app, SecondaryNode, [], []),
passed.
+%% 'cover' does not cope at all well with nodes disconnecting, which
+%% happens as part of reset. So we turn it off temporarily. That is ok
+%% even if we're not in general using cover, it just turns the engine
+%% on / off and doesn't log anything.
+safe_reset() -> with_suspended_cover(
+ nodes(), fun () -> control_action(reset, []) end).
+
+with_suspended_cover(Nodes, Fun) ->
+ cover:stop(Nodes),
+ Res = Fun(),
+ cover:start(Nodes),
+ Res.
+
test_user_management() ->
%% lots if stuff that should fail