summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Watson <tim@rabbitmq.com>2012-11-08 14:52:05 +0000
committerTim Watson <tim@rabbitmq.com>2012-11-08 14:52:05 +0000
commit0c9e49560393811e6137ea9e5c454ed10ec5760b (patch)
treebed9e5d125745494ec535402a71021a6fddff451
parent1a4262f574568e17a8702ff6772a7ca8eed8c2d6 (diff)
downloadrabbitmq-server-0c9e49560393811e6137ea9e5c454ed10ec5760b.tar.gz
introduce a noop process at the head of rabbit_sup's children,
which we now monitor instead of the rabbit application's pid
-rw-r--r--src/rabbit.erl22
-rw-r--r--src/rabbit_node_monitor.erl58
2 files changed, 51 insertions, 29 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl
index c52c296a..8c13224f 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -21,7 +21,7 @@
-export([start/0, boot/0, stop/0,
stop_and_halt/0, await_startup/0, status/0, is_running/0,
is_running/1, environment/0, rotate_logs/1, force_event_refresh/0,
- start_fhc/0]).
+ start_fhc/0, start_app_marker/1, hibernate/0]).
-export([start/2, stop/1]).
@@ -174,10 +174,15 @@
[{mfa, {rabbit_networking, boot, []}},
{requires, log_relay}]}).
+-rabbit_boot_step({app_running,
+ [{description, "cluster membership"},
+ {mfa, {rabbit, start_app_marker, [boot]}},
+ {requires, networking}]}).
+
-rabbit_boot_step({notify_cluster,
[{description, "notify cluster nodes"},
{mfa, {rabbit_node_monitor, notify_node_up, []}},
- {requires, networking}]}).
+ {requires, app_running}]}).
%%---------------------------------------------------------------------------
@@ -770,3 +775,16 @@ start_fhc() ->
rabbit_sup:start_restartable_child(
file_handle_cache,
[fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]).
+
+start_app_marker(boot) ->
+ supervisor:start_child(rabbit_sup,
+ {rabbit_app, {?MODULE, start_app_marker, [spawn]},
+ transient, ?MAX_WAIT, worker, [?MODULE]});
+start_app_marker(spawn) ->
+ Pid = spawn_link(fun() -> erlang:hibernate(?MODULE, hibernate, []) end),
+ register(rabbit_running, Pid),
+ {ok, Pid}.
+
+hibernate() ->
+ erlang:hibernate(?MODULE, hibernate, []).
+
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index b11c9d04..ec2f8159 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -85,10 +85,10 @@ cluster_status_filename() ->
prepare_cluster_status_files() ->
rabbit_mnesia:ensure_mnesia_dir(),
- CorruptFiles = fun () -> throw({error, corrupt_cluster_status_files}) end,
+ Corrupt = fun(F) -> throw({error, corrupt_cluster_status_files, F}) end,
RunningNodes1 = case try_read_file(running_nodes_filename()) of
{ok, [Nodes]} when is_list(Nodes) -> Nodes;
- {ok, _ } -> CorruptFiles();
+ {ok, Other} -> Corrupt(Other);
{error, enoent} -> []
end,
ThisNode = [node()],
@@ -102,8 +102,8 @@ prepare_cluster_status_files() ->
{ok, [AllNodes0]} when is_list(AllNodes0) ->
{legacy_cluster_nodes(AllNodes0),
legacy_should_be_disc_node(AllNodes0)};
- {ok, _} ->
- CorruptFiles();
+ {ok, Files} ->
+ Corrupt(Files);
{error, enoent} ->
{legacy_cluster_nodes([]), true}
end,
@@ -114,7 +114,7 @@ prepare_cluster_status_files() ->
end,
ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}).
-write_cluster_status({All, Disc, Running}) ->
+write_cluster_status({All, Disc, Running}=St) ->
ClusterStatusFN = cluster_status_filename(),
Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of
ok ->
@@ -134,8 +134,8 @@ read_cluster_status() ->
try_read_file(running_nodes_filename())} of
{{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) ->
{All, Disc, Running};
- {_, _} ->
- throw({error, corrupt_or_missing_cluster_files})
+ {Stat, Run} ->
+ throw({error, {corrupt_or_missing_cluster_files, Stat, Run}})
end.
update_cluster_status() ->
@@ -199,44 +199,48 @@ handle_call(_Request, _From, State) ->
%% mnesia propagation.
handle_cast({node_up, Node, NodeType},
State = #state{monitors = Monitors}) ->
- case pmon:is_monitored({rabbit, Node}, Monitors) of
+ case pmon:is_monitored({rabbit_running, Node}, Monitors) of
true -> {noreply, State};
false -> rabbit_log:info("rabbit on node ~p up~n", [Node]),
{AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
- write_cluster_status({add_node(Node, AllNodes),
- case NodeType of
- disc -> add_node(Node, DiscNodes);
- ram -> DiscNodes
- end,
- add_node(Node, RunningNodes)}),
+ ok = write_cluster_status({add_node(Node, AllNodes),
+ case NodeType of
+ disc -> add_node(Node, DiscNodes);
+ ram -> DiscNodes
+ end,
+ add_node(Node, RunningNodes)}),
ok = handle_live_rabbit(Node),
- {noreply, State#state{
- monitors = pmon:monitor({rabbit, Node}, Monitors)}}
+ {noreply,
+ State#state{
+ monitors = pmon:monitor({rabbit_running, Node}, Monitors)}}
end;
handle_cast({joined_cluster, Node, NodeType}, State) ->
{AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
- write_cluster_status({add_node(Node, AllNodes),
- case NodeType of
- disc -> add_node(Node, DiscNodes);
- ram -> DiscNodes
- end,
- RunningNodes}),
+ ok = write_cluster_status({add_node(Node, AllNodes),
+ case NodeType of
+ disc -> add_node(Node, DiscNodes);
+ ram -> DiscNodes
+ end,
+ RunningNodes}),
{noreply, State};
handle_cast({left_cluster, Node}, State) ->
{AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
- write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes),
- del_node(Node, RunningNodes)}),
+ ok = write_cluster_status({del_node(Node, AllNodes),
+ del_node(Node, DiscNodes),
+ del_node(Node, RunningNodes)}),
{noreply, State};
handle_cast(_Msg, State) ->
{noreply, State}.
-handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason},
+handle_info({'DOWN', _MRef, process, {rabbit_running, Node}, _Reason},
State = #state{monitors = Monitors}) ->
rabbit_log:info("rabbit on node ~p down~n", [Node]),
{AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
- write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}),
+ ok = write_cluster_status({AllNodes, DiscNodes,
+ del_node(Node, RunningNodes)}),
ok = handle_dead_rabbit(Node),
- {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}};
+ {noreply, State#state{monitors = pmon:erase(
+ {rabbit_running, Node}, Monitors)}};
handle_info({mnesia_system_event,
{inconsistent_database, running_partitioned_network, Node}},