summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Radestock <matthias@rabbitmq.com>2010-07-29 22:20:15 +0100
committerMatthias Radestock <matthias@rabbitmq.com>2010-07-29 22:20:15 +0100
commitbe4a2b09d6b9867dda53461fe65f88eb50f33f26 (patch)
tree8ace77e812fb5d323487154e34d5e2972a2f7a80
parent9ea08a4b9360e7a2160d46bbae2ef40d11b92d5d (diff)
parentb599590d8da76e1e0004bf5ac75493a61c5dc98c (diff)
downloadrabbitmq-server-be4a2b09d6b9867dda53461fe65f88eb50f33f26.tar.gz
merge bug21396 into default
-rw-r--r--src/rabbit_tests.erl4
-rw-r--r--src/supervisor2.erl109
-rw-r--r--src/test_sup.erl94
3 files changed, 189 insertions, 18 deletions
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 0b92682a..090c714b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -60,6 +60,7 @@ all_tests() ->
passed = test_bpqueue(),
passed = test_pg_local(),
passed = test_unfold(),
+ passed = test_supervisor_delayed_restart(),
passed = test_parsing(),
passed = test_content_framing(),
passed = test_topic_matching(),
@@ -1340,6 +1341,9 @@ bad_handle_hook(_, _, _) ->
extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
+test_supervisor_delayed_restart() ->
+ test_sup:test_supervisor_delayed_restart().
+
test_backing_queue() ->
case application:get_env(rabbit, backing_queue_module) of
{ok, rabbit_variable_queue} ->
diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 03dc0f99..fb4c9b02 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -4,11 +4,34 @@
%% 1) the module name is supervisor2
%%
%% 2) there is a new strategy called
-%% simple_one_for_one_terminate. This is exactly the same as for
-%% simple_one_for_one, except that children *are* explicitly
-%% terminated as per the shutdown component of the child_spec.
+%% simple_one_for_one_terminate. This is exactly the same as for
+%% simple_one_for_one, except that children *are* explicitly
+%% terminated as per the shutdown component of the child_spec.
%%
-%% All modifications are (C) 2010 LShift Ltd.
+%% 3) child specifications can contain, as the restart type, a tuple
+%% {permanent, Delay} | {transient, Delay} where Delay >= 0. The
+%% delay, in seconds, indicates what should happen if a child, upon
+%% being restarted, exceeds the MaxT and MaxR parameters. Thus, if
+%% a child exits, it is restarted as normal. If it exits
+%% sufficiently quickly and often to exceed the boundaries set by
+%% the MaxT and MaxR parameters, and a Delay is specified, then
+%% rather than stopping the supervisor, the supervisor instead
+%% continues and tries to start up the child again, Delay seconds
+%% later.
+%%
+%% Note that you can never restart more frequently than the MaxT
+%% and MaxR parameters allow: i.e. you must wait until *both* the
+%% Delay has passed *and* the MaxT and MaxR parameters allow the
+%% child to be restarted.
+%%
+%% Also note that the Delay is a *minimum*. There is no guarantee
+%% that the child will be restarted within that time, especially if
+%% other processes are dying and being restarted at the same time -
+%% essentially we have to wait for the delay to have passed and for
+%% the MaxT and MaxR parameters to permit the child to be
+%% restarted. This may require waiting for longer than Delay.
+%%
+%% All modifications are (C) 2010 Rabbit Technologies Ltd.
%%
%% %CopyrightBegin%
%%
@@ -43,6 +66,7 @@
%% Internal exports
-export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
-export([handle_cast/2]).
+-export([delayed_restart/2]).
-define(DICT, dict).
@@ -119,6 +143,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
end;
check_childspecs(X) -> {error, {badarg, X}}.
+delayed_restart(Supervisor, RestartDetails) ->
+ gen_server:cast(Supervisor, {delayed_restart, RestartDetails}).
+
%%% ---------------------------------------------------
%%%
%%% Initialize the supervisor.
@@ -315,6 +342,20 @@ handle_call(which_children, _From, State) ->
{reply, Resp, State}.
+handle_cast({delayed_restart, {RestartType, Reason, Child}}, State)
+ when ?is_simple(State) ->
+ {ok, NState} = do_restart(RestartType, Reason, Child, State),
+ {noreply, NState};
+handle_cast({delayed_restart, {RestartType, Reason, Child}}, State)
+ when not (?is_simple(State)) ->
+ case get_child(Child#child.name, State) of
+ {value, Child} ->
+ {ok, NState} = do_restart(RestartType, Reason, Child, State),
+ {noreply, NState};
+ _ ->
+ {noreply, State}
+ end;
+
%%% Hopefully cause a function-clause as there is no API function
%%% that utilizes cast.
handle_cast(null, State) ->
@@ -480,6 +521,16 @@ restart_child(Pid, Reason, State) ->
{ok, State}
end.
+do_restart({RestartType, Delay}, Reason, Child, State) ->
+ case restart1(Child, State) of
+ {ok, NState} ->
+ {ok, NState};
+ {terminate, NState} ->
+ {ok, _TRef} = timer:apply_after(
+ trunc(Delay*1000), ?MODULE, delayed_restart,
+ [self(), {{RestartType, Delay}, Reason, Child}]),
+ {ok, NState}
+ end;
do_restart(permanent, Reason, Child, State) ->
report_error(child_terminated, Reason, Child, State#state.name),
restart(Child, State);
@@ -500,14 +551,27 @@ do_restart(temporary, Reason, Child, State) ->
restart(Child, State) ->
case add_restart(State) of
{ok, NState} ->
- restart(NState#state.strategy, Child, NState);
+ restart(NState#state.strategy, Child, NState, fun restart/2);
{terminate, NState} ->
report_error(shutdown, reached_max_restart_intensity,
Child, State#state.name),
{shutdown, remove_child(Child, NState)}
end.
-restart(Strategy, Child, State)
+restart1(Child, State) ->
+ case add_restart(State) of
+ {ok, NState} ->
+ restart(NState#state.strategy, Child, NState, fun restart1/2);
+ {terminate, _NState} ->
+ %% we've reached the max restart intensity, but the
+ %% add_restart will have added to the restarts
+ %% field. Given we don't want to die here, we need to go
+ %% back to the old restarts field otherwise we'll never
+ %% attempt to restart later.
+ {terminate, State}
+ end.
+
+restart(Strategy, Child, State, Restart)
when Strategy =:= simple_one_for_one orelse
Strategy =:= simple_one_for_one_terminate ->
#child{mfa = {M, F, A}} = Child,
@@ -521,9 +585,9 @@ restart(Strategy, Child, State)
{ok, NState};
{error, Error} ->
report_error(start_error, Error, Child, State#state.name),
- restart(Child, State)
+ Restart(Child, State)
end;
-restart(one_for_one, Child, State) ->
+restart(one_for_one, Child, State, Restart) ->
case do_start_child(State#state.name, Child) of
{ok, Pid} ->
NState = replace_child(Child#child{pid = Pid}, State),
@@ -533,25 +597,25 @@ restart(one_for_one, Child, State) ->
{ok, NState};
{error, Reason} ->
report_error(start_error, Reason, Child, State#state.name),
- restart(Child, State)
+ Restart(Child, State)
end;
-restart(rest_for_one, Child, State) ->
+restart(rest_for_one, Child, State, Restart) ->
{ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
ChAfter2 = terminate_children(ChAfter, State#state.name),
case start_children(ChAfter2, State#state.name) of
{ok, ChAfter3} ->
{ok, State#state{children = ChAfter3 ++ ChBefore}};
{error, ChAfter3} ->
- restart(Child, State#state{children = ChAfter3 ++ ChBefore})
+ Restart(Child, State#state{children = ChAfter3 ++ ChBefore})
end;
-restart(one_for_all, Child, State) ->
+restart(one_for_all, Child, State, Restart) ->
Children1 = del_child(Child#child.pid, State#state.children),
Children2 = terminate_children(Children1, State#state.name),
case start_children(Children2, State#state.name) of
{ok, NChs} ->
{ok, State#state{children = NChs}};
{error, NChs} ->
- restart(Child, State#state{children = NChs})
+ Restart(Child, State#state{children = NChs})
end.
%%-----------------------------------------------------------------
@@ -769,7 +833,9 @@ supname(N,_) -> N.
%%% {Name, Func, RestartType, Shutdown, ChildType, Modules}
%%% where Name is an atom
%%% Func is {Mod, Fun, Args} == {atom, atom, list}
-%%% RestartType is permanent | temporary | transient
+%%% RestartType is permanent | temporary | transient |
+%%% {permanent, Delay} |
+%%% {transient, Delay} where Delay >= 0
%%% Shutdown = integer() | infinity | brutal_kill
%%% ChildType = supervisor | worker
%%% Modules = [atom()] | dynamic
@@ -815,10 +881,17 @@ validFunc({M, F, A}) when is_atom(M),
is_list(A) -> true;
validFunc(Func) -> throw({invalid_mfa, Func}).
-validRestartType(permanent) -> true;
-validRestartType(temporary) -> true;
-validRestartType(transient) -> true;
-validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
+validRestartType(permanent) -> true;
+validRestartType(temporary) -> true;
+validRestartType(transient) -> true;
+validRestartType({permanent, Delay}) -> validDelay(Delay);
+validRestartType({transient, Delay}) -> validDelay(Delay);
+validRestartType(RestartType) -> throw({invalid_restart_type,
+ RestartType}).
+
+validDelay(Delay) when is_number(Delay),
+ Delay >= 0 -> true;
+validDelay(What) -> throw({invalid_delay, What}).
validShutdown(Shutdown, _)
when is_integer(Shutdown), Shutdown > 0 -> true;
diff --git a/src/test_sup.erl b/src/test_sup.erl
new file mode 100644
index 00000000..f41793bc
--- /dev/null
+++ b/src/test_sup.erl
@@ -0,0 +1,94 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developers of the Original Code are LShift Ltd,
+%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%% Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%% Ltd. Portions created by Cohesive Financial Technologies LLC are
+%% Copyright (C) 2007-2010 Cohesive Financial Technologies
+%% LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%% (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%% All Rights Reserved.
+%%
+%% Contributor(s): ______________________________________.
+%%
+
+-module(test_sup).
+
+-behaviour(supervisor2).
+
+-export([test_supervisor_delayed_restart/0,
+ init/1, start_child/0]).
+
+test_supervisor_delayed_restart() ->
+ passed = with_sup(simple_one_for_one_terminate,
+ fun (SupPid) ->
+ {ok, _ChildPid} =
+ supervisor2:start_child(SupPid, []),
+ test_supervisor_delayed_restart(SupPid)
+ end),
+ passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1).
+
+test_supervisor_delayed_restart(SupPid) ->
+ ok = ping_child(SupPid),
+ ok = exit_child(SupPid),
+ timer:sleep(10),
+ ok = ping_child(SupPid),
+ ok = exit_child(SupPid),
+ timer:sleep(10),
+ timeout = ping_child(SupPid),
+ timer:sleep(1010),
+ ok = ping_child(SupPid),
+ passed.
+
+with_sup(RestartStrategy, Fun) ->
+ {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]),
+ Res = Fun(SupPid),
+ exit(SupPid, shutdown),
+ rabbit_misc:unlink_and_capture_exit(SupPid),
+ Res.
+
+init([RestartStrategy]) ->
+ {ok, {{RestartStrategy, 1, 1},
+ [{test, {test_sup, start_child, []}, {permanent, 1},
+ 16#ffffffff, worker, [test_sup]}]}}.
+
+start_child() ->
+ {ok, proc_lib:spawn_link(fun run_child/0)}.
+
+ping_child(SupPid) ->
+ Ref = make_ref(),
+ get_child_pid(SupPid) ! {ping, Ref, self()},
+ receive {pong, Ref} -> ok
+ after 1000 -> timeout
+ end.
+
+exit_child(SupPid) ->
+ true = exit(get_child_pid(SupPid), abnormal),
+ ok.
+
+get_child_pid(SupPid) ->
+ [{_Id, ChildPid, worker, [test_sup]}] =
+ supervisor2:which_children(SupPid),
+ ChildPid.
+
+run_child() ->
+ receive {ping, Ref, Pid} -> Pid ! {pong, Ref},
+ run_child()
+ end.