diff options
author | Matthias Radestock <matthias@rabbitmq.com> | 2010-07-29 22:20:15 +0100 |
---|---|---|
committer | Matthias Radestock <matthias@rabbitmq.com> | 2010-07-29 22:20:15 +0100 |
commit | be4a2b09d6b9867dda53461fe65f88eb50f33f26 (patch) | |
tree | 8ace77e812fb5d323487154e34d5e2972a2f7a80 | |
parent | 9ea08a4b9360e7a2160d46bbae2ef40d11b92d5d (diff) | |
parent | b599590d8da76e1e0004bf5ac75493a61c5dc98c (diff) | |
download | rabbitmq-server-be4a2b09d6b9867dda53461fe65f88eb50f33f26.tar.gz |
merge bug21396 into default
-rw-r--r-- | src/rabbit_tests.erl | 4 | ||||
-rw-r--r-- | src/supervisor2.erl | 109 | ||||
-rw-r--r-- | src/test_sup.erl | 94 |
3 files changed, 189 insertions, 18 deletions
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 0b92682a..090c714b 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -60,6 +60,7 @@ all_tests() -> passed = test_bpqueue(), passed = test_pg_local(), passed = test_unfold(), + passed = test_supervisor_delayed_restart(), passed = test_parsing(), passed = test_content_framing(), passed = test_topic_matching(), @@ -1340,6 +1341,9 @@ bad_handle_hook(_, _, _) -> extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) -> handle_hook(Hookname, Handler, {Args, Extra1, Extra2}). +test_supervisor_delayed_restart() -> + test_sup:test_supervisor_delayed_restart(). + test_backing_queue() -> case application:get_env(rabbit, backing_queue_module) of {ok, rabbit_variable_queue} -> diff --git a/src/supervisor2.erl b/src/supervisor2.erl index 03dc0f99..fb4c9b02 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -4,11 +4,34 @@ %% 1) the module name is supervisor2 %% %% 2) there is a new strategy called -%% simple_one_for_one_terminate. This is exactly the same as for -%% simple_one_for_one, except that children *are* explicitly -%% terminated as per the shutdown component of the child_spec. +%% simple_one_for_one_terminate. This is exactly the same as for +%% simple_one_for_one, except that children *are* explicitly +%% terminated as per the shutdown component of the child_spec. %% -%% All modifications are (C) 2010 LShift Ltd. +%% 3) child specifications can contain, as the restart type, a tuple +%% {permanent, Delay} | {transient, Delay} where Delay >= 0. The +%% delay, in seconds, indicates what should happen if a child, upon +%% being restarted, exceeds the MaxT and MaxR parameters. Thus, if +%% a child exits, it is restarted as normal. If it exits +%% sufficiently quickly and often to exceed the boundaries set by +%% the MaxT and MaxR parameters, and a Delay is specified, then +%% rather than stopping the supervisor, the supervisor instead +%% continues and tries to start up the child again, Delay seconds +%% later. +%% +%% Note that you can never restart more frequently than the MaxT +%% and MaxR parameters allow: i.e. you must wait until *both* the +%% Delay has passed *and* the MaxT and MaxR parameters allow the +%% child to be restarted. +%% +%% Also note that the Delay is a *minimum*. There is no guarantee +%% that the child will be restarted within that time, especially if +%% other processes are dying and being restarted at the same time - +%% essentially we have to wait for the delay to have passed and for +%% the MaxT and MaxR parameters to permit the child to be +%% restarted. This may require waiting for longer than Delay. +%% +%% All modifications are (C) 2010 Rabbit Technologies Ltd. %% %% %CopyrightBegin% %% @@ -43,6 +66,7 @@ %% Internal exports -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). -export([handle_cast/2]). +-export([delayed_restart/2]). -define(DICT, dict). @@ -119,6 +143,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. +delayed_restart(Supervisor, RestartDetails) -> + gen_server:cast(Supervisor, {delayed_restart, RestartDetails}). + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -315,6 +342,20 @@ handle_call(which_children, _From, State) -> {reply, Resp, State}. +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when ?is_simple(State) -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when not (?is_simple(State)) -> + case get_child(Child#child.name, State) of + {value, Child} -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; + _ -> + {noreply, State} + end; + %%% Hopefully cause a function-clause as there is no API function %%% that utilizes cast. handle_cast(null, State) -> @@ -480,6 +521,16 @@ restart_child(Pid, Reason, State) -> {ok, State} end. +do_restart({RestartType, Delay}, Reason, Child, State) -> + case restart1(Child, State) of + {ok, NState} -> + {ok, NState}; + {terminate, NState} -> + {ok, _TRef} = timer:apply_after( + trunc(Delay*1000), ?MODULE, delayed_restart, + [self(), {{RestartType, Delay}, Reason, Child}]), + {ok, NState} + end; do_restart(permanent, Reason, Child, State) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); @@ -500,14 +551,27 @@ do_restart(temporary, Reason, Child, State) -> restart(Child, State) -> case add_restart(State) of {ok, NState} -> - restart(NState#state.strategy, Child, NState); + restart(NState#state.strategy, Child, NState, fun restart/2); {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), {shutdown, remove_child(Child, NState)} end. -restart(Strategy, Child, State) +restart1(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState, fun restart1/2); + {terminate, _NState} -> + %% we've reached the max restart intensity, but the + %% add_restart will have added to the restarts + %% field. Given we don't want to die here, we need to go + %% back to the old restarts field otherwise we'll never + %% attempt to restart later. + {terminate, State} + end. + +restart(Strategy, Child, State, Restart) when Strategy =:= simple_one_for_one orelse Strategy =:= simple_one_for_one_terminate -> #child{mfa = {M, F, A}} = Child, @@ -521,9 +585,9 @@ restart(Strategy, Child, State) {ok, NState}; {error, Error} -> report_error(start_error, Error, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(one_for_one, Child, State) -> +restart(one_for_one, Child, State, Restart) -> case do_start_child(State#state.name, Child) of {ok, Pid} -> NState = replace_child(Child#child{pid = Pid}, State), @@ -533,25 +597,25 @@ restart(one_for_one, Child, State) -> {ok, NState}; {error, Reason} -> report_error(start_error, Reason, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(rest_for_one, Child, State) -> +restart(rest_for_one, Child, State, Restart) -> {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), ChAfter2 = terminate_children(ChAfter, State#state.name), case start_children(ChAfter2, State#state.name) of {ok, ChAfter3} -> {ok, State#state{children = ChAfter3 ++ ChBefore}}; {error, ChAfter3} -> - restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + Restart(Child, State#state{children = ChAfter3 ++ ChBefore}) end; -restart(one_for_all, Child, State) -> +restart(one_for_all, Child, State, Restart) -> Children1 = del_child(Child#child.pid, State#state.children), Children2 = terminate_children(Children1, State#state.name), case start_children(Children2, State#state.name) of {ok, NChs} -> {ok, State#state{children = NChs}}; {error, NChs} -> - restart(Child, State#state{children = NChs}) + Restart(Child, State#state{children = NChs}) end. %%----------------------------------------------------------------- @@ -769,7 +833,9 @@ supname(N,_) -> N. %%% {Name, Func, RestartType, Shutdown, ChildType, Modules} %%% where Name is an atom %%% Func is {Mod, Fun, Args} == {atom, atom, list} -%%% RestartType is permanent | temporary | transient +%%% RestartType is permanent | temporary | transient | +%%% {permanent, Delay} | +%%% {transient, Delay} where Delay >= 0 %%% Shutdown = integer() | infinity | brutal_kill %%% ChildType = supervisor | worker %%% Modules = [atom()] | dynamic @@ -815,10 +881,17 @@ validFunc({M, F, A}) when is_atom(M), is_list(A) -> true; validFunc(Func) -> throw({invalid_mfa, Func}). -validRestartType(permanent) -> true; -validRestartType(temporary) -> true; -validRestartType(transient) -> true; -validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}). +validRestartType(permanent) -> true; +validRestartType(temporary) -> true; +validRestartType(transient) -> true; +validRestartType({permanent, Delay}) -> validDelay(Delay); +validRestartType({transient, Delay}) -> validDelay(Delay); +validRestartType(RestartType) -> throw({invalid_restart_type, + RestartType}). + +validDelay(Delay) when is_number(Delay), + Delay >= 0 -> true; +validDelay(What) -> throw({invalid_delay, What}). validShutdown(Shutdown, _) when is_integer(Shutdown), Shutdown > 0 -> true; diff --git a/src/test_sup.erl b/src/test_sup.erl new file mode 100644 index 00000000..f41793bc --- /dev/null +++ b/src/test_sup.erl @@ -0,0 +1,94 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(test_sup). + +-behaviour(supervisor2). + +-export([test_supervisor_delayed_restart/0, + init/1, start_child/0]). + +test_supervisor_delayed_restart() -> + passed = with_sup(simple_one_for_one_terminate, + fun (SupPid) -> + {ok, _ChildPid} = + supervisor2:start_child(SupPid, []), + test_supervisor_delayed_restart(SupPid) + end), + passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1). + +test_supervisor_delayed_restart(SupPid) -> + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + timeout = ping_child(SupPid), + timer:sleep(1010), + ok = ping_child(SupPid), + passed. + +with_sup(RestartStrategy, Fun) -> + {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]), + Res = Fun(SupPid), + exit(SupPid, shutdown), + rabbit_misc:unlink_and_capture_exit(SupPid), + Res. + +init([RestartStrategy]) -> + {ok, {{RestartStrategy, 1, 1}, + [{test, {test_sup, start_child, []}, {permanent, 1}, + 16#ffffffff, worker, [test_sup]}]}}. + +start_child() -> + {ok, proc_lib:spawn_link(fun run_child/0)}. + +ping_child(SupPid) -> + Ref = make_ref(), + get_child_pid(SupPid) ! {ping, Ref, self()}, + receive {pong, Ref} -> ok + after 1000 -> timeout + end. + +exit_child(SupPid) -> + true = exit(get_child_pid(SupPid), abnormal), + ok. + +get_child_pid(SupPid) -> + [{_Id, ChildPid, worker, [test_sup]}] = + supervisor2:which_children(SupPid), + ChildPid. + +run_child() -> + receive {ping, Ref, Pid} -> Pid ! {pong, Ref}, + run_child() + end. |