diff options
author | Erlang/OTP <otp@erlang.org> | 2022-03-28 12:39:17 +0200 |
---|---|---|
committer | Erlang/OTP <otp@erlang.org> | 2022-03-28 12:39:17 +0200 |
commit | 510f4736527d166e7e87cf42defbcaba23ccf977 (patch) | |
tree | a6c03fe665d4d40eabf00ef03f7eb6463fd2d91c | |
parent | a41a194004efd69f43355b65456a0e2756fc2ffd (diff) | |
parent | 3c1f611a843b7d971ac41151886576950fbf5015 (diff) | |
download | erlang-510f4736527d166e7e87cf42defbcaba23ccf977.tar.gz |
Merge branch 'rickard/net-tick-intensity/22.3.4/ERIERL-732/OTP-17905' into maint-22
* rickard/net-tick-intensity/22.3.4/ERIERL-732/OTP-17905:
Net tick intensity
# Conflicts:
# lib/kernel/src/kernel.app.src
# lib/kernel/test/erl_distribution_SUITE.erl
-rw-r--r-- | lib/kernel/doc/src/kernel_app.xml | 71 | ||||
-rw-r--r-- | lib/kernel/doc/src/net_kernel.xml | 115 | ||||
-rw-r--r-- | lib/kernel/src/dist_util.erl | 59 | ||||
-rw-r--r-- | lib/kernel/src/erl_distribution.erl | 30 | ||||
-rw-r--r-- | lib/kernel/src/kernel.app.src | 2 | ||||
-rw-r--r-- | lib/kernel/src/net_kernel.erl | 247 | ||||
-rw-r--r-- | lib/kernel/test/erl_distribution_SUITE.erl | 169 |
7 files changed, 537 insertions, 156 deletions
diff --git a/lib/kernel/doc/src/kernel_app.xml b/lib/kernel/doc/src/kernel_app.xml index aef5a9fbe2..627a5c752c 100644 --- a/lib/kernel/doc/src/kernel_app.xml +++ b/lib/kernel/doc/src/kernel_app.xml @@ -307,32 +307,67 @@ setup time, but rather each individual network operation during the connection setup and handshake.</p> </item> - <tag><c>net_ticktime = TickTime</c></tag> - <item> - <marker id="net_ticktime"></marker> - <p>Specifies the <c>net_kernel</c> tick time in seconds. This is the + <tag><marker id="net_tickintensity"/><c>net_tickintensity = NetTickIntensity</c></tag> + <item> + <p><i>Net tick intensity</i> specifies how many ticks to send during a + <seealso marker="#net_ticktime">net tick time</seealso> period when + no other data is sent over a connection to another node. This also + determines how often to check for data from the other node. The + higher net tick intensity, the closer to the chosen net tick time period + the node will detect an unresponsive node. The net tick intensity + defaults to <c>4</c>. The value of <c>NetTickIntensity</c> should be + an integer in the range <c>4..1000</c>. If the <c>NetTickIntensity</c> + is not an integer or an integer less than <c>4</c>, <c>4</c> will + silently be used. If <c>NetTickIntensity</c> is an integer larger than + <c>1000</c>, <c>1000</c> will silently be used. + </p> + <note> + <p>Note that all communicating nodes are expected to use the same + <i>net tick intensity</i> as well as the same <i>net tick time</i>.</p> + </note> + <warning> + <p>Be careful not to set a too high net tick intensity, since you + can overwhelm the node with work if it is set too high.</p> + </warning> + </item> + <tag><marker id="net_ticktime"/><c>net_ticktime = NetTickTime</c></tag> + <item> + <p>Specifies the <i>net tick time</i> in seconds. This is the approximate time a connected node may be unresponsive until it is considered down and thereby disconnected.</p> - <p>Once every <c>TickTime/4</c> seconds, each connected node is ticked - if nothing has been sent to it during that last <c>TickTime/4</c> - interval. A tick is a small package sent on the connection. A connected - node is considered to be down if no ticks or payload packages have been - received during the last four <c>TickTime/4</c> intervals. This ensures - that nodes that are not responding, for reasons such as hardware errors, - are considered to be down.</p> - <p>As the availability is only checked every <c>TickTime/4</c> seconds, + <p>Net tick time together with <seealso marker="#net_tickintensity">net + tick intensity</seealso> determines an interval <c>TickInterval = + NetTickTime/NetTickIntensity</c>. Once every <c>TickInterval</c> seconds, + each connected node is ticked if nothing has been sent to it during that + last <c>TickInterval</c> seconds. A tick is a small package sent on the + connection. + A connected node is considered to be down if no ticks or payload packages + have been received during the last <c>NetTickIntensity</c> number of + <c>TickInterval</c> seconds intervals. This ensures that nodes that + are not responding, for reasons such as hardware errors, are considered + to be down.</p> + <p>As the availability is only checked every <c>TickInterval</c> seconds, the actual time <c>T</c> a node have been unresponsive when detected may vary between <c>MinT</c> and <c>MaxT</c>, where:</p> <code type="none"> -MinT = TickTime - TickTime / 4 -MaxT = TickTime + TickTime / 4</code> - <p><c>TickTime</c> defaults to <c>60</c> seconds. Thus, +MinT = NetTickTime - NetTickTime / NetTickIntensity +MaxT = NetTickTime + NetTickTime / NetTickIntensity</code> + <p><c>NetTickTime</c> defaults to <c>60</c> seconds and + <c>NetTickIntensity</c> defaults to <c>4</c>. Thus, <c><![CDATA[45 < T < 75]]></c> seconds.</p> + <note> <p>Notice that <em>all</em> communicating nodes are to have the - <em>same</em> <c>TickTime</c> value specified, as it determines both the - frequency of outgoing ticks and the expected frequency of incominging - ticks.</p> + <em>same</em> <c>NetTickTime</c> and <c>NetTickIntensity</c> values + specified, as it determines both the frequency of outgoing ticks and + the expected frequency of incominging ticks.</p> + </note> + <p><c>NetTickTime</c> needs to be a multiple of <c>NetTickIntensity</c>. + If the configured values are not, <c>NetTickTime</c> will internally be + rounded up to the nearest millisecond. + <seealso marker="net_kernel#get_net_ticktime/0"><c>net_kernel:get_net_ticktime()</c></seealso> + will, however, report net tick time truncated to the nearest second. + </p> <p>Normally, a terminating node is detected immediately by the transport protocol (like TCP/IP).</p> </item> diff --git a/lib/kernel/doc/src/net_kernel.xml b/lib/kernel/doc/src/net_kernel.xml index 419d3cad84..84e0aaaf5d 100644 --- a/lib/kernel/doc/src/net_kernel.xml +++ b/lib/kernel/doc/src/net_kernel.xml @@ -40,10 +40,11 @@ <c>-name</c> or <c>-sname</c>:</p> <pre> $ <input>erl -sname foobar</input></pre> - <p>It is also possible to call <c>net_kernel:start([foobar])</c> + <p>It is also possible to call + <seealso marker="#start/2"><c>net_kernel:start(foobar, #{})</c></seealso> directly from the normal Erlang shell prompt:</p> <pre> -1> <input>net_kernel:start([foobar, shortnames]).</input> +1> <input>net_kernel:start(foobar, #{name_domain => shortnames}).</input> {ok,<0.64.0>} (foobar@gringotts)2></pre> <p>If the node is started with command-line flag <c>-sname</c>, @@ -113,8 +114,11 @@ $ <input>erl -sname foobar</input></pre> <name name="get_net_ticktime" arity="0" since=""/> <fsummary>Get <c>net_ticktime</c>.</fsummary> <desc> - <p>Gets <c>net_ticktime</c> (see - <seealso marker="kernel_app"><c>kernel(6)</c></seealso>).</p> + <p>Returns currently used net tick time in seconds. For more information + see the + <seealso marker="kernel_app#net_ticktime"><c>net_ticktime</c></seealso> + <c>kernel(6)</c> parameter.</p> + <p>Defined return values (<c><anno>Res</anno></c>):</p> <taglist> <tag><c><anno>NetTicktime</anno></c></tag> @@ -345,21 +349,96 @@ $ <input>erl -sname foobar</input></pre> </func> <func> - <name since="">start([Name]) -> {ok, pid()} | {error, Reason}</name> - <name since="">start([Name, NameType]) -> {ok, pid()} | {error, Reason}</name> - <name since="">start([Name, NameType, Ticktime]) -> {ok, pid()} | {error, Reason}</name> + <name name="start" arity="2" since="OTP @OTP-17905@"/> + <fsummary>Turn an Erlang runtime system into a distributed node.</fsummary> + <desc> + <p> + Turns a non-distributed node into a distributed node by + starting <c>net_kernel</c> and other necessary processes. + </p> + + <p>Currently supported options:</p> + <taglist> + <tag><c>name_domain => <anno>NameDomain</anno></c></tag> + <item><p> + Determines the host name part of the node name. If + <c><anno>NameDomain</anno></c> equals <c>longnames</c>, fully + qualified domain names will be used which also is the default. + If <c><anno>NameDomain</anno></c> equals <c>shortnames</c>, only the + short name of the host will be used. + </p></item> + <tag><c>net_ticktime => <anno>NetTickTime</anno></c></tag> + <item><p> + <i>Net tick time</i> to use in seconds. Defaults to the value of the + <seealso marker="kernel_app#net_ticktime"><c>net_ticktime</c></seealso> + <c>kernel(6)</c> parameter. For more information about <i>net tick + time</i>, see the <c>kernel</c> parameter. However, note that if the + value of the <c>kernel</c> parameter is invalid, it will silently be + replaced by a valid value, but if an invalid + <c><anno>NetTickTime</anno></c> value is passed as option value to + this function, the call will fail. + </p></item> + <tag><c>net_tickintensity => <anno>NetTickIntensity</anno></c></tag> + <item><p> + <i>Net tick intensity</i> to use. Defaults to the value of the + <seealso marker="kernel_app#net_tickintensity"><c>net_tickintensity</c></seealso> + <c>kernel(6)</c> parameter. For more information about <i>net tick + intensity</i>, see the <c>kernel</c> parameter. However, note that if + the value of the <c>kernel</c> parameter is invalid, it will silently + be replaced by a valid value, but if an invalid + <c><anno>NetTickIntensity</anno></c> value is passed as option value + to this function, the call will fail. + </p></item> + </taglist> + </desc> + </func> + <func> + <name name="start" arity="1" since=""/> <fsummary>Turn an Erlang runtime system into a distributed node.</fsummary> - <type> - <v>Name = atom()</v> - <v>NameType = shortnames | longnames</v> - <v>Reason = {already_started, pid()} | term()</v> - </type> <desc> - <p>Turns a non-distributed node into a distributed node by - starting <c>net_kernel</c> and other necessary processes.</p> - <p>Notice that the argument is a list with exactly one, two, or - three arguments. <c>NameType</c> defaults to <c>longnames</c> - and <c>Ticktime</c> to <c>15000</c>.</p> + <warning><p> + <c>start/1</c> is deprecated. Use + <seealso marker="#start/2"><c>start/2</c></seealso> instead. + </p></warning> + <p> + Turns a non-distributed node into a distributed node by + starting <c>net_kernel</c> and other necessary processes. + </p> + <p> + <c><anno>Options</anno></c> list can only be exactly one of + the following lists (order is imporant): + </p> + <taglist> + <tag><c>[<anno>Name</anno>]</c></tag> + <item> + <p> + The same as <c>net_kernel:start([<anno>Name</anno>, + longnames, 15000])</c>. + </p> + </item> + <tag><c>[<anno>Name</anno>, <anno>NameDomain</anno>]</c></tag> + <item> + <p> + The same as <c>net_kernel:start([<anno>Name</anno>, + <anno>NameDomain</anno>, 15000])</c>. + </p> + </item> + <tag><c>[<anno>Name</anno>, <anno>NameDomain</anno>, + <anno>TickTime</anno>]</c></tag> + <item> + <p> + The same as <seealso marker="#start/2"> + <c>net_kernel:start(<anno>Name</anno>, #{name_domain => + <anno>NameDomain</anno>, net_ticktime => + ((<anno>TickTime</anno>*4-1) div 1000) + 1, + net_tickintensity => 4})</c></seealso>. + Note that <c><anno>TickTime</anno></c> is <i>not</i> the same + as net tick time expressed in milliseconds. + <c><anno>TickTime</anno></c> is the time between ticks when + net tick intensity equals <c>4</c>. + </p> + </item> + </taglist> </desc> </func> @@ -370,7 +449,7 @@ $ <input>erl -sname foobar</input></pre> <p>Turns a distributed node into a non-distributed node. For other nodes in the network, this is the same as the node going down. Only possible when the net kernel was started using - <seealso marker="#start/1"><c>start/1</c></seealso>, + <seealso marker="#start/2"><c>start/2</c></seealso>, otherwise <c>{error, not_allowed}</c> is returned. Returns <c>{error, not_found}</c> if the local node is not alive.</p> </desc> diff --git a/lib/kernel/src/dist_util.erl b/lib/kernel/src/dist_util.erl index 6a4fac115a..931c62ff5b 100644 --- a/lib/kernel/src/dist_util.erl +++ b/lib/kernel/src/dist_util.erl @@ -397,6 +397,17 @@ convert_flags(_Undefined) -> %% The connection has been established. %% -------------------------------------------------------------- +-record(state, {kernel :: pid(), + node :: node(), + tick_intensity :: 4..1000, + socket :: term(), + publish_type :: 'hidden' | 'normal', + handle :: erlang:dist_handle(), + f_tick :: function(), + f_getstat :: function() | 'undefined', + f_setopts :: function() | 'undefined', + f_getopts :: function() | 'undefined'}). + connection(#hs_data{other_node = Node, socket = Socket, f_address = FAddress, @@ -408,22 +419,23 @@ connection(#hs_data{other_node = Node, ok -> DHandle = do_setnode(HSData), % Succeeds or exits the process. Address = FAddress(Socket,Node), - mark_nodeup(HSData,Address), + TickIntensity = mark_nodeup(HSData,Address), case FPostNodeup(Socket) of ok -> case HSData#hs_data.f_handshake_complete of undefined -> ok; HsComplete -> HsComplete(Socket, Node, DHandle) end, - con_loop({HSData#hs_data.kernel_pid, - Node, - Socket, - PType, - DHandle, - HSData#hs_data.mf_tick, - HSData#hs_data.mf_getstat, - HSData#hs_data.mf_setopts, - HSData#hs_data.mf_getopts}, + con_loop(#state{kernel = HSData#hs_data.kernel_pid, + node = Node, + socket = Socket, + tick_intensity = TickIntensity, + publish_type = PType, + handle = DHandle, + f_tick = HSData#hs_data.mf_tick, + f_getstat = HSData#hs_data.mf_getstat, + f_setopts = HSData#hs_data.mf_setopts, + f_getopts = HSData#hs_data.mf_getopts}, #tick{}); _ -> ?shutdown2(Node, connection_setup_failed) @@ -503,8 +515,8 @@ mark_nodeup(#hs_data{kernel_pid = Kernel, Kernel ! {self(), {nodeup,Node,Address,publish_type(Flags), true}}, receive - {Kernel, inserted} -> - ok; + {Kernel, inserted, TickIntensity} -> + TickIntensity; {Kernel, bad_request} -> TypeT = case OtherStarted of true -> @@ -523,8 +535,10 @@ getstat(DHandle, _Socket, undefined) -> getstat(_DHandle, Socket, MFGetstat) -> MFGetstat(Socket). -con_loop({Kernel, Node, Socket, Type, DHandle, MFTick, MFGetstat, - MFSetOpts, MFGetOpts}=ConData, +con_loop(#state{kernel = Kernel, node = Node, + socket = Socket, handle = DHandle, + f_getstat = MFGetstat, f_setopts = MFSetOpts, + f_getopts = MFGetOpts} = ConData, Tick) -> receive {tcp_closed, Socket} -> @@ -534,14 +548,13 @@ con_loop({Kernel, Node, Socket, Type, DHandle, MFTick, MFGetstat, {Kernel, aux_tick} -> case getstat(DHandle, Socket, MFGetstat) of {ok, _, _, PendWrite} -> - send_aux_tick(Type, Socket, PendWrite, MFTick); + send_aux_tick(ConData, PendWrite); _ -> ignore_it end, con_loop(ConData, Tick); {Kernel, tick} -> - case send_tick(DHandle, Socket, Tick, Type, - MFTick, MFGetstat) of + case send_tick(ConData, Tick) of {ok, NewTick} -> con_loop(ConData, NewTick); {error, not_responding} -> @@ -887,13 +900,16 @@ send_status(#hs_data{socket = Socket, other_node = Node, %% A HIDDEN node is always ticked if we haven't read anything %% as a (primitive) hidden node only ticks when it receives a TICK !! -send_tick(DHandle, Socket, Tick, Type, MFTick, MFGetstat) -> +send_tick(#state{handle = DHandle, socket = Socket, + tick_intensity = TickIntensity, + publish_type = Type, f_tick = MFTick, + f_getstat = MFGetstat}, Tick) -> #tick{tick = T0, read = Read, write = Write, ticked = Ticked0} = Tick, T = T0 + 1, - T1 = T rem 4, + T1 = T rem TickIntensity, case getstat(DHandle, Socket, MFGetstat) of {ok, Read, _, _} when Ticked0 =:= T -> {error, not_responding}; @@ -931,9 +947,10 @@ need_to_tick(hidden, 0, _, _) -> % nothing read from hidden need_to_tick(_, _, _, _) -> false. -send_aux_tick(normal, _, Pend, _) when Pend /= false, Pend /= 0 -> +send_aux_tick(#state{publish_type = normal}, Pend) when Pend /= false, + Pend /= 0 -> ok; %% Dont send tick if pending write. -send_aux_tick(_Type, Socket, _Pend, MFTick) -> +send_aux_tick(#state{socket = Socket, f_tick = MFTick}, _Pend) -> MFTick(Socket). %% ------------------------------------------------------------ diff --git a/lib/kernel/src/erl_distribution.erl b/lib/kernel/src/erl_distribution.erl index f07bd351eb..86c988e160 100644 --- a/lib/kernel/src/erl_distribution.erl +++ b/lib/kernel/src/erl_distribution.erl @@ -23,7 +23,7 @@ -include_lib("kernel/include/logger.hrl"). --export([start_link/0,start_link/2,init/1,start/1,stop/0]). +-export([start_link/0,start_link/1,init/1,start/1,stop/0]). -define(DBG,erlang:display([?MODULE,?LINE])). @@ -35,8 +35,8 @@ start_link() -> %% Called from net_kernel:start/1 to start distribution after the %% system has already started. -start(Args) -> - C = {net_sup_dynamic, {?MODULE,start_link,[Args,false]}, permanent, +start(Opts) -> + C = {net_sup_dynamic, {?MODULE,start_link,[Opts#{clean_halt => false}]}, permanent, 1000, supervisor, [erl_distribution]}, supervisor:start_child(kernel_sup, C). @@ -62,8 +62,8 @@ stop() -> %% Helper start function. -start_link(Args, CleanHalt) -> - supervisor:start_link({local,net_sup}, ?MODULE, [Args,CleanHalt]). +start_link(Opts) -> + supervisor:start_link({local,net_sup}, ?MODULE, [Opts]). init(NetArgs) -> Epmd = @@ -84,25 +84,17 @@ init(NetArgs) -> do_start_link([{Arg,Flag}|T]) -> case init:get_argument(Arg) of {ok,[[Name]]} -> - start_link([list_to_atom(Name),Flag|ticktime()], true); + start_link(#{name => list_to_atom(Name), + name_domain => Flag, + clean_halt => true}); {ok,[[Name]|_Rest]} -> ?LOG_WARNING("Multiple -~p given to erl, using the first, ~p", [Arg, Name]), - start_link([list_to_atom(Name),Flag|ticktime()], true); + start_link(#{name => list_to_atom(Name), + name_domain => Flag, + clean_halt => true}); _ -> do_start_link(T) end; do_start_link([]) -> ignore. - -ticktime() -> - %% catch, in case the system was started with boot file start_old, - %% i.e. running without the application_controller. - %% Time is given in seconds. The net_kernel tick time is - %% Time/4 milliseconds. - case catch application:get_env(net_ticktime) of - {ok, Value} when is_integer(Value), Value > 0 -> - [Value * 250]; %% i.e. 1000 / 4 = 250 ms. - _ -> - [] - end. diff --git a/lib/kernel/src/kernel.app.src b/lib/kernel/src/kernel.app.src index 234d71f745..53a16a2f74 100644 --- a/lib/kernel/src/kernel.app.src +++ b/lib/kernel/src/kernel.app.src @@ -147,6 +147,8 @@ {applications, []}, {env, [{logger_level, notice}, {logger_sasl_compatible, false}, + {net_tickintensity, 4}, + {net_ticktime, 60}, {prevent_overlapping_partitions, false} ]}, {mod, {kernel, []}}, diff --git a/lib/kernel/src/net_kernel.erl b/lib/kernel/src/net_kernel.erl index 4c8dd1e2b5..28f8b611ef 100644 --- a/lib/kernel/src/net_kernel.erl +++ b/lib/kernel/src/net_kernel.erl @@ -59,12 +59,13 @@ monitor_nodes/2, setopts/2, getopts/2, + start/2, start/1, stop/0]). %% Exports for internal use. --export([start_link/2, +-export([start_link/1, kernel_apply/3, longnames/0, protocol_childspecs/0, @@ -143,14 +144,18 @@ node %% remote node name }). --record(tick, {ticker, %% ticker : pid() - time %% Ticktime in milli seconds : integer() - }). +-record(tick, + {ticker :: pid(), %% ticker + time :: pos_integer(), %% net tick time (ms) + intensity :: 4..1000 %% ticks until timout + }). --record(tick_change, {ticker, %% Ticker : pid() - time, %% Ticktime in milli seconds : integer() - how %% What type of change : atom() - }). +-record(tick_change, + {ticker :: pid(), %% ticker + time :: pos_integer(), %% net tick time (ms) + intensity :: 4..1000, %% ticks until timout + how :: 'longer' | 'shorter' %% What type of change + }). %% Default connection setup timeout in milliseconds. %% This timeout is set for every distributed action during @@ -223,7 +228,7 @@ verbose(Level) when is_integer(Level) -> | {ongoing_change_to, NewNetTicktime}, NewNetTicktime :: pos_integer(). set_net_ticktime(T, TP) when is_integer(T), T > 0, is_integer(TP), TP >= 0 -> - ticktime_res(request({new_ticktime, T*250, TP*1000})). + ticktime_res(request({new_ticktime, T*1000, TP*1000})). -spec set_net_ticktime(NetTicktime) -> Res when NetTicktime :: pos_integer(), @@ -268,8 +273,8 @@ monitor_nodes(Flag, Opts) -> end. %% ... -ticktime_res({A, I}) when is_atom(A), is_integer(I) -> {A, I div 250}; -ticktime_res(I) when is_integer(I) -> I div 250; +ticktime_res({A, I}) when is_atom(A), is_integer(I) -> {A, I div 1000}; +ticktime_res(I) when is_integer(I) -> I div 1000; ticktime_res(A) when is_atom(A) -> A. %% Called though BIF's @@ -329,21 +334,69 @@ request(Req) -> %% This function is used to dynamically start the %% distribution. -start(Args) -> - erl_distribution:start(Args). +-spec start(Name, Options) -> {ok, pid()} | {error, Reason} when + Options :: #{name_domain => NameDomain, + net_ticktime => NetTickTime, + net_tickintensity => NetTickIntensity}, + Name :: atom(), + NameDomain :: shortnames | longnames, + NetTickTime :: pos_integer(), + NetTickIntensity :: 4..1000, + Reason :: {already_started, pid()} | term(). + +start(Name, Options) when is_atom(Name), is_map(Options) -> + try + maps:fold(fun (name_domain, Val, _) when Val == shortnames; + Val == longnames -> + ok; + (net_ticktime, Val, _) when is_integer(Val), + Val > 0 -> + ok; + (net_tickintensity, Val, _) when is_integer(Val), + 4 =< Val, + Val =< 1000 -> + ok; + (Opt, Val, _) -> + error({invalid_option, Opt, Val}) + end, ok, Options) + catch error:Reason -> + error(Reason, [Name, Options]) + end, + erl_distribution:start(Options#{name => Name}); +start(Name, Options) when is_map(Options) -> + error(invalid_name, [Name, Options]); +start(Name, Options) -> + error(invalid_options, [Name, Options]). + +-spec start(Options) -> {ok, pid()} | {error, Reason} when + Options :: nonempty_list(Name | NameDomain | TickTime), + Name :: atom(), + NameDomain :: shortnames | longnames, + TickTime :: pos_integer(), + Reason :: {already_started, pid()} | term(). + +start([Name]) when is_atom(Name) -> + start([Name, longnames, 15000]); +start([Name, NameDomain]) when is_atom(Name), + is_atom(NameDomain) -> + start([Name, NameDomain, 15000]); +start([Name, NameDomain, TickTime]) when is_atom(Name), + is_atom(NameDomain), + is_integer(TickTime), + TickTime > 0 -> + %% NetTickTime is in seconds. TickTime is time in milliseconds + %% between ticks when net tick intensity is 4. We round upwards... + NetTickTime = ((TickTime*4-1) div 1000)+1, + start(Name, #{name_domain => NameDomain, + net_ticktime => NetTickTime, + net_tickintensity => 4}). %% This is the main startup routine for net_kernel (only for internal -%% use by the Kernel application. - -start_link([Name], CleanHalt) -> - start_link([Name, longnames], CleanHalt); -start_link([Name, LongOrShortNames], CleanHalt) -> - start_link([Name, LongOrShortNames, 15000], CleanHalt); +%% use) by the Kernel application. -start_link([Name, LongOrShortNames, Ticktime], CleanHalt) -> - Args = {Name, LongOrShortNames, Ticktime, CleanHalt}, +start_link(StartOpts) -> case gen_server:start_link({local, net_kernel}, ?MODULE, - Args, []) of + make_init_opts(StartOpts), []) of {ok, Pid} -> {ok, Pid}; {error, {already_started, Pid}} -> @@ -352,17 +405,70 @@ start_link([Name, LongOrShortNames, Ticktime], CleanHalt) -> exit(nodistribution) end. -init({Name, LongOrShortNames, TickT, CleanHalt}) -> +make_init_opts(Opts) -> + %% Net tick time given in seconds, but kept in milliseconds... + NTT1 = case maps:find(net_ticktime, Opts) of + {ok, NTT0} -> + NTT0*1000; + error -> + case application:get_env(kernel, net_ticktime) of + {ok, NTT0} when is_integer(NTT0), NTT0 < 1 -> + 1000; + {ok, NTT0} when is_integer(NTT0) -> + NTT0*1000; + _ -> + 60000 + end + end, + + NTI = case maps:find(net_tickintensity, Opts) of + {ok, NTI0} -> + NTI0; + error -> + case application:get_env(kernel, net_tickintensity) of + {ok, NTI0} when is_integer(NTI0), NTI0 < 4 -> + 4; + {ok, NTI0} when is_integer(NTI0), NTI0 > 1000 -> + 1000; + {ok, NTI0} when is_integer(NTI0) -> + NTI0; + _ -> + 4 + end + end, + + %% Net tick time needs to be a multiple of net tick intensity; + %% round net tick time upwards if not... + NTT = if NTT1 rem NTI =:= 0 -> NTT1; + true -> ((NTT1 div NTI) + 1) * NTI + end, + + ND = case maps:find(name_domain, Opts) of + {ok, ND0} -> + ND0; + error -> + longnames + end, + + Opts#{net_ticktime => NTT, net_tickintensity => NTI, name_domain => ND}. + +init(#{name := Name, + name_domain := NameDomain, + net_ticktime := NetTicktime, + net_tickintensity := NetTickIntensity, + clean_halt := CleanHalt}) -> process_flag(trap_exit,true), - case init_node(Name, LongOrShortNames, CleanHalt) of + case init_node(Name, NameDomain, CleanHalt) of {ok, Node, Listeners} -> process_flag(priority, max), - Ticktime = to_integer(TickT), - Ticker = spawn_link(net_kernel, ticker, [self(), Ticktime]), + TickInterval = NetTicktime div NetTickIntensity, + Ticker = spawn_link(net_kernel, ticker, [self(), TickInterval]), {ok, #state{name = Name, node = Node, - type = LongOrShortNames, - tick = #tick{ticker = Ticker, time = Ticktime}, + type = NameDomain, + tick = #tick{ticker = Ticker, + time = NetTicktime, + intensity = NetTickIntensity}, connecttime = connecttime(), connections = ets:new(sys_dist,[named_table, @@ -598,8 +704,7 @@ handle_call({verbose, Level}, From, State) -> %% %% The tick field of the state contains either a #tick{} or a -%% #tick_change{} record if the ticker process has been upgraded; -%% otherwise, an integer or an atom. +%% #tick_change{} record. handle_call(ticktime, From, #state{tick = #tick{time = T}} = State) -> async_reply({reply, T, State}, From); @@ -611,22 +716,46 @@ handle_call({new_ticktime,T,_TP}, From, #state{tick = #tick{time = T}} = State) async_reply({reply, unchanged, State}, From); handle_call({new_ticktime,T,TP}, From, #state{tick = #tick{ticker = Tckr, - time = OT}} = State) -> + time = OT, + intensity = I}} = State) -> ?tckr_dbg(initiating_tick_change), - start_aux_ticker(T, OT, TP), - How = case T > OT of - true -> - ?tckr_dbg(longer_ticktime), - Tckr ! {new_ticktime,T}, - longer; - false -> - ?tckr_dbg(shorter_ticktime), - shorter - end, - async_reply({reply, change_initiated, - State#state{tick = #tick_change{ticker = Tckr, - time = T, - how = How}}}, From); + %% We need to preserve tick intensity and net tick time needs to be a + %% multiple of tick intensity... + {NT, NIntrvl} = case T < I of + true -> + %% Max 1 tick per millisecond implies that + %% minimum net tick time equals intensity... + {I, 1}; + _ -> + NIntrvl0 = T div I, + case T rem I of + 0 -> + {T, NIntrvl0}; + _ -> + %% Round net tick time upwards... + {(NIntrvl0+1)*I, NIntrvl0+1} + end + end, + case NT == OT of + true -> + async_reply({reply, unchanged, State}, From); + false -> + start_aux_ticker(NIntrvl, OT div I, TP), + How = case NT > OT of + true -> + ?tckr_dbg(longer_ticktime), + Tckr ! {new_ticktime, NIntrvl}, + longer; + false -> + ?tckr_dbg(shorter_ticktime), + shorter + end, + async_reply({reply, change_initiated, + State#state{tick = #tick_change{ticker = Tckr, + time = NT, + intensity = I, + how = How}}}, From) + end; handle_call({new_ticktime,_T,_TP}, From, @@ -763,7 +892,8 @@ handle_info({dist_ctrlr, Ctrlr, Node, SetupPid} = Msg, %% %% A node has successfully been connected. %% -handle_info({SetupPid, {nodeup,Node,Address,Type,Immediate}}, State) -> +handle_info({SetupPid, {nodeup,Node,Address,Type,Immediate}}, + #state{tick = Tick} = State) -> case {Immediate, ets:lookup(sys_dist, Node)} of {true, [Conn]} when (Conn#connection.state =:= pending) andalso (Conn#connection.owner =:= SetupPid) @@ -772,7 +902,11 @@ handle_info({SetupPid, {nodeup,Node,Address,Type,Immediate}}, State) -> address = Address, waiting = [], type = Type}), - SetupPid ! {self(), inserted}, + TickIntensity = case Tick of + #tick{intensity = TI} -> TI; + #tick_change{intensity = TI} -> TI + end, + SetupPid ! {self(), inserted, TickIntensity}, reply_waiting(Node,Conn#connection.waiting, true), {noreply, State}; _ -> @@ -898,13 +1032,20 @@ handle_info(aux_tick, State) -> handle_info(transition_period_end, #state{tick = #tick_change{ticker = Tckr, time = T, + intensity = I, how = How}} = State) -> ?tckr_dbg(transition_period_ended), case How of - shorter -> Tckr ! {new_ticktime, T}, done; - _ -> done + shorter -> + Interval = T div I, + Tckr ! {new_ticktime, Interval}, + ok; + _ -> + ok end, - {noreply,State#state{tick = #tick{ticker = Tckr, time = T}}}; + {noreply,State#state{tick = #tick{ticker = Tckr, + time = T, + intensity = I}}}; handle_info(X, State) -> error_msg("Net kernel got ~tw~n",[X]), @@ -1245,12 +1386,6 @@ ticker(Kernel, Tick) when is_integer(Tick) -> ?tckr_dbg(ticker_started), ticker_loop(Kernel, Tick). -to_integer(T) when is_integer(T) -> T; -to_integer(T) when is_atom(T) -> - list_to_integer(atom_to_list(T)); -to_integer(T) when is_list(T) -> - list_to_integer(T). - ticker_loop(Kernel, Tick) -> receive {new_ticktime, NewTick} -> diff --git a/lib/kernel/test/erl_distribution_SUITE.erl b/lib/kernel/test/erl_distribution_SUITE.erl index 2b84a68c52..310b94bddf 100644 --- a/lib/kernel/test/erl_distribution_SUITE.erl +++ b/lib/kernel/test/erl_distribution_SUITE.erl @@ -24,7 +24,7 @@ -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2]). --export([tick/1, tick_change/1, +-export([tick/1, tick_intensity/1, tick_change/1, connect_node/1, nodenames/1, hostnames/1, illegal_nodenames/1, hidden_node/1, @@ -41,16 +41,19 @@ monitor_nodes_combinations/1, monitor_nodes_cleanup/1, monitor_nodes_many/1, - dist_ctrl_proc_smoke/1]). + dist_ctrl_proc_smoke/1, + net_kernel_start/1]). %% Performs the test at another node. -export([get_socket_priorities/0, - tick_cli_test/1, tick_cli_test1/1, + tick_cli_test/3, tick_cli_test1/3, tick_serv_test/2, tick_serv_test1/1, run_remote_test/1, setopts_do/2, keep_conn/1, time_ping/1]). +-export([net_kernel_start_do_test/1]). + -export([init_per_testcase/2, end_per_testcase/2]). -export([dist_cntrlr_output_test/2]). @@ -72,11 +75,12 @@ suite() -> all() -> [dist_ctrl_proc_smoke, - tick, tick_change, nodenames, hostnames, illegal_nodenames, - connect_node, + tick, tick_intensity, tick_change, nodenames, hostnames, + illegal_nodenames, connect_node, hidden_node, setopts, table_waste, net_setuptime, inet_dist_options_options, - {group, monitor_nodes}]. + {group, monitor_nodes}, + net_kernel_start]. groups() -> [{monitor_nodes, [], @@ -120,26 +124,45 @@ connect_node(Config) when is_list(Config) -> tick(Config) when is_list(Config) -> run_dist_configs(fun tick/2, Config). -tick(DCfg, _Config) -> +tick(DCfg, Config) -> + tick_test(DCfg, Config, false). + +tick_intensity(Config) when is_list(Config) -> + run_dist_configs(fun tick_intensity/2, Config). + +tick_intensity(DCfg, Config) -> + tick_test(DCfg, Config, true). + +tick_test(DCfg, _Config, CheckIntensityArg) -> %% %% This test case use disabled "connect all" so that %% global wont interfere... %% - %% First check that the normal case is OK! [Name1, Name2] = get_nodenames(2, dist_test), + {ok, Node} = start_node(DCfg, Name1), - rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [node()]), - erlang:monitor_node(Node, true), - receive - {nodedown, Node} -> - ct:fail("nodedown from other node") - after 30000 -> - erlang:monitor_node(Node, false), - stop_node(Node) + case CheckIntensityArg of + true -> + %% Not for intensity test... + ok; + false -> + %% First check that the normal case is OK! + rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [node(), 8000, 16000]), + + erlang:monitor_node(Node, true), + receive + {nodedown, Node} -> + ct:fail("nodedown from other node") + after 30000 -> + erlang:monitor_node(Node, false) + end, + ok end, + stop_node(Node), + %% Now, set the net_ticktime for the other node to 12 secs. %% After the sleep(2sec) and cast the other node shall destroy %% the connection as it has not received anything on the connection. @@ -156,9 +179,20 @@ tick(DCfg, _Config) -> "-kernel net_ticktime 100 -connect_all false"), rpc:call(ServNode, erl_distribution_SUITE, tick_serv_test, [Node, node()]), + %% We set min/max half a second lower/higher than expected since it + %% takes time for termination dist controller, delivery of messages + %% scheduling of process receiving nodedown, etc... + {IArg, Min, Max} = case CheckIntensityArg of + false -> + {"", 7500, 16500}; + true -> + {" -kernel net_tickintensity 24", 11000, 13000} + end, + {ok, Node} = start_node(DCfg, Name1, - "-kernel net_ticktime 12 -connect_all false"), - rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [ServNode]), + "-kernel net_ticktime 12 -connect_all false" ++ IArg), + + rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [ServNode, Min, Max]), spawn_link(erl_distribution_SUITE, keep_conn, [Node]), @@ -171,6 +205,7 @@ tick(DCfg, _Config) -> {tick_test, T} when is_integer(T) -> stop_node(ServNode), stop_node(Node), + io:format("Result: ~p~n", [T]), T; {tick_test, Error} -> stop_node(ServNode), @@ -393,10 +428,10 @@ tick_serv_test1(Node) -> end end. -tick_cli_test(Node) -> - spawn(erl_distribution_SUITE, tick_cli_test1, [Node]). +tick_cli_test(Node, Min, Max) -> + spawn(erl_distribution_SUITE, tick_cli_test1, [Node, Min, Max]). -tick_cli_test1(Node) -> +tick_cli_test1(Node, Min, Max) -> register(tick_test, self()), erlang:monitor_node(Node, true), sleep(2), @@ -410,11 +445,14 @@ tick_cli_test1(Node) -> Diff = erlang:convert_time_unit(T2-T1, native, millisecond), case Diff of - T when T > 8000, T < 16000 -> + T when Min =< T, T =< Max -> From ! {tick_test, T}; T -> From ! {tick_test, - {"T not in interval 8000 < T < 16000", + {"T not in interval " + ++ integer_to_list(Min) + ++ " =< T =< " + ++ integer_to_list(Max), T}} end end @@ -1532,6 +1570,89 @@ dist_ctrl_proc_smoke(Config) when is_list(Config) -> stop_node(Node2), ok. +net_kernel_start(Config) when is_list(Config) -> + MyName = net_kernel_start_tester, + register(MyName, self()), + net_kernel_start_test(MyName, 120, 8), + net_kernel_start_test(MyName, undefined, undefined). + +net_kernel_start_test(MyName, NetTickTime, NetTickIntesity) -> + TestNameStr = "net_kernel_start_test_node-" + ++ integer_to_list(erlang:system_time(seconds)) + ++ "-" ++ integer_to_list(erlang:unique_integer([monotonic,positive])), + TestNode = list_to_atom(TestNameStr ++ "@" ++ atom_to_list(gethostname())), + CmdLine = net_kernel_start_cmdline(MyName, list_to_atom(TestNameStr), + NetTickTime, NetTickIntesity), + io:format("Starting test node ~p: ~s~n", [TestNode, CmdLine]), + case open_port({spawn, CmdLine}, []) of + Port when is_port(Port) -> + receive + {i_am_alive, Pid, Node, NTT} = Msg -> + io:format("Response from ~p: ~p~n", [Node, Msg]), + rpc:cast(Node, erlang, halt, []), + catch erlang:port_close(Port), + TestNode = node(Pid), + TestNode = Node, + case NetTickTime == undefined of + true -> + {ok, DefNTT} = application:get_env(kernel, net_ticktime), + DefNTT = NTT; + false -> + NetTickTime = NTT + end + end, + ok; + Error -> + error({open_port_failed, TestNode, Error}) + end. + +net_kernel_start_cmdline(TestName, Name, NetTickTime, NetTickIntensity) -> + Pa = filename:dirname(code:which(?MODULE)), + Prog = case catch init:get_argument(progname) of + {ok, [[Prg]]} -> Prg; + _ -> error(missing_progname) + end, + NameDomain = case net_kernel:longnames() of + false -> "shortnames"; + true -> "longnames" + end, + {ok, Pwd} = file:get_cwd(), + NameStr = atom_to_list(Name), + Prog ++ " -noinput -noshell -detached -pa " ++ Pa + ++ " -env ERL_CRASH_DUMP " ++ Pwd ++ "/erl_crash_dump." ++ NameStr + ++ " -setcookie " ++ atom_to_list(erlang:get_cookie()) + ++ " -run " ++ atom_to_list(?MODULE) ++ " net_kernel_start_do_test " + ++ atom_to_list(TestName) ++ " " ++ atom_to_list(node()) ++ " " + ++ NameStr ++ " " ++ NameDomain + ++ case NetTickTime == undefined of + true -> + ""; + false -> + " " ++ integer_to_list(NetTickTime) ++ + " " ++ integer_to_list(NetTickIntensity) + end. + +net_kernel_start_do_test([TestName, TestNode, Name, NameDomain]) -> + net_kernel_start_do_test(TestName, TestNode, list_to_atom(Name), + #{name_domain => list_to_atom(NameDomain)}); + +net_kernel_start_do_test([TestName, TestNode, Name, NameDomain, NetTickTime, NetTickIntensity]) -> + net_kernel_start_do_test(TestName, TestNode, list_to_atom(Name), + #{net_ticktime => list_to_integer(NetTickTime), + name_domain => list_to_atom(NameDomain), + net_tickintensity => list_to_integer(NetTickIntensity)}). + +net_kernel_start_do_test(TestName, TestNode, Name, Options) -> + case net_kernel:start(Name, Options) of + {ok, _Pid} -> + Tester = {list_to_atom(TestName), list_to_atom(TestNode)}, + Tester ! {i_am_alive, self(), node(), net_kernel:get_net_ticktime()}, + receive after 60000 -> ok end, + erlang:halt(); + Error -> + erlang:halt(lists:flatten(io_lib:format("~p", [Error]))) + end. + %% Misc. functions run_dist_configs(Func, Config) -> @@ -1602,7 +1723,7 @@ print_my_messages() -> sleep(T) -> receive after T * 1000 -> ok end. -start_node(DCfg, Name, Param, this) -> +start_node(_DCfg, Name, Param, this) -> NewParam = Param ++ " -pa " ++ filename:dirname(code:which(?MODULE)), test_server:start_node(Name, peer, [{args, NewParam}, {erl, [this]}]); start_node(DCfg, Name, Param, "this") -> |