summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErlang/OTP <otp@erlang.org>2022-03-28 12:39:17 +0200
committerErlang/OTP <otp@erlang.org>2022-03-28 12:39:17 +0200
commit510f4736527d166e7e87cf42defbcaba23ccf977 (patch)
treea6c03fe665d4d40eabf00ef03f7eb6463fd2d91c
parenta41a194004efd69f43355b65456a0e2756fc2ffd (diff)
parent3c1f611a843b7d971ac41151886576950fbf5015 (diff)
downloaderlang-510f4736527d166e7e87cf42defbcaba23ccf977.tar.gz
Merge branch 'rickard/net-tick-intensity/22.3.4/ERIERL-732/OTP-17905' into maint-22
* rickard/net-tick-intensity/22.3.4/ERIERL-732/OTP-17905: Net tick intensity # Conflicts: # lib/kernel/src/kernel.app.src # lib/kernel/test/erl_distribution_SUITE.erl
-rw-r--r--lib/kernel/doc/src/kernel_app.xml71
-rw-r--r--lib/kernel/doc/src/net_kernel.xml115
-rw-r--r--lib/kernel/src/dist_util.erl59
-rw-r--r--lib/kernel/src/erl_distribution.erl30
-rw-r--r--lib/kernel/src/kernel.app.src2
-rw-r--r--lib/kernel/src/net_kernel.erl247
-rw-r--r--lib/kernel/test/erl_distribution_SUITE.erl169
7 files changed, 537 insertions, 156 deletions
diff --git a/lib/kernel/doc/src/kernel_app.xml b/lib/kernel/doc/src/kernel_app.xml
index aef5a9fbe2..627a5c752c 100644
--- a/lib/kernel/doc/src/kernel_app.xml
+++ b/lib/kernel/doc/src/kernel_app.xml
@@ -307,32 +307,67 @@
setup time, but rather each individual network operation during
the connection setup and handshake.</p>
</item>
- <tag><c>net_ticktime = TickTime</c></tag>
- <item>
- <marker id="net_ticktime"></marker>
- <p>Specifies the <c>net_kernel</c> tick time in seconds. This is the
+ <tag><marker id="net_tickintensity"/><c>net_tickintensity = NetTickIntensity</c></tag>
+ <item>
+ <p><i>Net tick intensity</i> specifies how many ticks to send during a
+ <seealso marker="#net_ticktime">net tick time</seealso> period when
+ no other data is sent over a connection to another node. This also
+ determines how often to check for data from the other node. The
+ higher net tick intensity, the closer to the chosen net tick time period
+ the node will detect an unresponsive node. The net tick intensity
+ defaults to <c>4</c>. The value of <c>NetTickIntensity</c> should be
+ an integer in the range <c>4..1000</c>. If the <c>NetTickIntensity</c>
+ is not an integer or an integer less than <c>4</c>, <c>4</c> will
+ silently be used. If <c>NetTickIntensity</c> is an integer larger than
+ <c>1000</c>, <c>1000</c> will silently be used.
+ </p>
+ <note>
+ <p>Note that all communicating nodes are expected to use the same
+ <i>net tick intensity</i> as well as the same <i>net tick time</i>.</p>
+ </note>
+ <warning>
+ <p>Be careful not to set a too high net tick intensity, since you
+ can overwhelm the node with work if it is set too high.</p>
+ </warning>
+ </item>
+ <tag><marker id="net_ticktime"/><c>net_ticktime = NetTickTime</c></tag>
+ <item>
+ <p>Specifies the <i>net tick time</i> in seconds. This is the
approximate time a connected node may be unresponsive until it is
considered down and thereby disconnected.</p>
- <p>Once every <c>TickTime/4</c> seconds, each connected node is ticked
- if nothing has been sent to it during that last <c>TickTime/4</c>
- interval. A tick is a small package sent on the connection. A connected
- node is considered to be down if no ticks or payload packages have been
- received during the last four <c>TickTime/4</c> intervals. This ensures
- that nodes that are not responding, for reasons such as hardware errors,
- are considered to be down.</p>
- <p>As the availability is only checked every <c>TickTime/4</c> seconds,
+ <p>Net tick time together with <seealso marker="#net_tickintensity">net
+ tick intensity</seealso> determines an interval <c>TickInterval =
+ NetTickTime/NetTickIntensity</c>. Once every <c>TickInterval</c> seconds,
+ each connected node is ticked if nothing has been sent to it during that
+ last <c>TickInterval</c> seconds. A tick is a small package sent on the
+ connection.
+ A connected node is considered to be down if no ticks or payload packages
+ have been received during the last <c>NetTickIntensity</c> number of
+ <c>TickInterval</c> seconds intervals. This ensures that nodes that
+ are not responding, for reasons such as hardware errors, are considered
+ to be down.</p>
+ <p>As the availability is only checked every <c>TickInterval</c> seconds,
the actual time <c>T</c> a node have been unresponsive when
detected may vary between <c>MinT</c> and <c>MaxT</c>,
where:</p>
<code type="none">
-MinT = TickTime - TickTime / 4
-MaxT = TickTime + TickTime / 4</code>
- <p><c>TickTime</c> defaults to <c>60</c> seconds. Thus,
+MinT = NetTickTime - NetTickTime / NetTickIntensity
+MaxT = NetTickTime + NetTickTime / NetTickIntensity</code>
+ <p><c>NetTickTime</c> defaults to <c>60</c> seconds and
+ <c>NetTickIntensity</c> defaults to <c>4</c>. Thus,
<c><![CDATA[45 < T < 75]]></c> seconds.</p>
+ <note>
<p>Notice that <em>all</em> communicating nodes are to have the
- <em>same</em> <c>TickTime</c> value specified, as it determines both the
- frequency of outgoing ticks and the expected frequency of incominging
- ticks.</p>
+ <em>same</em> <c>NetTickTime</c> and <c>NetTickIntensity</c> values
+ specified, as it determines both the frequency of outgoing ticks and
+ the expected frequency of incominging ticks.</p>
+ </note>
+ <p><c>NetTickTime</c> needs to be a multiple of <c>NetTickIntensity</c>.
+ If the configured values are not, <c>NetTickTime</c> will internally be
+ rounded up to the nearest millisecond.
+ <seealso marker="net_kernel#get_net_ticktime/0"><c>net_kernel:get_net_ticktime()</c></seealso>
+ will, however, report net tick time truncated to the nearest second.
+ </p>
<p>Normally, a terminating node is detected immediately by the transport
protocol (like TCP/IP).</p>
</item>
diff --git a/lib/kernel/doc/src/net_kernel.xml b/lib/kernel/doc/src/net_kernel.xml
index 419d3cad84..84e0aaaf5d 100644
--- a/lib/kernel/doc/src/net_kernel.xml
+++ b/lib/kernel/doc/src/net_kernel.xml
@@ -40,10 +40,11 @@
<c>-name</c> or <c>-sname</c>:</p>
<pre>
$ <input>erl -sname foobar</input></pre>
- <p>It is also possible to call <c>net_kernel:start([foobar])</c>
+ <p>It is also possible to call
+ <seealso marker="#start/2"><c>net_kernel:start(foobar, #{})</c></seealso>
directly from the normal Erlang shell prompt:</p>
<pre>
-1> <input>net_kernel:start([foobar, shortnames]).</input>
+1> <input>net_kernel:start(foobar, #{name_domain => shortnames}).</input>
{ok,&lt;0.64.0>}
(foobar@gringotts)2></pre>
<p>If the node is started with command-line flag <c>-sname</c>,
@@ -113,8 +114,11 @@ $ <input>erl -sname foobar</input></pre>
<name name="get_net_ticktime" arity="0" since=""/>
<fsummary>Get <c>net_ticktime</c>.</fsummary>
<desc>
- <p>Gets <c>net_ticktime</c> (see
- <seealso marker="kernel_app"><c>kernel(6)</c></seealso>).</p>
+ <p>Returns currently used net tick time in seconds. For more information
+ see the
+ <seealso marker="kernel_app#net_ticktime"><c>net_ticktime</c></seealso>
+ <c>kernel(6)</c> parameter.</p>
+
<p>Defined return values (<c><anno>Res</anno></c>):</p>
<taglist>
<tag><c><anno>NetTicktime</anno></c></tag>
@@ -345,21 +349,96 @@ $ <input>erl -sname foobar</input></pre>
</func>
<func>
- <name since="">start([Name]) -> {ok, pid()} | {error, Reason}</name>
- <name since="">start([Name, NameType]) -> {ok, pid()} | {error, Reason}</name>
- <name since="">start([Name, NameType, Ticktime]) -> {ok, pid()} | {error, Reason}</name>
+ <name name="start" arity="2" since="OTP @OTP-17905@"/>
+ <fsummary>Turn an Erlang runtime system into a distributed node.</fsummary>
+ <desc>
+ <p>
+ Turns a non-distributed node into a distributed node by
+ starting <c>net_kernel</c> and other necessary processes.
+ </p>
+
+ <p>Currently supported options:</p>
+ <taglist>
+ <tag><c>name_domain => <anno>NameDomain</anno></c></tag>
+ <item><p>
+ Determines the host name part of the node name. If
+ <c><anno>NameDomain</anno></c> equals <c>longnames</c>, fully
+ qualified domain names will be used which also is the default.
+ If <c><anno>NameDomain</anno></c> equals <c>shortnames</c>, only the
+ short name of the host will be used.
+ </p></item>
+ <tag><c>net_ticktime => <anno>NetTickTime</anno></c></tag>
+ <item><p>
+ <i>Net tick time</i> to use in seconds. Defaults to the value of the
+ <seealso marker="kernel_app#net_ticktime"><c>net_ticktime</c></seealso>
+ <c>kernel(6)</c> parameter. For more information about <i>net tick
+ time</i>, see the <c>kernel</c> parameter. However, note that if the
+ value of the <c>kernel</c> parameter is invalid, it will silently be
+ replaced by a valid value, but if an invalid
+ <c><anno>NetTickTime</anno></c> value is passed as option value to
+ this function, the call will fail.
+ </p></item>
+ <tag><c>net_tickintensity => <anno>NetTickIntensity</anno></c></tag>
+ <item><p>
+ <i>Net tick intensity</i> to use. Defaults to the value of the
+ <seealso marker="kernel_app#net_tickintensity"><c>net_tickintensity</c></seealso>
+ <c>kernel(6)</c> parameter. For more information about <i>net tick
+ intensity</i>, see the <c>kernel</c> parameter. However, note that if
+ the value of the <c>kernel</c> parameter is invalid, it will silently
+ be replaced by a valid value, but if an invalid
+ <c><anno>NetTickIntensity</anno></c> value is passed as option value
+ to this function, the call will fail.
+ </p></item>
+ </taglist>
+ </desc>
+ </func>
+ <func>
+ <name name="start" arity="1" since=""/>
<fsummary>Turn an Erlang runtime system into a distributed node.</fsummary>
- <type>
- <v>Name = atom()</v>
- <v>NameType = shortnames | longnames</v>
- <v>Reason = {already_started, pid()} | term()</v>
- </type>
<desc>
- <p>Turns a non-distributed node into a distributed node by
- starting <c>net_kernel</c> and other necessary processes.</p>
- <p>Notice that the argument is a list with exactly one, two, or
- three arguments. <c>NameType</c> defaults to <c>longnames</c>
- and <c>Ticktime</c> to <c>15000</c>.</p>
+ <warning><p>
+ <c>start/1</c> is deprecated. Use
+ <seealso marker="#start/2"><c>start/2</c></seealso> instead.
+ </p></warning>
+ <p>
+ Turns a non-distributed node into a distributed node by
+ starting <c>net_kernel</c> and other necessary processes.
+ </p>
+ <p>
+ <c><anno>Options</anno></c> list can only be exactly one of
+ the following lists (order is imporant):
+ </p>
+ <taglist>
+ <tag><c>[<anno>Name</anno>]</c></tag>
+ <item>
+ <p>
+ The same as <c>net_kernel:start([<anno>Name</anno>,
+ longnames, 15000])</c>.
+ </p>
+ </item>
+ <tag><c>[<anno>Name</anno>, <anno>NameDomain</anno>]</c></tag>
+ <item>
+ <p>
+ The same as <c>net_kernel:start([<anno>Name</anno>,
+ <anno>NameDomain</anno>, 15000])</c>.
+ </p>
+ </item>
+ <tag><c>[<anno>Name</anno>, <anno>NameDomain</anno>,
+ <anno>TickTime</anno>]</c></tag>
+ <item>
+ <p>
+ The same as <seealso marker="#start/2">
+ <c>net_kernel:start(<anno>Name</anno>, #{name_domain =>
+ <anno>NameDomain</anno>, net_ticktime =>
+ ((<anno>TickTime</anno>*4-1) div 1000) + 1,
+ net_tickintensity => 4})</c></seealso>.
+ Note that <c><anno>TickTime</anno></c> is <i>not</i> the same
+ as net tick time expressed in milliseconds.
+ <c><anno>TickTime</anno></c> is the time between ticks when
+ net tick intensity equals <c>4</c>.
+ </p>
+ </item>
+ </taglist>
</desc>
</func>
@@ -370,7 +449,7 @@ $ <input>erl -sname foobar</input></pre>
<p>Turns a distributed node into a non-distributed node. For
other nodes in the network, this is the same as the node
going down. Only possible when the net kernel was started using
- <seealso marker="#start/1"><c>start/1</c></seealso>,
+ <seealso marker="#start/2"><c>start/2</c></seealso>,
otherwise <c>{error, not_allowed}</c> is returned. Returns
<c>{error, not_found}</c> if the local node is not alive.</p>
</desc>
diff --git a/lib/kernel/src/dist_util.erl b/lib/kernel/src/dist_util.erl
index 6a4fac115a..931c62ff5b 100644
--- a/lib/kernel/src/dist_util.erl
+++ b/lib/kernel/src/dist_util.erl
@@ -397,6 +397,17 @@ convert_flags(_Undefined) ->
%% The connection has been established.
%% --------------------------------------------------------------
+-record(state, {kernel :: pid(),
+ node :: node(),
+ tick_intensity :: 4..1000,
+ socket :: term(),
+ publish_type :: 'hidden' | 'normal',
+ handle :: erlang:dist_handle(),
+ f_tick :: function(),
+ f_getstat :: function() | 'undefined',
+ f_setopts :: function() | 'undefined',
+ f_getopts :: function() | 'undefined'}).
+
connection(#hs_data{other_node = Node,
socket = Socket,
f_address = FAddress,
@@ -408,22 +419,23 @@ connection(#hs_data{other_node = Node,
ok ->
DHandle = do_setnode(HSData), % Succeeds or exits the process.
Address = FAddress(Socket,Node),
- mark_nodeup(HSData,Address),
+ TickIntensity = mark_nodeup(HSData,Address),
case FPostNodeup(Socket) of
ok ->
case HSData#hs_data.f_handshake_complete of
undefined -> ok;
HsComplete -> HsComplete(Socket, Node, DHandle)
end,
- con_loop({HSData#hs_data.kernel_pid,
- Node,
- Socket,
- PType,
- DHandle,
- HSData#hs_data.mf_tick,
- HSData#hs_data.mf_getstat,
- HSData#hs_data.mf_setopts,
- HSData#hs_data.mf_getopts},
+ con_loop(#state{kernel = HSData#hs_data.kernel_pid,
+ node = Node,
+ socket = Socket,
+ tick_intensity = TickIntensity,
+ publish_type = PType,
+ handle = DHandle,
+ f_tick = HSData#hs_data.mf_tick,
+ f_getstat = HSData#hs_data.mf_getstat,
+ f_setopts = HSData#hs_data.mf_setopts,
+ f_getopts = HSData#hs_data.mf_getopts},
#tick{});
_ ->
?shutdown2(Node, connection_setup_failed)
@@ -503,8 +515,8 @@ mark_nodeup(#hs_data{kernel_pid = Kernel,
Kernel ! {self(), {nodeup,Node,Address,publish_type(Flags),
true}},
receive
- {Kernel, inserted} ->
- ok;
+ {Kernel, inserted, TickIntensity} ->
+ TickIntensity;
{Kernel, bad_request} ->
TypeT = case OtherStarted of
true ->
@@ -523,8 +535,10 @@ getstat(DHandle, _Socket, undefined) ->
getstat(_DHandle, Socket, MFGetstat) ->
MFGetstat(Socket).
-con_loop({Kernel, Node, Socket, Type, DHandle, MFTick, MFGetstat,
- MFSetOpts, MFGetOpts}=ConData,
+con_loop(#state{kernel = Kernel, node = Node,
+ socket = Socket, handle = DHandle,
+ f_getstat = MFGetstat, f_setopts = MFSetOpts,
+ f_getopts = MFGetOpts} = ConData,
Tick) ->
receive
{tcp_closed, Socket} ->
@@ -534,14 +548,13 @@ con_loop({Kernel, Node, Socket, Type, DHandle, MFTick, MFGetstat,
{Kernel, aux_tick} ->
case getstat(DHandle, Socket, MFGetstat) of
{ok, _, _, PendWrite} ->
- send_aux_tick(Type, Socket, PendWrite, MFTick);
+ send_aux_tick(ConData, PendWrite);
_ ->
ignore_it
end,
con_loop(ConData, Tick);
{Kernel, tick} ->
- case send_tick(DHandle, Socket, Tick, Type,
- MFTick, MFGetstat) of
+ case send_tick(ConData, Tick) of
{ok, NewTick} ->
con_loop(ConData, NewTick);
{error, not_responding} ->
@@ -887,13 +900,16 @@ send_status(#hs_data{socket = Socket, other_node = Node,
%% A HIDDEN node is always ticked if we haven't read anything
%% as a (primitive) hidden node only ticks when it receives a TICK !!
-send_tick(DHandle, Socket, Tick, Type, MFTick, MFGetstat) ->
+send_tick(#state{handle = DHandle, socket = Socket,
+ tick_intensity = TickIntensity,
+ publish_type = Type, f_tick = MFTick,
+ f_getstat = MFGetstat}, Tick) ->
#tick{tick = T0,
read = Read,
write = Write,
ticked = Ticked0} = Tick,
T = T0 + 1,
- T1 = T rem 4,
+ T1 = T rem TickIntensity,
case getstat(DHandle, Socket, MFGetstat) of
{ok, Read, _, _} when Ticked0 =:= T ->
{error, not_responding};
@@ -931,9 +947,10 @@ need_to_tick(hidden, 0, _, _) -> % nothing read from hidden
need_to_tick(_, _, _, _) ->
false.
-send_aux_tick(normal, _, Pend, _) when Pend /= false, Pend /= 0 ->
+send_aux_tick(#state{publish_type = normal}, Pend) when Pend /= false,
+ Pend /= 0 ->
ok; %% Dont send tick if pending write.
-send_aux_tick(_Type, Socket, _Pend, MFTick) ->
+send_aux_tick(#state{socket = Socket, f_tick = MFTick}, _Pend) ->
MFTick(Socket).
%% ------------------------------------------------------------
diff --git a/lib/kernel/src/erl_distribution.erl b/lib/kernel/src/erl_distribution.erl
index f07bd351eb..86c988e160 100644
--- a/lib/kernel/src/erl_distribution.erl
+++ b/lib/kernel/src/erl_distribution.erl
@@ -23,7 +23,7 @@
-include_lib("kernel/include/logger.hrl").
--export([start_link/0,start_link/2,init/1,start/1,stop/0]).
+-export([start_link/0,start_link/1,init/1,start/1,stop/0]).
-define(DBG,erlang:display([?MODULE,?LINE])).
@@ -35,8 +35,8 @@ start_link() ->
%% Called from net_kernel:start/1 to start distribution after the
%% system has already started.
-start(Args) ->
- C = {net_sup_dynamic, {?MODULE,start_link,[Args,false]}, permanent,
+start(Opts) ->
+ C = {net_sup_dynamic, {?MODULE,start_link,[Opts#{clean_halt => false}]}, permanent,
1000, supervisor, [erl_distribution]},
supervisor:start_child(kernel_sup, C).
@@ -62,8 +62,8 @@ stop() ->
%% Helper start function.
-start_link(Args, CleanHalt) ->
- supervisor:start_link({local,net_sup}, ?MODULE, [Args,CleanHalt]).
+start_link(Opts) ->
+ supervisor:start_link({local,net_sup}, ?MODULE, [Opts]).
init(NetArgs) ->
Epmd =
@@ -84,25 +84,17 @@ init(NetArgs) ->
do_start_link([{Arg,Flag}|T]) ->
case init:get_argument(Arg) of
{ok,[[Name]]} ->
- start_link([list_to_atom(Name),Flag|ticktime()], true);
+ start_link(#{name => list_to_atom(Name),
+ name_domain => Flag,
+ clean_halt => true});
{ok,[[Name]|_Rest]} ->
?LOG_WARNING("Multiple -~p given to erl, using the first, ~p",
[Arg, Name]),
- start_link([list_to_atom(Name),Flag|ticktime()], true);
+ start_link(#{name => list_to_atom(Name),
+ name_domain => Flag,
+ clean_halt => true});
_ ->
do_start_link(T)
end;
do_start_link([]) ->
ignore.
-
-ticktime() ->
- %% catch, in case the system was started with boot file start_old,
- %% i.e. running without the application_controller.
- %% Time is given in seconds. The net_kernel tick time is
- %% Time/4 milliseconds.
- case catch application:get_env(net_ticktime) of
- {ok, Value} when is_integer(Value), Value > 0 ->
- [Value * 250]; %% i.e. 1000 / 4 = 250 ms.
- _ ->
- []
- end.
diff --git a/lib/kernel/src/kernel.app.src b/lib/kernel/src/kernel.app.src
index 234d71f745..53a16a2f74 100644
--- a/lib/kernel/src/kernel.app.src
+++ b/lib/kernel/src/kernel.app.src
@@ -147,6 +147,8 @@
{applications, []},
{env, [{logger_level, notice},
{logger_sasl_compatible, false},
+ {net_tickintensity, 4},
+ {net_ticktime, 60},
{prevent_overlapping_partitions, false}
]},
{mod, {kernel, []}},
diff --git a/lib/kernel/src/net_kernel.erl b/lib/kernel/src/net_kernel.erl
index 4c8dd1e2b5..28f8b611ef 100644
--- a/lib/kernel/src/net_kernel.erl
+++ b/lib/kernel/src/net_kernel.erl
@@ -59,12 +59,13 @@
monitor_nodes/2,
setopts/2,
getopts/2,
+ start/2,
start/1,
stop/0]).
%% Exports for internal use.
--export([start_link/2,
+-export([start_link/1,
kernel_apply/3,
longnames/0,
protocol_childspecs/0,
@@ -143,14 +144,18 @@
node %% remote node name
}).
--record(tick, {ticker, %% ticker : pid()
- time %% Ticktime in milli seconds : integer()
- }).
+-record(tick,
+ {ticker :: pid(), %% ticker
+ time :: pos_integer(), %% net tick time (ms)
+ intensity :: 4..1000 %% ticks until timout
+ }).
--record(tick_change, {ticker, %% Ticker : pid()
- time, %% Ticktime in milli seconds : integer()
- how %% What type of change : atom()
- }).
+-record(tick_change,
+ {ticker :: pid(), %% ticker
+ time :: pos_integer(), %% net tick time (ms)
+ intensity :: 4..1000, %% ticks until timout
+ how :: 'longer' | 'shorter' %% What type of change
+ }).
%% Default connection setup timeout in milliseconds.
%% This timeout is set for every distributed action during
@@ -223,7 +228,7 @@ verbose(Level) when is_integer(Level) ->
| {ongoing_change_to, NewNetTicktime},
NewNetTicktime :: pos_integer().
set_net_ticktime(T, TP) when is_integer(T), T > 0, is_integer(TP), TP >= 0 ->
- ticktime_res(request({new_ticktime, T*250, TP*1000})).
+ ticktime_res(request({new_ticktime, T*1000, TP*1000})).
-spec set_net_ticktime(NetTicktime) -> Res when
NetTicktime :: pos_integer(),
@@ -268,8 +273,8 @@ monitor_nodes(Flag, Opts) ->
end.
%% ...
-ticktime_res({A, I}) when is_atom(A), is_integer(I) -> {A, I div 250};
-ticktime_res(I) when is_integer(I) -> I div 250;
+ticktime_res({A, I}) when is_atom(A), is_integer(I) -> {A, I div 1000};
+ticktime_res(I) when is_integer(I) -> I div 1000;
ticktime_res(A) when is_atom(A) -> A.
%% Called though BIF's
@@ -329,21 +334,69 @@ request(Req) ->
%% This function is used to dynamically start the
%% distribution.
-start(Args) ->
- erl_distribution:start(Args).
+-spec start(Name, Options) -> {ok, pid()} | {error, Reason} when
+ Options :: #{name_domain => NameDomain,
+ net_ticktime => NetTickTime,
+ net_tickintensity => NetTickIntensity},
+ Name :: atom(),
+ NameDomain :: shortnames | longnames,
+ NetTickTime :: pos_integer(),
+ NetTickIntensity :: 4..1000,
+ Reason :: {already_started, pid()} | term().
+
+start(Name, Options) when is_atom(Name), is_map(Options) ->
+ try
+ maps:fold(fun (name_domain, Val, _) when Val == shortnames;
+ Val == longnames ->
+ ok;
+ (net_ticktime, Val, _) when is_integer(Val),
+ Val > 0 ->
+ ok;
+ (net_tickintensity, Val, _) when is_integer(Val),
+ 4 =< Val,
+ Val =< 1000 ->
+ ok;
+ (Opt, Val, _) ->
+ error({invalid_option, Opt, Val})
+ end, ok, Options)
+ catch error:Reason ->
+ error(Reason, [Name, Options])
+ end,
+ erl_distribution:start(Options#{name => Name});
+start(Name, Options) when is_map(Options) ->
+ error(invalid_name, [Name, Options]);
+start(Name, Options) ->
+ error(invalid_options, [Name, Options]).
+
+-spec start(Options) -> {ok, pid()} | {error, Reason} when
+ Options :: nonempty_list(Name | NameDomain | TickTime),
+ Name :: atom(),
+ NameDomain :: shortnames | longnames,
+ TickTime :: pos_integer(),
+ Reason :: {already_started, pid()} | term().
+
+start([Name]) when is_atom(Name) ->
+ start([Name, longnames, 15000]);
+start([Name, NameDomain]) when is_atom(Name),
+ is_atom(NameDomain) ->
+ start([Name, NameDomain, 15000]);
+start([Name, NameDomain, TickTime]) when is_atom(Name),
+ is_atom(NameDomain),
+ is_integer(TickTime),
+ TickTime > 0 ->
+ %% NetTickTime is in seconds. TickTime is time in milliseconds
+ %% between ticks when net tick intensity is 4. We round upwards...
+ NetTickTime = ((TickTime*4-1) div 1000)+1,
+ start(Name, #{name_domain => NameDomain,
+ net_ticktime => NetTickTime,
+ net_tickintensity => 4}).
%% This is the main startup routine for net_kernel (only for internal
-%% use by the Kernel application.
-
-start_link([Name], CleanHalt) ->
- start_link([Name, longnames], CleanHalt);
-start_link([Name, LongOrShortNames], CleanHalt) ->
- start_link([Name, LongOrShortNames, 15000], CleanHalt);
+%% use) by the Kernel application.
-start_link([Name, LongOrShortNames, Ticktime], CleanHalt) ->
- Args = {Name, LongOrShortNames, Ticktime, CleanHalt},
+start_link(StartOpts) ->
case gen_server:start_link({local, net_kernel}, ?MODULE,
- Args, []) of
+ make_init_opts(StartOpts), []) of
{ok, Pid} ->
{ok, Pid};
{error, {already_started, Pid}} ->
@@ -352,17 +405,70 @@ start_link([Name, LongOrShortNames, Ticktime], CleanHalt) ->
exit(nodistribution)
end.
-init({Name, LongOrShortNames, TickT, CleanHalt}) ->
+make_init_opts(Opts) ->
+ %% Net tick time given in seconds, but kept in milliseconds...
+ NTT1 = case maps:find(net_ticktime, Opts) of
+ {ok, NTT0} ->
+ NTT0*1000;
+ error ->
+ case application:get_env(kernel, net_ticktime) of
+ {ok, NTT0} when is_integer(NTT0), NTT0 < 1 ->
+ 1000;
+ {ok, NTT0} when is_integer(NTT0) ->
+ NTT0*1000;
+ _ ->
+ 60000
+ end
+ end,
+
+ NTI = case maps:find(net_tickintensity, Opts) of
+ {ok, NTI0} ->
+ NTI0;
+ error ->
+ case application:get_env(kernel, net_tickintensity) of
+ {ok, NTI0} when is_integer(NTI0), NTI0 < 4 ->
+ 4;
+ {ok, NTI0} when is_integer(NTI0), NTI0 > 1000 ->
+ 1000;
+ {ok, NTI0} when is_integer(NTI0) ->
+ NTI0;
+ _ ->
+ 4
+ end
+ end,
+
+ %% Net tick time needs to be a multiple of net tick intensity;
+ %% round net tick time upwards if not...
+ NTT = if NTT1 rem NTI =:= 0 -> NTT1;
+ true -> ((NTT1 div NTI) + 1) * NTI
+ end,
+
+ ND = case maps:find(name_domain, Opts) of
+ {ok, ND0} ->
+ ND0;
+ error ->
+ longnames
+ end,
+
+ Opts#{net_ticktime => NTT, net_tickintensity => NTI, name_domain => ND}.
+
+init(#{name := Name,
+ name_domain := NameDomain,
+ net_ticktime := NetTicktime,
+ net_tickintensity := NetTickIntensity,
+ clean_halt := CleanHalt}) ->
process_flag(trap_exit,true),
- case init_node(Name, LongOrShortNames, CleanHalt) of
+ case init_node(Name, NameDomain, CleanHalt) of
{ok, Node, Listeners} ->
process_flag(priority, max),
- Ticktime = to_integer(TickT),
- Ticker = spawn_link(net_kernel, ticker, [self(), Ticktime]),
+ TickInterval = NetTicktime div NetTickIntensity,
+ Ticker = spawn_link(net_kernel, ticker, [self(), TickInterval]),
{ok, #state{name = Name,
node = Node,
- type = LongOrShortNames,
- tick = #tick{ticker = Ticker, time = Ticktime},
+ type = NameDomain,
+ tick = #tick{ticker = Ticker,
+ time = NetTicktime,
+ intensity = NetTickIntensity},
connecttime = connecttime(),
connections =
ets:new(sys_dist,[named_table,
@@ -598,8 +704,7 @@ handle_call({verbose, Level}, From, State) ->
%%
%% The tick field of the state contains either a #tick{} or a
-%% #tick_change{} record if the ticker process has been upgraded;
-%% otherwise, an integer or an atom.
+%% #tick_change{} record.
handle_call(ticktime, From, #state{tick = #tick{time = T}} = State) ->
async_reply({reply, T, State}, From);
@@ -611,22 +716,46 @@ handle_call({new_ticktime,T,_TP}, From, #state{tick = #tick{time = T}} = State)
async_reply({reply, unchanged, State}, From);
handle_call({new_ticktime,T,TP}, From, #state{tick = #tick{ticker = Tckr,
- time = OT}} = State) ->
+ time = OT,
+ intensity = I}} = State) ->
?tckr_dbg(initiating_tick_change),
- start_aux_ticker(T, OT, TP),
- How = case T > OT of
- true ->
- ?tckr_dbg(longer_ticktime),
- Tckr ! {new_ticktime,T},
- longer;
- false ->
- ?tckr_dbg(shorter_ticktime),
- shorter
- end,
- async_reply({reply, change_initiated,
- State#state{tick = #tick_change{ticker = Tckr,
- time = T,
- how = How}}}, From);
+ %% We need to preserve tick intensity and net tick time needs to be a
+ %% multiple of tick intensity...
+ {NT, NIntrvl} = case T < I of
+ true ->
+ %% Max 1 tick per millisecond implies that
+ %% minimum net tick time equals intensity...
+ {I, 1};
+ _ ->
+ NIntrvl0 = T div I,
+ case T rem I of
+ 0 ->
+ {T, NIntrvl0};
+ _ ->
+ %% Round net tick time upwards...
+ {(NIntrvl0+1)*I, NIntrvl0+1}
+ end
+ end,
+ case NT == OT of
+ true ->
+ async_reply({reply, unchanged, State}, From);
+ false ->
+ start_aux_ticker(NIntrvl, OT div I, TP),
+ How = case NT > OT of
+ true ->
+ ?tckr_dbg(longer_ticktime),
+ Tckr ! {new_ticktime, NIntrvl},
+ longer;
+ false ->
+ ?tckr_dbg(shorter_ticktime),
+ shorter
+ end,
+ async_reply({reply, change_initiated,
+ State#state{tick = #tick_change{ticker = Tckr,
+ time = NT,
+ intensity = I,
+ how = How}}}, From)
+ end;
handle_call({new_ticktime,_T,_TP},
From,
@@ -763,7 +892,8 @@ handle_info({dist_ctrlr, Ctrlr, Node, SetupPid} = Msg,
%%
%% A node has successfully been connected.
%%
-handle_info({SetupPid, {nodeup,Node,Address,Type,Immediate}}, State) ->
+handle_info({SetupPid, {nodeup,Node,Address,Type,Immediate}},
+ #state{tick = Tick} = State) ->
case {Immediate, ets:lookup(sys_dist, Node)} of
{true, [Conn]} when (Conn#connection.state =:= pending)
andalso (Conn#connection.owner =:= SetupPid)
@@ -772,7 +902,11 @@ handle_info({SetupPid, {nodeup,Node,Address,Type,Immediate}}, State) ->
address = Address,
waiting = [],
type = Type}),
- SetupPid ! {self(), inserted},
+ TickIntensity = case Tick of
+ #tick{intensity = TI} -> TI;
+ #tick_change{intensity = TI} -> TI
+ end,
+ SetupPid ! {self(), inserted, TickIntensity},
reply_waiting(Node,Conn#connection.waiting, true),
{noreply, State};
_ ->
@@ -898,13 +1032,20 @@ handle_info(aux_tick, State) ->
handle_info(transition_period_end,
#state{tick = #tick_change{ticker = Tckr,
time = T,
+ intensity = I,
how = How}} = State) ->
?tckr_dbg(transition_period_ended),
case How of
- shorter -> Tckr ! {new_ticktime, T}, done;
- _ -> done
+ shorter ->
+ Interval = T div I,
+ Tckr ! {new_ticktime, Interval},
+ ok;
+ _ ->
+ ok
end,
- {noreply,State#state{tick = #tick{ticker = Tckr, time = T}}};
+ {noreply,State#state{tick = #tick{ticker = Tckr,
+ time = T,
+ intensity = I}}};
handle_info(X, State) ->
error_msg("Net kernel got ~tw~n",[X]),
@@ -1245,12 +1386,6 @@ ticker(Kernel, Tick) when is_integer(Tick) ->
?tckr_dbg(ticker_started),
ticker_loop(Kernel, Tick).
-to_integer(T) when is_integer(T) -> T;
-to_integer(T) when is_atom(T) ->
- list_to_integer(atom_to_list(T));
-to_integer(T) when is_list(T) ->
- list_to_integer(T).
-
ticker_loop(Kernel, Tick) ->
receive
{new_ticktime, NewTick} ->
diff --git a/lib/kernel/test/erl_distribution_SUITE.erl b/lib/kernel/test/erl_distribution_SUITE.erl
index 2b84a68c52..310b94bddf 100644
--- a/lib/kernel/test/erl_distribution_SUITE.erl
+++ b/lib/kernel/test/erl_distribution_SUITE.erl
@@ -24,7 +24,7 @@
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
init_per_group/2,end_per_group/2]).
--export([tick/1, tick_change/1,
+-export([tick/1, tick_intensity/1, tick_change/1,
connect_node/1,
nodenames/1, hostnames/1,
illegal_nodenames/1, hidden_node/1,
@@ -41,16 +41,19 @@
monitor_nodes_combinations/1,
monitor_nodes_cleanup/1,
monitor_nodes_many/1,
- dist_ctrl_proc_smoke/1]).
+ dist_ctrl_proc_smoke/1,
+ net_kernel_start/1]).
%% Performs the test at another node.
-export([get_socket_priorities/0,
- tick_cli_test/1, tick_cli_test1/1,
+ tick_cli_test/3, tick_cli_test1/3,
tick_serv_test/2, tick_serv_test1/1,
run_remote_test/1,
setopts_do/2,
keep_conn/1, time_ping/1]).
+-export([net_kernel_start_do_test/1]).
+
-export([init_per_testcase/2, end_per_testcase/2]).
-export([dist_cntrlr_output_test/2]).
@@ -72,11 +75,12 @@ suite() ->
all() ->
[dist_ctrl_proc_smoke,
- tick, tick_change, nodenames, hostnames, illegal_nodenames,
- connect_node,
+ tick, tick_intensity, tick_change, nodenames, hostnames,
+ illegal_nodenames, connect_node,
hidden_node, setopts,
table_waste, net_setuptime, inet_dist_options_options,
- {group, monitor_nodes}].
+ {group, monitor_nodes},
+ net_kernel_start].
groups() ->
[{monitor_nodes, [],
@@ -120,26 +124,45 @@ connect_node(Config) when is_list(Config) ->
tick(Config) when is_list(Config) ->
run_dist_configs(fun tick/2, Config).
-tick(DCfg, _Config) ->
+tick(DCfg, Config) ->
+ tick_test(DCfg, Config, false).
+
+tick_intensity(Config) when is_list(Config) ->
+ run_dist_configs(fun tick_intensity/2, Config).
+
+tick_intensity(DCfg, Config) ->
+ tick_test(DCfg, Config, true).
+
+tick_test(DCfg, _Config, CheckIntensityArg) ->
%%
%% This test case use disabled "connect all" so that
%% global wont interfere...
%%
- %% First check that the normal case is OK!
[Name1, Name2] = get_nodenames(2, dist_test),
+
{ok, Node} = start_node(DCfg, Name1),
- rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [node()]),
- erlang:monitor_node(Node, true),
- receive
- {nodedown, Node} ->
- ct:fail("nodedown from other node")
- after 30000 ->
- erlang:monitor_node(Node, false),
- stop_node(Node)
+ case CheckIntensityArg of
+ true ->
+ %% Not for intensity test...
+ ok;
+ false ->
+ %% First check that the normal case is OK!
+ rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [node(), 8000, 16000]),
+
+ erlang:monitor_node(Node, true),
+ receive
+ {nodedown, Node} ->
+ ct:fail("nodedown from other node")
+ after 30000 ->
+ erlang:monitor_node(Node, false)
+ end,
+ ok
end,
+ stop_node(Node),
+
%% Now, set the net_ticktime for the other node to 12 secs.
%% After the sleep(2sec) and cast the other node shall destroy
%% the connection as it has not received anything on the connection.
@@ -156,9 +179,20 @@ tick(DCfg, _Config) ->
"-kernel net_ticktime 100 -connect_all false"),
rpc:call(ServNode, erl_distribution_SUITE, tick_serv_test, [Node, node()]),
+ %% We set min/max half a second lower/higher than expected since it
+ %% takes time for termination dist controller, delivery of messages
+ %% scheduling of process receiving nodedown, etc...
+ {IArg, Min, Max} = case CheckIntensityArg of
+ false ->
+ {"", 7500, 16500};
+ true ->
+ {" -kernel net_tickintensity 24", 11000, 13000}
+ end,
+
{ok, Node} = start_node(DCfg, Name1,
- "-kernel net_ticktime 12 -connect_all false"),
- rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [ServNode]),
+ "-kernel net_ticktime 12 -connect_all false" ++ IArg),
+
+ rpc:call(Node, erl_distribution_SUITE, tick_cli_test, [ServNode, Min, Max]),
spawn_link(erl_distribution_SUITE, keep_conn, [Node]),
@@ -171,6 +205,7 @@ tick(DCfg, _Config) ->
{tick_test, T} when is_integer(T) ->
stop_node(ServNode),
stop_node(Node),
+ io:format("Result: ~p~n", [T]),
T;
{tick_test, Error} ->
stop_node(ServNode),
@@ -393,10 +428,10 @@ tick_serv_test1(Node) ->
end
end.
-tick_cli_test(Node) ->
- spawn(erl_distribution_SUITE, tick_cli_test1, [Node]).
+tick_cli_test(Node, Min, Max) ->
+ spawn(erl_distribution_SUITE, tick_cli_test1, [Node, Min, Max]).
-tick_cli_test1(Node) ->
+tick_cli_test1(Node, Min, Max) ->
register(tick_test, self()),
erlang:monitor_node(Node, true),
sleep(2),
@@ -410,11 +445,14 @@ tick_cli_test1(Node) ->
Diff = erlang:convert_time_unit(T2-T1, native,
millisecond),
case Diff of
- T when T > 8000, T < 16000 ->
+ T when Min =< T, T =< Max ->
From ! {tick_test, T};
T ->
From ! {tick_test,
- {"T not in interval 8000 < T < 16000",
+ {"T not in interval "
+ ++ integer_to_list(Min)
+ ++ " =< T =< "
+ ++ integer_to_list(Max),
T}}
end
end
@@ -1532,6 +1570,89 @@ dist_ctrl_proc_smoke(Config) when is_list(Config) ->
stop_node(Node2),
ok.
+net_kernel_start(Config) when is_list(Config) ->
+ MyName = net_kernel_start_tester,
+ register(MyName, self()),
+ net_kernel_start_test(MyName, 120, 8),
+ net_kernel_start_test(MyName, undefined, undefined).
+
+net_kernel_start_test(MyName, NetTickTime, NetTickIntesity) ->
+ TestNameStr = "net_kernel_start_test_node-"
+ ++ integer_to_list(erlang:system_time(seconds))
+ ++ "-" ++ integer_to_list(erlang:unique_integer([monotonic,positive])),
+ TestNode = list_to_atom(TestNameStr ++ "@" ++ atom_to_list(gethostname())),
+ CmdLine = net_kernel_start_cmdline(MyName, list_to_atom(TestNameStr),
+ NetTickTime, NetTickIntesity),
+ io:format("Starting test node ~p: ~s~n", [TestNode, CmdLine]),
+ case open_port({spawn, CmdLine}, []) of
+ Port when is_port(Port) ->
+ receive
+ {i_am_alive, Pid, Node, NTT} = Msg ->
+ io:format("Response from ~p: ~p~n", [Node, Msg]),
+ rpc:cast(Node, erlang, halt, []),
+ catch erlang:port_close(Port),
+ TestNode = node(Pid),
+ TestNode = Node,
+ case NetTickTime == undefined of
+ true ->
+ {ok, DefNTT} = application:get_env(kernel, net_ticktime),
+ DefNTT = NTT;
+ false ->
+ NetTickTime = NTT
+ end
+ end,
+ ok;
+ Error ->
+ error({open_port_failed, TestNode, Error})
+ end.
+
+net_kernel_start_cmdline(TestName, Name, NetTickTime, NetTickIntensity) ->
+ Pa = filename:dirname(code:which(?MODULE)),
+ Prog = case catch init:get_argument(progname) of
+ {ok, [[Prg]]} -> Prg;
+ _ -> error(missing_progname)
+ end,
+ NameDomain = case net_kernel:longnames() of
+ false -> "shortnames";
+ true -> "longnames"
+ end,
+ {ok, Pwd} = file:get_cwd(),
+ NameStr = atom_to_list(Name),
+ Prog ++ " -noinput -noshell -detached -pa " ++ Pa
+ ++ " -env ERL_CRASH_DUMP " ++ Pwd ++ "/erl_crash_dump." ++ NameStr
+ ++ " -setcookie " ++ atom_to_list(erlang:get_cookie())
+ ++ " -run " ++ atom_to_list(?MODULE) ++ " net_kernel_start_do_test "
+ ++ atom_to_list(TestName) ++ " " ++ atom_to_list(node()) ++ " "
+ ++ NameStr ++ " " ++ NameDomain
+ ++ case NetTickTime == undefined of
+ true ->
+ "";
+ false ->
+ " " ++ integer_to_list(NetTickTime) ++
+ " " ++ integer_to_list(NetTickIntensity)
+ end.
+
+net_kernel_start_do_test([TestName, TestNode, Name, NameDomain]) ->
+ net_kernel_start_do_test(TestName, TestNode, list_to_atom(Name),
+ #{name_domain => list_to_atom(NameDomain)});
+
+net_kernel_start_do_test([TestName, TestNode, Name, NameDomain, NetTickTime, NetTickIntensity]) ->
+ net_kernel_start_do_test(TestName, TestNode, list_to_atom(Name),
+ #{net_ticktime => list_to_integer(NetTickTime),
+ name_domain => list_to_atom(NameDomain),
+ net_tickintensity => list_to_integer(NetTickIntensity)}).
+
+net_kernel_start_do_test(TestName, TestNode, Name, Options) ->
+ case net_kernel:start(Name, Options) of
+ {ok, _Pid} ->
+ Tester = {list_to_atom(TestName), list_to_atom(TestNode)},
+ Tester ! {i_am_alive, self(), node(), net_kernel:get_net_ticktime()},
+ receive after 60000 -> ok end,
+ erlang:halt();
+ Error ->
+ erlang:halt(lists:flatten(io_lib:format("~p", [Error])))
+ end.
+
%% Misc. functions
run_dist_configs(Func, Config) ->
@@ -1602,7 +1723,7 @@ print_my_messages() ->
sleep(T) -> receive after T * 1000 -> ok end.
-start_node(DCfg, Name, Param, this) ->
+start_node(_DCfg, Name, Param, this) ->
NewParam = Param ++ " -pa " ++ filename:dirname(code:which(?MODULE)),
test_server:start_node(Name, peer, [{args, NewParam}, {erl, [this]}]);
start_node(DCfg, Name, Param, "this") ->