diff options
-rw-r--r-- | lib/kernel/src/dist_util.erl | 32 | ||||
-rw-r--r-- | lib/kernel/src/net_kernel.erl | 17 |
2 files changed, 39 insertions, 10 deletions
diff --git a/lib/kernel/src/dist_util.erl b/lib/kernel/src/dist_util.erl index 4958065027..7fe5edf80d 100644 --- a/lib/kernel/src/dist_util.erl +++ b/lib/kernel/src/dist_util.erl @@ -215,7 +215,7 @@ handshake_other_started(#hs_data{request_type=ReqType, require_flags=ReqFlgs}, check_dflags(HSData1, EDF), ?debug({"MD5 connection from ~p~n", [NodeOrHost]}), - HSData2 = mark_pending(HSData1), + {AcceptedPending, HSData2} = mark_pending(HSData1), Node = HSData2#hs_data.other_node, Cookie = auth:get_cookie(Node), ChallengeA = gen_challenge(), @@ -228,6 +228,10 @@ handshake_other_started(#hs_data{request_type=ReqType, HSData4 = HSData3#hs_data{this_flags = ChosenFlags, other_flags = ChosenFlags}, ChallengeB = recv_challenge_reply(HSData4, ChallengeA, Cookie), + case AcceptedPending of + up_pending -> wait_pending(HSData4); + _ -> continue + end, send_challenge_ack(HSData4, gen_digest(ChallengeB, Cookie)), ?debug({dist_util, self(), accept_connection, Node}), connection(HSData4); @@ -288,8 +292,10 @@ check_mandatory(Mandatory, OtherFlags, Missing) -> mark_pending(#hs_data{kernel_pid=Kernel, other_node=Node, this_node=MyNode}=HSData) -> - case do_mark_pending(Kernel, MyNode, Node, - HSData#hs_data.other_flags) of + KernelReply = do_mark_pending(Kernel, MyNode, Node, + HSData#hs_data.other_flags), + {KernelReply, + case KernelReply of ok -> send_status(HSData, ok), reset_timer(HSData#hs_data.timer), @@ -319,8 +325,12 @@ mark_pending(#hs_data{kernel_pid=Kernel, %% This can happen if the other node goes down, %% and goes up again and contact us before we have - %% detected that the socket was closed. - wait_pending(Kernel), + %% detected that the socket was closed. + %% It can also happen if the old connection went down silently, + %% without us knowing, a lost TCP FIN or RST packet for example. + + %% Continue handshake to verify cookie and then wait for old + %% connection to die. reset_timer(HSData#hs_data.timer), HSData; @@ -328,15 +338,17 @@ mark_pending(#hs_data{kernel_pid=Kernel, %% FIXME: is this a case ? ?debug({dist_util,self(),mark_pending,already_pending,Node}), ?shutdown(Node) - end. + end + }. + %% -%% Marking pending and negotiating away -%% simultaneous connection problems +%% Tell net_kernel we are waiting for old connection to die. %% - -wait_pending(Kernel) -> +wait_pending(#hs_data{kernel_pid=Kernel, + other_node=Node}) -> + Kernel ! {self(), {wait_pending, Node}}, receive {Kernel, pending} -> ?trace("wait_pending returned for pid ~p.~n", diff --git a/lib/kernel/src/net_kernel.erl b/lib/kernel/src/net_kernel.erl index 715bb2da58..40abf24f57 100644 --- a/lib/kernel/src/net_kernel.erl +++ b/lib/kernel/src/net_kernel.erl @@ -1047,6 +1047,23 @@ handle_info({SetupPid, {is_pending, Node}}, State) -> SetupPid ! {self(), {is_pending, Reply}}, {noreply, State}; +handle_info({AcceptPid, {wait_pending, Node}}, State) -> + case get_conn(Node) of + {ok, #connection{state = up_pending, + ctrlr = OldCtrlr, + pending_owner = AcceptPid}} -> + %% Kill old controller to make sure new connection setup + %% does not get stuck. + ?debug({net_kernel, wait_pending, kill, OldCtrlr, new, AcceptPid}), + exit(OldCtrlr, wait_pending); + _ -> + %% Old connnection maybe already gone + ignore + end, + %% Exiting controller will trigger {Kernel,pending} reply + %% in up_pending_nodedown() + {noreply, State}; + %% %% Handle different types of process terminations. %% |