diff options
-rw-r--r-- | python/ovs/jsonrpc.py | 5 | ||||
-rw-r--r-- | python/ovs/reconnect.py | 21 | ||||
-rw-r--r-- | tests/reconnect.at | 43 | ||||
-rw-r--r-- | tests/test-reconnect.c | 14 | ||||
-rw-r--r-- | tests/test-reconnect.py | 13 |
5 files changed, 85 insertions, 11 deletions
diff --git a/python/ovs/jsonrpc.py b/python/ovs/jsonrpc.py index 240e8ccd4..bf32f8c87 100644 --- a/python/ovs/jsonrpc.py +++ b/python/ovs/jsonrpc.py @@ -570,13 +570,16 @@ class Session(object): if self.rpc is not None: received_bytes = self.rpc.get_received_bytes() error, msg = self.rpc.recv() + + now = ovs.timeval.msec() + self.reconnect.receive_attempted(now) if received_bytes != self.rpc.get_received_bytes(): # Data was successfully received. # # Previously we only counted receiving a full message as # activity, but with large messages or a slow connection that # policy could time out the session mid-message. - self.reconnect.activity(ovs.timeval.msec()) + self.reconnect.activity(now) if not error: if msg.type == Message.T_REQUEST and msg.method == "echo": diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py index 574db7fdd..c4c6c87e9 100644 --- a/python/ovs/reconnect.py +++ b/python/ovs/reconnect.py @@ -95,7 +95,10 @@ class Reconnect(object): def deadline(fsm): if fsm.probe_interval: base = max(fsm.last_activity, fsm.state_entered) - return base + fsm.probe_interval + expiration = base + fsm.probe_interval + if (fsm.last_receive_attempt is None or + fsm.last_receive_attempt >= expiration): + return expiration return None @staticmethod @@ -113,7 +116,10 @@ class Reconnect(object): @staticmethod def deadline(fsm): if fsm.probe_interval: - return fsm.state_entered + fsm.probe_interval + expiration = fsm.state_entered + fsm.probe_interval + if (fsm.last_receive_attempt is None or + fsm.last_receive_attempt >= expiration): + return expiration return None @staticmethod @@ -153,6 +159,7 @@ class Reconnect(object): self.last_activity = now self.last_connected = None self.last_disconnected = None + self.last_receive_attempt = now self.max_tries = None self.backoff_free_tries = 0 @@ -472,6 +479,16 @@ class Reconnect(object): self._transition(now, Reconnect.Active) self.last_activity = now + def receive_attempted(self, now): + """Tell 'fsm' that some attempt to receive data on the connection was + made at 'now'. The FSM only allows probe interval timer to expire when + some attempt to receive data on the connection was received after the + time when it should have expired. This helps in the case where there's + a long delay in the poll loop and then reconnect_run() executes before + the code to try to receive anything from the remote runs. (To disable + this feature, pass None for 'now'.)""" + self.last_receive_attempt = now + def _transition(self, now, state): if self.state == Reconnect.ConnectInProgress: self.n_attempted_connections += 1 diff --git a/tests/reconnect.at b/tests/reconnect.at index 2631658be..0f74709f5 100644 --- a/tests/reconnect.at +++ b/tests/reconnect.at @@ -38,7 +38,12 @@ RECONNECT_CHECK([quick connect, idle disconnect], run connected -# Send inactivity probe. +# Try timeout without noting that we tried to receive. +# (This does nothing since we never timeout in this case.) +timeout + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX timeout run @@ -61,7 +66,13 @@ connected connected last connected 0 ms ago, connected 0 ms total -# Send inactivity probe. +# Try timeout without noting that we tried to receive. +# (This does nothing since we never timeout in this case.) +timeout + no timeout + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX timeout advance 5000 ms @@ -99,7 +110,12 @@ advance 500 run connected -# Send inactivity probe. +# Try timeout without noting that we tried to receive. +# (This does nothing since we never timeout in this case.) +timeout + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX timeout run @@ -131,7 +147,13 @@ connected connected last connected 0 ms ago, connected 0 ms total -# Send inactivity probe. +# Try timeout without noting that we tried to receive. +# (This does nothing since we never timeout in this case.) +timeout + no timeout + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX timeout advance 5000 ms @@ -357,7 +379,8 @@ connect-failed ###################################################################### RECONNECT_CHECK([connections with no data preserve backoff], - [enable + [receive-attempted LLONG_MAX +enable # First connect, then idle timeout kills connection. run @@ -397,6 +420,7 @@ disconnected # Back off for 4000 ms. timeout ], [### t=1000 ### +receive-attempted LLONG_MAX enable in BACKOFF for 0 ms (0 ms backoff) @@ -1083,7 +1107,8 @@ timeout ###################################################################### RECONNECT_CHECK([max-tries of 1 honored], - [set-max-tries 1 + [receive-attempted LLONG_MAX +set-max-tries 1 enable # Connection succeeds. @@ -1100,6 +1125,7 @@ run disconnected ], [### t=1000 ### +receive-attempted LLONG_MAX set-max-tries 1 1 tries left enable @@ -1185,6 +1211,8 @@ activity # Connection times out. timeout +receive-attempted LLONG_MAX +timeout run timeout run @@ -1244,6 +1272,9 @@ activity # Connection times out. timeout + no timeout +receive-attempted LLONG_MAX +timeout advance 5000 ms ### t=8000 ### diff --git a/tests/test-reconnect.c b/tests/test-reconnect.c index bf0463e25..c84bb1cdb 100644 --- a/tests/test-reconnect.c +++ b/tests/test-reconnect.c @@ -48,7 +48,6 @@ test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) now = 1000; reconnect = reconnect_create(now); - reconnect_receive_attempted(reconnect, LLONG_MAX); reconnect_set_name(reconnect, "remote"); reconnect_get_stats(reconnect, now, &prev); printf("### t=%d ###\n", now); @@ -278,6 +277,18 @@ do_listen_error(struct ovs_cmdl_context *ctx) reconnect_listen_error(reconnect, now, atoi(ctx->argv[1])); } +static void +do_receive_attempted(struct ovs_cmdl_context *ctx OVS_UNUSED) +{ + if (!strcmp(ctx->argv[1], "now")) { + reconnect_receive_attempted(reconnect, now); + } else if (!strcmp(ctx->argv[1], "LLONG_MAX")) { + reconnect_receive_attempted(reconnect, LLONG_MAX); + } else { + ovs_fatal(0, "%s: bad argument %s", ctx->argv[0], ctx->argv[1]); + } +} + static const struct ovs_cmdl_command all_commands[] = { { "enable", NULL, 0, 0, do_enable, OVS_RO }, { "disable", NULL, 0, 0, do_disable, OVS_RO }, @@ -296,6 +307,7 @@ static const struct ovs_cmdl_command all_commands[] = { { "passive", NULL, 0, 0, do_set_passive, OVS_RO }, { "listening", NULL, 0, 0, do_listening, OVS_RO }, { "listen-error", NULL, 1, 1, do_listen_error, OVS_RO }, + { "receive-attempted", NULL, 1, 1, do_receive_attempted, OVS_RO }, { NULL, NULL, 0, 0, NULL, OVS_RO }, }; diff --git a/tests/test-reconnect.py b/tests/test-reconnect.py index 6cd052878..f0ad9f979 100644 --- a/tests/test-reconnect.py +++ b/tests/test-reconnect.py @@ -163,6 +163,16 @@ def do_listen_error(arg): r.listen_error(now, int(arg)) +def do_receive_attempted(arg): + if arg == "now": + r.receive_attempted(now) + elif arg == "LLONG_MAX": + r.receive_attempted(None) + else: + sys.stderr.write("receive-attempted: bad argument %s\n" % arg) + sys.exit(1) + + def main(): commands = { "enable": do_enable, @@ -180,7 +190,8 @@ def main(): "set-backoff-free-tries": do_set_backoff_free_tries, "passive": do_set_passive, "listening": do_listening, - "listen-error": do_listen_error + "listen-error": do_listen_error, + "receive-attempted": do_receive_attempted } global now |