summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Pfaff <blp@nicira.com>2012-08-06 15:03:32 -0700
committerJustin Pettit <jpettit@nicira.com>2012-08-13 15:31:16 -0700
commitd2155157b90966c6c16cf093b3fc724812f8313b (patch)
tree8edf23ec7d3caef69e9f5b29e60f8a9c7f708924
parent1c60dc3c8a7d61f094e3aff506a22d0c0075efe9 (diff)
downloadopenvswitch-d2155157b90966c6c16cf093b3fc724812f8313b.tar.gz
rconn: Treat draining a message from the send queue as activity.
Until now, the rconn module has used messages received from the controller as the sole means to determine that the connection is up. This can interact badly with the OVS connection manager in ofproto, which stops reading and processing messages from the receive queue when there is a backlog in the send queue for a given connection (because reading and processes messages is the main cause of messages getting pushed onto the send queue). So, if a send queue backlog lasts more than twice the inactivity probe interval, then the connection drops, whether the controller is sending messages or not. Dumping a large flow table can trigger this behavior if the controller becomes temporarily busy or if the network between OVS and a controller is slow. The problem can easily repeat itself, since upon reconnection the controller will generally dump the flow table. This commit fixes the problem by expanding the definition of "activity" to include successfully sending an OpenFlow message that was previously queued. Bug #12789. Reported-by: Natasha Gude <natasha@nicira.com> Signed-off-by: Ben Pfaff <blp@nicira.com>
-rw-r--r--lib/rconn.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/lib/rconn.c b/lib/rconn.c
index b25a99a82..603175bdb 100644
--- a/lib/rconn.c
+++ b/lib/rconn.c
@@ -79,7 +79,6 @@ struct rconn {
int backoff;
int max_backoff;
time_t backoff_deadline;
- time_t last_received;
time_t last_connected;
time_t last_disconnected;
unsigned int packets_sent;
@@ -104,11 +103,15 @@ struct rconn {
time_t creation_time;
unsigned long int total_time_connected;
- /* Throughout this file, "probe" is shorthand for "inactivity probe".
- * When nothing has been received from the peer for a while, we send out
- * an echo request as an inactivity probe packet. We should receive back
- * a response. */
+ /* Throughout this file, "probe" is shorthand for "inactivity probe". When
+ * no activity has been observed from the peer for a while, we send out an
+ * echo request as an inactivity probe packet. We should receive back a
+ * response.
+ *
+ * "Activity" is defined as either receiving an OpenFlow message from the
+ * peer or successfully sending a message that had been in 'txq'. */
int probe_interval; /* Secs of inactivity before sending probe. */
+ time_t last_activity; /* Last time we saw some activity. */
/* When we create a vconn we obtain these values, to save them past the end
* of the vconn's lifetime. Otherwise, in-band control will only allow
@@ -178,7 +181,6 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp)
rc->backoff = 0;
rc->max_backoff = max_backoff ? max_backoff : 8;
rc->backoff_deadline = TIME_MIN;
- rc->last_received = time_now();
rc->last_connected = TIME_MIN;
rc->last_disconnected = TIME_MIN;
rc->seqno = 0;
@@ -194,6 +196,8 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp)
rc->creation_time = time_now();
rc->total_time_connected = 0;
+ rc->last_activity = time_now();
+
rconn_set_probe_interval(rc, probe_interval);
rconn_set_dscp(rc, dscp);
@@ -412,6 +416,7 @@ do_tx_work(struct rconn *rc)
if (error) {
break;
}
+ rc->last_activity = time_now();
}
if (list_is_empty(&rc->txq)) {
poll_immediate_wake();
@@ -422,7 +427,7 @@ static unsigned int
timeout_ACTIVE(const struct rconn *rc)
{
if (rc->probe_interval) {
- unsigned int base = MAX(rc->last_received, rc->state_entered);
+ unsigned int base = MAX(rc->last_activity, rc->state_entered);
unsigned int arg = base + rc->probe_interval - rc->state_entered;
return arg;
}
@@ -433,7 +438,7 @@ static void
run_ACTIVE(struct rconn *rc)
{
if (timed_out(rc)) {
- unsigned int base = MAX(rc->last_received, rc->state_entered);
+ unsigned int base = MAX(rc->last_activity, rc->state_entered);
VLOG_DBG("%s: idle %u seconds, sending inactivity probe",
rc->name, (unsigned int) (time_now() - base));
@@ -536,7 +541,7 @@ rconn_recv(struct rconn *rc)
rc->probably_admitted = true;
rc->last_admitted = time_now();
}
- rc->last_received = time_now();
+ rc->last_activity = time_now();
rc->packets_received++;
if (rc->state == S_IDLE) {
state_transition(rc, S_ACTIVE);