summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabrice Bellet <fabrice@bellet.info>2016-06-21 21:47:42 +0200
committerOlivier CrĂȘte <olivier.crete@collabora.com>2017-06-21 16:05:57 -0400
commit07366a5bca7e4818b8df29d9c7c220da8f752547 (patch)
treee9630b490bab2360feba14876b509edfe5c3bbde
parent95f8805eb7b77755337e28daf1f134587d42b35f (diff)
downloadlibnice-07366a5bca7e4818b8df29d9c7c220da8f752547.tar.gz
conncheck: fix the component failed transition
This patch fixes the transition of a component from connecting to failed, that previously occured due to the propagation of the keep_timer_going variable, and to the final call to function priv_update_check_list_failed_components(), after the global agent timer was stopped. Previously, the code almost never entered to failed state, because the timer was going one, as long as the number of nominated pair was not enough, and as long as there were valid pairs not yet nominated. Even if all pair timers were over. The definition of the Failed state of a conncheck list is somewhat contradictory in the spec, depending on weather you read : * sect 5.7.4. "Computing States", "Failed: In this state, the ICE checks have not completed successfully for this media stream." or * sect 7.1.3.3. "Check List and Timer State Updates", "If all of the pairs in the check list are now either in the Failed or Succeeded state: If there is not a pair in the valid list for each component of the media stream, the state of the check list is set to Failed." Our understanding of the ICE spec is that, the proper way to enter failed state instead in when all connchecks have no longer in-progress pairs. All pairs are either in state succeeded, discovered, or failed. No timer is still running, and we have no hope that the conncheck list changes again, except if an unexpected STUN packet arrives later. All pairs in frozen state is a special case, that is handled separately (sect 7.1.3.3). A special grace delay is added before declaring a component in state Failed. This delay is not part of the RFC, and it is aimed to limit the cases when a conncheck list is reactivated just after it's been declared failed, causing a user visible transition from connecting to failed, and back from failed to connecting again. This is also required by the test suite, that counts exactly the number of time each state is entered, and doesn't expect these transcient failed states to happen (frequent due to the nature of the testsuite, less frequent in real life). Differential Revision: https://phabricator.freedesktop.org/D1111
-rw-r--r--agent/agent-priv.h14
-rw-r--r--agent/conncheck.c71
2 files changed, 71 insertions, 14 deletions
diff --git a/agent/agent-priv.h b/agent/agent-priv.h
index 3384180..714ecff 100644
--- a/agent/agent-priv.h
+++ b/agent/agent-priv.h
@@ -122,6 +122,18 @@ nice_input_message_iter_compare (const NiceInputMessageIter *a,
((obj)->compatibility == NICE_COMPATIBILITY_RFC5245 || \
(obj)->compatibility == NICE_COMPATIBILITY_OC2007R2)
+/* A grace period before declaring a component as failed, in msecs. This
+ * delay is added to reduce the chance to see the agent receiving new
+ * stun activity just after the conncheck list has been declared failed,
+ * reactiviting conncheck activity, and causing a (valid) state
+ * transitions like that: connecting -> failed -> connecting ->
+ * connected -> ready.
+ * Such transitions are not buggy per-se, but may break the
+ * test-suite, that counts precisely the number of time each state
+ * has been set, and doesnt expect these transcient failed states.
+ */
+#define NICE_AGENT_MAX_TIMER_GRACE_PERIOD 1000
+
struct _NiceAgent
{
GObject parent; /* gobject pointer */
@@ -176,6 +188,8 @@ struct _NiceAgent
guint16 rfc4571_expecting_length;
gboolean use_ice_udp;
gboolean use_ice_tcp;
+
+ guint conncheck_timer_grace_period; /* ongoing delay before timer stop */
/* XXX: add pointer to internal data struct for ABI-safe extensions */
};
diff --git a/agent/conncheck.c b/agent/conncheck.c
index b0e2222..63db471 100644
--- a/agent/conncheck.c
+++ b/agent/conncheck.c
@@ -709,7 +709,7 @@ timer_timeout:
}
}
}
- }
+ }
/* step: perform an ordinary check, ICE spec, 5.8 "Scheduling Checks"
* note: This code is executed when the triggered checks list is
@@ -795,11 +795,8 @@ priv_conn_check_tick_stream_nominate (NiceStream *stream, NiceAgent *agent)
if (s_inprogress)
keep_timer_going = TRUE;
- /* note: if some components have established connectivity,
- * but yet no nominated pair, keep timer going */
if (s_nominated < stream->n_components &&
s_waiting_for_nomination) {
- keep_timer_going = TRUE;
if (NICE_AGENT_IS_COMPATIBLE_WITH_RFC5245_OR_OC2007R2 (agent)) {
if (agent->nomination_mode == NICE_NOMINATION_MODE_REGULAR &&
agent->controlling_mode) {
@@ -888,6 +885,7 @@ priv_conn_check_tick_stream_nominate (NiceStream *stream, NiceAgent *agent)
p->recheck_on_timeout = FALSE;
p->use_candidate_on_next_check = TRUE;
priv_add_pair_to_triggered_check_queue (agent, p);
+ keep_timer_going = TRUE;
break; /* move to the next component */
}
}
@@ -911,6 +909,7 @@ priv_conn_check_tick_stream_nominate (NiceStream *stream, NiceAgent *agent)
p->recheck_on_timeout = FALSE;
priv_update_selected_pair (agent, component, p);
priv_add_pair_to_triggered_check_queue (agent, p);
+ keep_timer_going = TRUE;
break; /* move to the next component */
}
}
@@ -937,6 +936,7 @@ conn_check_stop (NiceAgent *agent)
g_source_destroy (agent->conncheck_timer_source);
g_source_unref (agent->conncheck_timer_source);
agent->conncheck_timer_source = NULL;
+ agent->conncheck_timer_grace_period = 0;
}
@@ -1005,9 +1005,39 @@ static gboolean priv_conn_check_tick_unlocked (NiceAgent *agent)
keep_timer_going = TRUE;
}
+ /* step: if no work left and a conncheck list of a stream is still
+ * frozen, set the pairs to waiting, according to ICE SPEC, sect
+ * 7.1.3.3. "Check List and Timer State Updates"
+ */
+ if (!keep_timer_going) {
+ for (i = agent->streams; i ; i = i->next) {
+ NiceStream *stream = i->data;
+ if (priv_is_checklist_frozen (stream)) {
+ nice_debug ("Agent %p : stream %d conncheck list is still "
+ "frozen, while other lists are completed. Unfreeze it.",
+ agent, stream->id);
+ keep_timer_going = priv_conn_check_unfreeze_next (agent, stream);
+ }
+ }
+ }
+
+ /* note: we provide a grace period before declaring a component as
+ * failed. Components marked connected, and then ready follow another
+ * code path, and are not concerned by this grace period.
+ */
+ if (!keep_timer_going && agent->conncheck_timer_grace_period == 0)
+ nice_debug ("Agent %p : waiting %d msecs before checking "
+ "for failed components.", agent, NICE_AGENT_MAX_TIMER_GRACE_PERIOD);
+
+ if (keep_timer_going)
+ agent->conncheck_timer_grace_period = 0;
+ else
+ agent->conncheck_timer_grace_period += agent->timer_ta;
+
/* step: stop timer if no work left */
- if (keep_timer_going != TRUE) {
- nice_debug ("Agent %p : %s: stopping conncheck timer", agent, G_STRFUNC);
+ if (!keep_timer_going &&
+ agent->conncheck_timer_grace_period >= NICE_AGENT_MAX_TIMER_GRACE_PERIOD) {
+ nice_debug ("Agent %p : checking for failed components now.", agent);
for (i = agent->streams; i; i = i->next) {
NiceStream *stream = i->data;
priv_update_check_list_failed_components (agent, stream);
@@ -1017,6 +1047,7 @@ static gboolean priv_conn_check_tick_unlocked (NiceAgent *agent)
}
}
+ nice_debug ("Agent %p : %s: stopping conncheck timer", agent, G_STRFUNC);
priv_print_conn_check_lists (agent, G_STRFUNC,
", conncheck timer stopped");
@@ -1027,9 +1058,10 @@ static gboolean priv_conn_check_tick_unlocked (NiceAgent *agent)
/* XXX: what to signal, is all processing now really done? */
nice_debug ("Agent %p : changing conncheck state to COMPLETED.", agent);
+ return FALSE;
}
- return keep_timer_going;
+ return TRUE;
}
static gboolean priv_conn_check_tick (gpointer pointer)
@@ -1810,15 +1842,18 @@ static gboolean priv_update_selected_pair (NiceAgent *agent, NiceComponent *comp
* Updates the check list state.
*
* Implements parts of the algorithm described in
- * ICE sect 8.1.2. "Updating States" (ID-19): if for any
+ * ICE sect 8.1.2. "Updating States" (RFC 5245): if for any
* component, all checks have been completed and have
- * failed, mark that component's state to NICE_CHECK_FAILED.
+ * failed to produce a nominated pair, mark that component's
+ * state to NICE_CHECK_FAILED.
*
* Sends a component state changesignal via 'agent'.
*/
static void priv_update_check_list_failed_components (NiceAgent *agent, NiceStream *stream)
{
GSList *i;
+ gboolean completed;
+ guint nominated;
/* note: emitting a signal might cause the client
* to remove the stream, thus the component count
* must be fetched before entering the loop*/
@@ -1842,6 +1877,8 @@ static void priv_update_check_list_failed_components (NiceAgent *agent, NiceStre
if (!agent_find_component (agent, stream->id, c+1, NULL, &comp))
continue;
+ nominated = 0;
+ completed = TRUE;
for (i = stream->conncheck_list; i; i = i->next) {
CandidateCheckPair *p = i->data;
@@ -1849,16 +1886,22 @@ static void priv_update_check_list_failed_components (NiceAgent *agent, NiceStre
g_assert (p->stream_id == stream->id);
if (p->component_id == (c + 1)) {
- if (p->state != NICE_CHECK_FAILED)
- break;
+ if (p->nominated)
+ ++nominated;
+ if (p->state != NICE_CHECK_FAILED &&
+ p->state != NICE_CHECK_SUCCEEDED &&
+ p->state != NICE_CHECK_DISCOVERED)
+ completed = FALSE;
}
}
- /* note: all checks have failed
+ /* note: all pairs are either failed or succeeded, and the component
+ * has not produced a nominated pair.
* Set the component to FAILED only if it actually had remote candidates
* that failed.. */
- if (i == NULL && comp != NULL && comp->remote_candidates != NULL)
- agent_signal_component_state_change (agent,
+ if (completed && nominated == 0 &&
+ comp != NULL && comp->remote_candidates != NULL)
+ agent_signal_component_state_change (agent,
stream->id,
(c + 1), /* component-id */
NICE_COMPONENT_STATE_FAILED);