diff options
author | Fabrice Bellet <fabrice@bellet.info> | 2016-06-21 21:47:42 +0200 |
---|---|---|
committer | Olivier CrĂȘte <olivier.crete@collabora.com> | 2017-06-21 16:05:57 -0400 |
commit | 07366a5bca7e4818b8df29d9c7c220da8f752547 (patch) | |
tree | e9630b490bab2360feba14876b509edfe5c3bbde /agent | |
parent | 95f8805eb7b77755337e28daf1f134587d42b35f (diff) | |
download | libnice-07366a5bca7e4818b8df29d9c7c220da8f752547.tar.gz |
conncheck: fix the component failed transition
This patch fixes the transition of a component from connecting to
failed, that previously occured due to the propagation of the
keep_timer_going variable, and to the final call to function
priv_update_check_list_failed_components(), after the global agent
timer was stopped.
Previously, the code almost never entered to failed state, because the
timer was going one, as long as the number of nominated pair was not
enough, and as long as there were valid pairs not yet nominated. Even
if all pair timers were over.
The definition of the Failed state of a conncheck list is somewhat
contradictory in the spec, depending on weather you read :
* sect 5.7.4. "Computing States",
"Failed: In this state, the ICE checks have not completed successfully
for this media stream."
or
* sect 7.1.3.3. "Check List and Timer State Updates",
"If all of the pairs in the check list are now either in the Failed or
Succeeded state: If there is not a pair in the valid list for each
component of the media stream, the state of the check list is set to
Failed."
Our understanding of the ICE spec is that, the proper way to enter failed
state instead in when all connchecks have no longer in-progress pairs.
All pairs are either in state succeeded, discovered, or failed. No timer
is still running, and we have no hope that the conncheck list changes
again, except if an unexpected STUN packet arrives later. All pairs in
frozen state is a special case, that is handled separately (sect
7.1.3.3).
A special grace delay is added before declaring a component in state
Failed. This delay is not part of the RFC, and it is aimed to limit the
cases when a conncheck list is reactivated just after it's been declared
failed, causing a user visible transition from connecting to failed, and
back from failed to connecting again. This is also required by the test
suite, that counts exactly the number of time each state is entered, and
doesn't expect these transcient failed states to happen (frequent due to
the nature of the testsuite, less frequent in real life).
Differential Revision: https://phabricator.freedesktop.org/D1111
Diffstat (limited to 'agent')
-rw-r--r-- | agent/agent-priv.h | 14 | ||||
-rw-r--r-- | agent/conncheck.c | 71 |
2 files changed, 71 insertions, 14 deletions
diff --git a/agent/agent-priv.h b/agent/agent-priv.h index 3384180..714ecff 100644 --- a/agent/agent-priv.h +++ b/agent/agent-priv.h @@ -122,6 +122,18 @@ nice_input_message_iter_compare (const NiceInputMessageIter *a, ((obj)->compatibility == NICE_COMPATIBILITY_RFC5245 || \ (obj)->compatibility == NICE_COMPATIBILITY_OC2007R2) +/* A grace period before declaring a component as failed, in msecs. This + * delay is added to reduce the chance to see the agent receiving new + * stun activity just after the conncheck list has been declared failed, + * reactiviting conncheck activity, and causing a (valid) state + * transitions like that: connecting -> failed -> connecting -> + * connected -> ready. + * Such transitions are not buggy per-se, but may break the + * test-suite, that counts precisely the number of time each state + * has been set, and doesnt expect these transcient failed states. + */ +#define NICE_AGENT_MAX_TIMER_GRACE_PERIOD 1000 + struct _NiceAgent { GObject parent; /* gobject pointer */ @@ -176,6 +188,8 @@ struct _NiceAgent guint16 rfc4571_expecting_length; gboolean use_ice_udp; gboolean use_ice_tcp; + + guint conncheck_timer_grace_period; /* ongoing delay before timer stop */ /* XXX: add pointer to internal data struct for ABI-safe extensions */ }; diff --git a/agent/conncheck.c b/agent/conncheck.c index b0e2222..63db471 100644 --- a/agent/conncheck.c +++ b/agent/conncheck.c @@ -709,7 +709,7 @@ timer_timeout: } } } - } + } /* step: perform an ordinary check, ICE spec, 5.8 "Scheduling Checks" * note: This code is executed when the triggered checks list is @@ -795,11 +795,8 @@ priv_conn_check_tick_stream_nominate (NiceStream *stream, NiceAgent *agent) if (s_inprogress) keep_timer_going = TRUE; - /* note: if some components have established connectivity, - * but yet no nominated pair, keep timer going */ if (s_nominated < stream->n_components && s_waiting_for_nomination) { - keep_timer_going = TRUE; if (NICE_AGENT_IS_COMPATIBLE_WITH_RFC5245_OR_OC2007R2 (agent)) { if (agent->nomination_mode == NICE_NOMINATION_MODE_REGULAR && agent->controlling_mode) { @@ -888,6 +885,7 @@ priv_conn_check_tick_stream_nominate (NiceStream *stream, NiceAgent *agent) p->recheck_on_timeout = FALSE; p->use_candidate_on_next_check = TRUE; priv_add_pair_to_triggered_check_queue (agent, p); + keep_timer_going = TRUE; break; /* move to the next component */ } } @@ -911,6 +909,7 @@ priv_conn_check_tick_stream_nominate (NiceStream *stream, NiceAgent *agent) p->recheck_on_timeout = FALSE; priv_update_selected_pair (agent, component, p); priv_add_pair_to_triggered_check_queue (agent, p); + keep_timer_going = TRUE; break; /* move to the next component */ } } @@ -937,6 +936,7 @@ conn_check_stop (NiceAgent *agent) g_source_destroy (agent->conncheck_timer_source); g_source_unref (agent->conncheck_timer_source); agent->conncheck_timer_source = NULL; + agent->conncheck_timer_grace_period = 0; } @@ -1005,9 +1005,39 @@ static gboolean priv_conn_check_tick_unlocked (NiceAgent *agent) keep_timer_going = TRUE; } + /* step: if no work left and a conncheck list of a stream is still + * frozen, set the pairs to waiting, according to ICE SPEC, sect + * 7.1.3.3. "Check List and Timer State Updates" + */ + if (!keep_timer_going) { + for (i = agent->streams; i ; i = i->next) { + NiceStream *stream = i->data; + if (priv_is_checklist_frozen (stream)) { + nice_debug ("Agent %p : stream %d conncheck list is still " + "frozen, while other lists are completed. Unfreeze it.", + agent, stream->id); + keep_timer_going = priv_conn_check_unfreeze_next (agent, stream); + } + } + } + + /* note: we provide a grace period before declaring a component as + * failed. Components marked connected, and then ready follow another + * code path, and are not concerned by this grace period. + */ + if (!keep_timer_going && agent->conncheck_timer_grace_period == 0) + nice_debug ("Agent %p : waiting %d msecs before checking " + "for failed components.", agent, NICE_AGENT_MAX_TIMER_GRACE_PERIOD); + + if (keep_timer_going) + agent->conncheck_timer_grace_period = 0; + else + agent->conncheck_timer_grace_period += agent->timer_ta; + /* step: stop timer if no work left */ - if (keep_timer_going != TRUE) { - nice_debug ("Agent %p : %s: stopping conncheck timer", agent, G_STRFUNC); + if (!keep_timer_going && + agent->conncheck_timer_grace_period >= NICE_AGENT_MAX_TIMER_GRACE_PERIOD) { + nice_debug ("Agent %p : checking for failed components now.", agent); for (i = agent->streams; i; i = i->next) { NiceStream *stream = i->data; priv_update_check_list_failed_components (agent, stream); @@ -1017,6 +1047,7 @@ static gboolean priv_conn_check_tick_unlocked (NiceAgent *agent) } } + nice_debug ("Agent %p : %s: stopping conncheck timer", agent, G_STRFUNC); priv_print_conn_check_lists (agent, G_STRFUNC, ", conncheck timer stopped"); @@ -1027,9 +1058,10 @@ static gboolean priv_conn_check_tick_unlocked (NiceAgent *agent) /* XXX: what to signal, is all processing now really done? */ nice_debug ("Agent %p : changing conncheck state to COMPLETED.", agent); + return FALSE; } - return keep_timer_going; + return TRUE; } static gboolean priv_conn_check_tick (gpointer pointer) @@ -1810,15 +1842,18 @@ static gboolean priv_update_selected_pair (NiceAgent *agent, NiceComponent *comp * Updates the check list state. * * Implements parts of the algorithm described in - * ICE sect 8.1.2. "Updating States" (ID-19): if for any + * ICE sect 8.1.2. "Updating States" (RFC 5245): if for any * component, all checks have been completed and have - * failed, mark that component's state to NICE_CHECK_FAILED. + * failed to produce a nominated pair, mark that component's + * state to NICE_CHECK_FAILED. * * Sends a component state changesignal via 'agent'. */ static void priv_update_check_list_failed_components (NiceAgent *agent, NiceStream *stream) { GSList *i; + gboolean completed; + guint nominated; /* note: emitting a signal might cause the client * to remove the stream, thus the component count * must be fetched before entering the loop*/ @@ -1842,6 +1877,8 @@ static void priv_update_check_list_failed_components (NiceAgent *agent, NiceStre if (!agent_find_component (agent, stream->id, c+1, NULL, &comp)) continue; + nominated = 0; + completed = TRUE; for (i = stream->conncheck_list; i; i = i->next) { CandidateCheckPair *p = i->data; @@ -1849,16 +1886,22 @@ static void priv_update_check_list_failed_components (NiceAgent *agent, NiceStre g_assert (p->stream_id == stream->id); if (p->component_id == (c + 1)) { - if (p->state != NICE_CHECK_FAILED) - break; + if (p->nominated) + ++nominated; + if (p->state != NICE_CHECK_FAILED && + p->state != NICE_CHECK_SUCCEEDED && + p->state != NICE_CHECK_DISCOVERED) + completed = FALSE; } } - /* note: all checks have failed + /* note: all pairs are either failed or succeeded, and the component + * has not produced a nominated pair. * Set the component to FAILED only if it actually had remote candidates * that failed.. */ - if (i == NULL && comp != NULL && comp->remote_candidates != NULL) - agent_signal_component_state_change (agent, + if (completed && nominated == 0 && + comp != NULL && comp->remote_candidates != NULL) + agent_signal_component_state_change (agent, stream->id, (c + 1), /* component-id */ NICE_COMPONENT_STATE_FAILED); |