diff options
author | Thomas Haller <thaller@redhat.com> | 2021-01-22 17:53:23 +0100 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2021-01-27 10:18:15 +0100 |
commit | 5ba2f81d2dc5a32148ae6e724fd51094cc408da3 (patch) | |
tree | d980b73f3a9190af228c37a82f8bec1b202d2f25 | |
parent | 6dad4a315fae9577fba7e346df25776f5e337b69 (diff) | |
download | NetworkManager-5ba2f81d2dc5a32148ae6e724fd51094cc408da3.tar.gz |
ndisc: rework sending router solicitations to follow RFC7559
RFC4861 describes how to solicit routers, but this is later extended
by RFC7559 (Packet-Loss Resiliency for Router Solicitations).
Rework the scheduling of router solicitations to follow RFC7559.
Differences from RFC7559:
- the initial random delay before sending the first RS is only
up to 250 milliseconds (instead of 1 second).
- we never completely stop sending RS, even after we receive a valid
RA. We will still send RS every hour.
We no longer honor the sysctl variables
- /proc/sys/net/ipv6/conf/$IFNAME/router_solicitations
- /proc/sys/net/ipv6/conf/$IFNAME/router_solicitation_interval
I don't think having the autoconf algorithm configurable is useful.
At least not, if the configuration happens via sysctl variables.
https://tools.ietf.org/html/rfc4861#section-6.3.7
https://tools.ietf.org/html/rfc7559#section-2.1
-rw-r--r-- | src/ndisc/nm-ndisc.c | 192 | ||||
-rw-r--r-- | src/ndisc/tests/test-ndisc-fake.c | 44 |
2 files changed, 131 insertions, 105 deletions
diff --git a/src/ndisc/nm-ndisc.c b/src/ndisc/nm-ndisc.c index 8b813010ef..c8e4c7f9c3 100644 --- a/src/ndisc/nm-ndisc.c +++ b/src/ndisc/nm-ndisc.c @@ -20,6 +20,9 @@ #define _NMLOG_PREFIX_NAME "ndisc" +#define RFC7559_IRT ((gint32) 4) /* RFC7559, Initial Retransmission Time, in seconds */ +#define RFC7559_MRT ((gint32) 3600) /* RFC7559, Maximum Retransmission Time, in seconds */ + /*****************************************************************************/ struct _NMNDiscPrivate { @@ -29,18 +32,13 @@ struct _NMNDiscPrivate { char * last_error; GSource *ra_timeout_source; - union { - gint32 solicitations_left; - gint32 announcements_left; - }; - union { - guint send_rs_id; - guint send_ra_id; - }; - union { - gint32 last_rs; - gint32 last_ra; - }; + gint32 announcements_left; + guint send_ra_id; + gint32 last_ra; + + gint32 solicit_retransmit_time_msec; + + GSource *solicit_timer_source; GSource *timeout_expire_source; @@ -773,62 +771,111 @@ nm_ndisc_add_dns_domain(NMNDisc *ndisc, const NMNDiscDNSDomain *new_item, gint64 } \ G_STMT_END -static gboolean -send_rs_timeout(NMNDisc *ndisc) +static gint32 +solicit_retransmit_time_jitter(gint32 solicit_retransmit_time_msec) { - nm_auto_pop_netns NMPNetns *netns = NULL; - NMNDiscClass * klass = NM_NDISC_GET_CLASS(ndisc); - NMNDiscPrivate * priv = NM_NDISC_GET_PRIVATE(ndisc); - GError * error = NULL; + gint32 ten_percent; - priv->send_rs_id = 0; + nm_assert(solicit_retransmit_time_msec > 0); + nm_assert(solicit_retransmit_time_msec < 3 * RFC7559_MRT * 1000); - if (!nm_ndisc_netns_push(ndisc, &netns)) - return G_SOURCE_REMOVE; + /* Add a ±10% jitter. + * + * This is the "RAND" parameter from https://tools.ietf.org/html/rfc3315#section-14 + * as requested by RFC7559. */ + + ten_percent = NM_MAX(1, solicit_retransmit_time_msec / 10); + + return solicit_retransmit_time_msec - ten_percent + + ((gint32)(g_random_int() % (2u * ((guint32) ten_percent)))); +} - if (klass->send_rs(ndisc, &error)) { - _LOGD("router solicitation sent"); - priv->solicitations_left--; +static gboolean +solicit_timer_cb(gpointer user_data) +{ + const gint32 TIMEOUT_APPROX_THRESHOLD_SEC = 10000; + nm_auto_pop_netns NMPNetns *netns = NULL; + NMNDisc * ndisc = user_data; + NMNDiscClass * klass = NM_NDISC_GET_CLASS(ndisc); + NMNDiscPrivate * priv = NM_NDISC_GET_PRIVATE(ndisc); + gs_free_error GError *error = NULL; + gint32 timeout_msec; + + if (!nm_ndisc_netns_push(ndisc, &netns)) { + nm_utils_error_set(&error, + NM_UTILS_ERROR_UNKNOWN, + "failure to switch netns for soliciting routers"); + } else + klass->send_rs(ndisc, &error); + + if (error) + _MAYBE_WARN("solicit: failure sending router solicitation: %s", error->message); + else { + _LOGT("solicit: router solicitation sent"); nm_clear_g_free(&priv->last_error); - } else { - _MAYBE_WARN("failure sending router solicitation: %s", error->message); - g_clear_error(&error); } - priv->last_rs = nm_utils_get_monotonic_timestamp_sec(); - if (priv->solicitations_left > 0) { - _LOGD("scheduling router solicitation retry in %d seconds.", - (int) priv->router_solicitation_interval); - priv->send_rs_id = g_timeout_add_seconds(priv->router_solicitation_interval, - (GSourceFunc) send_rs_timeout, - ndisc); + /* https://tools.ietf.org/html/rfc4861#section-6.3.7 describes how to send solicitations: + * + * > ... a host SHOULD transmit up to MAX_RTR_SOLICITATIONS Router Solicitation messages, + * > each separated by at least RTR_SOLICITATION_INTERVAL seconds. + * + * but this was extended by https://tools.ietf.org/html/rfc7559#section-2 to send continuously + * and with exponential backoff (detailed the algorithm in https://tools.ietf.org/html/rfc3315#section-14). + * We do RFC7559. + */ + if (priv->solicit_retransmit_time_msec == 0) { + G_STATIC_ASSERT(RFC7559_IRT == NM_NDISC_RFC4861_RTR_SOLICITATION_INTERVAL); + priv->solicit_retransmit_time_msec = solicit_retransmit_time_jitter(RFC7559_IRT * 1000); + timeout_msec = priv->solicit_retransmit_time_msec; } else { - _LOGD("did not receive a router advertisement after %d solicitations.", - (int) priv->router_solicitations); + priv->solicit_retransmit_time_msec += + solicit_retransmit_time_jitter(priv->solicit_retransmit_time_msec); + timeout_msec = priv->solicit_retransmit_time_msec; + if (priv->solicit_retransmit_time_msec > RFC7559_MRT * 1000) { + priv->solicit_retransmit_time_msec = RFC7559_MRT * 1000; + timeout_msec = solicit_retransmit_time_jitter(priv->solicit_retransmit_time_msec); + } } - return G_SOURCE_REMOVE; + _LOGD("solicit: schedule sending next solicitation in%s %.3f seconds", + timeout_msec / 1000 >= TIMEOUT_APPROX_THRESHOLD_SEC ? " about" : "", + ((double) timeout_msec) / 1000); + + nm_clear_g_source_inst(&priv->solicit_timer_source); + priv->solicit_timer_source = nm_g_timeout_add_source_approx(timeout_msec, + TIMEOUT_APPROX_THRESHOLD_SEC, + solicit_timer_cb, + ndisc); + return G_SOURCE_CONTINUE; } static void -solicit_routers(NMNDisc *ndisc) +solicit_timer_start(NMNDisc *ndisc) { NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc); - gint32 now, next; - gint64 t; + gint32 delay_msec; - if (priv->send_rs_id) - return; + /* rfc4861, Section 6.3.7: + * + * We should randomly wait up to NM_NDISC_RFC4861_MAX_RTR_SOLICITATION_DELAY (1 second) + * before sending the first RS. RFC4861 is from 2007, I don't think 1 second is + * a suitable delay in 2021. Wait only up to 250 msec instead. */ + + delay_msec = + g_random_int() % ((guint32)(NM_NDISC_RFC4861_MAX_RTR_SOLICITATION_DELAY * 1000 / 4)); - now = nm_utils_get_monotonic_timestamp_sec(); - priv->solicitations_left = priv->router_solicitations; + _LOGD("solicit: schedule sending first solicitation (of %d) in %.3f seconds", + priv->router_solicitations, + ((double) delay_msec) / 1000); - t = (((gint64) priv->last_rs) + priv->router_solicitation_interval) - now; - next = CLAMP(t, 0, G_MAXINT32); - _LOGD("scheduling explicit router solicitation request in %" G_GINT32_FORMAT " seconds.", next); - priv->send_rs_id = g_timeout_add_seconds((guint32) next, (GSourceFunc) send_rs_timeout, ndisc); + priv->solicit_retransmit_time_msec = 0; + nm_clear_g_source_inst(&priv->solicit_timer_source); + priv->solicit_timer_source = nm_g_timeout_add_source(delay_msec, solicit_timer_cb, ndisc); } +/*****************************************************************************/ + static gboolean announce_router(NMNDisc *ndisc) { @@ -988,7 +1035,7 @@ nm_ndisc_set_iid(NMNDisc *ndisc, const NMUtilsIPv6IfaceId iid) _LOGD("IPv6 interface identifier changed, flushing addresses"); g_array_remove_range(rdata->addresses, 0, rdata->addresses->len); nm_ndisc_emit_config_change(ndisc, NM_NDISC_CONFIG_ADDRESSES); - solicit_routers(ndisc); + solicit_timer_start(ndisc); } return TRUE; } @@ -1050,7 +1097,7 @@ nm_ndisc_start(NMNDisc *ndisc) g_source_attach(priv->ra_timeout_source, NULL); } - solicit_routers(ndisc); + solicit_timer_start(ndisc); return; } @@ -1088,20 +1135,16 @@ nm_ndisc_stop(NMNDisc *ndisc) g_array_set_size(rdata->dns_domains, 0); priv->rdata.public.hop_limit = 64; - /* Start at very low number so that last_rs - router_solicitation_interval - * is much lower than nm_utils_get_monotonic_timestamp_sec() at startup. - */ - priv->last_rs = G_MININT32; nm_clear_g_source_inst(&priv->ra_timeout_source); - nm_clear_g_source(&priv->send_rs_id); nm_clear_g_source(&priv->send_ra_id); nm_clear_g_free(&priv->last_error); nm_clear_g_source_inst(&priv->timeout_expire_source); - priv->solicitations_left = 0; + priv->solicit_retransmit_time_msec = 0; + nm_clear_g_source_inst(&priv->solicit_timer_source); + priv->announcements_left = 0; - priv->last_rs = G_MININT32; priv->last_ra = G_MININT32; } @@ -1422,6 +1465,33 @@ check_timestamps(NMNDisc *ndisc, gint64 now_msec, NMNDiscConfigMap changed) ndisc); } + /* When we receive an RA, we don't disable solicitations entirely. Instead, + * we set the interval the maximum (RFC7559_MRT). + * + * This contradicts https://tools.ietf.org/html/rfc7559#section-2.1, which says + * that we SHOULD stop sending RS if we receive an RA -- but only on a multicast + * capable link and if the RA has a valid router lifetime. + * + * But we really want to recover from a dead router on the network, so we + * don't want to cease sending RS entirely. + * + * But we only re-schedule the timer if the current interval is not already + * "RFC7559_MRT * 1000". Otherwise, we already have a slow interval counter + * pending. */ + if (priv->solicit_retransmit_time_msec != RFC7559_MRT * 1000) { + gint32 timeout_msec; + + priv->solicit_retransmit_time_msec = RFC7559_MRT * 1000; + timeout_msec = solicit_retransmit_time_jitter(priv->solicit_retransmit_time_msec); + + _LOGD("solicit: schedule sending next (slow) solicitation in about %.3f seconds", + ((double) timeout_msec) / 1000); + + nm_clear_g_source_inst(&priv->solicit_timer_source); + priv->solicit_timer_source = + nm_g_timeout_add_source_approx(timeout_msec, 0, solicit_timer_cb, ndisc); + } + if (changed != NM_NDISC_CONFIG_NONE) nm_ndisc_emit_config_change(ndisc, changed); } @@ -1439,7 +1509,6 @@ nm_ndisc_ra_received(NMNDisc *ndisc, gint64 now_msec, NMNDiscConfigMap changed) NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc); nm_clear_g_source_inst(&priv->ra_timeout_source); - nm_clear_g_source(&priv->send_rs_id); nm_clear_g_free(&priv->last_error); check_timestamps(ndisc, now_msec, changed); } @@ -1551,11 +1620,6 @@ nm_ndisc_init(NMNDisc *ndisc) rdata->dns_domains = g_array_new(FALSE, FALSE, sizeof(NMNDiscDNSDomain)); g_array_set_clear_func(rdata->dns_domains, dns_domain_free); priv->rdata.public.hop_limit = 64; - - /* Start at very low number so that last_rs - router_solicitation_interval - * is much lower than nm_utils_get_monotonic_timestamp_sec() at startup. - */ - priv->last_rs = G_MININT32; } static void @@ -1565,7 +1629,7 @@ dispose(GObject *object) NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc); nm_clear_g_source_inst(&priv->ra_timeout_source); - nm_clear_g_source(&priv->send_rs_id); + nm_clear_g_source_inst(&priv->solicit_timer_source); nm_clear_g_source(&priv->send_ra_id); nm_clear_g_free(&priv->last_error); diff --git a/src/ndisc/tests/test-ndisc-fake.c b/src/ndisc/tests/test-ndisc-fake.c index 408fc420e0..3c4bc1f2f7 100644 --- a/src/ndisc/tests/test-ndisc-fake.c +++ b/src/ndisc/tests/test-ndisc-fake.c @@ -584,46 +584,9 @@ _test_dns_solicit_loop_changed(NMNDisc * ndisc, data->counter++; } -static gboolean -success_timeout(TestData *data) -{ - data->timeout_id = 0; - g_main_loop_quit(data->loop); - return G_SOURCE_REMOVE; -} - static void _test_dns_solicit_loop_rs_sent(NMFakeNDisc *ndisc, TestData *data) { - const gint64 now_msec = nm_utils_get_monotonic_timestamp_msec(); - guint id; - - if (data->rs_counter > 0 && data->rs_counter < 6) { - if (data->rs_counter == 1) { - data->first_solicit_msec = now_msec; - /* Kill the test after 10 seconds if it hasn't failed yet */ - data->timeout_id = g_timeout_add_seconds(10, (GSourceFunc) success_timeout, data); - } - - /* On all but the first solicitation, which should be triggered by the - * DNS servers reaching 1/2 lifetime, emit a new RA without the DNS - * servers again. - */ - id = nm_fake_ndisc_add_ra(ndisc, 0, NM_NDISC_DHCP_LEVEL_NONE, 4, 1500); - g_assert(id); - nm_fake_ndisc_add_gateway(ndisc, - id, - "fe80::1", - now_msec + 10000, - NM_ICMPV6_ROUTER_PREF_MEDIUM); - - nm_fake_ndisc_emit_new_ras(ndisc); - } else if (data->rs_counter >= 6) { - /* Fail if we've sent too many solicitations in the past 4 seconds */ - g_assert_cmpint(now_msec - data->first_solicit_msec, >, 4000); - g_source_remove(data->timeout_id); - g_main_loop_quit(data->loop); - } data->rs_counter++; } @@ -639,9 +602,6 @@ test_dns_solicit_loop(void) }; guint id; - g_test_skip("The solicitation behavior is wrong and need fixing. This test is not working too"); - return; - /* Ensure that no solicitation loop happens when DNS servers or domains * stop being sent in advertisements. This can happen if two routers * send RAs, but the one sending DNS info stops responding, or if one @@ -664,8 +624,10 @@ test_dns_solicit_loop(void) &data); nm_ndisc_start(NM_NDISC(ndisc)); - nmtst_main_loop_run_assert(data.loop, 20000); + if (nmtst_main_loop_run(data.loop, 10000)) + g_error("we expect to run the loop until timeout. What is wrong?"); g_assert_cmpint(data.counter, ==, 3); + g_assert_cmpint(data.rs_counter, ==, 1); } /*****************************************************************************/ |