l3cfg: merge branch 'th/l3cfg-acd-crash'

https://bugzilla.redhat.com/show_bug.cgi?id=2047788 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1077
author: Thomas Haller <thaller@redhat.com> 2022-02-02 11:10:36 +0100
committer: Thomas Haller <thaller@redhat.com> 2022-02-02 11:10:36 +0100
commit: dc1092e22b2500233c22ec197191114e5d885256 (patch)
tree: 4d8530abd258e73db742585d26a1c7b1ee8426ee
parent: 49fe45b155f3ce458ee329a595cb417f4fc3ff3c (diff)
parent: db0d84f13a4c5a73403474f8b4af4b905fa60d4b (diff)
download: NetworkManager-dc1092e22b2500233c22ec197191114e5d885256.tar.gz
1 files changed, 76 insertions, 74 deletions
diff --git a/src/core/nm-l3cfg.c b/src/core/nm-l3cfg.c
index b42ff8cb75..aed4cf05c9 100644
--- a/src/core/nm-l3cfg.c
+++ b/src/core/nm-l3cfg.c
@@ -1334,6 +1334,15 @@ _acd_data_collect_tracks_data(const AcdData     *acd_data,
     guint             n                     = 0;
     guint             i;
 
+    /* We do a simple search over all track-infos for the best, which determines
+     * our ACD state. That is, we prefer ACD disabled, and otherwise the
+     * shortest configured timeout.
+     *
+     * This linear search is probably fast enough, because we expect that each
+     * address/acd_data has few trackers.
+     * The alternative would be caching the best result, but that is more complicated,
+     * so not done. */
+
     for (i = 0; i < acd_data->info.n_track_infos; i++) {
         const NML3AcdAddrTrackInfo *acd_track = &acd_data->info.track_infos[i];
 
@@ -1517,12 +1526,7 @@ _l3_acd_nacd_event(int fd, GIOCondition condition, gpointer user_data)
                       "in %u msec)",
                       timeout_msec);
                 self->priv.p->nacd_event_down_source =
-                    nm_g_timeout_source_new(timeout_msec,
-                                            G_PRIORITY_DEFAULT,
-                                            _l3_acd_nacd_event_down_timeout_cb,
-                                            self,
-                                            NULL);
-                g_source_attach(self->priv.p->nacd_event_down_source, NULL);
+                    nm_g_timeout_add_source(timeout_msec, _l3_acd_nacd_event_down_timeout_cb, self);
             }
             break;
         default:
@@ -1583,12 +1587,9 @@ _l3_acd_nacd_instance_reset(NML3Cfg *self, NMTernary start_timer, gboolean acd_d
         break;
     case NM_TERNARY_TRUE:
         self->priv.p->nacd_instance_ensure_retry =
-            nm_g_timeout_source_new_seconds(ACD_ENSURE_RATELIMIT_MSEC / 1000u,
-                                            G_PRIORITY_DEFAULT,
+            nm_g_timeout_add_seconds_source(ACD_ENSURE_RATELIMIT_MSEC / 1000u,
                                             _l3_acd_nacd_instance_ensure_retry_cb,
-                                            self,
-                                            NULL);
-        g_source_attach(self->priv.p->nacd_instance_ensure_retry, NULL);
+                                            self);
         break;
     case NM_TERNARY_DEFAULT:
         break;
@@ -1937,14 +1938,24 @@ _l3_acd_data_timeout_cb(gpointer user_data)
 static void
 _l3_acd_data_timeout_schedule(AcdData *acd_data, gint64 timeout_msec)
 {
+    /* in _l3_acd_data_state_set_full() we clear the timer. At the same time,
+     * in _l3_acd_data_state_change(ACD_STATE_CHANGE_MODE_TIMEOUT) we only
+     * expect timeouts in certain states.
+     *
+     * That means, scheduling a timeout is only correct if we are in a certain
+     * state, which allows to handle timeouts. This assert checks for that to
+     * ensure we don't call a timeout in an unexpected state. */
+    nm_assert(NM_IN_SET(acd_data->info.state,
+                        NM_L3_ACD_ADDR_STATE_PROBING,
+                        NM_L3_ACD_ADDR_STATE_USED,
+                        NM_L3_ACD_ADDR_STATE_DEFENDING,
+                        NM_L3_ACD_ADDR_STATE_CONFLICT));
+
     nm_clear_g_source_inst(&acd_data->acd_data_timeout_source);
     acd_data->acd_data_timeout_source =
-        nm_g_timeout_source_new(NM_CLAMP((gint64) 0, timeout_msec, (gint64) G_MAXUINT),
-                                G_PRIORITY_DEFAULT,
+        nm_g_timeout_add_source(NM_CLAMP((gint64) 0, timeout_msec, (gint64) G_MAXUINT),
                                 _l3_acd_data_timeout_cb,
-                                acd_data,
-                                NULL);
-    g_source_attach(acd_data->acd_data_timeout_source, NULL);
+                                acd_data);
 }
 
 static void
@@ -2071,17 +2082,19 @@ _nm_printf(5, 6) static void _l3_acd_data_state_set_full(NML3Cfg         *self,
     else
         changed = FALSE;
 
-    if (format) {
-        gs_free char *msg = NULL;
-        va_list       args;
+    if (_LOGT_ENABLED()) {
+        if (format) {
+            gs_free char *msg = NULL;
+            va_list       args;
 
-        va_start(args, format);
-        msg = g_strdup_vprintf(format, args);
-        va_end(args);
+            va_start(args, format);
+            msg = g_strdup_vprintf(format, args);
+            va_end(args);
 
-        _LOGT_acd(acd_data, "set state to %s (%s)", _l3_acd_addr_state_to_string(state), msg);
-    } else
-        _LOGT_acd(acd_data, "set state to %s", _l3_acd_addr_state_to_string(state));
+            _LOGT_acd(acd_data, "set state to %s (%s)", _l3_acd_addr_state_to_string(state), msg);
+        } else
+            _LOGT_acd(acd_data, "set state to %s", _l3_acd_addr_state_to_string(state));
+    }
 
     if (changed && allow_commit) {
         /* The availability of an address just changed (and we are instructed to
@@ -2338,22 +2351,7 @@ handle_init:
                 /* we are already probing. There is nothing to do for this timeout. */
                 return;
             }
-
-            nm_utils_get_monotonic_timestamp_msec_cached(p_now_msec);
-
-            if (acd_data->probing_timestamp_msec + ACD_WAIT_PROBING_EXTRA_TIME_MSEC
-                    + ACD_WAIT_PROBING_EXTRA_TIME2_MSEC
-                >= (*p_now_msec)) {
-                /* hm. We failed to create a new probe too long. Something is really wrong
-                 * internally, but let's ignore the issue and assume the address is good. What
-                 * else would we do? Assume the address is USED? */
-                _LOGT_acd(acd_data,
-                          "probe-good (waiting for creating probe timed out. Assume good)");
-                goto handle_start_defending;
-            }
-
-            log_reason = "retry probing on timeout";
-            goto handle_start_probing;
+            /* fall-through */
 
         case NM_L3_ACD_ADDR_STATE_USED:
         case NM_L3_ACD_ADDR_STATE_CONFLICT:
@@ -2373,7 +2371,10 @@ handle_init:
             acd_data->acd_defend_type_desired = acd_defend_type;
 
             if (acd_timeout_msec <= 0) {
-                log_reason = "acd disabled by configuration (restart after previous conflict)";
+                if (acd_data->info.state == NM_L3_ACD_ADDR_STATE_PROBING)
+                    log_reason = "acd disabled by configuration (timeout during probing)";
+                else
+                    log_reason = "acd disabled by configuration (restart after previous conflict)";
                 goto handle_probing_done;
             }
 
@@ -2383,6 +2384,23 @@ handle_init:
             }
 
             nm_utils_get_monotonic_timestamp_msec_cached(p_now_msec);
+
+            if (acd_data->info.state == NM_L3_ACD_ADDR_STATE_PROBING) {
+                if (acd_data->probing_timestamp_msec + ACD_WAIT_PROBING_EXTRA_TIME_MSEC
+                        + ACD_WAIT_PROBING_EXTRA_TIME2_MSEC
+                    >= (*p_now_msec)) {
+                    /* hm. We failed to create a new probe too long. Something is really wrong
+                     * internally, but let's ignore the issue and assume the address is good. What
+                     * else would we do? Assume the address is USED? */
+                    _LOGT_acd(acd_data,
+                              "probe-good (waiting for creating probe timed out. Assume good)");
+                    goto handle_start_defending;
+                }
+
+                log_reason = "retry probing on timeout";
+                goto handle_start_probing;
+            }
+
             acd_data->probing_timestamp_msec = (*p_now_msec);
             acd_data->probing_timeout_msec   = acd_timeout_msec;
             if (acd_data->info.state == NM_L3_ACD_ADDR_STATE_USED)
@@ -2603,39 +2621,23 @@ handle_init:
         return;
 
     case ACD_STATE_CHANGE_MODE_INSTANCE_RESET:
+        nm_assert(NM_IN_SET(acd_data->info.state,
+                            NM_L3_ACD_ADDR_STATE_PROBING,
+                            NM_L3_ACD_ADDR_STATE_DEFENDING,
+                            NM_L3_ACD_ADDR_STATE_READY,
+                            NM_L3_ACD_ADDR_STATE_USED,
+                            NM_L3_ACD_ADDR_STATE_CONFLICT,
+                            NM_L3_ACD_ADDR_STATE_EXTERNAL_REMOVED));
 
-        switch (acd_data->info.state) {
-        case NM_L3_ACD_ADDR_STATE_INIT:
-            nm_assert_not_reached();
-            return;
-        case NM_L3_ACD_ADDR_STATE_PROBING:
-        case NM_L3_ACD_ADDR_STATE_DEFENDING:
-        case NM_L3_ACD_ADDR_STATE_READY:
-            if (!acd_data->nacd_probe) {
-                /* we failed starting to probe before and have a timer running to
-                 * restart. We don't do anything now, but let the timer handle it.
-                 * This also implements some rate limiting for us. */
-                _LOGT_acd(acd_data,
-                          "n-acd instance reset. Ignore event while in state %s",
-                          _l3_acd_addr_state_to_string(acd_data->info.state));
-                return;
-            }
-
-            _LOGT_acd(acd_data,
-                      "n-acd instance reset. Trigger a restart while in state %s",
-                      _l3_acd_addr_state_to_string(acd_data->info.state));
-            acd_data->nacd_probe = n_acd_probe_free(acd_data->nacd_probe);
-            _l3_acd_data_timeout_schedule(acd_data, 0);
-            return;
-        case NM_L3_ACD_ADDR_STATE_USED:
-        case NM_L3_ACD_ADDR_STATE_CONFLICT:
-        case NM_L3_ACD_ADDR_STATE_EXTERNAL_REMOVED:
-            /* as we have no probe, we are already handling this (e.g. by having
-             * a retry timer). Nothing to do. */
-            nm_assert(!acd_data->nacd_probe);
-            return;
-        }
-        nm_assert_not_reached();
+        /* an instance-reset is a dramatic event. We start over with probing. */
+        _LOGT_acd(acd_data,
+                  "n-acd instance reset. Reset to probing while in state %s",
+                  _l3_acd_addr_state_to_string(acd_data->info.state));
+        acd_data->nacd_probe                         = n_acd_probe_free(acd_data->nacd_probe);
+        acd_data->last_defendconflict_timestamp_msec = 0;
+        acd_data->probing_timestamp_msec = nm_utils_get_monotonic_timestamp_msec_cached(p_now_msec);
+        _l3_acd_data_state_set(self, acd_data, NM_L3_ACD_ADDR_STATE_PROBING, FALSE);
+        _l3_acd_data_timeout_schedule(acd_data, 0);
         return;
     }
author	Thomas Haller <thaller@redhat.com>	2022-02-02 11:10:36 +0100
committer	Thomas Haller <thaller@redhat.com>	2022-02-02 11:10:36 +0100
commit	dc1092e22b2500233c22ec197191114e5d885256 (patch)
tree	4d8530abd258e73db742585d26a1c7b1ee8426ee
parent	49fe45b155f3ce458ee329a595cb417f4fc3ff3c (diff)
parent	db0d84f13a4c5a73403474f8b4af4b905fa60d4b (diff)
download	NetworkManager-dc1092e22b2500233c22ec197191114e5d885256.tar.gz