diff options
author | Beniamino Galvani <bgalvani@redhat.com> | 2020-04-16 22:37:27 +0200 |
---|---|---|
committer | Beniamino Galvani <bgalvani@redhat.com> | 2020-04-21 08:54:44 +0200 |
commit | eefe5dacaa90e8a4e63f34168fafb1a29c8a2c64 (patch) | |
tree | 07b23c6457262279bd25d5c9eab15aac6bdd668f | |
parent | a5338affb5e64c48fcdb90f1b7bbf9a93cae4342 (diff) | |
download | NetworkManager-eefe5dacaa90e8a4e63f34168fafb1a29c8a2c64.tar.gz |
dhcp-helper: retry in case of failure connecting to D-Bus unix socket
Connecting to the unix socket can fail with EAGAIN if there are too
many pending connections and the server can't accept them in time
before reaching backlog capacity. Ideally the server should increase
the backlog length, but GLib doesn't provide a way to change it for a
GDBus server. Retry for up to 5 seconds in case of failure.
https://bugzilla.redhat.com/show_bug.cgi?id=1821594
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/471
-rw-r--r-- | src/dhcp/nm-dhcp-helper.c | 45 |
1 files changed, 36 insertions, 9 deletions
diff --git a/src/dhcp/nm-dhcp-helper.c b/src/dhcp/nm-dhcp-helper.c index 17f9db7f3b..365e32e384 100644 --- a/src/dhcp/nm-dhcp-helper.c +++ b/src/dhcp/nm-dhcp-helper.c @@ -115,19 +115,42 @@ main (int argc, char *argv[]) gs_unref_variant GVariant *parameters = NULL; gs_unref_variant GVariant *result = NULL; gboolean success = FALSE; - guint try_count = 0; + guint try_count; + gint64 time_start; gint64 time_end; - /* FIXME: g_dbus_connection_new_for_address_sync() tries to connect to the socket in - * non-blocking mode, which can easily fail with EAGAIN, causing the creation of the - * socket to fail with "Could not connect: Resource temporarily unavailable". - * - * We should instead create the GIOStream ourself and block on connecting to - * the socket. */ + /* Connecting to the unix socket can fail with EAGAIN if there are too + * many pending connections and the server can't accept them in time + * before reaching backlog capacity. Ideally the server should increase + * the backlog length, but GLib doesn't provide a way to change it for a + * GDBus server. Retry for up to 5 seconds in case of failure. */ + time_start = g_get_monotonic_time (); + time_end = time_start + (5000 * 1000L); + try_count = 0; + +do_connect: + try_count++; connection = g_dbus_connection_new_for_address_sync ("unix:path=" NMRUNDIR "/private-dhcp", G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT, NULL, NULL, &error); if (!connection) { + if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) { + gint64 time_remaining = time_end - g_get_monotonic_time (); + gint64 interval; + + if (time_remaining > 0) { + _LOGi ("failure to connect: %s (retry %u, waited %lld ms)", + error->message, try_count, + (long long) (time_end - time_remaining - time_start) / 1000); + interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))), + 5000, + 100000); + g_usleep (NM_MIN (interval, time_remaining)); + g_clear_error (&error); + goto do_connect; + } + } + g_dbus_error_strip_remote_error (error); _LOGE ("could not connect to NetworkManager D-Bus socket: %s", error->message); @@ -135,8 +158,8 @@ main (int argc, char *argv[]) } parameters = build_signal_parameters (); - time_end = g_get_monotonic_time () + (200 * 1000L); /* retry for at most 200 milliseconds */ + try_count = 0; do_notify: try_count++; @@ -158,6 +181,7 @@ do_notify: s_err = g_dbus_error_get_remote_error (error); if (NM_IN_STRSET (s_err, "org.freedesktop.DBus.Error.UnknownMethod")) { gint64 remaining_time = time_end - g_get_monotonic_time (); + gint64 interval; /* I am not sure that a race can actually happen, as we register the object * on the server side during GDBusServer:new-connection signal. @@ -166,7 +190,10 @@ do_notify: * do some retry. */ if (remaining_time > 0) { _LOGi ("failure to call notify: %s (retry %u)", error->message, try_count); - g_usleep (NM_MIN (NM_CLAMP ((gint64) (100L * (1L << try_count)), 5000, 25000), remaining_time)); + interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))), + 5000, + 25000); + g_usleep (NM_MIN (interval, remaining_time)); g_clear_error (&error); goto do_notify; } |