summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBeniamino Galvani <bgalvani@redhat.com>2020-04-16 22:37:27 +0200
committerBeniamino Galvani <bgalvani@redhat.com>2020-04-21 08:54:44 +0200
commiteefe5dacaa90e8a4e63f34168fafb1a29c8a2c64 (patch)
tree07b23c6457262279bd25d5c9eab15aac6bdd668f
parenta5338affb5e64c48fcdb90f1b7bbf9a93cae4342 (diff)
downloadNetworkManager-eefe5dacaa90e8a4e63f34168fafb1a29c8a2c64.tar.gz
dhcp-helper: retry in case of failure connecting to D-Bus unix socket
Connecting to the unix socket can fail with EAGAIN if there are too many pending connections and the server can't accept them in time before reaching backlog capacity. Ideally the server should increase the backlog length, but GLib doesn't provide a way to change it for a GDBus server. Retry for up to 5 seconds in case of failure. https://bugzilla.redhat.com/show_bug.cgi?id=1821594 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/471
-rw-r--r--src/dhcp/nm-dhcp-helper.c45
1 files changed, 36 insertions, 9 deletions
diff --git a/src/dhcp/nm-dhcp-helper.c b/src/dhcp/nm-dhcp-helper.c
index 17f9db7f3b..365e32e384 100644
--- a/src/dhcp/nm-dhcp-helper.c
+++ b/src/dhcp/nm-dhcp-helper.c
@@ -115,19 +115,42 @@ main (int argc, char *argv[])
gs_unref_variant GVariant *parameters = NULL;
gs_unref_variant GVariant *result = NULL;
gboolean success = FALSE;
- guint try_count = 0;
+ guint try_count;
+ gint64 time_start;
gint64 time_end;
- /* FIXME: g_dbus_connection_new_for_address_sync() tries to connect to the socket in
- * non-blocking mode, which can easily fail with EAGAIN, causing the creation of the
- * socket to fail with "Could not connect: Resource temporarily unavailable".
- *
- * We should instead create the GIOStream ourself and block on connecting to
- * the socket. */
+ /* Connecting to the unix socket can fail with EAGAIN if there are too
+ * many pending connections and the server can't accept them in time
+ * before reaching backlog capacity. Ideally the server should increase
+ * the backlog length, but GLib doesn't provide a way to change it for a
+ * GDBus server. Retry for up to 5 seconds in case of failure. */
+ time_start = g_get_monotonic_time ();
+ time_end = time_start + (5000 * 1000L);
+ try_count = 0;
+
+do_connect:
+ try_count++;
connection = g_dbus_connection_new_for_address_sync ("unix:path=" NMRUNDIR "/private-dhcp",
G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT,
NULL, NULL, &error);
if (!connection) {
+ if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) {
+ gint64 time_remaining = time_end - g_get_monotonic_time ();
+ gint64 interval;
+
+ if (time_remaining > 0) {
+ _LOGi ("failure to connect: %s (retry %u, waited %lld ms)",
+ error->message, try_count,
+ (long long) (time_end - time_remaining - time_start) / 1000);
+ interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))),
+ 5000,
+ 100000);
+ g_usleep (NM_MIN (interval, time_remaining));
+ g_clear_error (&error);
+ goto do_connect;
+ }
+ }
+
g_dbus_error_strip_remote_error (error);
_LOGE ("could not connect to NetworkManager D-Bus socket: %s",
error->message);
@@ -135,8 +158,8 @@ main (int argc, char *argv[])
}
parameters = build_signal_parameters ();
-
time_end = g_get_monotonic_time () + (200 * 1000L); /* retry for at most 200 milliseconds */
+ try_count = 0;
do_notify:
try_count++;
@@ -158,6 +181,7 @@ do_notify:
s_err = g_dbus_error_get_remote_error (error);
if (NM_IN_STRSET (s_err, "org.freedesktop.DBus.Error.UnknownMethod")) {
gint64 remaining_time = time_end - g_get_monotonic_time ();
+ gint64 interval;
/* I am not sure that a race can actually happen, as we register the object
* on the server side during GDBusServer:new-connection signal.
@@ -166,7 +190,10 @@ do_notify:
* do some retry. */
if (remaining_time > 0) {
_LOGi ("failure to call notify: %s (retry %u)", error->message, try_count);
- g_usleep (NM_MIN (NM_CLAMP ((gint64) (100L * (1L << try_count)), 5000, 25000), remaining_time));
+ interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))),
+ 5000,
+ 25000);
+ g_usleep (NM_MIN (interval, remaining_time));
g_clear_error (&error);
goto do_notify;
}