diff options
author | Thomas Haller <thaller@redhat.com> | 2021-08-03 12:58:29 +0200 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2021-08-04 09:41:10 +0200 |
commit | 4fe20e4cbeeead68ff9d7480242b2309aff6662b (patch) | |
tree | 4c20afdf78d7e17943eceb421a870211d3cd90dd | |
parent | d127b1fb79c2693f7c62aa964afb74a74e31925b (diff) | |
download | NetworkManager-4fe20e4cbeeead68ff9d7480242b2309aff6662b.tar.gz |
dispatcher: fix race for exit-on-idle
- exit-on-idle needs to be done correctly. Fix the race, by first
notifying systemd (STOPPING=1), releasing the name, and all the
while continue processing requests.
- don't use g_bus_own_name_on_connection(). That one also listens
to NameLost and NameAcquired signals, but we don't care about those.
systemd will take care to only spawn one process at a time. And
anyway, the well-known name is only important to be reachable, we
don't require it to be functional. We can get the first request
before RequestName completed and we can continue getting requests
after releasing the name.
-rw-r--r-- | data/NetworkManager-dispatcher.service.in | 1 | ||||
-rw-r--r-- | src/nm-dispatcher/nm-dispatcher.c | 238 |
2 files changed, 156 insertions, 83 deletions
diff --git a/data/NetworkManager-dispatcher.service.in b/data/NetworkManager-dispatcher.service.in index c450478bac..b192d0b0c4 100644 --- a/data/NetworkManager-dispatcher.service.in +++ b/data/NetworkManager-dispatcher.service.in @@ -5,6 +5,7 @@ Description=Network Manager Script Dispatcher Service Type=dbus BusName=org.freedesktop.nm_dispatcher ExecStart=@libexecdir@/nm-dispatcher +NotifyAccess=main # We want to allow scripts to spawn long-running daemons, so tell # systemd to not clean up when nm-dispatcher exits diff --git a/src/nm-dispatcher/nm-dispatcher.c b/src/nm-dispatcher/nm-dispatcher.c index 4ee7153afb..c76b9c368f 100644 --- a/src/nm-dispatcher/nm-dispatcher.c +++ b/src/nm-dispatcher/nm-dispatcher.c @@ -19,6 +19,7 @@ #include "libnm-core-aux-extern/nm-dispatcher-api.h" #include "libnm-glib-aux/nm-dbus-aux.h" +#include "libnm-glib-aux/nm-io-utils.h" #include "nm-dispatcher-utils.h" /*****************************************************************************/ @@ -32,8 +33,11 @@ static struct { gboolean persist; GSource * quit_source; guint request_id_counter; - gboolean ever_acquired_name; - bool exit_with_failure; + guint dbus_regist_id; + + bool name_requested; + + bool exit_with_failure; bool shutdown_timeout; bool shutdown_quitting; @@ -201,13 +205,17 @@ quit_timeout_cb(gpointer user_data) static void quit_timeout_reschedule(void) { + nm_clear_g_source_inst(&gl.quit_source); + if (gl.persist) return; if (gl.shutdown_quitting) return; - nm_clear_g_source_inst(&gl.quit_source); + if (gl.num_requests_pending > 0) + return; + gl.quit_source = nm_g_timeout_add_source(10000, quit_timeout_cb, NULL); } @@ -294,7 +302,7 @@ complete_request(Request *request) request_free(request); - g_assert_cmpuint(gl.num_requests_pending, >, 0); + nm_assert(gl.num_requests_pending > 0); if (--gl.num_requests_pending <= 0) { nm_assert(!gl.current_request && !g_queue_peek_head(gl.requests_waiting)); quit_timeout_reschedule(); @@ -791,9 +799,9 @@ _method_call_action(GDBusMethodInvocation *invocation, GVariant *parameters) return; } - nm_clear_g_source_inst(&gl.quit_source); - gl.num_requests_pending++; + gl.shutdown_timeout = FALSE; + nm_clear_g_source_inst(&gl.quit_source); for (i = 0; i < request->scripts->len; i++) { ScriptInfo *s = g_ptr_array_index(request->scripts, i); @@ -839,31 +847,6 @@ _method_call_action(GDBusMethodInvocation *invocation, GVariant *parameters) } static void -on_name_acquired(GDBusConnection *connection, const char *name, gpointer user_data) -{ - gl.ever_acquired_name = TRUE; -} - -static void -on_name_lost(GDBusConnection *connection, const char *name, gpointer user_data) -{ - if (!connection) { - if (!gl.ever_acquired_name) { - _LOG_X_W("Could not get the system bus. Make sure the message bus daemon is running!"); - gl.exit_with_failure = TRUE; - } else { - _LOG_X_I("System bus stopped. Exiting"); - } - } else if (!gl.ever_acquired_name) { - _LOG_X_W("Could not acquire the " NM_DISPATCHER_DBUS_SERVICE " service."); - gl.exit_with_failure = TRUE; - } else - _LOG_X_I("Lost the " NM_DISPATCHER_DBUS_SERVICE " name. Exiting"); - - gl.shutdown_quitting = TRUE; -} - -static void _method_call(GDBusConnection * connection, const char * sender, const char * object_path, @@ -910,9 +893,75 @@ static GDBusInterfaceInfo *const interface_info = NM_DEFINE_GDBUS_INTERFACE_INFO .out_args = NM_DEFINE_GDBUS_ARG_INFOS(NM_DEFINE_GDBUS_ARG_INFO("results", "a(sus)"), ), ), ), ); -static const GDBusInterfaceVTable interface_vtable = { - .method_call = _method_call, -}; +static gboolean +_bus_register_service(void) +{ + static const GDBusInterfaceVTable interface_vtable = { + .method_call = _method_call, + }; + gs_free_error GError * error = NULL; + NMDBusConnectionCallBlockingData data = { + .result = NULL, + }; + gs_unref_variant GVariant *ret = NULL; + guint32 ret_val; + + gl.dbus_regist_id = + g_dbus_connection_register_object(gl.dbus_connection, + NM_DISPATCHER_DBUS_PATH, + interface_info, + NM_UNCONST_PTR(GDBusInterfaceVTable, &interface_vtable), + NULL, + NULL, + &error); + if (gl.dbus_regist_id == 0) { + _LOG_X_W("dbus: could not export dispatcher D-Bus interface %s: %s", + NM_DISPATCHER_DBUS_PATH, + error->message); + return FALSE; + } + + _LOG_X_D("dbus: dispatcher D-Bus interface %s registered", NM_DISPATCHER_DBUS_PATH); + + gl.name_requested = TRUE; + + nm_dbus_connection_call_request_name(gl.dbus_connection, + NM_DISPATCHER_DBUS_SERVICE, + DBUS_NAME_FLAG_ALLOW_REPLACEMENT + | DBUS_NAME_FLAG_REPLACE_EXISTING, + 10000, + gl.quit_cancellable, + nm_dbus_connection_call_blocking_callback, + &data); + + /* Note that with D-Bus activation, the first request will already hit us before RequestName + * completes. So when we start iterating the main context, the first request may already come + * in. */ + + ret = nm_dbus_connection_call_blocking(&data, &error); + + if (nm_utils_error_is_cancelled(error)) + return FALSE; + + if (error) { + _LOG_X_W("d-bus: failed to request name %s: %s", + NM_DISPATCHER_DBUS_SERVICE, + error->message); + return FALSE; + } + + g_variant_get(ret, "(u)", &ret_val); + + if (ret_val != DBUS_REQUEST_NAME_REPLY_PRIMARY_OWNER) { + _LOG_X_W("dbus: request name for %s failed to take name (response %u)", + NM_DISPATCHER_DBUS_SERVICE, + ret_val); + return FALSE; + } + + _LOG_X_D("dbus: request name for %s succeeded", NM_DISPATCHER_DBUS_SERVICE); + return TRUE; +} /*****************************************************************************/ @@ -973,6 +1022,14 @@ signal_handler(gpointer user_data) return G_SOURCE_CONTINUE; } +static void +_bus_release_name_cb(GObject *source, GAsyncResult *result, gpointer user_data) +{ + nm_assert(gl.num_requests_pending > 0); + gl.num_requests_pending--; + g_main_context_wakeup(NULL); +} + static gboolean parse_command_line(int *p_argc, char ***p_argv, GError **error) { @@ -997,11 +1054,9 @@ parse_command_line(int *p_argc, char ***p_argv, GError **error) int main(int argc, char **argv) { - gs_free_error GError *error = NULL; - GSource * source_term = NULL; - GSource * source_int = NULL; - guint dbus_regist_id = 0; - guint dbus_own_name_id = 0; + gs_free_error GError *error = NULL; + GSource * source_term = NULL; + GSource * source_int = NULL; signal(SIGPIPE, SIG_IGN); source_term = nm_g_unix_signal_add_source(SIGTERM, signal_handler, GINT_TO_POINTER(SIGTERM)); @@ -1045,67 +1100,84 @@ main(int argc, char **argv) quit_timeout_reschedule(); - dbus_regist_id = - g_dbus_connection_register_object(gl.dbus_connection, - NM_DISPATCHER_DBUS_PATH, - interface_info, - NM_UNCONST_PTR(GDBusInterfaceVTable, &interface_vtable), - NULL, - NULL, - &error); - if (dbus_regist_id == 0) { - _LOG_X_W("Could not export Dispatcher D-Bus interface: %s", error->message); - gl.exit_with_failure = 1; - goto done; + if (!_bus_register_service()) { + /* we failed to start the D-Bus service, and will shut down. However, + * first see whether there are any requests that we should process. + * Even if RequestName fails, we might already have requests pending. */ + if (!g_cancellable_is_cancelled(gl.quit_cancellable)) + gl.exit_with_failure = TRUE; + gl.shutdown_quitting = TRUE; } - dbus_own_name_id = g_bus_own_name_on_connection(gl.dbus_connection, - NM_DISPATCHER_DBUS_SERVICE, - G_BUS_NAME_OWNER_FLAGS_NONE, - on_name_acquired, - on_name_lost, - NULL, - NULL); - while (TRUE) { - if (gl.shutdown_timeout || gl.shutdown_quitting) + if (gl.num_requests_pending > 0) { + /* while we have requests pending, we cannot stop processing them... */ + } else if (gl.shutdown_timeout || gl.shutdown_quitting) { + if (gl.name_requested) { + int r; + + /* We already requested a name. To exit-on-idle without race, we need to dance. + * See https://lists.freedesktop.org/archives/dbus/2015-May/016671.html . */ + + gl.name_requested = FALSE; + gl.shutdown_quitting = TRUE; + + _LOG_X_T("shutdown: release-name"); + + /* we create a fake pending request. */ + gl.num_requests_pending++; + nm_clear_g_source_inst(&gl.quit_source); + + r = nm_sd_notify("STOPPING=1"); + if (r < 0) + _LOG_X_W("shutdown: sd_notifiy(STOPPING=1) failed: %s", nm_strerror_native(-r)); + else + _LOG_X_T("shutdown: sd_notifiy(STOPPING=1) succeeded"); + + g_dbus_connection_call(gl.dbus_connection, + DBUS_SERVICE_DBUS, + DBUS_PATH_DBUS, + DBUS_INTERFACE_DBUS, + "ReleaseName", + g_variant_new("(s)", NM_DISPATCHER_DBUS_SERVICE), + G_VARIANT_TYPE("(u)"), + G_DBUS_CALL_FLAGS_NONE, + 10000, + NULL, + _bus_release_name_cb, + NULL); + continue; + } + break; + } + g_main_context_iteration(NULL, TRUE); } done: + nm_g_main_context_iterate_ready(NULL); gl.shutdown_quitting = TRUE; g_cancellable_cancel(gl.quit_cancellable); - /* FIXME: nm-dispatcher does not exit-on-idle in a racefree manner. - * See https://lists.freedesktop.org/archives/dbus/2015-May/016671.html */ + nm_assert(gl.num_requests_pending == 0); - if (gl.num_requests_pending > 0) { - /* this only happens when we quit due to SIGTERM (not due to the idle timer). - * - * Log a warning about pending scripts. - * - * Maybe we should notify NetworkManager that these scripts are left in an unknown state. - * But this is either a bug of a dispatcher script (not terminating in time). - * - * FIXME(shutdown): Also, currently NetworkManager behaves wrongly on shutdown. - * Note that systemd would not terminate NetworkManager-dispatcher before NetworkManager. - * It's NetworkManager's responsibility to keep running long enough so that all requests - * can complete (with a watchdog timer, and a warning that user provided scripts hang). */ - _LOG_X_W("exiting but there are still %u requests pending", gl.num_requests_pending); - } - - if (dbus_own_name_id != 0) - g_bus_unown_name(nm_steal_int(&dbus_own_name_id)); - - if (dbus_regist_id != 0) - g_dbus_connection_unregister_object(gl.dbus_connection, nm_steal_int(&dbus_regist_id)); + if (gl.dbus_regist_id != 0) + g_dbus_connection_unregister_object(gl.dbus_connection, nm_steal_int(&gl.dbus_regist_id)); nm_clear_pointer(&gl.requests_waiting, g_queue_free); nm_clear_g_source_inst(&gl.quit_source); - g_clear_object(&gl.dbus_connection); + + if (gl.dbus_connection) { + g_dbus_connection_flush_sync(gl.dbus_connection, NULL, NULL); + g_clear_object(&gl.dbus_connection); + } + + nm_g_main_context_iterate_ready(NULL); + + _LOG_X_T("shutdown: exiting with %s", gl.exit_with_failure ? "failure" : "success"); if (!gl.debug) logging_shutdown(); |