From cb731294837736e957ee595ce11ab115277dbb36 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 17 Jan 2022 11:46:04 -0500 Subject: deploy: Add a 5s max timeout on global filesystem `sync()` https://bugzilla.redhat.com/show_bug.cgi?id=2003532 Basically there's a systemd bug where it's losing the `_netdev` aspect of Ceph filesystem mounts. This means the network is taken down before Ceph is unmounted. In turn, our invocation of `sync()` blocks on Ceph, which won't succeed. And this in turn manifests as a failure to transition to the new deployment. I initially did this patch to just rip out the global `sync()`. I am pretty sure we don't need it anymore. We've been doing individual `syncfs()` on `/sysroot` and `/boot` for a while now, and those are the only filesystems we should be touching. But *proving* that is a whole other thing of course. To be conservative, let's instead just add a timeout of 5s on our invocation of `sync()`. It doesn't return any information on success/error anyways. To allow testing without the `sync()` invocation, we also support a new `OSTREE_SYSROOT_OPT_SKIP_SYNC=1` environment variable. For staged deployments, this needs to be injected via e.g. systemd unit overrides into `ostree-finalize-staged.service`. Implementing this is a bit hairy - we need to spawn a thread. I debated blocking in arecursive mainloop, but I think `g_cond_wait_until()` is also fine here. --- src/libostree/ostree-sysroot-deploy.c | 68 +++++++++++++++++++++++++++++++++- src/libostree/ostree-sysroot-private.h | 6 +++ src/libostree/ostree-sysroot.c | 5 +++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c index c4ae86d5..6ee62410 100644 --- a/src/libostree/ostree-sysroot-deploy.c +++ b/src/libostree/ostree-sysroot-deploy.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1557,6 +1558,39 @@ typedef struct { guint64 extra_syncfs_msec; } SyncStats; +typedef struct { + int ref_count; /* atomic */ + bool success; + GMutex mutex; + GCond cond; +} SyncData; + +static void +sync_data_unref (SyncData *data) +{ + if (!g_atomic_int_dec_and_test (&data->ref_count)) + return; + g_mutex_clear (&data->mutex); + g_cond_clear (&data->cond); + g_free (data); +} + +// An asynchronous sync() call. See below. +static void * +sync_in_thread (void *ptr) +{ + SyncData *syncdata = ptr; + // Ensure that the caller is blocked waiting + g_mutex_lock (&syncdata->mutex); + sync (); + // Signal success + syncdata->success = true; + g_cond_broadcast (&syncdata->cond); + g_mutex_unlock (&syncdata->mutex); + sync_data_unref (syncdata); + return NULL; +} + /* First, sync the root directory as well as /var and /boot which may * be separate mount points. Then *in addition*, do a global * `sync()`. @@ -1589,9 +1623,41 @@ full_system_sync (OstreeSysroot *self, * actually get error codes out of that API, and we more clearly * delineate what we actually want to sync in the future when this * global sync call is removed. + * + * Due to https://bugzilla.redhat.com/show_bug.cgi?id=2003532 which is + * a bug in the interaction between Ceph and systemd, we have a capped + * timeout of 5s on the sync here. In general, we don't actually want + * to "own" flushing data on *all* mounted filesystems as part of + * our process. Again, we're only keeping the `sync` here out of + * conservatisim. We could likely just remove it and e.g. systemd + * would take care of sync as part of unmounting individual filesystems. */ start_msec = g_get_monotonic_time () / 1000; - sync (); + if (!(self->opt_flags & OSTREE_SYSROOT_GLOBAL_OPT_SKIP_SYNC)) + { + SyncData *syncdata = g_new (SyncData, 1); + syncdata->ref_count = 2; // One for us, one for thread + syncdata->success = FALSE; + g_mutex_init (&syncdata->mutex); + g_cond_init (&syncdata->cond); + g_mutex_lock (&syncdata->mutex); + GThread *worker = g_thread_new ("ostree sync", sync_in_thread, syncdata); + // Wait up to 5 seconds for sync() to finish + gint64 end_time = g_get_monotonic_time () + (5 * G_TIME_SPAN_SECOND); + while (!syncdata->success) + { + if (!g_cond_wait_until (&syncdata->cond, &syncdata->mutex, end_time)) + break; + } + g_mutex_unlock (&syncdata->mutex); + sync_data_unref (syncdata); + // We can't join the thread here, for two reasons. First, the whole + // point here is to avoid blocking on `sync`. The other reason is + // today there is no return value on success from `sync`, so there's + // no point to joining the thread even if we had a nonblocking mechanism + // to do so. + g_thread_unref (worker); + } end_msec = g_get_monotonic_time () / 1000; out_stats->extra_syncfs_msec = (end_msec - start_msec); diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 9cc4023f..cb34eeb3 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -38,6 +38,11 @@ typedef enum { OSTREE_SYSROOT_DEBUG_TEST_NO_DTB = 1 << 3, /* https://github.com/ostreedev/ostree/issues/2154 */ } OstreeSysrootDebugFlags; +typedef enum { + /* Skip invoking `sync()` */ + OSTREE_SYSROOT_GLOBAL_OPT_SKIP_SYNC = 1 << 0, +} OstreeSysrootGlobalOptFlags; + typedef enum { OSTREE_SYSROOT_LOAD_STATE_NONE, /* ostree_sysroot_new() was called */ OSTREE_SYSROOT_LOAD_STATE_INIT, /* We've loaded basic sysroot state and have an fd */ @@ -75,6 +80,7 @@ struct OstreeSysroot { /* Only access through ostree_sysroot_[_get]repo() */ OstreeRepo *repo; + OstreeSysrootGlobalOptFlags opt_flags; OstreeSysrootDebugFlags debug_flags; }; diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 9cb40e66..266a2975 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -184,6 +184,9 @@ ostree_sysroot_class_init (OstreeSysrootClass *klass) static void ostree_sysroot_init (OstreeSysroot *self) { + const GDebugKey globalopt_keys[] = { + { "skip-sync", OSTREE_SYSROOT_GLOBAL_OPT_SKIP_SYNC }, + }; const GDebugKey keys[] = { { "mutable-deployments", OSTREE_SYSROOT_DEBUG_MUTABLE_DEPLOYMENTS }, { "test-fifreeze", OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE }, @@ -191,6 +194,8 @@ ostree_sysroot_init (OstreeSysroot *self) { "no-dtb", OSTREE_SYSROOT_DEBUG_TEST_NO_DTB }, }; + self->opt_flags = g_parse_debug_string (g_getenv ("OSTREE_SYSROOT_OPTS"), + globalopt_keys, G_N_ELEMENTS (globalopt_keys)); self->debug_flags = g_parse_debug_string (g_getenv ("OSTREE_SYSROOT_DEBUG"), keys, G_N_ELEMENTS (keys)); -- cgit v1.2.1