summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Walters <walters@verbum.org>2022-01-17 11:46:04 -0500
committerColin Walters <walters@verbum.org>2022-01-18 09:19:20 -0500
commitcb731294837736e957ee595ce11ab115277dbb36 (patch)
treea77115c6b1f6b1883f0361b4324d0d01aa01653d
parent0095f7c472e237a10befeb02f300127f28880354 (diff)
downloadostree-cb731294837736e957ee595ce11ab115277dbb36.tar.gz
deploy: Add a 5s max timeout on global filesystem `sync()`
https://bugzilla.redhat.com/show_bug.cgi?id=2003532 Basically there's a systemd bug where it's losing the `_netdev` aspect of Ceph filesystem mounts. This means the network is taken down before Ceph is unmounted. In turn, our invocation of `sync()` blocks on Ceph, which won't succeed. And this in turn manifests as a failure to transition to the new deployment. I initially did this patch to just rip out the global `sync()`. I am pretty sure we don't need it anymore. We've been doing individual `syncfs()` on `/sysroot` and `/boot` for a while now, and those are the only filesystems we should be touching. But *proving* that is a whole other thing of course. To be conservative, let's instead just add a timeout of 5s on our invocation of `sync()`. It doesn't return any information on success/error anyways. To allow testing without the `sync()` invocation, we also support a new `OSTREE_SYSROOT_OPT_SKIP_SYNC=1` environment variable. For staged deployments, this needs to be injected via e.g. systemd unit overrides into `ostree-finalize-staged.service`. Implementing this is a bit hairy - we need to spawn a thread. I debated blocking in arecursive mainloop, but I think `g_cond_wait_until()` is also fine here.
-rw-r--r--src/libostree/ostree-sysroot-deploy.c68
-rw-r--r--src/libostree/ostree-sysroot-private.h6
-rw-r--r--src/libostree/ostree-sysroot.c5
3 files changed, 78 insertions, 1 deletions
diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c
index c4ae86d5..6ee62410 100644
--- a/src/libostree/ostree-sysroot-deploy.c
+++ b/src/libostree/ostree-sysroot-deploy.c
@@ -26,6 +26,7 @@
#include <sys/statvfs.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
+#include <stdbool.h>
#include <sys/poll.h>
#include <linux/fs.h>
#include <err.h>
@@ -1557,6 +1558,39 @@ typedef struct {
guint64 extra_syncfs_msec;
} SyncStats;
+typedef struct {
+ int ref_count; /* atomic */
+ bool success;
+ GMutex mutex;
+ GCond cond;
+} SyncData;
+
+static void
+sync_data_unref (SyncData *data)
+{
+ if (!g_atomic_int_dec_and_test (&data->ref_count))
+ return;
+ g_mutex_clear (&data->mutex);
+ g_cond_clear (&data->cond);
+ g_free (data);
+}
+
+// An asynchronous sync() call. See below.
+static void *
+sync_in_thread (void *ptr)
+{
+ SyncData *syncdata = ptr;
+ // Ensure that the caller is blocked waiting
+ g_mutex_lock (&syncdata->mutex);
+ sync ();
+ // Signal success
+ syncdata->success = true;
+ g_cond_broadcast (&syncdata->cond);
+ g_mutex_unlock (&syncdata->mutex);
+ sync_data_unref (syncdata);
+ return NULL;
+}
+
/* First, sync the root directory as well as /var and /boot which may
* be separate mount points. Then *in addition*, do a global
* `sync()`.
@@ -1589,9 +1623,41 @@ full_system_sync (OstreeSysroot *self,
* actually get error codes out of that API, and we more clearly
* delineate what we actually want to sync in the future when this
* global sync call is removed.
+ *
+ * Due to https://bugzilla.redhat.com/show_bug.cgi?id=2003532 which is
+ * a bug in the interaction between Ceph and systemd, we have a capped
+ * timeout of 5s on the sync here. In general, we don't actually want
+ * to "own" flushing data on *all* mounted filesystems as part of
+ * our process. Again, we're only keeping the `sync` here out of
+ * conservatisim. We could likely just remove it and e.g. systemd
+ * would take care of sync as part of unmounting individual filesystems.
*/
start_msec = g_get_monotonic_time () / 1000;
- sync ();
+ if (!(self->opt_flags & OSTREE_SYSROOT_GLOBAL_OPT_SKIP_SYNC))
+ {
+ SyncData *syncdata = g_new (SyncData, 1);
+ syncdata->ref_count = 2; // One for us, one for thread
+ syncdata->success = FALSE;
+ g_mutex_init (&syncdata->mutex);
+ g_cond_init (&syncdata->cond);
+ g_mutex_lock (&syncdata->mutex);
+ GThread *worker = g_thread_new ("ostree sync", sync_in_thread, syncdata);
+ // Wait up to 5 seconds for sync() to finish
+ gint64 end_time = g_get_monotonic_time () + (5 * G_TIME_SPAN_SECOND);
+ while (!syncdata->success)
+ {
+ if (!g_cond_wait_until (&syncdata->cond, &syncdata->mutex, end_time))
+ break;
+ }
+ g_mutex_unlock (&syncdata->mutex);
+ sync_data_unref (syncdata);
+ // We can't join the thread here, for two reasons. First, the whole
+ // point here is to avoid blocking on `sync`. The other reason is
+ // today there is no return value on success from `sync`, so there's
+ // no point to joining the thread even if we had a nonblocking mechanism
+ // to do so.
+ g_thread_unref (worker);
+ }
end_msec = g_get_monotonic_time () / 1000;
out_stats->extra_syncfs_msec = (end_msec - start_msec);
diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h
index 9cc4023f..cb34eeb3 100644
--- a/src/libostree/ostree-sysroot-private.h
+++ b/src/libostree/ostree-sysroot-private.h
@@ -39,6 +39,11 @@ typedef enum {
} OstreeSysrootDebugFlags;
typedef enum {
+ /* Skip invoking `sync()` */
+ OSTREE_SYSROOT_GLOBAL_OPT_SKIP_SYNC = 1 << 0,
+} OstreeSysrootGlobalOptFlags;
+
+typedef enum {
OSTREE_SYSROOT_LOAD_STATE_NONE, /* ostree_sysroot_new() was called */
OSTREE_SYSROOT_LOAD_STATE_INIT, /* We've loaded basic sysroot state and have an fd */
OSTREE_SYSROOT_LOAD_STATE_LOADED, /* We've loaded all of the deployments */
@@ -75,6 +80,7 @@ struct OstreeSysroot {
/* Only access through ostree_sysroot_[_get]repo() */
OstreeRepo *repo;
+ OstreeSysrootGlobalOptFlags opt_flags;
OstreeSysrootDebugFlags debug_flags;
};
diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c
index 9cb40e66..266a2975 100644
--- a/src/libostree/ostree-sysroot.c
+++ b/src/libostree/ostree-sysroot.c
@@ -184,6 +184,9 @@ ostree_sysroot_class_init (OstreeSysrootClass *klass)
static void
ostree_sysroot_init (OstreeSysroot *self)
{
+ const GDebugKey globalopt_keys[] = {
+ { "skip-sync", OSTREE_SYSROOT_GLOBAL_OPT_SKIP_SYNC },
+ };
const GDebugKey keys[] = {
{ "mutable-deployments", OSTREE_SYSROOT_DEBUG_MUTABLE_DEPLOYMENTS },
{ "test-fifreeze", OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE },
@@ -191,6 +194,8 @@ ostree_sysroot_init (OstreeSysroot *self)
{ "no-dtb", OSTREE_SYSROOT_DEBUG_TEST_NO_DTB },
};
+ self->opt_flags = g_parse_debug_string (g_getenv ("OSTREE_SYSROOT_OPTS"),
+ globalopt_keys, G_N_ELEMENTS (globalopt_keys));
self->debug_flags = g_parse_debug_string (g_getenv ("OSTREE_SYSROOT_DEBUG"),
keys, G_N_ELEMENTS (keys));