summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Walters <walters@verbum.org>2017-08-02 22:07:26 -0400
committerAtomic Bot <atomic-devel@projectatomic.io>2017-08-08 16:09:04 +0000
commit8642ef5ab3fec3ac8eb8f193054852f83a8bc4d0 (patch)
tree4105371c58a3e719836e0d19a70b72508a22919d
parent9f8f351cd45e5dd0219c3177558b497ab10c58e9 (diff)
downloadostree-8642ef5ab3fec3ac8eb8f193054852f83a8bc4d0.tar.gz
lib/deploy: Use a FIFREEZE/FITHAW cycle for /boot
See: http://marc.info/?l=linux-fsdevel&m=149520244919284&w=2 XFS doesn't flush the journal on `syncfs()`. GRUB doesn't know how to follow the XFS journal, so if the filesystem is in a dirty state (possible with xfs `/boot`, extremely likely with `/`, if the journaled data includes content for `/boot`, the system may be unbootable if a system crash occurs. Fix this by doing a `FIFREEZE`+`FITHAW` cycle. Now, most people probably would have replaced the `syncfs()` invocation with those two ioctls. But this would have become (I believe) the *only* place in libostree where we weren't safe against interruption. The failure mode would be ugly; nothing else would be able to write to the filesystem until manual intervention. The real fix here I think is to land an atomic `FIFREEZETHAW` ioctl in the kernel. I might try a patch. In the meantime though, let's jump through some hoops and set up a "watchdog" child process that acts as a fallback unfreezer. Closes: https://github.com/ostreedev/ostree/issues/876 Closes: #1049 Approved by: jlebon
-rw-r--r--src/libostree/ostree-sysroot-deploy.c137
-rw-r--r--src/libostree/ostree-sysroot-private.h3
-rw-r--r--src/libostree/ostree-sysroot.c1
-rw-r--r--tests/admin-test.sh8
-rwxr-xr-xtests/test-admin-deploy-grub2.sh2
-rwxr-xr-xtests/test-admin-deploy-syslinux.sh2
-rwxr-xr-xtests/test-admin-deploy-uboot.sh2
7 files changed, 141 insertions, 14 deletions
diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c
index 29fcfeed..d87ba1a0 100644
--- a/src/libostree/ostree-sysroot-deploy.c
+++ b/src/libostree/ostree-sysroot-deploy.c
@@ -22,8 +22,14 @@
#include <gio/gunixinputstream.h>
#include <gio/gunixoutputstream.h>
+#include <glib-unix.h>
#include <sys/mount.h>
#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <linux/fs.h>
+#include <err.h>
#ifdef HAVE_LIBMOUNT
#include <libmount.h>
@@ -973,18 +979,126 @@ checksum_from_kernel_src (const char *name,
return TRUE;
}
+/* We used to syncfs(), but that doesn't flush the journal on XFS,
+ * and since GRUB2 can't read the XFS journal, the system
+ * could fail to boot.
+ *
+ * http://marc.info/?l=linux-fsdevel&m=149520244919284&w=2
+ * https://github.com/ostreedev/ostree/pull/1049
+ */
static gboolean
-syncfs_dir_at (int dfd,
- const char *path,
- GCancellable *cancellable,
- GError **error)
+fsfreeze_thaw_cycle (OstreeSysroot *self,
+ int rootfs_dfd,
+ GCancellable *cancellable,
+ GError **error)
{
- glnx_fd_close int child_dfd = -1;
- if (!glnx_opendirat (dfd, path, TRUE, &child_dfd, error))
- return FALSE;
- if (syncfs (child_dfd) != 0)
- return glnx_throw_errno_prefix (error, "syncfs(%s)", path);
+ GLNX_AUTO_PREFIX_ERROR ("During fsfreeze-thaw", error);
+ int sockpair[2];
+ if (socketpair (AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sockpair) < 0)
+ return glnx_throw_errno_prefix (error, "socketpair");
+ glnx_fd_close int sock_parent = sockpair[0];
+ glnx_fd_close int sock_watchdog = sockpair[1];
+
+ pid_t pid = fork ();
+ if (pid < 0)
+ return glnx_throw_errno_prefix (error, "fork");
+
+ const gboolean debug_fifreeze = (self->debug_flags & OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE)>0;
+ char c = '!';
+ if (pid == 0) /* Child watchdog/unfreezer process. */
+ {
+ (void) close (glnx_steal_fd (&sock_parent));
+ /* Daemonize, and mask SIGINT/SIGTERM, so we're likely to survive e.g.
+ * someone doing a `systemctl restart rpm-ostreed` or a Ctrl-C of
+ * `ostree admin upgrade`.
+ */
+ if (daemon (0, debug_fifreeze ? 1 : 0) < 0)
+ err (1, "daemon");
+ int sigs[] = { SIGINT, SIGTERM };
+ for (guint i = 0; i < G_N_ELEMENTS (sigs); i++)
+ {
+ if (signal (sigs[i], SIG_IGN) == SIG_ERR)
+ err (1, "signal");
+ }
+ /* Tell the parent we're ready */
+ if (write (sock_watchdog, &c, sizeof (c)) != 1)
+ err (1, "write");
+ /* Wait for the parent to say it's going to freeze. */
+ ssize_t bytes_read = TEMP_FAILURE_RETRY (read (sock_watchdog, &c, sizeof (c)));
+ if (bytes_read < 0)
+ err (1, "read");
+ if (bytes_read != 1)
+ errx (1, "failed to read from parent");
+ /* Now we wait for the second message from the parent saying the freeze is
+ * complete. We have a 30 second timeout; if somehow the parent hasn't
+ * signaled completion, go ahead and unfreeze. But for debugging, just 1
+ * second to avoid exessively lengthining the test suite.
+ */
+ const int timeout_ms = debug_fifreeze ? 1000 : 30000;
+ struct pollfd pfds[1];
+ pfds[0].fd = sock_watchdog;
+ pfds[0].events = POLLIN | POLLHUP;
+ int r = TEMP_FAILURE_RETRY (poll (pfds, 1, timeout_ms));
+ /* Do a thaw if we hit an error, or if the poll timed out */
+ if (r <= 0)
+ {
+ if (TEMP_FAILURE_RETRY (ioctl (rootfs_dfd, FITHAW, 0)) != 0)
+ {
+ if (errno == EPERM)
+ ; /* Ignore this for the test suite */
+ else
+ err (1, "FITHAW");
+ }
+ /* But if we got an error from poll, let's log it */
+ if (r < 0)
+ err (1, "poll");
+ }
+ if (debug_fifreeze)
+ g_printerr ("fifreeze watchdog was run\n");
+ exit (EXIT_SUCCESS);
+ }
+ else /* Parent process. */
+ {
+ (void) close (glnx_steal_fd (&sock_watchdog));
+ /* Wait for the watchdog to say it's set up; mainly that it's
+ * masked SIGTERM successfully.
+ */
+ ssize_t bytes_read = TEMP_FAILURE_RETRY (read (sock_parent, &c, sizeof (c)));
+ if (bytes_read < 0)
+ return glnx_throw_errno_prefix (error, "read(watchdog init)");
+ if (bytes_read != 1)
+ return glnx_throw (error, "read(watchdog init)");
+ /* And tell the watchdog that we're ready to start */
+ if (write (sock_parent, &c, sizeof (c)) != sizeof (c))
+ return glnx_throw_errno_prefix (error, "write(watchdog start)");
+ /* Testing infrastructure */
+ if (debug_fifreeze)
+ return glnx_throw (error, "aborting due to test-fifreeze");
+ /* Do a freeze/thaw cycle; TODO add a FIFREEZETHAW ioctl */
+ if (ioctl (rootfs_dfd, FIFREEZE, 0) != 0)
+ {
+ /* Not supported, or we're running in the unit tests (as non-root)?
+ * OK, let's just do a syncfs.
+ */
+ if (G_IN_SET (errno, EOPNOTSUPP, EPERM))
+ {
+ if (TEMP_FAILURE_RETRY (syncfs (rootfs_dfd)) != 0)
+ return glnx_throw_errno_prefix (error, "syncfs");
+ /* Write the completion, and return */
+ if (write (sock_parent, &c, sizeof (c)) != sizeof (c))
+ return glnx_throw_errno_prefix (error, "write(watchdog syncfs complete)");
+ return TRUE;
+ }
+ else
+ return glnx_throw_errno_prefix (error, "ioctl(FIFREEZE)");
+ }
+ /* And finally thaw, then signal our completion to the watchdog */
+ if (TEMP_FAILURE_RETRY (ioctl (rootfs_dfd, FITHAW, 0)) != 0)
+ return glnx_throw_errno_prefix (error, "ioctl(FITHAW)");
+ if (write (sock_parent, &c, sizeof (c)) != sizeof (c))
+ return glnx_throw_errno_prefix (error, "write(watchdog FITHAW complete)");
+ }
return TRUE;
}
@@ -1012,7 +1126,10 @@ full_system_sync (OstreeSysroot *self,
out_stats->root_syncfs_msec = (end_msec - start_msec);
start_msec = g_get_monotonic_time () / 1000;
- if (!syncfs_dir_at (self->sysroot_fd, "boot", cancellable, error))
+ glnx_fd_close int boot_dfd = -1;
+ if (!glnx_opendirat (self->sysroot_fd, "boot", TRUE, &boot_dfd, error))
+ return FALSE;
+ if (!fsfreeze_thaw_cycle (self, boot_dfd, cancellable, error))
return FALSE;
end_msec = g_get_monotonic_time () / 1000;
out_stats->boot_syncfs_msec = (end_msec - start_msec);
diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h
index 82abc8e7..07c4bf6e 100644
--- a/src/libostree/ostree-sysroot-private.h
+++ b/src/libostree/ostree-sysroot-private.h
@@ -33,7 +33,8 @@ typedef enum {
OSTREE_SYSROOT_DEBUG_MUTABLE_DEPLOYMENTS = 1 << 0,
/* See https://github.com/ostreedev/ostree/pull/759 */
OSTREE_SYSROOT_DEBUG_NO_XATTRS = 1 << 1,
-
+ /* https://github.com/ostreedev/ostree/pull/1049 */
+ OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE = 1 << 2,
} OstreeSysrootDebugFlags;
/**
diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c
index cb09db7b..20539e4d 100644
--- a/src/libostree/ostree-sysroot.c
+++ b/src/libostree/ostree-sysroot.c
@@ -166,6 +166,7 @@ ostree_sysroot_init (OstreeSysroot *self)
{
const GDebugKey keys[] = {
{ "mutable-deployments", OSTREE_SYSROOT_DEBUG_MUTABLE_DEPLOYMENTS },
+ { "test-fifreeze", OSTREE_SYSROOT_DEBUG_TEST_FIFREEZE },
{ "no-xattrs", OSTREE_SYSROOT_DEBUG_NO_XATTRS },
};
diff --git a/tests/admin-test.sh b/tests/admin-test.sh
index 6001ceea..55de7235 100644
--- a/tests/admin-test.sh
+++ b/tests/admin-test.sh
@@ -249,3 +249,11 @@ ${CMD_PREFIX} ostree --sysroot=${deployment} remote add --set=gpg-verify=false r
assert_not_file_has_content sysroot/ostree/repo/config remote-test-nonphysical
assert_file_has_content ${deployment}/etc/ostree/remotes.d/remote-test-nonphysical.conf testos-repo
echo "ok remote add nonphysical sysroot"
+
+if env OSTREE_SYSROOT_DEBUG="${OSTREE_SYSROOT_DEBUG},test-fifreeze" \
+ ${CMD_PREFIX} ostree admin deploy --os=testos testos:testos/buildmaster/x86_64-runtime 2>err.txt; then
+ fatal "fifreeze-test exited successfully?"
+fi
+assert_file_has_content err.txt "fifreeze watchdog was run"
+assert_file_has_content err.txt "During fsfreeze-thaw: aborting due to test-fifreeze"
+echo "ok fifreeze test"
diff --git a/tests/test-admin-deploy-grub2.sh b/tests/test-admin-deploy-grub2.sh
index d7c1c6db..6f785df5 100755
--- a/tests/test-admin-deploy-grub2.sh
+++ b/tests/test-admin-deploy-grub2.sh
@@ -19,7 +19,7 @@
set -euo pipefail
-echo "1..18"
+echo "1..19"
. $(dirname $0)/libtest.sh
diff --git a/tests/test-admin-deploy-syslinux.sh b/tests/test-admin-deploy-syslinux.sh
index 797836f0..e03e211b 100755
--- a/tests/test-admin-deploy-syslinux.sh
+++ b/tests/test-admin-deploy-syslinux.sh
@@ -19,7 +19,7 @@
set -euo pipefail
-echo "1..18"
+echo "1..19"
. $(dirname $0)/libtest.sh
diff --git a/tests/test-admin-deploy-uboot.sh b/tests/test-admin-deploy-uboot.sh
index d9104f8c..5262b48a 100755
--- a/tests/test-admin-deploy-uboot.sh
+++ b/tests/test-admin-deploy-uboot.sh
@@ -20,7 +20,7 @@
set -euo pipefail
-echo "1..19"
+echo "1..20"
. $(dirname $0)/libtest.sh