summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-05-03 20:12:20 +0200
committerGitHub <noreply@github.com>2023-05-03 20:12:20 +0200
commit5d63c7eb83740efd13a65fe0915d1a9c5c0839fb (patch)
tree0423d8d7469062c371a32afcb772972a1bac0058
parentef25552e8b3b63e655f94106570fd5978fb3aa87 (diff)
parent1408912282021d5dfbad4cedc166e5b06685e0b6 (diff)
downloadsystemd-5d63c7eb83740efd13a65fe0915d1a9c5c0839fb.tar.gz
Merge pull request #27450 from poettering/switch-root-modernize
pid1: modernize switch root logic a bit
-rw-r--r--TODO4
-rw-r--r--src/core/main.c2
-rw-r--r--src/shared/base-filesystem.c58
-rw-r--r--src/shared/base-filesystem.h1
-rw-r--r--src/shared/switch-root.c89
-rw-r--r--src/shared/switch-root.h2
-rw-r--r--src/shutdown/shutdown.c2
7 files changed, 81 insertions, 77 deletions
diff --git a/TODO b/TODO
index b7d5813f17..f1c73fefb3 100644
--- a/TODO
+++ b/TODO
@@ -1224,10 +1224,6 @@ Features:
* Add service setting to run a service within the specified VRF. i.e. do the
equivalent of "ip vrf exec".
-* change SwitchRoot() implementation in PID 1 to use pivot_root(".", "."), as
- documented in the pivot_root(2) man page, so that we can drop the /oldroot
- temporary dir.
-
* special case some calls of chase() to use openat2() internally, so
that the kernel does what we otherwise do.
diff --git a/src/core/main.c b/src/core/main.c
index 932ea64e45..af88fe34bb 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1797,7 +1797,7 @@ static int do_reexecute(
broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
/* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
- r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
+ r = switch_root(switch_root_dir, /* old_root_after= */ NULL, MS_MOVE);
if (r < 0)
log_error_errno(r, "Failed to switch root, trying to continue: %m");
}
diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c
index f2161041c8..8a50cc6ebb 100644
--- a/src/shared/base-filesystem.c
+++ b/src/shared/base-filesystem.c
@@ -11,6 +11,7 @@
#include "alloc-util.h"
#include "architecture.h"
#include "base-filesystem.h"
+#include "errno-util.h"
#include "fd-util.h"
#include "log.h"
#include "macro.h"
@@ -130,19 +131,19 @@ static const BaseFilesystem table[] = {
# pragma message "Please add an entry above specifying whether your architecture uses /lib64/, /lib32/, or no such links."
#endif
-int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
- _cleanup_close_ int fd = -EBADF;
+int base_filesystem_create_fd(int fd, const char *root, uid_t uid, gid_t gid) {
int r;
- fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
- if (fd < 0)
- return log_error_errno(errno, "Failed to open root file system: %m");
+ assert(fd >= 0);
+ assert(root);
+
+ /* The "root" parameter is decoration only – it's only used as part of log messages */
for (size_t i = 0; i < ELEMENTSOF(table); i++) {
if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
continue;
- if (table[i].target) {
+ if (table[i].target) { /* Create as symlink? */
const char *target = NULL;
/* check if one of the targets exists */
@@ -169,39 +170,36 @@ int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
if (!target)
continue;
- if (symlinkat(target, fd, table[i].dir) < 0) {
- log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
- "Failed to create symlink at %s/%s: %m", root, table[i].dir);
-
- if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure)
- continue;
-
- return -errno;
- }
-
- if (uid_is_valid(uid) || gid_is_valid(gid))
- if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
- return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir);
-
- continue;
+ r = RET_NERRNO(symlinkat(target, fd, table[i].dir));
+ } else {
+ /* Create as directory. */
+ WITH_UMASK(0000)
+ r = RET_NERRNO(mkdirat(fd, table[i].dir, table[i].mode));
}
-
- WITH_UMASK(0000)
- r = mkdirat(fd, table[i].dir, table[i].mode);
if (r < 0) {
- log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
- "Failed to create directory at %s/%s: %m", root, table[i].dir);
-
- if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure)
+ bool ignore = IN_SET(r, -EEXIST, -EROFS) || table[i].ignore_failure;
+ log_full_errno(ignore ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to create %s/%s: %m", root, table[i].dir);
+ if (ignore)
continue;
- return -errno;
+ return r;
}
if (uid_is_valid(uid) || gid_is_valid(gid))
if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
- return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir);
+ return log_error_errno(errno, "Failed to chown %s/%s: %m", root, table[i].dir);
}
return 0;
}
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -EBADF;
+
+ fd = open(ASSERT_PTR(root), O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open root file system: %m");
+
+ return base_filesystem_create_fd(fd, root, uid, gid);
+}
diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h
index a33975f3b5..a1ccf451c4 100644
--- a/src/shared/base-filesystem.h
+++ b/src/shared/base-filesystem.h
@@ -3,4 +3,5 @@
#include <sys/types.h>
+int base_filesystem_create_fd(int fd, const char *root, uid_t uid, gid_t gid);
int base_filesystem_create(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
index 8dc93c2822..fa438b3c06 100644
--- a/src/shared/switch-root.c
+++ b/src/shared/switch-root.c
@@ -26,41 +26,45 @@
#include "user-util.h"
int switch_root(const char *new_root,
- const char *old_root_after, /* path below the new root, where to place the old root after the transition */
- bool unmount_old_root,
- unsigned long mount_flags) { /* MS_MOVE or MS_BIND */
+ const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */
+ unsigned long mount_flags) { /* MS_MOVE or MS_BIND used for /proc/, /dev/, /run/, /sys/ */
+ _cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF;
_cleanup_free_ char *resolved_old_root_after = NULL;
- _cleanup_close_ int old_root_fd = -EBADF;
- int r;
+ int r, istmp;
assert(new_root);
- assert(old_root_after);
+ assert(IN_SET(mount_flags, MS_MOVE, MS_BIND));
if (path_equal(new_root, "/"))
return 0;
/* Check if we shall remove the contents of the old root */
- old_root_fd = open("/", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+ old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC);
if (old_root_fd < 0)
return log_error_errno(errno, "Failed to open root directory: %m");
- r = fd_is_temporary_fs(old_root_fd);
- if (r < 0)
- return log_error_errno(r, "Failed to stat root directory: %m");
- if (r > 0)
+
+ istmp = fd_is_temporary_fs(old_root_fd);
+ if (istmp < 0)
+ return log_error_errno(istmp, "Failed to stat root directory: %m");
+ if (istmp > 0)
log_debug("Root directory is on tmpfs, will do cleanup later.");
- else
- old_root_fd = safe_close(old_root_fd);
-
- /* Determine where we shall place the old root after the transition */
- r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
- if (r == 0) /* Doesn't exist yet. Let's create it */
- (void) mkdir_p_label(resolved_old_root_after, 0755);
-
- /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence
- * remount them MS_PRIVATE here as a work-around.
+
+ new_root_fd = open(new_root, O_DIRECTORY|O_CLOEXEC);
+ if (new_root_fd < 0)
+ return log_error_errno(errno, "Failed to open target directory '%s': %m", new_root);
+
+ if (old_root_after) {
+ /* Determine where we shall place the old root after the transition */
+ r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
+ if (r == 0) /* Doesn't exist yet. Let's create it */
+ (void) mkdir_p_label(resolved_old_root_after, 0755);
+ }
+
+ /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted
+ * MS_SHARED. Hence remount them MS_PRIVATE here as a work-around.
*
* https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
@@ -90,37 +94,42 @@ int switch_root(const char *new_root,
/* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
* them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
* and switch_root() nevertheless. */
- (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID);
+ (void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID);
- if (chdir(new_root) < 0)
+ if (fchdir(new_root_fd) < 0)
return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
/* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
* that's not possible however, and hence we simply overmount root */
- if (pivot_root(new_root, resolved_old_root_after) >= 0) {
-
- /* Immediately get rid of the old root, if detach_oldroot is set.
- * Since we are running off it we need to do this lazily. */
- if (unmount_old_root) {
- r = umount_recursive(old_root_after, MNT_DETACH);
- if (r < 0)
- log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m");
+ if (resolved_old_root_after)
+ r = RET_NERRNO(pivot_root(".", resolved_old_root_after));
+ else {
+ r = RET_NERRNO(pivot_root(".", "."));
+ if (r >= 0) {
+ /* Now unmount the upper of the two stacked file systems */
+ if (umount2(".", MNT_DETACH) < 0)
+ return log_error_errno(errno, "Failed to unmount the old root: %m");
}
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Pivoting root file system failed, moving mounts instead: %m");
- } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0)
- return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
+ if (mount(".", "/", NULL, MS_MOVE, NULL) < 0)
+ return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
- if (chroot(".") < 0)
- return log_error_errno(errno, "Failed to change root: %m");
+ if (chroot(".") < 0)
+ return log_error_errno(errno, "Failed to change root: %m");
- if (chdir("/") < 0)
- return log_error_errno(errno, "Failed to change directory: %m");
+ if (chdir(".") < 0)
+ return log_error_errno(errno, "Failed to change directory: %m");
+ }
- if (old_root_fd >= 0) {
+ if (istmp) {
struct stat rb;
if (fstat(old_root_fd, &rb) < 0)
return log_error_errno(errno, "Failed to stat old root directory: %m");
+
(void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
}
diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h
index 4e04283e53..e3fabae3d9 100644
--- a/src/shared/switch-root.h
+++ b/src/shared/switch-root.h
@@ -3,4 +3,4 @@
#include <stdbool.h>
-int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags);
+int switch_root(const char *new_root, const char *old_root_after, unsigned long mount_flags);
diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c
index 802be44a79..cf0351cf78 100644
--- a/src/shutdown/shutdown.c
+++ b/src/shutdown/shutdown.c
@@ -169,7 +169,7 @@ static int switch_root_initramfs(void) {
* /run/initramfs/shutdown will take care of these.
* Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
*/
- return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
+ return switch_root("/run/initramfs", "/oldroot", MS_BIND);
}
/* Read the following fields from /proc/meminfo: