diff options
author | Christian Brauner <brauner@kernel.org> | 2022-11-24 10:45:24 +0100 |
---|---|---|
committer | Christian Brauner (Microsoft) <brauner@kernel.org> | 2022-12-05 18:34:25 +0100 |
commit | 57c10a5650f6bb7180f3bec31a3f24239a81be39 (patch) | |
tree | f21c1891d597e0a45a1c9b697fe79f6cfcb4a48e | |
parent | 2e776ed6c8649d5991de5d2a7c0334a77485456c (diff) | |
download | systemd-57c10a5650f6bb7180f3bec31a3f24239a81be39.tar.gz |
nspawn: support pivot_root()
In order to support pivot_root() we need to move mount propagation
changes after the pivot_root(). While MS_MOVE requires the source mount
to not be a shared mount pivot_root() also requires the target mount to
not be a shared mount. This guarantees that pivot_root() doesn't leak
any mounts.
Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
-rw-r--r-- | src/core/namespace.c | 4 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 24 | ||||
-rw-r--r-- | src/shared/mount-util.c | 97 | ||||
-rw-r--r-- | src/shared/mount-util.h | 17 |
4 files changed, 96 insertions, 46 deletions
diff --git a/src/core/namespace.c b/src/core/namespace.c index c0d0cc9715..c9c2132b8a 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2486,7 +2486,7 @@ int setup_namespace( goto finish; /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ - r = mount_pivot_root(root); + r = mount_switch_root(root, MOUNT_ATTR_PROPAGATION_INHERIT); if (r == -EINVAL && root_directory) { /* If we are using root_directory and we don't have privileges (ie: user manager in a user * namespace) and the root_directory is already a mount point in the parent namespace, @@ -2496,7 +2496,7 @@ int setup_namespace( r = mount_nofollow_verbose(LOG_DEBUG, root, root, NULL, MS_BIND|MS_REC, NULL); if (r < 0) goto finish; - r = mount_pivot_root(root); + r = mount_switch_root(root, MOUNT_ATTR_PROPAGATION_INHERIT); } if (r < 0) { log_debug_errno(r, "Failed to mount root with MS_MOVE: %m"); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index d7b636209e..5844674d95 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3858,19 +3858,6 @@ static int outer_child( unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket); } - /* Mark everything as shared so our mounts get propagated down. This is required to make new bind - * mounts available in systemd services inside the container that create a new mount namespace. See - * https://github.com/systemd/systemd/issues/3860 Further submounts (such as /dev) done after this - * will inherit the shared propagation mode. - * - * IMPORTANT: Do not overmount the root directory anymore from now on to enable moving the root - * directory mount to root later on. - * https://github.com/systemd/systemd/issues/3847#issuecomment-562735251 - */ - r = mount_nofollow_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL); - if (r < 0) - return r; - r = recursive_chown(directory, arg_uid_shift, arg_uid_range); if (r < 0) return r; @@ -3974,7 +3961,16 @@ static int outer_child( return r; } - r = mount_move_root(directory); + /* Mark everything as shared so our mounts get propagated down. This is required to make new bind + * mounts available in systemd services inside the container that create a new mount namespace. See + * https://github.com/systemd/systemd/issues/3860 Further submounts (such as /dev) done after this + * will inherit the shared propagation mode. + * + * IMPORTANT: Do not overmount the root directory anymore from now on to enable moving the root + * directory mount to root later on. + * https://github.com/systemd/systemd/issues/3847#issuecomment-562735251 + */ + r = mount_switch_root(directory, MOUNT_ATTR_PROPAGATION_SHARED); if (r < 0) return log_error_errno(r, "Failed to move root directory: %m"); diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 681d698800..adb6b6dd27 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -36,6 +36,7 @@ #include "set.h" #include "stat-util.h" #include "stdio-util.h" +#include "string-table.h" #include "string-util.h" #include "strv.h" #include "tmpfile-util.h" @@ -475,47 +476,41 @@ int bind_remount_one_with_mountinfo( return 0; } -int mount_move_root(const char *path) { - assert(path); +static const char *const mount_attr_propagation_type_table[_MOUNT_ATTR_PROPAGATION_TYPE_MAX] = { + [MOUNT_ATTR_PROPAGATION_INHERIT] = "inherited", + [MOUNT_ATTR_PROPAGATION_PRIVATE] = "private", + [MOUNT_ATTR_PROPAGATION_DEPENDENT] = "dependent", + [MOUNT_ATTR_PROPAGATION_SHARED] = "shared", +}; - if (chdir(path) < 0) - return -errno; +DEFINE_STRING_TABLE_LOOKUP(mount_attr_propagation_type, MountAttrPropagationType); - if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) - return -errno; - - if (chroot(".") < 0) - return -errno; - - return RET_NERRNO(chdir("/")); +unsigned int mount_attr_propagation_type_to_flag(MountAttrPropagationType t) { + switch (t) { + case MOUNT_ATTR_PROPAGATION_INHERIT: + return 0; + case MOUNT_ATTR_PROPAGATION_PRIVATE: + return MS_PRIVATE; + case MOUNT_ATTR_PROPAGATION_DEPENDENT: + return MS_SLAVE; + case MOUNT_ATTR_PROPAGATION_SHARED: + return MS_SHARED; + default: + assert_not_reached(); + } } -int mount_pivot_root(const char *path) { - _cleanup_close_ int fd_oldroot = -EBADF, fd_newroot = -EBADF; - - assert(path); - - /* pivot_root() isn't currently supported in the initramfs. */ - if (in_initrd()) - return mount_move_root(path); +static inline int mount_switch_root_pivot(const char *path, int fd_newroot) { + _cleanup_close_ int fd_oldroot = -EBADF; fd_oldroot = open("/", O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); if (fd_oldroot < 0) return log_debug_errno(errno, "Failed to open old rootfs"); - fd_newroot = open(path, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); - if (fd_newroot < 0) - return log_debug_errno(errno, "Failed to open new rootfs '%s': %m", path); - - /* Change into the new rootfs. */ - if (fchdir(fd_newroot) < 0) - return log_debug_errno(errno, "Failed to change into new rootfs '%s': %m", path); - /* Let the kernel tuck the new root under the old one. */ if (pivot_root(".", ".") < 0) return log_debug_errno(errno, "Failed to pivot root to new rootfs '%s': %m", path); - /* At this point the new root is tucked under the old root. If we want * to unmount it we cannot be fchdir()ed into it. So escape back to the * old root. */ @@ -535,6 +530,52 @@ int mount_pivot_root(const char *path) { return 0; } +static inline int mount_switch_root_move(const char *path) { + if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) + return log_debug_errno(errno, "Failed to move new rootfs '%s': %m", path); + + if (chroot(".") < 0) + return log_debug_errno(errno, "Failed to chroot to new rootfs '%s': %m", path); + + if (chdir("/")) + return log_debug_errno(errno, "Failed to chdir to new rootfs '%s': %m", path); + + return 0; +} + +int mount_switch_root(const char *path, MountAttrPropagationType type) { + int r; + _cleanup_close_ int fd_newroot = -EBADF; + unsigned int flags; + + assert(path); + + fd_newroot = open(path, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (fd_newroot < 0) + return log_debug_errno(errno, "Failed to open new rootfs '%s': %m", path); + + /* Change into the new rootfs. */ + if (fchdir(fd_newroot) < 0) + return log_debug_errno(errno, "Failed to change into new rootfs '%s': %m", path); + + r = mount_switch_root_pivot(path, fd_newroot); + if (r < 0) { + /* Failed to pivot_root() fallback to MS_MOVE. For example, this may happen if the + * rootfs is an initramfs in which case pivot_root() isn't supported. */ + log_debug_errno(r, "Failed to pivot into new rootfs '%s': %m", path); + r = mount_switch_root_move(path); + } + if (r < 0) + return log_debug_errno(r, "Failed to switch to new rootfs '%s': %m", path); + + /* Finally, let's establish the requested propagation type. */ + flags = mount_attr_propagation_type_to_flag(type); + if ((flags != 0) && mount(NULL, ".", NULL, flags|MS_REC, 0) < 0) + return log_debug_errno(errno, "Failed to turn new rootfs '%s' into %s mount: %m", + mount_attr_propagation_type_to_string(type), path); + + return 0; +} int repeat_unmount(const char *path, int flags) { bool done = false; diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 29b9ed02f7..56b1c3669c 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -11,6 +11,20 @@ #include "errno-util.h" #include "macro.h" +typedef enum MountAttrPropagationType { + MOUNT_ATTR_PROPAGATION_INHERIT, /* no special MS_* propagation flags */ + MOUNT_ATTR_PROPAGATION_PRIVATE, /* MS_PRIVATE */ + MOUNT_ATTR_PROPAGATION_DEPENDENT, /* MS_SLAVE */ + MOUNT_ATTR_PROPAGATION_SHARED, /* MS_SHARE */ + + _MOUNT_ATTR_PROPAGATION_TYPE_MAX, + _MOUNT_ATTR_PROPAGATION_TYPE_INVALID = -EINVAL, +} MountAttrPropagationType; + +const char* mount_attr_propagation_type_to_string(MountAttrPropagationType t) _const_; +MountAttrPropagationType mount_attr_propagation_type_from_string(const char *s) _pure_; +unsigned int mount_attr_propagation_type_to_flag(MountAttrPropagationType t); + /* The limit used for /dev itself. 4MB should be enough since device nodes and symlinks don't * consume any space and udev isn't supposed to create regular file either. There's no limit on the * max number of inodes since such limit is hard to guess especially on large storage array @@ -54,8 +68,7 @@ static inline int bind_remount_recursive(const char *prefix, unsigned long new_f int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo); -int mount_move_root(const char *path); -int mount_pivot_root(const char *path); +int mount_switch_root(const char *path, MountAttrPropagationType type); DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(FILE*, endmntent, NULL); #define _cleanup_endmntent_ _cleanup_(endmntentp) |