diff options
author | Yu Watanabe <watanabe.yu+github@gmail.com> | 2021-10-25 23:15:10 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-25 23:15:10 +0900 |
commit | f8aa82e43fe9beb71955b5fe99169bb0eb427f30 (patch) | |
tree | aebff0c682aa3f4507ccb56773ac98f6bad658f7 | |
parent | 2db32618fe8e7305aa6f25db8b1d4a00aaafa264 (diff) | |
parent | 0289948eb4b22c01fe27e708056e6daaa719b9ae (diff) | |
download | systemd-f8aa82e43fe9beb71955b5fe99169bb0eb427f30.tar.gz |
Merge pull request #21077 from poettering/mount-setattr
optimize remounting with mount_setattr() syscall
-rw-r--r-- | src/basic/missing_syscall.h | 8 | ||||
-rw-r--r-- | src/shared/mount-util.c | 92 | ||||
-rw-r--r-- | src/shared/mount-util.h | 6 | ||||
-rw-r--r-- | src/test/test-mount-util.c | 3 |
4 files changed, 90 insertions, 19 deletions
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h index 5e80fa79fd..2f67adaec3 100644 --- a/src/basic/missing_syscall.h +++ b/src/basic/missing_syscall.h @@ -465,10 +465,18 @@ struct mount_attr; #define MOUNT_ATTR_IDMAP 0x00100000 #endif +#ifndef MOUNT_ATTR_NOSYMFOLLOW +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 +#endif + #ifndef AT_RECURSIVE #define AT_RECURSIVE 0x8000 #endif +#ifndef MOUNT_ATTR_SIZE_VER0 +#define MOUNT_ATTR_SIZE_VER0 32 +#endif + static inline int missing_mount_setattr( int dfd, const char *path, diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index a7217adfa1..813fdc1664 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -134,6 +134,31 @@ int umount_recursive(const char *prefix, int flags) { return n; } +#define MS_CONVERTIBLE_FLAGS (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_NOSYMFOLLOW) + +static uint64_t ms_flags_to_mount_attr(unsigned long a) { + uint64_t f = 0; + + if (FLAGS_SET(a, MS_RDONLY)) + f |= MOUNT_ATTR_RDONLY; + + if (FLAGS_SET(a, MS_NOSUID)) + f |= MOUNT_ATTR_NOSUID; + + if (FLAGS_SET(a, MS_NODEV)) + f |= MOUNT_ATTR_NODEV; + + if (FLAGS_SET(a, MS_NOEXEC)) + f |= MOUNT_ATTR_NOEXEC; + + if (FLAGS_SET(a, MS_NOSYMFOLLOW)) + f |= MOUNT_ATTR_NOSYMFOLLOW; + + return f; +} + +static bool skip_mount_set_attr = false; + /* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */ int bind_remount_recursive_with_mountinfo( @@ -143,12 +168,44 @@ int bind_remount_recursive_with_mountinfo( char **deny_list, FILE *proc_self_mountinfo) { + _cleanup_fclose_ FILE *proc_self_mountinfo_opened = NULL; _cleanup_set_free_ Set *done = NULL; unsigned n_tries = 0; int r; assert(prefix); - assert(proc_self_mountinfo); + + if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && strv_isempty(deny_list) && !skip_mount_set_attr) { + /* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */ + + if (mount_setattr(AT_FDCWD, prefix, AT_SYMLINK_NOFOLLOW|AT_RECURSIVE, + &(struct mount_attr) { + .attr_set = ms_flags_to_mount_attr(new_flags & flags_mask), + .attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask), + }, MOUNT_ATTR_SIZE_VER0) < 0) { + + log_debug_errno(errno, "mount_setattr() failed, falling back to classic remounting: %m"); + + /* We fall through to classic behaviour if not supported (i.e. kernel < 5.12). We + * also do this for all other kinds of errors since they are so many different, and + * mount_setattr() has no graceful mode where it continues despite seeing errors one + * some mounts, but we want that. Moreover mount_setattr() only works on the mount + * point inode itself, not a non-mount point inode, and we want to support arbitrary + * prefixes here. */ + + if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */ + skip_mount_set_attr = true; + } else + return 0; /* Nice, this worked! */ + } + + if (!proc_self_mountinfo) { + r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo_opened); + if (r < 0) + return r; + + proc_self_mountinfo = proc_self_mountinfo_opened; + } /* Recursively remount a directory (and all its submounts) with desired flags (MS_READONLY, * MS_NOSUID, MS_NOEXEC). If the directory is already mounted, we reuse the mount and simply mark it @@ -343,22 +400,6 @@ int bind_remount_recursive_with_mountinfo( } } -int bind_remount_recursive( - const char *prefix, - unsigned long new_flags, - unsigned long flags_mask, - char **deny_list) { - - _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; - int r; - - r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo); - if (r < 0) - return r; - - return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo); -} - int bind_remount_one_with_mountinfo( const char *path, unsigned long new_flags, @@ -374,6 +415,23 @@ int bind_remount_one_with_mountinfo( assert(path); assert(proc_self_mountinfo); + if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && !skip_mount_set_attr) { + /* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */ + + if (mount_setattr(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW, + &(struct mount_attr) { + .attr_set = ms_flags_to_mount_attr(new_flags & flags_mask), + .attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask), + }, MOUNT_ATTR_SIZE_VER0) < 0) { + + log_debug_errno(errno, "mount_setattr() didn't work, falling back to classic remounting: %m"); + + if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */ + skip_mount_set_attr = true; + } else + return 0; /* Nice, this worked! */ + } + rewind(proc_self_mountinfo); table = mnt_new_table(); diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 36501c2c4a..d9fb801e61 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -40,8 +40,12 @@ int mount_nofollow(const char *source, const char *target, const char *filesyste int repeat_unmount(const char *path, int flags); int umount_recursive(const char *target, int flags); -int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list); + int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list, FILE *proc_self_mountinfo); +static inline int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list) { + return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, NULL); +} + int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo); int mount_move_root(const char *path); diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c index d3d004071b..fdf2a249ed 100644 --- a/src/test/test-mount-util.c +++ b/src/test/test-mount-util.c @@ -169,7 +169,7 @@ static void test_bind_remount_recursive(void) { assert_se(!FLAGS_SET(svfs.f_flag, ST_RDONLY)); /* Now mark the path we currently run for read-only */ - assert_se(bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, STRV_MAKE("/sys/kernel")) >= 0); + assert_se(bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, path_equal(p, "/sys") ? STRV_MAKE("/sys/kernel") : NULL) >= 0); /* Ensure that this worked on the top-level */ assert_se(statvfs(p, &svfs) >= 0); @@ -209,6 +209,7 @@ static void test_bind_remount_one(void) { assert_se(fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo) >= 0); assert_se(bind_remount_one_with_mountinfo("/run", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) >= 0); + assert_se(bind_remount_one_with_mountinfo("/run", MS_NOEXEC, MS_RDONLY|MS_NOEXEC, proc_self_mountinfo) >= 0); assert_se(bind_remount_one_with_mountinfo("/proc/idontexist", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) == -ENOENT); assert_se(bind_remount_one_with_mountinfo("/proc/self", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) == -EINVAL); assert_se(bind_remount_one_with_mountinfo("/", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) >= 0); |