summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Watanabe <watanabe.yu+github@gmail.com>2021-10-25 23:15:10 +0900
committerGitHub <noreply@github.com>2021-10-25 23:15:10 +0900
commitf8aa82e43fe9beb71955b5fe99169bb0eb427f30 (patch)
treeaebff0c682aa3f4507ccb56773ac98f6bad658f7
parent2db32618fe8e7305aa6f25db8b1d4a00aaafa264 (diff)
parent0289948eb4b22c01fe27e708056e6daaa719b9ae (diff)
downloadsystemd-f8aa82e43fe9beb71955b5fe99169bb0eb427f30.tar.gz
Merge pull request #21077 from poettering/mount-setattr
optimize remounting with mount_setattr() syscall
-rw-r--r--src/basic/missing_syscall.h8
-rw-r--r--src/shared/mount-util.c92
-rw-r--r--src/shared/mount-util.h6
-rw-r--r--src/test/test-mount-util.c3
4 files changed, 90 insertions, 19 deletions
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
index 5e80fa79fd..2f67adaec3 100644
--- a/src/basic/missing_syscall.h
+++ b/src/basic/missing_syscall.h
@@ -465,10 +465,18 @@ struct mount_attr;
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
#ifndef AT_RECURSIVE
#define AT_RECURSIVE 0x8000
#endif
+#ifndef MOUNT_ATTR_SIZE_VER0
+#define MOUNT_ATTR_SIZE_VER0 32
+#endif
+
static inline int missing_mount_setattr(
int dfd,
const char *path,
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
index a7217adfa1..813fdc1664 100644
--- a/src/shared/mount-util.c
+++ b/src/shared/mount-util.c
@@ -134,6 +134,31 @@ int umount_recursive(const char *prefix, int flags) {
return n;
}
+#define MS_CONVERTIBLE_FLAGS (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_NOSYMFOLLOW)
+
+static uint64_t ms_flags_to_mount_attr(unsigned long a) {
+ uint64_t f = 0;
+
+ if (FLAGS_SET(a, MS_RDONLY))
+ f |= MOUNT_ATTR_RDONLY;
+
+ if (FLAGS_SET(a, MS_NOSUID))
+ f |= MOUNT_ATTR_NOSUID;
+
+ if (FLAGS_SET(a, MS_NODEV))
+ f |= MOUNT_ATTR_NODEV;
+
+ if (FLAGS_SET(a, MS_NOEXEC))
+ f |= MOUNT_ATTR_NOEXEC;
+
+ if (FLAGS_SET(a, MS_NOSYMFOLLOW))
+ f |= MOUNT_ATTR_NOSYMFOLLOW;
+
+ return f;
+}
+
+static bool skip_mount_set_attr = false;
+
/* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
* for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
int bind_remount_recursive_with_mountinfo(
@@ -143,12 +168,44 @@ int bind_remount_recursive_with_mountinfo(
char **deny_list,
FILE *proc_self_mountinfo) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo_opened = NULL;
_cleanup_set_free_ Set *done = NULL;
unsigned n_tries = 0;
int r;
assert(prefix);
- assert(proc_self_mountinfo);
+
+ if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && strv_isempty(deny_list) && !skip_mount_set_attr) {
+ /* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */
+
+ if (mount_setattr(AT_FDCWD, prefix, AT_SYMLINK_NOFOLLOW|AT_RECURSIVE,
+ &(struct mount_attr) {
+ .attr_set = ms_flags_to_mount_attr(new_flags & flags_mask),
+ .attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask),
+ }, MOUNT_ATTR_SIZE_VER0) < 0) {
+
+ log_debug_errno(errno, "mount_setattr() failed, falling back to classic remounting: %m");
+
+ /* We fall through to classic behaviour if not supported (i.e. kernel < 5.12). We
+ * also do this for all other kinds of errors since they are so many different, and
+ * mount_setattr() has no graceful mode where it continues despite seeing errors one
+ * some mounts, but we want that. Moreover mount_setattr() only works on the mount
+ * point inode itself, not a non-mount point inode, and we want to support arbitrary
+ * prefixes here. */
+
+ if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */
+ skip_mount_set_attr = true;
+ } else
+ return 0; /* Nice, this worked! */
+ }
+
+ if (!proc_self_mountinfo) {
+ r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo_opened);
+ if (r < 0)
+ return r;
+
+ proc_self_mountinfo = proc_self_mountinfo_opened;
+ }
/* Recursively remount a directory (and all its submounts) with desired flags (MS_READONLY,
* MS_NOSUID, MS_NOEXEC). If the directory is already mounted, we reuse the mount and simply mark it
@@ -343,22 +400,6 @@ int bind_remount_recursive_with_mountinfo(
}
}
-int bind_remount_recursive(
- const char *prefix,
- unsigned long new_flags,
- unsigned long flags_mask,
- char **deny_list) {
-
- _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
- int r;
-
- r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
- if (r < 0)
- return r;
-
- return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo);
-}
-
int bind_remount_one_with_mountinfo(
const char *path,
unsigned long new_flags,
@@ -374,6 +415,23 @@ int bind_remount_one_with_mountinfo(
assert(path);
assert(proc_self_mountinfo);
+ if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && !skip_mount_set_attr) {
+ /* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */
+
+ if (mount_setattr(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW,
+ &(struct mount_attr) {
+ .attr_set = ms_flags_to_mount_attr(new_flags & flags_mask),
+ .attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask),
+ }, MOUNT_ATTR_SIZE_VER0) < 0) {
+
+ log_debug_errno(errno, "mount_setattr() didn't work, falling back to classic remounting: %m");
+
+ if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */
+ skip_mount_set_attr = true;
+ } else
+ return 0; /* Nice, this worked! */
+ }
+
rewind(proc_self_mountinfo);
table = mnt_new_table();
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
index 36501c2c4a..d9fb801e61 100644
--- a/src/shared/mount-util.h
+++ b/src/shared/mount-util.h
@@ -40,8 +40,12 @@ int mount_nofollow(const char *source, const char *target, const char *filesyste
int repeat_unmount(const char *path, int flags);
int umount_recursive(const char *target, int flags);
-int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list);
+
int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list, FILE *proc_self_mountinfo);
+static inline int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list) {
+ return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, NULL);
+}
+
int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo);
int mount_move_root(const char *path);
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
index d3d004071b..fdf2a249ed 100644
--- a/src/test/test-mount-util.c
+++ b/src/test/test-mount-util.c
@@ -169,7 +169,7 @@ static void test_bind_remount_recursive(void) {
assert_se(!FLAGS_SET(svfs.f_flag, ST_RDONLY));
/* Now mark the path we currently run for read-only */
- assert_se(bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, STRV_MAKE("/sys/kernel")) >= 0);
+ assert_se(bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, path_equal(p, "/sys") ? STRV_MAKE("/sys/kernel") : NULL) >= 0);
/* Ensure that this worked on the top-level */
assert_se(statvfs(p, &svfs) >= 0);
@@ -209,6 +209,7 @@ static void test_bind_remount_one(void) {
assert_se(fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo) >= 0);
assert_se(bind_remount_one_with_mountinfo("/run", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) >= 0);
+ assert_se(bind_remount_one_with_mountinfo("/run", MS_NOEXEC, MS_RDONLY|MS_NOEXEC, proc_self_mountinfo) >= 0);
assert_se(bind_remount_one_with_mountinfo("/proc/idontexist", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) == -ENOENT);
assert_se(bind_remount_one_with_mountinfo("/proc/self", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) == -EINVAL);
assert_se(bind_remount_one_with_mountinfo("/", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) >= 0);