diff options
Diffstat (limited to 'src/shared/mount-util.c')
-rw-r--r-- | src/shared/mount-util.c | 226 |
1 files changed, 226 insertions, 0 deletions
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index a6480b93a4..9d0d7c73df 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -14,15 +14,19 @@ #include "fs-util.h" #include "hashmap.h" #include "libmount-util.h" +#include "mkdir.h" #include "mount-util.h" #include "mountpoint-util.h" +#include "namespace-util.h" #include "parse-util.h" #include "path-util.h" +#include "process-util.h" #include "set.h" #include "stat-util.h" #include "stdio-util.h" #include "string-util.h" #include "strv.h" +#include "tmpfile-util.h" int mount_fd(const char *source, int target_fd, @@ -742,3 +746,225 @@ int mount_option_mangle( return 0; } + +int bind_mount_in_namespace( + pid_t target, + const char *propagate_path, + const char *incoming_path, + const char *src, + const char *dest, + bool read_only, + bool make_file_or_directory) { + + _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; + _cleanup_close_ int self_mntns_fd = -1, mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1; + char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p, + chased_src[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + bool mount_slave_created = false, mount_slave_mounted = false, + mount_tmp_created = false, mount_tmp_mounted = false, + mount_outside_created = false, mount_outside_mounted = false; + struct stat st, self_mntns_st; + pid_t child; + int r; + + assert(target > 0); + assert(propagate_path); + assert(incoming_path); + assert(src); + assert(dest); + + r = namespace_open(target, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd); + if (r < 0) + return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m"); + + if (fstat(mntns_fd, &st) < 0) + return log_debug_errno(errno, "Failed to fstat mount namespace FD of target process: %m"); + + r = namespace_open(0, NULL, &self_mntns_fd, NULL, NULL, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to retrieve FDs of systemd's namespace: %m"); + + if (fstat(self_mntns_fd, &self_mntns_st) < 0) + return log_debug_errno(errno, "Failed to fstat mount namespace FD of systemd: %m"); + + /* We can't add new mounts at runtime if the process wasn't started in a namespace */ + if (st.st_ino == self_mntns_st.st_ino && st.st_dev == self_mntns_st.st_dev) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace"); + + /* One day, when bind mounting /proc/self/fd/n works across + * namespace boundaries we should rework this logic to make + * use of it... */ + + p = strjoina(propagate_path, "/"); + r = laccess(p, F_OK); + if (r < 0) + return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points"); + + r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, NULL, &chased_src_fd); + if (r < 0) + return log_debug_errno(r, "Failed to resolve source path of %s: %m", src); + xsprintf(chased_src, "/proc/self/fd/%i", chased_src_fd); + + if (fstat(chased_src_fd, &st) < 0) + return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", src); + if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safeā¦ */ + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", src); + + /* Our goal is to install a new bind mount into the container, + possibly read-only. This is irritatingly complex + unfortunately, currently. + + First, we start by creating a private playground in /tmp, + that we can mount MS_SLAVE. (Which is necessary, since + MS_MOVE cannot be applied to mounts with MS_SHARED parent + mounts.) */ + + if (!mkdtemp(mount_slave)) + return log_debug_errno(errno, "Failed to create playground %s: %m", mount_slave); + + mount_slave_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL); + if (r < 0) + goto finish; + + mount_slave_mounted = true; + + r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL); + if (r < 0) + goto finish; + + /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */ + mount_tmp = strjoina(mount_slave, "/mount"); + r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700); + if (r < 0) { + log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp); + goto finish; + } + + mount_tmp_created = true; + + r = mount_follow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL); + if (r < 0) + goto finish; + + mount_tmp_mounted = true; + + /* Third, we remount the new bind mount read-only if requested. */ + if (read_only) { + r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); + if (r < 0) + goto finish; + } + + /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only + * right-away. */ + + mount_outside = strjoina(propagate_path, "/XXXXXX"); + if (S_ISDIR(st.st_mode)) + r = mkdtemp(mount_outside) ? 0 : -errno; + else { + r = mkostemp_safe(mount_outside); + safe_close(r); + } + if (r < 0) { + log_debug_errno(r, "Cannot create propagation file or directory %s: %m", mount_outside); + goto finish; + } + + mount_outside_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL); + if (r < 0) + goto finish; + + mount_outside_mounted = true; + mount_tmp_mounted = false; + + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_tmp); + else + (void) unlink(mount_tmp); + mount_tmp_created = false; + + (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); + mount_slave_mounted = false; + + (void) rmdir(mount_slave); + mount_slave_created = false; + + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) { + log_debug_errno(errno, "Failed to create pipe: %m"); + goto finish; + } + + r = namespace_fork("(sd-bindmnt)", "(sd-bindmnt-inner)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidns_fd, mntns_fd, -1, -1, root_fd, &child); + if (r < 0) + goto finish; + if (r == 0) { + const char *mount_inside; + + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + if (make_file_or_directory) { + (void) mkdir_parents(dest, 0755); + (void) make_mount_point_inode_from_stat(&st, dest, 0700); + } + + /* Fifth, move the mount to the right place inside */ + mount_inside = strjoina(incoming_path, basename(mount_outside)); + r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL); + if (r < 0) + goto child_fail; + + _exit(EXIT_SUCCESS); + + child_fail: + (void) write(errno_pipe_fd[1], &r, sizeof(r)); + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + _exit(EXIT_FAILURE); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0); + if (r < 0) { + log_debug_errno(r, "Failed to wait for child: %m"); + goto finish; + } + if (r != EXIT_SUCCESS) { + if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) + log_debug_errno(r, "Failed to mount: %m"); + else + log_debug("Child failed."); + goto finish; + } + +finish: + if (mount_outside_mounted) + (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); + if (mount_outside_created) { + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_outside); + else + (void) unlink(mount_outside); + } + + if (mount_tmp_mounted) + (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); + if (mount_tmp_created) { + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_tmp); + else + (void) unlink(mount_tmp); + } + + if (mount_slave_mounted) + (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); + if (mount_slave_created) + (void) rmdir(mount_slave); + + return r; +} |