diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/dbus-manager.c | 16 | ||||
-rw-r--r-- | src/core/dbus-service.c | 87 | ||||
-rw-r--r-- | src/core/dbus-service.h | 1 | ||||
-rw-r--r-- | src/core/dbus-unit.c | 32 | ||||
-rw-r--r-- | src/core/dbus-util.c | 33 | ||||
-rw-r--r-- | src/core/dbus-util.h | 1 | ||||
-rw-r--r-- | src/core/execute.c | 18 | ||||
-rw-r--r-- | src/core/execute.h | 2 | ||||
-rw-r--r-- | src/core/namespace.c | 58 | ||||
-rw-r--r-- | src/core/namespace.h | 2 | ||||
-rw-r--r-- | src/core/org.freedesktop.systemd1.conf | 8 | ||||
-rw-r--r-- | src/machine/machine-dbus.c | 218 | ||||
-rw-r--r-- | src/shared/mount-util.c | 226 | ||||
-rw-r--r-- | src/shared/mount-util.h | 2 | ||||
-rw-r--r-- | src/systemctl/systemctl-mount.c | 41 | ||||
-rw-r--r-- | src/systemctl/systemctl-mount.h | 4 | ||||
-rw-r--r-- | src/systemctl/systemctl.c | 20 | ||||
-rw-r--r-- | src/systemctl/systemctl.h | 2 | ||||
-rw-r--r-- | src/test/test-namespace.c | 2 | ||||
-rw-r--r-- | src/test/test-ns.c | 2 |
20 files changed, 527 insertions, 248 deletions
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 3e1d609aa3..4b88f0d9f0 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -16,6 +16,7 @@ #include "dbus-job.h" #include "dbus-manager.h" #include "dbus-scope.h" +#include "dbus-service.h" #include "dbus-unit.h" #include "dbus.h" #include "env-util.h" @@ -725,6 +726,11 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s return method_generic_unit_operation(message, userdata, error, bus_unit_method_set_properties, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED); } +static int method_bind_mount_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + /* Only add mounts on fully loaded units */ + return method_generic_unit_operation(message, userdata, error, bus_service_method_bind_mount, GENERIC_UNIT_VALIDATE_LOADED); +} + static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { /* Only allow reffing of fully loaded units, and make sure reffing a unit loads it. */ return method_generic_unit_operation(message, userdata, error, bus_unit_method_ref, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED); @@ -2760,6 +2766,16 @@ const sd_bus_vtable bus_manager_vtable[] = { NULL,, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD_WITH_NAMES("BindMountUnit", + "sssbb", + SD_BUS_PARAM(name) + SD_BUS_PARAM(source) + SD_BUS_PARAM(destination) + SD_BUS_PARAM(read_only) + SD_BUS_PARAM(mkdir), + NULL,, + method_bind_mount_unit, + SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD_WITH_NAMES("RefUnit", "s", SD_BUS_PARAM(name), diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 64f9d4ab36..6df93e44a4 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -11,11 +11,15 @@ #include "dbus-manager.h" #include "dbus-service.h" #include "dbus-util.h" +#include "execute.h" #include "exit-status.h" #include "fd-util.h" #include "fileio.h" +#include "locale-util.h" +#include "mount-util.h" #include "parse-util.h" #include "path-util.h" +#include "selinux-access.h" #include "service.h" #include "signal-util.h" #include "string-util.h" @@ -91,6 +95,79 @@ static int property_get_exit_status_set( return sd_bus_message_close_container(reply); } +int bus_service_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) { + int read_only, make_file_or_directory; + const char *dest, *src, *propagate_directory; + Unit *u = userdata; + ExecContext *c; + pid_t unit_pid; + int r; + + assert(message); + assert(u); + + if (!MANAGER_IS_SYSTEM(u->manager)) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Adding bind mounts at runtime is only supported for system managers."); + + r = mac_selinux_unit_access_check(u, message, "start", error); + if (r < 0) + return r; + + r = sd_bus_message_read(message, "ssbb", &src, &dest, &read_only, &make_file_or_directory); + if (r < 0) + return r; + + if (!path_is_absolute(src) || !path_is_normalized(src)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and normalized."); + + if (isempty(dest)) + dest = src; + else if (!path_is_absolute(dest) || !path_is_normalized(dest)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and normalized."); + + r = bus_verify_manage_units_async_full( + u, + "bind-mount", + CAP_SYS_ADMIN, + N_("Authentication is required to bind mount on '$(unit)'."), + true, + message, + error); + if (r < 0) + return r; + if (r == 0) + return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ + + if (u->type != UNIT_SERVICE) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not of type .service"); + + /* If it would be dropped at startup time, return an error. The context should always be available, but + * there's an assert in exec_needs_mount_namespace, so double-check just in case. */ + c = unit_get_exec_context(u); + if (!c) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot access unit execution context"); + if (path_startswith_strv(dest, c->inaccessible_paths)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s is not accessible to this unit", dest); + + /* Ensure that the unit was started in a private mount namespace */ + if (!exec_needs_mount_namespace(c, NULL, unit_get_exec_runtime(u))) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit not running in private mount namespace, cannot activate bind mount"); + + unit_pid = unit_main_pid(u); + if (unit_pid == 0 || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not running"); + + propagate_directory = strjoina("/run/systemd/propagate/", u->id); + r = bind_mount_in_namespace(unit_pid, + propagate_directory, + "/run/systemd/incoming/", + src, dest, read_only, make_file_or_directory); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to mount %s on %s in unit's namespace: %m", src, dest); + + return sd_bus_reply_method_return(message, NULL); +} + const sd_bus_vtable bus_service_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Service, type), SD_BUS_VTABLE_PROPERTY_CONST), @@ -146,6 +223,16 @@ const sd_bus_vtable bus_service_vtable[] = { BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStopPostEx", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_METHOD_WITH_NAMES("BindMount", + "ssbb", + SD_BUS_PARAM(source) + SD_BUS_PARAM(destination) + SD_BUS_PARAM(read_only) + SD_BUS_PARAM(mkdir), + NULL,, + bus_service_method_bind_mount, + SD_BUS_VTABLE_UNPRIVILEGED), + /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_ratelimit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_ratelimit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), diff --git a/src/core/dbus-service.h b/src/core/dbus-service.h index 69311675c9..5b7b7b757b 100644 --- a/src/core/dbus-service.h +++ b/src/core/dbus-service.h @@ -9,4 +9,5 @@ extern const sd_bus_vtable bus_service_vtable[]; int bus_service_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error); +int bus_service_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_service_commit_properties(Unit *u); diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 427152a757..67cc58ee9e 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -323,38 +323,6 @@ static int property_get_load_error( return sd_bus_message_append(reply, "(ss)", NULL, NULL); } -static int bus_verify_manage_units_async_full( - Unit *u, - const char *verb, - int capability, - const char *polkit_message, - bool interactive, - sd_bus_message *call, - sd_bus_error *error) { - - const char *details[9] = { - "unit", u->id, - "verb", verb, - }; - - if (polkit_message) { - details[4] = "polkit.message"; - details[5] = polkit_message; - details[6] = "polkit.gettext_domain"; - details[7] = GETTEXT_PACKAGE; - } - - return bus_verify_polkit_async( - call, - capability, - "org.freedesktop.systemd1.manage-units", - details, - interactive, - UID_INVALID, - &u->manager->polkit_registry, - error); -} - static const char *const polkit_message_for_job[_JOB_TYPE_MAX] = { [JOB_START] = N_("Authentication is required to start '$(unit)'."), [JOB_STOP] = N_("Authentication is required to stop '$(unit)'."), diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c index d6223db305..2d22bc699a 100644 --- a/src/core/dbus-util.c +++ b/src/core/dbus-util.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "bus-polkit.h" #include "bus-util.h" #include "dbus-util.h" #include "parse-util.h" @@ -153,3 +154,35 @@ int bus_set_transient_usec_internal( return 1; } + +int bus_verify_manage_units_async_full( + Unit *u, + const char *verb, + int capability, + const char *polkit_message, + bool interactive, + sd_bus_message *call, + sd_bus_error *error) { + + const char *details[9] = { + "unit", u->id, + "verb", verb, + }; + + if (polkit_message) { + details[4] = "polkit.message"; + details[5] = polkit_message; + details[6] = "polkit.gettext_domain"; + details[7] = GETTEXT_PACKAGE; + } + + return bus_verify_polkit_async( + call, + capability, + "org.freedesktop.systemd1.manage-units", + details, + interactive, + UID_INVALID, + &u->manager->polkit_registry, + error); +} diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h index 4e7c68e843..e35c632d37 100644 --- a/src/core/dbus-util.h +++ b/src/core/dbus-util.h @@ -248,3 +248,4 @@ static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, s static inline int bus_set_transient_usec_fix_0(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) { return bus_set_transient_usec_internal(u, name, p, true, message, flags, error); } +int bus_verify_manage_units_async_full(Unit *u, const char *verb, int capability, const char *polkit_message, bool interactive, sd_bus_message *call, sd_bus_error *error); diff --git a/src/core/execute.c b/src/core/execute.c index ee5f082783..5f170db8d1 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1987,13 +1987,12 @@ static int build_pass_environment(const ExecContext *c, char ***ret) { return 0; } -static bool exec_needs_mount_namespace( +bool exec_needs_mount_namespace( const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime) { assert(context); - assert(params); if (context->root_image) return true; @@ -2035,7 +2034,7 @@ static bool exec_needs_mount_namespace( return true; for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) { - if (!params->prefix[t]) + if (params && !params->prefix[t]) continue; if (!strv_isempty(context->directories[t].paths)) @@ -3115,7 +3114,7 @@ static int apply_mount_namespace( _cleanup_strv_free_ char **empty_directories = NULL; const char *tmp_dir = NULL, *var_tmp_dir = NULL; const char *root_dir = NULL, *root_image = NULL; - _cleanup_free_ char *creds_path = NULL; + _cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL; NamespaceInfo ns_info; bool needs_sandboxing; BindMount *bind_mounts = NULL; @@ -3192,6 +3191,15 @@ static int apply_mount_namespace( } } + if (MANAGER_IS_SYSTEM(u->manager)) { + propagate_dir = path_join("/run/systemd/propagate/", u->id); + if (!propagate_dir) + return -ENOMEM; + incoming_dir = strdup("/run/systemd/incoming"); + if (!incoming_dir) + return -ENOMEM; + } + r = setup_namespace(root_dir, root_image, context->root_image_options, &ns_info, context->read_write_paths, needs_sandboxing ? context->read_only_paths : NULL, @@ -3211,6 +3219,8 @@ static int apply_mount_namespace( context->root_hash, context->root_hash_size, context->root_hash_path, context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path, context->root_verity, + propagate_dir, + incoming_dir, DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK, error_path); diff --git a/src/core/execute.h b/src/core/execute.h index da8d6ae272..2da4699df1 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -471,3 +471,5 @@ ExecDirectoryType exec_directory_type_from_string(const char *s) _pure_; const char* exec_resource_type_to_string(ExecDirectoryType i) _const_; ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_; + +bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime); diff --git a/src/core/namespace.c b/src/core/namespace.c index e32336a7ff..4b5519e11b 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -51,6 +51,7 @@ typedef enum MountMode { EMPTY_DIR, SYSFS, PROCFS, + RUN, READONLY, READWRITE, TMPFS, @@ -76,12 +77,13 @@ typedef struct MountEntry { LIST_HEAD(MountOptions, image_options); } MountEntry; -/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted +/* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted * something there already. These mounts are hence overridden by any other explicitly configured mounts. */ static const MountEntry apivfs_table[] = { { "/proc", PROCFS, false }, { "/dev", BIND_DEV, false }, { "/sys", SYSFS, false }, + { "/run", RUN, false, .options_const = "mode=755" TMPFS_LIMITS_RUN, .flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME }, }; /* ProtectKernelTunables= option and the related filesystem APIs */ @@ -945,6 +947,20 @@ static int mount_tmpfs(const MountEntry *m) { return 1; } +static int mount_run(const MountEntry *m) { + int r; + + assert(m); + + r = path_is_mount_point(mount_entry_path(m), NULL, 0); + if (r < 0 && r != -ENOENT) + return log_debug_errno(r, "Unable to determine whether /run is already mounted: %m"); + if (r > 0) /* make this a NOP if /run is already a mount point */ + return 0; + + return mount_tmpfs(m); +} + static int mount_images(const MountEntry *m) { _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; @@ -1170,6 +1186,9 @@ static int apply_mount( case PROCFS: return mount_procfs(m, ns_info); + case RUN: + return mount_run(m); + case MOUNT_IMAGES: return mount_images(m); @@ -1282,7 +1301,8 @@ static size_t namespace_calculate_mounts( const char* tmp_dir, const char* var_tmp_dir, const char *creds_path, - const char* log_namespace) { + const char* log_namespace, + bool setup_propagate) { size_t protect_home_cnt; size_t protect_system_cnt = @@ -1309,6 +1329,7 @@ static size_t namespace_calculate_mounts( n_bind_mounts + n_mount_images + n_temporary_filesystems + + (setup_propagate ? 1 : 0) + /* /run/systemd/incoming */ ns_info->private_dev + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + @@ -1468,6 +1489,8 @@ int setup_namespace( size_t root_hash_sig_size, const char *root_hash_sig_path, const char *verity_data_path, + const char *propagate_dir, + const char *incoming_dir, DissectImageFlags dissect_image_flags, char **error_path) { @@ -1476,13 +1499,16 @@ int setup_namespace( _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; MountEntry *m = NULL, *mounts = NULL; - bool require_prefix = false; + bool require_prefix = false, setup_propagate = false; const char *root; size_t n_mounts; int r; assert(ns_info); + if (!isempty(propagate_dir) && !isempty(incoming_dir)) + setup_propagate = true; + if (mount_flags == 0) mount_flags = MS_SHARED; @@ -1566,7 +1592,8 @@ int setup_namespace( n_mount_images, tmp_dir, var_tmp_dir, creds_path, - log_namespace); + log_namespace, + setup_propagate); if (n_mounts > 0) { m = mounts = new0(MountEntry, n_mounts); @@ -1735,6 +1762,15 @@ int setup_namespace( }; } + /* Will be used to add bind mounts at runtime */ + if (setup_propagate) + *(m++) = (MountEntry) { + .source_const = propagate_dir, + .path_const = incoming_dir, + .mode = BIND_MOUNT, + .read_only = true, + }; + assert(mounts + n_mounts == m); /* Prepend the root directory where that's necessary */ @@ -1759,6 +1795,10 @@ int setup_namespace( goto finish; } + /* Create the source directory to allow runtime propagation of mounts */ + if (setup_propagate) + (void) mkdir_p(propagate_dir, 0600); + /* Remount / as SLAVE so that nothing now mounted in the namespace * shows up in the parent */ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { @@ -1900,6 +1940,16 @@ int setup_namespace( goto finish; } + /* bind_mount_in_namespace() will MS_MOVE into that directory, and that's only + * supported for non-shared mounts. This needs to happen after remounting / or it will fail. */ + if (setup_propagate) { + r = mount(NULL, incoming_dir, NULL, MS_SLAVE, NULL); + if (r < 0) { + log_error_errno(r, "Failed to remount %s with MS_SLAVE: %m", incoming_dir); + goto finish; + } + } + r = 0; finish: diff --git a/src/core/namespace.h b/src/core/namespace.h index da0861c406..91ee44cd51 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -127,6 +127,8 @@ int setup_namespace( size_t root_hash_sig_size, const char *root_hash_sig_path, const char *root_verity, + const char *propagate_dir, + const char *incoming_dir, DissectImageFlags dissected_image_flags, char **error_path); diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index 8b32379835..0cea4d2b02 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -224,6 +224,10 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="BindMountUnit"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="KillUnit"/> <allow send_destination="org.freedesktop.systemd1" @@ -392,6 +396,10 @@ send_interface="org.freedesktop.systemd1.Service" send_member="AttachProcesses"/> + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Service" + send_member="BindMount"/> + <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Scope interface --> <allow send_destination="org.freedesktop.systemd1" diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index 5d8d4276e2..7d6e1c7163 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -810,17 +810,9 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu } int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) { - _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; - char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; - bool mount_slave_created = false, mount_slave_mounted = false, - mount_tmp_created = false, mount_tmp_mounted = false, - mount_outside_created = false, mount_outside_mounted = false; - _cleanup_free_ char *chased_src = NULL; int read_only, make_file_or_directory; - const char *dest, *src; + const char *dest, *src, *propagate_directory; Machine *m = userdata; - struct stat st; - pid_t child; uid_t uid; int r; @@ -835,12 +827,12 @@ int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bu return r; if (!path_is_absolute(src) || !path_is_normalized(src)) - return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and not contain ../."); + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and normalized."); if (isempty(dest)) dest = src; else if (!path_is_absolute(dest) || !path_is_normalized(dest)) - return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and not contain ../."); + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and normalized."); r = bus_verify_polkit_async( message, @@ -862,205 +854,15 @@ int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bu if (uid != 0) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Can't bind mount on container with user namespacing applied."); - /* One day, when bind mounting /proc/self/fd/n works across - * namespace boundaries we should rework this logic to make - * use of it... */ - - p = strjoina("/run/systemd/nspawn/propagate/", m->name, "/"); - if (laccess(p, F_OK) < 0) - return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Container does not allow propagation of mount points."); - - r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src, NULL); + propagate_directory = strjoina("/run/systemd/nspawn/propagate/", m->name); + r = bind_mount_in_namespace(m->leader, + propagate_directory, + "/run/host/incoming/", + src, dest, read_only, make_file_or_directory); if (r < 0) - return sd_bus_error_set_errnof(error, r, "Failed to resolve source path: %m"); - - if (lstat(chased_src, &st) < 0) - return sd_bus_error_set_errnof(error, errno, "Failed to stat() source path: %m"); - if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */ - return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Source directory can't be a symbolic link"); - - /* Our goal is to install a new bind mount into the container, - possibly read-only. This is irritatingly complex - unfortunately, currently. - - First, we start by creating a private playground in /tmp, - that we can mount MS_SLAVE. (Which is necessary, since - MS_MOVE cannot be applied to mounts with MS_SHARED parent - mounts.) */ - - if (!mkdtemp(mount_slave)) - return sd_bus_error_set_errnof(error, errno, "Failed to create playground %s: %m", mount_slave); - - mount_slave_created = true; - - r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to make bind mount %s: %m", mount_slave); - goto finish; - } - - mount_slave_mounted = true; - - r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to remount slave %s: %m", mount_slave); - goto finish; - } - - /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */ - mount_tmp = strjoina(mount_slave, "/mount"); - r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to create temporary mount point %s: %m", mount_tmp); - goto finish; - } - - mount_tmp_created = true; - - r = mount_nofollow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to mount %s: %m", chased_src); - goto finish; - } - - mount_tmp_mounted = true; - - /* Third, we remount the new bind mount read-only if requested. */ - if (read_only) { - r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to remount read-only %s: %m", mount_tmp); - goto finish; - } - } - - /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only - * right-away. */ - - mount_outside = strjoina("/run/systemd/nspawn/propagate/", m->name, "/XXXXXX"); - if (S_ISDIR(st.st_mode)) - r = mkdtemp(mount_outside) ? 0 : -errno; - else { - r = mkostemp_safe(mount_outside); - safe_close(r); - } - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Cannot create propagation file or directory %s: %m", mount_outside); - goto finish; - } - - mount_outside_created = true; - - r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to move %s to %s: %m", mount_tmp, mount_outside); - goto finish; - } - - mount_outside_mounted = true; - mount_tmp_mounted = false; - - if (S_ISDIR(st.st_mode)) - (void) rmdir(mount_tmp); - else - (void) unlink(mount_tmp); - mount_tmp_created = false; - - (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); - mount_slave_mounted = false; - - (void) rmdir(mount_slave); - mount_slave_created = false; - - if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) { - r = sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m"); - goto finish; - } - - r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to fork(): %m"); - goto finish; - } - if (r == 0) { - const char *mount_inside, *q; - int mntfd; - - errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); - - q = procfs_file_alloca(m->leader, "ns/mnt"); - mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (mntfd < 0) { - r = log_error_errno(errno, "Failed to open mount namespace of leader: %m"); - goto child_fail; - } - - if (setns(mntfd, CLONE_NEWNS) < 0) { - r = log_error_errno(errno, "Failed to join namespace of leader: %m"); - goto child_fail; - } - - if (make_file_or_directory) { - (void) mkdir_parents(dest, 0755); - (void) make_mount_point_inode_from_stat(&st, dest, 0700); - } - - mount_inside = strjoina("/run/host/incoming/", basename(mount_outside)); - r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL); - if (r < 0) - goto child_fail; - - _exit(EXIT_SUCCESS); - - child_fail: - (void) write(errno_pipe_fd[1], &r, sizeof(r)); - errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); - - _exit(EXIT_FAILURE); - } - - errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + return sd_bus_error_set_errnof(error, r, "Failed to mount %s on %s in machine's namespace: %m", src, dest); - r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0); - if (r < 0) { - r = sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m"); - goto finish; - } - if (r != EXIT_SUCCESS) { - if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) - r = sd_bus_error_set_errnof(error, r, "Failed to mount: %m"); - else - r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child failed."); - goto finish; - } - - r = sd_bus_reply_method_return(message, NULL); - -finish: - if (mount_outside_mounted) - (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); - if (mount_outside_created) { - if (S_ISDIR(st.st_mode)) - (void) rmdir(mount_outside); - else - (void) unlink(mount_outside); - } - - if (mount_tmp_mounted) - (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); - if (mount_tmp_created) { - if (S_ISDIR(st.st_mode)) - (void) rmdir(mount_tmp); - else - (void) unlink(mount_tmp); - } - - if (mount_slave_mounted) - (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); - if (mount_slave_created) - (void) rmdir(mount_slave); - - return r; + return sd_bus_reply_method_return(message, NULL); } int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error) { diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index a6480b93a4..9d0d7c73df 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -14,15 +14,19 @@ #include "fs-util.h" #include "hashmap.h" #include "libmount-util.h" +#include "mkdir.h" #include "mount-util.h" #include "mountpoint-util.h" +#include "namespace-util.h" #include "parse-util.h" #include "path-util.h" +#include "process-util.h" #include "set.h" #include "stat-util.h" #include "stdio-util.h" #include "string-util.h" #include "strv.h" +#include "tmpfile-util.h" int mount_fd(const char *source, int target_fd, @@ -742,3 +746,225 @@ int mount_option_mangle( return 0; } + +int bind_mount_in_namespace( + pid_t target, + const char *propagate_path, + const char *incoming_path, + const char *src, + const char *dest, + bool read_only, + bool make_file_or_directory) { + + _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; + _cleanup_close_ int self_mntns_fd = -1, mntns_fd = -1, root_fd = -1, pidns_fd = -1, chased_src_fd = -1; + char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p, + chased_src[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + bool mount_slave_created = false, mount_slave_mounted = false, + mount_tmp_created = false, mount_tmp_mounted = false, + mount_outside_created = false, mount_outside_mounted = false; + struct stat st, self_mntns_st; + pid_t child; + int r; + + assert(target > 0); + assert(propagate_path); + assert(incoming_path); + assert(src); + assert(dest); + + r = namespace_open(target, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd); + if (r < 0) + return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m"); + + if (fstat(mntns_fd, &st) < 0) + return log_debug_errno(errno, "Failed to fstat mount namespace FD of target process: %m"); + + r = namespace_open(0, NULL, &self_mntns_fd, NULL, NULL, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to retrieve FDs of systemd's namespace: %m"); + + if (fstat(self_mntns_fd, &self_mntns_st) < 0) + return log_debug_errno(errno, "Failed to fstat mount namespace FD of systemd: %m"); + + /* We can't add new mounts at runtime if the process wasn't started in a namespace */ + if (st.st_ino == self_mntns_st.st_ino && st.st_dev == self_mntns_st.st_dev) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace"); + + /* One day, when bind mounting /proc/self/fd/n works across + * namespace boundaries we should rework this logic to make + * use of it... */ + + p = strjoina(propagate_path, "/"); + r = laccess(p, F_OK); + if (r < 0) + return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points"); + + r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, NULL, &chased_src_fd); + if (r < 0) + return log_debug_errno(r, "Failed to resolve source path of %s: %m", src); + xsprintf(chased_src, "/proc/self/fd/%i", chased_src_fd); + + if (fstat(chased_src_fd, &st) < 0) + return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", src); + if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */ + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", src); + + /* Our goal is to install a new bind mount into the container, + possibly read-only. This is irritatingly complex + unfortunately, currently. + + First, we start by creating a private playground in /tmp, + that we can mount MS_SLAVE. (Which is necessary, since + MS_MOVE cannot be applied to mounts with MS_SHARED parent + mounts.) */ + + if (!mkdtemp(mount_slave)) + return log_debug_errno(errno, "Failed to create playground %s: %m", mount_slave); + + mount_slave_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL); + if (r < 0) + goto finish; + + mount_slave_mounted = true; + + r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL); + if (r < 0) + goto finish; + + /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */ + mount_tmp = strjoina(mount_slave, "/mount"); + r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700); + if (r < 0) { + log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp); + goto finish; + } + + mount_tmp_created = true; + + r = mount_follow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL); + if (r < 0) + goto finish; + + mount_tmp_mounted = true; + + /* Third, we remount the new bind mount read-only if requested. */ + if (read_only) { + r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); + if (r < 0) + goto finish; + } + + /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only + * right-away. */ + + mount_outside = strjoina(propagate_path, "/XXXXXX"); + if (S_ISDIR(st.st_mode)) + r = mkdtemp(mount_outside) ? 0 : -errno; + else { + r = mkostemp_safe(mount_outside); + safe_close(r); + } + if (r < 0) { + log_debug_errno(r, "Cannot create propagation file or directory %s: %m", mount_outside); + goto finish; + } + + mount_outside_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL); + if (r < 0) + goto finish; + + mount_outside_mounted = true; + mount_tmp_mounted = false; + + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_tmp); + else + (void) unlink(mount_tmp); + mount_tmp_created = false; + + (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); + mount_slave_mounted = false; + + (void) rmdir(mount_slave); + mount_slave_created = false; + + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) { + log_debug_errno(errno, "Failed to create pipe: %m"); + goto finish; + } + + r = namespace_fork("(sd-bindmnt)", "(sd-bindmnt-inner)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidns_fd, mntns_fd, -1, -1, root_fd, &child); + if (r < 0) + goto finish; + if (r == 0) { + const char *mount_inside; + + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + if (make_file_or_directory) { + (void) mkdir_parents(dest, 0755); + (void) make_mount_point_inode_from_stat(&st, dest, 0700); + } + + /* Fifth, move the mount to the right place inside */ + mount_inside = strjoina(incoming_path, basename(mount_outside)); + r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL); + if (r < 0) + goto child_fail; + + _exit(EXIT_SUCCESS); + + child_fail: + (void) write(errno_pipe_fd[1], &r, sizeof(r)); + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + _exit(EXIT_FAILURE); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0); + if (r < 0) { + log_debug_errno(r, "Failed to wait for child: %m"); + goto finish; + } + if (r != EXIT_SUCCESS) { + if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) + log_debug_errno(r, "Failed to mount: %m"); + else + log_debug("Child failed."); + goto finish; + } + +finish: + if (mount_outside_mounted) + (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); + if (mount_outside_created) { + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_outside); + else + (void) unlink(mount_outside); + } + + if (mount_tmp_mounted) + (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); + if (mount_tmp_created) { + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_tmp); + else + (void) unlink(mount_tmp); + } + + if (mount_slave_mounted) + (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); + if (mount_slave_created) + (void) rmdir(mount_slave); + + return r; +} diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 089fd69e29..fa36dd7875 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -97,3 +97,5 @@ static inline char* umount_and_rmdir_and_free(char *p) { return mfree(p); } DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free); + +int bind_mount_in_namespace(pid_t target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory); diff --git a/src/systemctl/systemctl-mount.c b/src/systemctl/systemctl-mount.c new file mode 100644 index 0000000000..513a876f21 --- /dev/null +++ b/src/systemctl/systemctl-mount.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "bus-error.h" +#include "bus-locator.h" +#include "systemctl-mount.h" +#include "systemctl-util.h" +#include "systemctl.h" + +int mount_bind(int argc, char *argv[], void *userdata) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *n = NULL; + sd_bus *bus; + int r; + + r = acquire_bus(BUS_MANAGER, &bus); + if (r < 0) + return r; + + polkit_agent_open_maybe(); + + r = unit_name_mangle(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, &n); + if (r < 0) + return log_error_errno(r, "Failed to mangle unit name: %m"); + + r = bus_call_method( + bus, + bus_systemd_mgr, + "BindMountUnit", + &error, + NULL, + "sssbb", + n, + argv[2], + argv[3], + arg_read_only, + arg_mkdir); + if (r < 0) + return log_error_errno(r, "Failed to bind mount: %s", bus_error_message(&error, r)); + + return 0; +} diff --git a/src/systemctl/systemctl-mount.h b/src/systemctl/systemctl-mount.h new file mode 100644 index 0000000000..1f9b3879fb --- /dev/null +++ b/src/systemctl/systemctl-mount.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +int mount_bind(int argc, char *argv[], void *userdata); diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 7471fadd91..4726f65f97 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -38,6 +38,7 @@ #include "systemctl-list-units.h" #include "systemctl-log-setting.h" #include "systemctl-logind.h" +#include "systemctl-mount.h" #include "systemctl-preset-all.h" #include "systemctl-reset-failed.h" #include "systemctl-service-watchdogs.h" @@ -105,6 +106,8 @@ bool arg_jobs_before = false; bool arg_jobs_after = false; char **arg_clean_what = NULL; TimestampStyle arg_timestamp_style = TIMESTAMP_PRETTY; +bool arg_read_only = false; +bool arg_mkdir = false; STATIC_DESTRUCTOR_REGISTER(arg_wall, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_root, freep); @@ -157,6 +160,8 @@ static int systemctl_help(void) { " freeze PATTERN... Freeze execution of unit processes\n" " thaw PATTERN... Resume execution of a frozen unit\n" " set-property UNIT PROPERTY=VALUE... Sets one or more properties of a unit\n" + " bind UNIT PATH [PATH] Bind-mount a path from the host into a\n" + " unit's namespace\n" " service-log-level SERVICE [LEVEL] Get/set logging threshold for service\n" " service-log-target SERVICE [TARGET] Get/set logging target for service\n" " reset-failed [PATTERN...] Reset failed state for all, one, or more\n" @@ -286,6 +291,8 @@ static int systemctl_help(void) { " 'us': 'Day YYYY-MM-DD HH:MM:SS.UUUUUU TZ\n" " 'utc': 'Day YYYY-MM-DD HH:MM:SS UTC\n" " 'us+utc': 'Day YYYY-MM-DD HH:MM:SS.UUUUUU UTC\n" + " --read-only Create read-only bind mount\n" + " --mkdir Create directory before bind-mounting, if missing\n" "\nSee the %2$s for details.\n" , program_invocation_short_name , link @@ -401,6 +408,8 @@ static int systemctl_parse_argv(int argc, char *argv[]) { ARG_WHAT, ARG_REBOOT_ARG, ARG_TIMESTAMP_STYLE, + ARG_READ_ONLY, + ARG_MKDIR, }; static const struct option options[] = { @@ -457,6 +466,8 @@ static int systemctl_parse_argv(int argc, char *argv[]) { { "what", required_argument, NULL, ARG_WHAT }, { "reboot-argument", required_argument, NULL, ARG_REBOOT_ARG }, { "timestamp", required_argument, NULL, ARG_TIMESTAMP_STYLE }, + { "read-only", no_argument, NULL, ARG_READ_ONLY }, + { "mkdir", no_argument, NULL, ARG_MKDIR }, {} }; @@ -878,6 +889,14 @@ static int systemctl_parse_argv(int argc, char *argv[]) { break; + case ARG_READ_ONLY: + arg_read_only = true; + break; + + case ARG_MKDIR: + arg_mkdir = true; + break; + case '.': /* Output an error mimicking getopt, and print a hint afterwards */ log_error("%s: invalid option -- '.'", program_invocation_name); @@ -1045,6 +1064,7 @@ static int systemctl_main(int argc, char *argv[]) { { "add-wants", 3, VERB_ANY, 0, add_dependency }, { "add-requires", 3, VERB_ANY, 0, add_dependency }, { "edit", 2, VERB_ANY, VERB_ONLINE_ONLY, edit }, + { "bind", 3, 4, VERB_ONLINE_ONLY, mount_bind }, {} }; diff --git a/src/systemctl/systemctl.h b/src/systemctl/systemctl.h index cdf56c7220..34650ebb44 100644 --- a/src/systemctl/systemctl.h +++ b/src/systemctl/systemctl.h @@ -90,3 +90,5 @@ extern bool arg_jobs_before; extern bool arg_jobs_after; extern char **arg_clean_what; extern TimestampStyle arg_timestamp_style; +extern bool arg_read_only; +extern bool arg_mkdir; diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index e234f54dee..d92bcacfad 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -172,6 +172,8 @@ static void test_protect_kernel_logs(void) { 0, NULL, NULL, + NULL, + NULL, 0, NULL); assert_se(r == 0); diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 6ec1cff28a..88bdb437de 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -87,6 +87,8 @@ int main(int argc, char *argv[]) { 0, NULL, NULL, + NULL, + NULL, 0, NULL); if (r < 0) { |