diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2022-12-13 14:32:35 +0100 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2022-12-14 22:12:44 +0100 |
commit | ca6ce62d2a437432082b5c6e5d4275d56055510f (patch) | |
tree | 134e8f1a48de72a6c42f01dbf1a7cfc88b8c7973 /src/core/manager.c | |
parent | 61ef30515b44c478cf1aa1155d5b306f09c9dc5a (diff) | |
download | systemd-ca6ce62d2a437432082b5c6e5d4275d56055510f.tar.gz |
manager: execute generators in a mount namespace "sandbox"
When generators are executed during early boot, /tmp might not be available
yet. This causes problems with bash, because here-docs don't work. Even
non-shell code can often assume that /tmp is available. This limitation is
known to trip up people, and when the code is tested on a "normal" system,
everything works.
We can solve this nicely, and get another small benefit, by making most of the
file system read-only and "punching holes" for some dirs that should be
writable. The generator code runs with full privileges and can do anything it
wants by writing appropriate systemd units, so it doesn't make much sense to do
any significant sandboxing around generators. But making root read-only is nice
because it can catch stupid mistakes where the generator tries to write to a
wrong path or something like that. We effectively also get a "private /tmp" for
the generators, which protects them against existing files in /tmp.
The path does the following:
when executing generators, we fork, and the child unshares root and makes
it recursively read-only, with the exception of /sys and /run. Error handling
is permissive — if some of this setup fails, we're in the same state as
before the patch.
Fixes #24430.
Diffstat (limited to 'src/core/manager.c')
-rw-r--r-- | src/core/manager.c | 75 |
1 files changed, 53 insertions, 22 deletions
diff --git a/src/core/manager.c b/src/core/manager.c index e9b7b26160..292e82fd87 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -6,6 +6,7 @@ #include <sys/epoll.h> #include <sys/inotify.h> #include <sys/ioctl.h> +#include <sys/mount.h> #include <sys/reboot.h> #include <sys/timerfd.h> #include <sys/utsname.h> @@ -62,6 +63,7 @@ #include "manager-serialize.h" #include "memory-util.h" #include "mkdir-label.h" +#include "mount-util.h" #include "os-util.h" #include "parse-util.h" #include "path-lookup.h" @@ -3740,8 +3742,45 @@ static int build_generator_environment(Manager *m, char ***ret) { return 0; } +static int manager_execute_generators(Manager *m, char **paths, bool remount_ro) { + _cleanup_strv_free_ char **ge = NULL; + const char *argv[] = { + NULL, /* Leave this empty, execute_directory() will fill something in */ + m->lookup_paths.generator, + m->lookup_paths.generator_early, + m->lookup_paths.generator_late, + NULL, + }; + int r; + + r = build_generator_environment(m, &ge); + if (r < 0) + return log_error_errno(r, "Failed to build generator environment: %m"); + + if (remount_ro) { + /* Remount most of the filesystem tree read-only. We leave /sys/ as-is, because our code + * checks whether it is read-only to detect containerized execution environments. We leave + * /run/ as-is too, because that's where our output goes. We also leave /proc/ and /dev/shm/ + * because they're API, and /tmp/ that safe_fork() mounted for us. + */ + r = bind_remount_recursive("/", MS_RDONLY, MS_RDONLY, + STRV_MAKE("/sys", "/run", "/proc", "/dev/shm", "/tmp")); + if (r < 0) + log_warning_errno(r, "Read-only bind remount failed, ignoring: %m"); + } + + BLOCK_WITH_UMASK(0022); + return execute_directories( + (const char* const*) paths, + DEFAULT_TIMEOUT_USEC, + /* callbacks= */ NULL, /* callback_args= */ NULL, + (char**) argv, + ge, + EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS | EXEC_DIR_SET_SYSTEMD_EXEC_PID); +} + static int manager_run_generators(Manager *m) { - _cleanup_strv_free_ char **paths = NULL, **ge = NULL; + _cleanup_strv_free_ char **paths = NULL; int r; assert(m); @@ -3762,30 +3801,22 @@ static int manager_run_generators(Manager *m) { goto finish; } - const char *argv[] = { - NULL, /* Leave this empty, execute_directory() will fill something in */ - m->lookup_paths.generator, - m->lookup_paths.generator_early, - m->lookup_paths.generator_late, - NULL, - }; - - r = build_generator_environment(m, &ge); - if (r < 0) { - log_error_errno(r, "Failed to build generator environment: %m"); + /* If we are the system manager, we fork and invoke the generators in a sanitized mount namespace. If + * we are the user manager, let's just execute the generators directly. We might not have the + * necessary privileges, and the system manager has already mounted /tmp/ and everything else for us. + */ + if (MANAGER_IS_USER(m)) { + r = manager_execute_generators(m, paths, /* remount_ro= */ false); goto finish; } - WITH_UMASK(0022) - (void) execute_directories( - (const char* const*) paths, - DEFAULT_TIMEOUT_USEC, - /* callbacks= */ NULL, /* callback_args= */ NULL, - (char**) argv, - ge, - EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS | EXEC_DIR_SET_SYSTEMD_EXEC_PID); - - r = 0; + r = safe_fork("(sd-gens)", + FORK_RESET_SIGNALS | FORK_LOG | FORK_WAIT | FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE | FORK_PRIVATE_TMP, + NULL); + if (r == 0) { + r = manager_execute_generators(m, paths, /* remount_ro= */ true); + _exit(r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE); + } finish: lookup_paths_trim_generator(&m->lookup_paths); |