summaryrefslogtreecommitdiff
path: root/src/core/manager.c
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2022-12-13 14:32:35 +0100
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2022-12-14 22:12:44 +0100
commitca6ce62d2a437432082b5c6e5d4275d56055510f (patch)
tree134e8f1a48de72a6c42f01dbf1a7cfc88b8c7973 /src/core/manager.c
parent61ef30515b44c478cf1aa1155d5b306f09c9dc5a (diff)
downloadsystemd-ca6ce62d2a437432082b5c6e5d4275d56055510f.tar.gz
manager: execute generators in a mount namespace "sandbox"
When generators are executed during early boot, /tmp might not be available yet. This causes problems with bash, because here-docs don't work. Even non-shell code can often assume that /tmp is available. This limitation is known to trip up people, and when the code is tested on a "normal" system, everything works. We can solve this nicely, and get another small benefit, by making most of the file system read-only and "punching holes" for some dirs that should be writable. The generator code runs with full privileges and can do anything it wants by writing appropriate systemd units, so it doesn't make much sense to do any significant sandboxing around generators. But making root read-only is nice because it can catch stupid mistakes where the generator tries to write to a wrong path or something like that. We effectively also get a "private /tmp" for the generators, which protects them against existing files in /tmp. The path does the following: when executing generators, we fork, and the child unshares root and makes it recursively read-only, with the exception of /sys and /run. Error handling is permissive — if some of this setup fails, we're in the same state as before the patch. Fixes #24430.
Diffstat (limited to 'src/core/manager.c')
-rw-r--r--src/core/manager.c75
1 files changed, 53 insertions, 22 deletions
diff --git a/src/core/manager.c b/src/core/manager.c
index e9b7b26160..292e82fd87 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -6,6 +6,7 @@
#include <sys/epoll.h>
#include <sys/inotify.h>
#include <sys/ioctl.h>
+#include <sys/mount.h>
#include <sys/reboot.h>
#include <sys/timerfd.h>
#include <sys/utsname.h>
@@ -62,6 +63,7 @@
#include "manager-serialize.h"
#include "memory-util.h"
#include "mkdir-label.h"
+#include "mount-util.h"
#include "os-util.h"
#include "parse-util.h"
#include "path-lookup.h"
@@ -3740,8 +3742,45 @@ static int build_generator_environment(Manager *m, char ***ret) {
return 0;
}
+static int manager_execute_generators(Manager *m, char **paths, bool remount_ro) {
+ _cleanup_strv_free_ char **ge = NULL;
+ const char *argv[] = {
+ NULL, /* Leave this empty, execute_directory() will fill something in */
+ m->lookup_paths.generator,
+ m->lookup_paths.generator_early,
+ m->lookup_paths.generator_late,
+ NULL,
+ };
+ int r;
+
+ r = build_generator_environment(m, &ge);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build generator environment: %m");
+
+ if (remount_ro) {
+ /* Remount most of the filesystem tree read-only. We leave /sys/ as-is, because our code
+ * checks whether it is read-only to detect containerized execution environments. We leave
+ * /run/ as-is too, because that's where our output goes. We also leave /proc/ and /dev/shm/
+ * because they're API, and /tmp/ that safe_fork() mounted for us.
+ */
+ r = bind_remount_recursive("/", MS_RDONLY, MS_RDONLY,
+ STRV_MAKE("/sys", "/run", "/proc", "/dev/shm", "/tmp"));
+ if (r < 0)
+ log_warning_errno(r, "Read-only bind remount failed, ignoring: %m");
+ }
+
+ BLOCK_WITH_UMASK(0022);
+ return execute_directories(
+ (const char* const*) paths,
+ DEFAULT_TIMEOUT_USEC,
+ /* callbacks= */ NULL, /* callback_args= */ NULL,
+ (char**) argv,
+ ge,
+ EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS | EXEC_DIR_SET_SYSTEMD_EXEC_PID);
+}
+
static int manager_run_generators(Manager *m) {
- _cleanup_strv_free_ char **paths = NULL, **ge = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
int r;
assert(m);
@@ -3762,30 +3801,22 @@ static int manager_run_generators(Manager *m) {
goto finish;
}
- const char *argv[] = {
- NULL, /* Leave this empty, execute_directory() will fill something in */
- m->lookup_paths.generator,
- m->lookup_paths.generator_early,
- m->lookup_paths.generator_late,
- NULL,
- };
-
- r = build_generator_environment(m, &ge);
- if (r < 0) {
- log_error_errno(r, "Failed to build generator environment: %m");
+ /* If we are the system manager, we fork and invoke the generators in a sanitized mount namespace. If
+ * we are the user manager, let's just execute the generators directly. We might not have the
+ * necessary privileges, and the system manager has already mounted /tmp/ and everything else for us.
+ */
+ if (MANAGER_IS_USER(m)) {
+ r = manager_execute_generators(m, paths, /* remount_ro= */ false);
goto finish;
}
- WITH_UMASK(0022)
- (void) execute_directories(
- (const char* const*) paths,
- DEFAULT_TIMEOUT_USEC,
- /* callbacks= */ NULL, /* callback_args= */ NULL,
- (char**) argv,
- ge,
- EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS | EXEC_DIR_SET_SYSTEMD_EXEC_PID);
-
- r = 0;
+ r = safe_fork("(sd-gens)",
+ FORK_RESET_SIGNALS | FORK_LOG | FORK_WAIT | FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE | FORK_PRIVATE_TMP,
+ NULL);
+ if (r == 0) {
+ r = manager_execute_generators(m, paths, /* remount_ro= */ true);
+ _exit(r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+ }
finish:
lookup_paths_trim_generator(&m->lookup_paths);