summaryrefslogtreecommitdiff
path: root/src/core/import-creds.c
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2022-04-08 00:18:55 +0200
committerLennart Poettering <lennart@poettering.net>2022-04-28 18:12:00 +0200
commit4b9a4b01793170b9b17467711195552ef1f25ab8 (patch)
tree37dd12c36b4a0667092f0a15c7ef390f610e4ba6 /src/core/import-creds.c
parent5c1d67af465ab6921beec3f864ffdf1670ca4e1e (diff)
downloadsystemd-4b9a4b01793170b9b17467711195552ef1f25ab8.tar.gz
pid1: import creds from sd-stub + qemu + kernel cmdline
Let's beef up our system credential game a bit, and explicitly import creds from sd-stub, from qemu fw_cfg and the kernel cmdline and expose them in the same way as those passed in from nspawn. Specifically, this will imprt such credentials to /run/credentials/@system (if the source can be trusted, as in the qemu/kernel cmdline case) and /run/credentials/@encrypted (otherwise, such as sd-stub provided ones). Once imported we'll set the $CREDENTIALS_PATH env var for PID 1, like it would be done by a container manager for the payload. (Conversely, we'll also creat a symlink from /run/credentials/@system to whatever is set in $CREDENTIALS_PATH in case we are invoked by a container manager, thus providing a fixed path where system credentials are found).
Diffstat (limited to 'src/core/import-creds.c')
-rw-r--r--src/core/import-creds.c551
1 files changed, 551 insertions, 0 deletions
diff --git a/src/core/import-creds.c b/src/core/import-creds.c
new file mode 100644
index 0000000000..8b87434683
--- /dev/null
+++ b/src/core/import-creds.c
@@ -0,0 +1,551 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "copy.h"
+#include "creds-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "import-creds.h"
+#include "io-util.h"
+#include "mkdir-label.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "recurse-dir.h"
+#include "strv.h"
+
+/* This imports credentials passed in from environments higher up (VM manager, boot loader, …) and rearranges
+ * them so that later code can access them using our regular credential protocol
+ * (i.e. $CREDENTIALS_DIRECTORY). It's supposed to be minimal glue to unify behaviour how PID 1 (and
+ * generators invoked by it) can acquire credentials from outside, to mimic how we support it for containers,
+ * but on VM/physical environments.
+ *
+ * This does three things:
+ *
+ * 1. It imports credentials picked up by sd-boot (and placed in the /.extra/credentials/ dir in the initrd)
+ * and puts them in /run/credentials/@encrypted/. Note that during the initrd→host transition the initrd root
+ * file system is cleaned out, thus it is essential we pick up these files before they are deleted. Note
+ * that these credentials originate from an untrusted source, i.e. the ESP and are not
+ * pre-authenticated. They still have to be authenticated before use.
+ *
+ * 2. It imports credentials from /proc/cmdline and puts them in /run/credentials/@system/. These come from a
+ * trusted environment (i.e. the boot loader), and are typically authenticated (if authentication is done
+ * at all). However, they are world-readable, which might be less than ideal. Hence only use this for data
+ * that doesn't require trust.
+ *
+ * 3. It imports credentials passed in through qemu's fw_cfg logic. Specifically, credential data passed in
+ * /sys/firmware/qemu_fw_cfg/by_name/opt/io.systemd.credentials/ is picked up and also placed in
+ * /run/credentials/@system/.
+ *
+ * If it picked up any credentials it will set the $CREDENTIALS_DIRECTORY and
+ * $ENCRYPTED_CREDENTIALS_DIRECTORY environment variables to point to these directories, so that processes
+ * can find them there later on. If "ramfs" is available $CREDENTIALS_DIRECTORY will be backed by it (but
+ * $ENCRYPTED_CREDENTIALS_DIRECTORY is just a regular tmpfs).
+ *
+ * Net result: the service manager can pick up trusted credentials from $CREDENTIALS_DIRECTORY afterwards,
+ * and untrusted ones from $ENCRYPTED_CREDENTIALS_DIRECTORY. */
+
+typedef struct ImportCredentialContext {
+ int target_dir_fd;
+ size_t size_sum;
+ unsigned n_credentials;
+} ImportCredentialContext;
+
+static void import_credentials_context_free(ImportCredentialContext *c) {
+ assert(c);
+
+ c->target_dir_fd = safe_close(c->target_dir_fd);
+}
+
+static int acquire_encrypted_credential_directory(ImportCredentialContext *c) {
+ int r;
+
+ assert(c);
+
+ if (c->target_dir_fd >= 0)
+ return c->target_dir_fd;
+
+ r = mkdir_safe_label(ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, 0700, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create " ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY ": %m");
+
+ c->target_dir_fd = open(ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (c->target_dir_fd < 0)
+ return log_error_errno(errno, "Failed to open " ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY ": %m");
+
+ return c->target_dir_fd;
+}
+
+static int open_credential_file_for_write(int target_dir_fd, const char *dir_name, const char *n) {
+ int fd;
+
+ assert(target_dir_fd >= 0);
+ assert(dir_name);
+ assert(n);
+
+ fd = openat(target_dir_fd, n, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL|O_NOFOLLOW, 0400);
+ if (fd < 0) {
+ if (errno == EEXIST) /* In case of EEXIST we'll only debug log! */
+ return log_debug_errno(errno, "Credential '%s' set twice, ignoring.", n);
+
+ return log_error_errno(errno, "Failed to create %s/%s: %m", dir_name, n);
+ }
+
+ return fd;
+}
+
+static bool credential_size_ok(ImportCredentialContext *c, const char *name, uint64_t size) {
+ assert(c);
+ assert(name);
+
+ if (size > CREDENTIAL_SIZE_MAX) {
+ log_warning("Credential '%s' is larger than allowed limit (%s > %s), skipping.", name, FORMAT_BYTES(size), FORMAT_BYTES(CREDENTIAL_SIZE_MAX));
+ return false;
+ }
+
+ if (size > CREDENTIALS_TOTAL_SIZE_MAX - c->size_sum) {
+ log_warning("Accumulated credential size would be above allowed limit (%s+%s > %s), skipping '%s'.",
+ FORMAT_BYTES(c->size_sum), FORMAT_BYTES(size), FORMAT_BYTES(CREDENTIALS_TOTAL_SIZE_MAX), name);
+ return false;
+ }
+
+ return true;
+}
+
+static int finalize_credentials_dir(const char *dir, const char *envvar) {
+ int r;
+
+ assert(dir);
+ assert(envvar);
+
+ /* Try to make the credentials directory read-only now */
+
+ r = make_mount_point(dir);
+ if (r < 0)
+ log_warning_errno(r, "Failed to make '%s' a mount point, ignoring: %m", dir);
+ else
+ (void) mount_nofollow_verbose(LOG_WARNING, NULL, dir, NULL, MS_BIND|MS_NODEV|MS_NOEXEC|MS_NOSUID|MS_RDONLY|MS_REMOUNT, NULL);
+
+ if (setenv(envvar, dir, /* overwrite= */ true) < 0)
+ return log_error_errno(errno, "Failed to set $%s environment variable: %m", envvar);
+
+ return 0;
+}
+
+static int import_credentials_boot(void) {
+ _cleanup_(import_credentials_context_free) ImportCredentialContext context = {
+ .target_dir_fd = -1,
+ };
+ int r;
+
+ /* systemd-stub will wrap sidecar *.cred files from the UEFI kernel image directory into initrd
+ * cpios, so that they unpack into /.extra/. We'll pick them up from there and copy them into /run/
+ * so that we can access them during the entire runtime (note that the initrd file system is erased
+ * during the initrd → host transition). Note that these credentials originate from an untrusted
+ * source (i.e. the ESP typically) and thus need to be authenticated later. We thus put them in a
+ * directory separate from the usual credentials which are from a trusted source. */
+
+ if (!in_initrd())
+ return 0;
+
+ FOREACH_STRING(p,
+ "/.extra/credentials/", /* specific to this boot menu */
+ "/.extra/global_credentials/") { /* boot partition wide */
+
+ _cleanup_free_ DirectoryEntries *de = NULL;
+ _cleanup_close_ int source_dir_fd = -1;
+
+ source_dir_fd = open(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (source_dir_fd < 0) {
+ if (errno == ENOENT) {
+ log_debug("No credentials passed via %s.", p);
+ continue;
+ }
+
+ log_warning_errno(errno, "Failed to open '%s', ignoring: %m", p);
+ continue;
+ }
+
+ r = readdir_all(source_dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read '%s' contents, ignoring: %m", p);
+ continue;
+ }
+
+ for (size_t i = 0; i < de->n_entries; i++) {
+ const struct dirent *d = de->entries[i];
+ _cleanup_close_ int cfd = -1, nfd = -1;
+ _cleanup_free_ char *n = NULL;
+ const char *e;
+ struct stat st;
+
+ e = endswith(d->d_name, ".cred");
+ if (!e)
+ continue;
+
+ /* drop .cred suffix (which we want in the ESP sidecar dir, but not for our internal
+ * processing) */
+ n = strndup(d->d_name, e - d->d_name);
+ if (!n)
+ return log_oom();
+
+ if (!credential_name_valid(n)) {
+ log_warning("Credential '%s' has invalid name, ignoring.", d->d_name);
+ continue;
+ }
+
+ cfd = openat(source_dir_fd, d->d_name, O_RDONLY|O_CLOEXEC);
+ if (cfd < 0) {
+ log_warning_errno(errno, "Failed to open %s, ignoring: %m", d->d_name);
+ continue;
+ }
+
+ if (fstat(cfd, &st) < 0) {
+ log_warning_errno(errno, "Failed to stat %s, ignoring: %m", d->d_name);
+ continue;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0) {
+ log_warning_errno(r, "Credential file %s is not a regular file, ignoring: %m", d->d_name);
+ continue;
+ }
+
+ if (!credential_size_ok(&context, n, st.st_size))
+ continue;
+
+ r = acquire_encrypted_credential_directory(&context);
+ if (r < 0)
+ return r;
+
+ nfd = open_credential_file_for_write(context.target_dir_fd, ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, n);
+ if (nfd == -EEXIST)
+ continue;
+ if (nfd < 0)
+ return r;
+
+ r = copy_bytes(cfd, nfd, st.st_size, 0);
+ if (r < 0) {
+ (void) unlinkat(context.target_dir_fd, n, 0);
+ return log_error_errno(r, "Failed to create credential '%s': %m", n);
+ }
+
+ context.size_sum += st.st_size;
+ context.n_credentials++;
+
+ log_debug("Successfully copied boot credential '%s'.", n);
+ }
+ }
+
+ if (context.n_credentials > 0) {
+ log_debug("Imported %u credentials from boot loader.", context.n_credentials);
+
+ r = finalize_credentials_dir(ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, "ENCRYPTED_CREDENTIALS_DIRECTORY");
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int acquire_credential_directory(ImportCredentialContext *c) {
+ int r;
+
+ assert(c);
+
+ if (c->target_dir_fd >= 0)
+ return c->target_dir_fd;
+
+ r = path_is_mount_point(SYSTEM_CREDENTIALS_DIRECTORY, NULL, 0);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to determine if " SYSTEM_CREDENTIALS_DIRECTORY " is a mount point: %m");
+
+ r = mkdir_safe_label(SYSTEM_CREDENTIALS_DIRECTORY, 0700, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create " SYSTEM_CREDENTIALS_DIRECTORY " mount point: %m");
+
+ r = 0; /* Now it exists and is not a mount point */
+ }
+ if (r == 0)
+ /* If not a mountpoint yet, try to mount a ramfs there (so that this stuff isn't swapped
+ * out), but if that doesn't work, let's just use the regular tmpfs it already is. */
+ (void) mount_nofollow_verbose(LOG_WARNING, "ramfs", SYSTEM_CREDENTIALS_DIRECTORY, "ramfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, "mode=0700");
+
+ c->target_dir_fd = open(SYSTEM_CREDENTIALS_DIRECTORY, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (c->target_dir_fd < 0)
+ return log_error_errno(errno, "Failed to open " SYSTEM_CREDENTIALS_DIRECTORY ": %m");
+
+ return c->target_dir_fd;
+}
+
+static int proc_cmdline_callback(const char *key, const char *value, void *data) {
+ ImportCredentialContext *c = ASSERT_PTR(data);
+ _cleanup_free_ char *n = NULL;
+ _cleanup_close_ int nfd = -1;
+ const char *colon;
+ size_t l;
+ int r;
+
+ assert(key);
+
+ if (!proc_cmdline_key_streq(key, "systemd.set_credential"))
+ return 0;
+
+ colon = value ? strchr(value, ':') : NULL;
+ if (!colon) {
+ log_warning("Credential assignment through kernel command line lacks ':' character, ignoring: %s", value);
+ return 0;
+ }
+
+ n = strndup(value, colon - value);
+ if (!n)
+ return log_oom();
+
+ if (!credential_name_valid(n)) {
+ log_warning("Credential name '%s' is invalid, ignoring.", n);
+ return 0;
+ }
+
+ colon++;
+ l = strlen(colon);
+
+ if (!credential_size_ok(c, n, l))
+ return 0;
+
+ r = acquire_credential_directory(c);
+ if (r < 0)
+ return r;
+
+ nfd = open_credential_file_for_write(c->target_dir_fd, SYSTEM_CREDENTIALS_DIRECTORY, n);
+ if (nfd == -EEXIST)
+ return 0;
+ if (nfd < 0)
+ return r;
+
+ r = loop_write(nfd, colon, l, /* do_poll= */ false);
+ if (r < 0) {
+ (void) unlinkat(c->target_dir_fd, n, 0);
+ return log_error_errno(r, "Failed to write credential: %m");
+ }
+
+ c->size_sum += l;
+ c->n_credentials++;
+
+ log_debug("Successfully processed kernel command line credential '%s'.", n);
+
+ return 0;
+}
+
+static int import_credentials_proc_cmdline(ImportCredentialContext *c) {
+ int r;
+
+ assert(c);
+
+ r = proc_cmdline_parse(proc_cmdline_callback, c, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /proc/cmdline: %m");
+
+ return 0;
+}
+
+#define QEMU_FWCFG_PATH "/sys/firmware/qemu_fw_cfg/by_name/opt/io.systemd.credentials"
+
+static int import_credentials_qemu(ImportCredentialContext *c) {
+ _cleanup_free_ DirectoryEntries *de = NULL;
+ _cleanup_close_ int source_dir_fd = -1;
+ int r;
+
+ assert(c);
+
+ source_dir_fd = open(QEMU_FWCFG_PATH, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (source_dir_fd < 0) {
+ if (errno == ENOENT) {
+ log_debug("No credentials passed via fw_cfg.");
+ return 0;
+ }
+
+ log_warning_errno(errno, "Failed to open '" QEMU_FWCFG_PATH "', ignoring: %m");
+ return 0;
+ }
+
+ r = readdir_all(source_dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read '" QEMU_FWCFG_PATH "' contents, ignoring: %m");
+ return 0;
+ }
+
+ for (size_t i = 0; i < de->n_entries; i++) {
+ const struct dirent *d = de->entries[i];
+ _cleanup_close_ int vfd = -1, rfd = -1, nfd = -1;
+ _cleanup_free_ char *szs = NULL;
+ uint64_t sz;
+
+ if (!credential_name_valid(d->d_name)) {
+ log_warning("Credential '%s' has invalid name, ignoring.", d->d_name);
+ continue;
+ }
+
+ vfd = openat(source_dir_fd, d->d_name, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (vfd < 0) {
+ log_warning_errno(errno, "Failed to open '" QEMU_FWCFG_PATH "'/%s/, ignoring: %m", d->d_name);
+ continue;
+ }
+
+ r = read_virtual_file_at(vfd, "size", LINE_MAX, &szs, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read '" QEMU_FWCFG_PATH "'/%s/size, ignoring: %m", d->d_name);
+ continue;
+ }
+
+ r = safe_atou64(strstrip(szs), &sz);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse size of credential '%s', ignoring: %s", d->d_name, szs);
+ continue;
+ }
+
+ if (!credential_size_ok(c, d->d_name, sz))
+ continue;
+
+ /* Ideally we'd just symlink the data here. Alas the kernel driver exports the raw file as
+ * having size zero, and we'd rather not have applications support such credential
+ * files. Let's hence copy the files to make them regular. */
+
+ rfd = openat(vfd, "raw", O_RDONLY|O_CLOEXEC);
+ if (rfd < 0) {
+ log_warning_errno(r, "Failed to open '" QEMU_FWCFG_PATH "'/%s/raw, ignoring: %m", d->d_name);
+ continue;
+ }
+
+ r = acquire_credential_directory(c);
+ if (r < 0)
+ return r;
+
+ nfd = open_credential_file_for_write(c->target_dir_fd, SYSTEM_CREDENTIALS_DIRECTORY, d->d_name);
+ if (nfd == -EEXIST)
+ continue;
+ if (nfd < 0)
+ return r;
+
+ r = copy_bytes(rfd, nfd, sz, 0);
+ if (r < 0) {
+ (void) unlinkat(c->target_dir_fd, d->d_name, 0);
+ return log_error_errno(r, "Failed to create credential '%s': %m", d->d_name);
+ }
+
+ c->size_sum += sz;
+ c->n_credentials++;
+
+ log_debug("Successfully copied qemu fw_cfg credential '%s'.", d->d_name);
+ }
+
+ return 0;
+}
+
+static int import_credentials_trusted(void) {
+ _cleanup_(import_credentials_context_free) ImportCredentialContext c = {
+ .target_dir_fd = -1,
+ };
+ int q, r;
+
+ r = import_credentials_qemu(&c);
+ q = import_credentials_proc_cmdline(&c);
+
+ if (c.n_credentials > 0) {
+ int z;
+
+ log_debug("Imported %u credentials from kernel command line/fw_cfg.", c.n_credentials);
+
+ z = finalize_credentials_dir(SYSTEM_CREDENTIALS_DIRECTORY, "CREDENTIALS_DIRECTORY");
+ if (z < 0)
+ return z;
+ }
+
+ return r < 0 ? r : q;
+}
+
+static int symlink_credential_dir(const char *envvar, const char *path, const char *where) {
+ int r;
+
+ assert(envvar);
+ assert(path);
+ assert(where);
+
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "String specified via $%s is not a valid absolute path, refusing: %s", envvar, path);
+
+ /* If the env var already points to where we intend to create the symlink, then most likely we
+ * already imported some creds earlier, and thus set the env var, and hence don't need to do
+ * anything. */
+ if (path_equal(path, where))
+ return 0;
+
+ r = symlink_idempotent(path, where, /* make_relative= */ true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to link $%s to %s: %m", envvar, where);
+
+ return 0;
+}
+
+int import_credentials(void) {
+ const char *received_creds_dir = NULL, *received_encrypted_creds_dir = NULL;
+ bool envvar_set = false;
+ int r, q;
+
+ r = get_credentials_dir(&received_creds_dir);
+ if (r < 0 && r != -ENXIO) /* ENXIO → env var not set yet */
+ log_warning_errno(r, "Failed to determine credentials directory, ignoring: %m");
+
+ envvar_set = r >= 0;
+
+ r = get_encrypted_credentials_dir(&received_encrypted_creds_dir);
+ if (r < 0 && r != -ENXIO) /* ENXIO → env var not set yet */
+ log_warning_errno(r, "Failed to determine encrypted credentials directory, ignoring: %m");
+
+ envvar_set = envvar_set || r >= 0;
+
+ if (envvar_set) {
+ /* Maybe an earlier stage initrd already set this up? If so, don't try to import anything again. */
+ log_debug("Not importing credentials, $CREDENTIALS_DIRECTORY or $ENCRYPTED_CREDENTIALS_DIRECTORY already set.");
+
+ /* But, let's make sure the creds are available from our regular paths. */
+ if (received_creds_dir)
+ r = symlink_credential_dir("CREDENTIALS_DIRECTORY", received_creds_dir, SYSTEM_CREDENTIALS_DIRECTORY);
+ else
+ r = 0;
+
+ if (received_encrypted_creds_dir) {
+ q = symlink_credential_dir("ENCRYPTED_CREDENTIALS_DIRECTORY", received_encrypted_creds_dir, ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY);
+ if (r >= 0)
+ r = q;
+ }
+
+ } else {
+ _cleanup_free_ char *v = NULL;
+
+ r = proc_cmdline_get_key("systemd.import_credentials", PROC_CMDLINE_STRIP_RD_PREFIX, &v);
+ if (r < 0)
+ log_debug_errno(r, "Failed to check if 'systemd.import_credentials=' kernel command line option is set, ignoring: %m");
+ else if (r > 0) {
+ r = parse_boolean(v);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse 'systemd.import_credentials=' parameter, ignoring: %m");
+ else if (r == 0) {
+ log_notice("systemd.import_credentials=no is set, skipping importing of credentials.");
+ return 0;
+ }
+ }
+
+ r = import_credentials_boot();
+
+ q = import_credentials_trusted();
+ if (r >= 0)
+ r = q;
+ }
+
+ return r;
+}