diff options
author | Daan De Meyer <daan.j.demeyer@gmail.com> | 2021-09-09 16:12:55 +0100 |
---|---|---|
committer | Daan De Meyer <daan.j.demeyer@gmail.com> | 2021-09-20 13:53:11 +0100 |
commit | 064a5c1438690f9454ad9bff05ec4ec2acfa67d2 (patch) | |
tree | eb03352f75a79307dd38d86e913b3e1a52521eaf /src/oom | |
parent | f2ed82d5107cfe557470958a7c0857202952aac3 (diff) | |
download | systemd-064a5c1438690f9454ad9bff05ec4ec2acfa67d2.tar.gz |
oom: Add support for user unit ManagedOOM property updates
Compared to PID1 where systemd-oomd has to be the client to PID1
because PID1 is a more privileged process than systemd-oomd, systemd-oomd
is the more privileged process compared to a user manager so we have
user managers be the client whereas systemd-oomd is now the server.
The same varlink protocol is used between user managers and systemd-oomd
to deliver ManagedOOM property updates. systemd-oomd now sets up a varlink
server that user managers connect to to send ManagedOOM property updates.
We also add extra validation to make sure that non-root senders don't
send updates for cgroups they don't own.
The integration test was extended to repeat the chill/bloat test using
a user manager instead of PID1.
Diffstat (limited to 'src/oom')
-rw-r--r-- | src/oom/oomd-manager.c | 105 | ||||
-rw-r--r-- | src/oom/oomd-manager.h | 9 | ||||
-rw-r--r-- | src/oom/oomd.c | 9 |
3 files changed, 110 insertions, 13 deletions
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c index b026621905..d20c14f5aa 100644 --- a/src/oom/oomd-manager.c +++ b/src/oom/oomd-manager.c @@ -1,11 +1,14 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "sd-daemon.h" + #include "bus-log-control-api.h" #include "bus-util.h" #include "bus-polkit.h" #include "cgroup-util.h" #include "fd-util.h" #include "fileio.h" +#include "format-util.h" #include "memory-util.h" #include "oomd-manager-bus.h" #include "oomd-manager.h" @@ -40,7 +43,7 @@ static int managed_oom_mode(const char *name, JsonVariant *v, JsonDispatchFlags return 0; } -static int process_managed_oom_message(Manager *m, JsonVariant *parameters) { +static int process_managed_oom_message(Manager *m, uid_t uid, JsonVariant *parameters) { JsonVariant *c, *cgroups; int r; @@ -75,6 +78,23 @@ static int process_managed_oom_message(Manager *m, JsonVariant *parameters) { if (r < 0) continue; + if (uid != 0) { + uid_t cg_uid; + + r = cg_path_get_owner_uid(message.path, &cg_uid); + if (r < 0) { + log_debug("Failed to get cgroup %s owner uid: %m", message.path); + continue; + } + + /* Let's not be lenient for permission errors and skip processing if we receive an + * update for a cgroup that doesn't belong to the user. */ + if (uid != cg_uid) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "cgroup path owner UID does not match sender uid " + "(" UID_FMT " != " UID_FMT ")", uid, cg_uid); + } + monitor_hm = streq(message.property, "ManagedOOMSwap") ? m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts; @@ -112,6 +132,24 @@ static int process_managed_oom_message(Manager *m, JsonVariant *parameters) { return 0; } +static int process_managed_oom_request( + Varlink *link, + JsonVariant *parameters, + VarlinkMethodFlags flags, + void *userdata) { + Manager *m = userdata; + uid_t uid; + int r; + + assert(m); + + r = varlink_get_peer_uid(link, &uid); + if (r < 0) + return log_error_errno(r, "Failed to get varlink peer uid: %m"); + + return process_managed_oom_message(m, uid, parameters); +} + static int process_managed_oom_reply( Varlink *link, JsonVariant *parameters, @@ -119,6 +157,7 @@ static int process_managed_oom_reply( VarlinkReplyFlags flags, void *userdata) { Manager *m = userdata; + uid_t uid; int r; assert(m); @@ -129,11 +168,17 @@ static int process_managed_oom_reply( goto finish; } - r = process_managed_oom_message(m, parameters); + r = varlink_get_peer_uid(link, &uid); + if (r < 0) { + log_error_errno(r, "Failed to get varlink peer uid: %m"); + goto finish; + } + + r = process_managed_oom_message(m, uid, parameters); finish: if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) - m->varlink = varlink_close_unref(link); + m->varlink_client = varlink_close_unref(link); return r; } @@ -279,9 +324,9 @@ static int acquire_managed_oom_connect(Manager *m) { assert(m); assert(m->event); - r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM); + r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM); if (r < 0) - return log_error_errno(r, "Failed to connect to %s: %m", VARLINK_ADDR_PATH_MANAGED_OOM); + return log_error_errno(r, "Failed to connect to " VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM ": %m"); (void) varlink_set_userdata(link, m); (void) varlink_set_description(link, "oomd"); @@ -299,7 +344,7 @@ static int acquire_managed_oom_connect(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to observe varlink call: %m"); - m->varlink = TAKE_PTR(link); + m->varlink_client = TAKE_PTR(link); return 0; } @@ -321,7 +366,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void return log_error_errno(r, "Failed to set relative time for timer: %m"); /* Reconnect if our connection dropped */ - if (!m->varlink) { + if (!m->varlink_client) { r = acquire_managed_oom_connect(m); if (r < 0) return log_error_errno(r, "Failed to acquire varlink connection: %m"); @@ -411,7 +456,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t return log_error_errno(r, "Failed to set relative time for timer: %m"); /* Reconnect if our connection dropped */ - if (!m->varlink) { + if (!m->varlink_client) { r = acquire_managed_oom_connect(m); if (r < 0) return log_error_errno(r, "Failed to acquire varlink connection: %m"); @@ -568,7 +613,8 @@ static int monitor_memory_pressure_contexts(Manager *m) { Manager* manager_free(Manager *m) { assert(m); - varlink_close_unref(m->varlink); + varlink_server_unref(m->varlink_server); + varlink_close_unref(m->varlink_client); sd_event_source_unref(m->swap_context_event_source); sd_event_source_unref(m->mem_pressure_context_event_source); sd_event_unref(m->event); @@ -652,12 +698,47 @@ static int manager_connect_bus(Manager *m) { return 0; } +static int manager_varlink_init(Manager *m, int fd) { + _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL; + int r; + + assert(m); + assert(!m->varlink_server); + + r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA); + if (r < 0) + return log_error_errno(r, "Failed to allocate varlink server object: %m"); + + varlink_server_set_userdata(s, m); + + r = varlink_server_bind_method(s, "io.systemd.oom.ReportManagedOOMCGroups", process_managed_oom_request); + if (r < 0) + return log_error_errno(r, "Failed to register varlink method: %m"); + + if (fd < 0) + r = varlink_server_listen_address(s, VARLINK_ADDR_PATH_MANAGED_OOM_USER, 0666); + else + r = varlink_server_listen_fd(s, fd); + if (r < 0) + return log_error_errno(r, "Failed to bind to varlink socket: %m"); + + r = varlink_server_attach_event(s, m->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return log_error_errno(r, "Failed to attach varlink connection to event loop: %m"); + + log_debug("Initialized systemd-oomd varlink server"); + + m->varlink_server = TAKE_PTR(s); + return 0; +} + int manager_start( Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, - usec_t mem_pressure_usec) { + usec_t mem_pressure_usec, + int fd) { unsigned long l, f; int r; @@ -692,6 +773,10 @@ int manager_start( if (r < 0) return r; + r = manager_varlink_init(m, fd); + if (r < 0) + return r; + r = monitor_memory_pressure_contexts(m); if (r < 0) return r; diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h index 3edc31d0b9..8f0dd412da 100644 --- a/src/oom/oomd-manager.h +++ b/src/oom/oomd-manager.h @@ -53,7 +53,12 @@ struct Manager { sd_event_source *swap_context_event_source; sd_event_source *mem_pressure_context_event_source; - Varlink *varlink; + /* This varlink object is used to manage the subscription from systemd-oomd to PID1 which it uses to + * listen for changes in ManagedOOM settings (oomd client - systemd server). */ + Varlink *varlink_client; + /* This varlink server object is used to manage systemd-oomd's varlink server which is used by user + * managers to report changes in ManagedOOM settings (oomd server - systemd client). */ + VarlinkServer *varlink_server; }; Manager* manager_free(Manager *m); @@ -61,7 +66,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free); int manager_new(Manager **ret); -int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec); +int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec, int fd); int manager_get_dump_string(Manager *m, char **ret); diff --git a/src/oom/oomd.c b/src/oom/oomd.c index 5b50833df7..8f9369983c 100644 --- a/src/oom/oomd.c +++ b/src/oom/oomd.c @@ -136,6 +136,12 @@ static int run(int argc, char *argv[]) { /* Do some basic requirement checks for running systemd-oomd. It's not exhaustive as some of the other * requirements do not have a reliable means to check for in code. */ + int n = sd_listen_fds(0); + if (n > 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Received too many file descriptors"); + + int fd = n == 1 ? SD_LISTEN_FDS_START : -1; + /* SwapTotal is always available in /proc/meminfo and defaults to 0, even on swap-disabled kernels. */ r = get_proc_field("/proc/meminfo", "SwapTotal", WHITESPACE, &swap); if (r < 0) @@ -175,7 +181,8 @@ static int run(int argc, char *argv[]) { arg_dry_run, arg_swap_used_limit_permyriad, arg_mem_pressure_limit_permyriad, - arg_mem_pressure_usec); + arg_mem_pressure_usec, + fd); if (r < 0) return log_error_errno(r, "Failed to start up daemon: %m"); |