summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaan De Meyer <daan.j.demeyer@gmail.com>2021-09-09 16:12:55 +0100
committerDaan De Meyer <daan.j.demeyer@gmail.com>2021-09-20 13:53:11 +0100
commit064a5c1438690f9454ad9bff05ec4ec2acfa67d2 (patch)
treeeb03352f75a79307dd38d86e913b3e1a52521eaf
parentf2ed82d5107cfe557470958a7c0857202952aac3 (diff)
downloadsystemd-064a5c1438690f9454ad9bff05ec4ec2acfa67d2.tar.gz
oom: Add support for user unit ManagedOOM property updates
Compared to PID1 where systemd-oomd has to be the client to PID1 because PID1 is a more privileged process than systemd-oomd, systemd-oomd is the more privileged process compared to a user manager so we have user managers be the client whereas systemd-oomd is now the server. The same varlink protocol is used between user managers and systemd-oomd to deliver ManagedOOM property updates. systemd-oomd now sets up a varlink server that user managers connect to to send ManagedOOM property updates. We also add extra validation to make sure that non-root senders don't send updates for cgroups they don't own. The integration test was extended to repeat the chill/bloat test using a user manager instead of PID1.
-rw-r--r--src/basic/def.h5
-rw-r--r--src/core/core-varlink.c129
-rw-r--r--src/core/manager.c2
-rw-r--r--src/core/manager.h7
-rw-r--r--src/oom/oomd-manager.c105
-rw-r--r--src/oom/oomd-manager.h9
-rw-r--r--src/oom/oomd.c9
-rw-r--r--test/test-functions3
-rw-r--r--test/units/testsuite-55.service2
-rwxr-xr-xtest/units/testsuite-55.sh33
-rw-r--r--units/meson.build1
-rw-r--r--units/systemd-oomd.service.in2
-rw-r--r--units/systemd-oomd.socket21
13 files changed, 299 insertions, 29 deletions
diff --git a/src/basic/def.h b/src/basic/def.h
index 2e60abb4f1..eccee3d3fa 100644
--- a/src/basic/def.h
+++ b/src/basic/def.h
@@ -64,4 +64,7 @@
.un.sun_path = "\0/org/freedesktop/plymouthd", \
}
-#define VARLINK_ADDR_PATH_MANAGED_OOM "/run/systemd/io.system.ManagedOOM"
+/* Path where PID1 listens for varlink subscriptions from systemd-oomd to notify of changes in ManagedOOM settings. */
+#define VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM "/run/systemd/io.system.ManagedOOM"
+/* Path where systemd-oomd listens for varlink connections from user managers to report changes in ManagedOOM settings. */
+#define VARLINK_ADDR_PATH_MANAGED_OOM_USER "/run/systemd/oom/io.system.ManagedOOM"
diff --git a/src/core/core-varlink.c b/src/core/core-varlink.c
index 29bc3367ea..04d9d96417 100644
--- a/src/core/core-varlink.c
+++ b/src/core/core-varlink.c
@@ -93,9 +93,22 @@ int manager_varlink_send_managed_oom_update(Unit *u) {
assert(u);
- if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager || !u->manager->managed_oom_varlink_request || !u->cgroup_path)
+ if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager || !u->cgroup_path)
return 0;
+ if (MANAGER_IS_SYSTEM(u->manager)) {
+ /* In system mode we can't send any notifications unless oomd connected back to us. In this
+ * mode oomd must initiate communication, not us. */
+ if (!u->manager->managed_oom_varlink)
+ return 0;
+ } else {
+ /* If we are in user mode, let's connect to oomd if we aren't connected yet. In this mode we
+ * must initiate communication to oomd, not the other way round. */
+ r = manager_varlink_init(u->manager);
+ if (r <= 0)
+ return r;
+ }
+
c = unit_get_cgroup_context(u);
if (!c)
return 0;
@@ -120,7 +133,16 @@ int manager_varlink_send_managed_oom_update(Unit *u) {
if (r < 0)
return r;
- return varlink_notify(u->manager->managed_oom_varlink_request, v);
+ if (MANAGER_IS_SYSTEM(u->manager))
+ /* in system mode, oomd is our client, thus send out notifications as replies to the
+ * initiating method call from them. */
+ r = varlink_notify(u->manager->managed_oom_varlink, v);
+ else
+ /* in user mode, we are oomd's client, thus send out notifications as method calls that do
+ * not expect a reply. */
+ r = varlink_send(u->manager->managed_oom_varlink, "io.systemd.oom.ReportManagedOOMCGroups", v);
+
+ return r;
}
static int build_managed_oom_cgroups_json(Manager *m, JsonVariant **ret) {
@@ -194,7 +216,7 @@ static int vl_method_subscribe_managed_oom_cgroups(
/* We only take one subscriber for this method so return an error if there's already an existing one.
* This shouldn't happen since systemd-oomd is the only client of this method. */
- if (FLAGS_SET(flags, VARLINK_METHOD_MORE) && m->managed_oom_varlink_request)
+ if (FLAGS_SET(flags, VARLINK_METHOD_MORE) && m->managed_oom_varlink)
return varlink_error(link, VARLINK_ERROR_SUBSCRIPTION_TAKEN, NULL);
r = build_managed_oom_cgroups_json(m, &v);
@@ -204,9 +226,27 @@ static int vl_method_subscribe_managed_oom_cgroups(
if (!FLAGS_SET(flags, VARLINK_METHOD_MORE))
return varlink_reply(link, v);
- assert(!m->managed_oom_varlink_request);
- m->managed_oom_varlink_request = varlink_ref(link);
- return varlink_notify(m->managed_oom_varlink_request, v);
+ assert(!m->managed_oom_varlink);
+ m->managed_oom_varlink = varlink_ref(link);
+ return varlink_notify(m->managed_oom_varlink, v);
+}
+
+static int manager_varlink_send_managed_oom_initial(Manager *m) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ assert(m->managed_oom_varlink);
+
+ r = build_managed_oom_cgroups_json(m, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_send(m->managed_oom_varlink, "io.systemd.oom.ReportManagedOOMCGroups", v);
}
static int vl_method_get_user_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
@@ -433,18 +473,18 @@ static void vl_disconnect(VarlinkServer *s, Varlink *link, void *userdata) {
assert(s);
assert(link);
- if (link == m->managed_oom_varlink_request)
- m->managed_oom_varlink_request = varlink_unref(link);
+ if (link == m->managed_oom_varlink)
+ m->managed_oom_varlink = varlink_unref(link);
}
-int manager_varlink_init(Manager *m) {
+static int manager_varlink_init_system(Manager *m) {
_cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
int r;
assert(m);
if (m->varlink_server)
- return 0;
+ return 1;
if (!MANAGER_IS_SYSTEM(m))
return 0;
@@ -475,7 +515,7 @@ int manager_varlink_init(Manager *m) {
if (r < 0)
return log_error_errno(r, "Failed to bind to varlink socket: %m");
- r = varlink_server_listen_address(s, VARLINK_ADDR_PATH_MANAGED_OOM, 0666);
+ r = varlink_server_listen_address(s, VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM, 0666);
if (r < 0)
return log_error_errno(r, "Failed to bind to varlink socket: %m");
}
@@ -485,9 +525,73 @@ int manager_varlink_init(Manager *m) {
return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
m->varlink_server = TAKE_PTR(s);
+ return 1;
+}
+
+static int vl_reply(Varlink *link, JsonVariant *parameters, const char *error_id, VarlinkReplyFlags flags, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+
+ if (error_id)
+ log_debug("varlink systemd-oomd client error: %s", error_id);
+
+ if (FLAGS_SET(flags, VARLINK_REPLY_ERROR) && FLAGS_SET(flags, VARLINK_REPLY_LOCAL)) {
+ /* Varlink connection was closed, likely because of systemd-oomd restart. Let's try to
+ * reconnect and send the initial ManagedOOM update again. */
+
+ m->managed_oom_varlink = varlink_unref(link);
+
+ log_debug("Reconnecting to %s", VARLINK_ADDR_PATH_MANAGED_OOM_USER);
+
+ r = manager_varlink_init(m);
+ if (r <= 0)
+ return r;
+ }
+
return 0;
}
+static int manager_varlink_init_user(Manager *m) {
+ _cleanup_(varlink_close_unrefp) Varlink *link = NULL;
+ int r;
+
+ assert(m);
+
+ if (m->managed_oom_varlink)
+ return 1;
+
+ r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM_USER);
+ if (r == -ENOENT || ERRNO_IS_DISCONNECT(r)) {
+ log_debug("systemd-oomd varlink unix socket not found, skipping user manager varlink setup");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to %s: %m", VARLINK_ADDR_PATH_MANAGED_OOM_USER);
+
+ varlink_set_userdata(link, m);
+
+ r = varlink_bind_reply(link, vl_reply);
+ if (r < 0)
+ return r;
+
+ r = varlink_attach_event(link, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ m->managed_oom_varlink = TAKE_PTR(link);
+
+ /* Queue the initial ManagedOOM update. */
+ (void) manager_varlink_send_managed_oom_initial(m);
+
+ return 1;
+}
+
+int manager_varlink_init(Manager *m) {
+ return MANAGER_IS_SYSTEM(m) ? manager_varlink_init_system(m) : manager_varlink_init_user(m);
+}
+
void manager_varlink_done(Manager *m) {
assert(m);
@@ -495,7 +599,8 @@ void manager_varlink_done(Manager *m) {
* the manager, and only then disconnect it — in two steps – so that we don't end up accidentally
* unreffing it twice. After all, closing the connection might cause the disconnect handler we
* installed (vl_disconnect() above) to be called, where we will unref it too. */
- varlink_close_unref(TAKE_PTR(m->managed_oom_varlink_request));
+ varlink_close_unref(TAKE_PTR(m->managed_oom_varlink));
m->varlink_server = varlink_server_unref(m->varlink_server);
+ m->managed_oom_varlink = varlink_close_unref(m->managed_oom_varlink);
}
diff --git a/src/core/manager.c b/src/core/manager.c
index b767b0ee6c..20a930101d 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1810,7 +1810,7 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo
r = manager_varlink_init(m);
if (r < 0)
- log_warning_errno(r, "Failed to set up Varlink server, ignoring: %m");
+ log_warning_errno(r, "Failed to set up Varlink, ignoring: %m");
/* Third, fire things up! */
manager_coldplug(m);
diff --git a/src/core/manager.h b/src/core/manager.h
index 1220c9fb16..c60f9fa9b2 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -446,8 +446,11 @@ struct Manager {
bool honor_device_enumeration;
VarlinkServer *varlink_server;
- /* Only systemd-oomd should be using this to subscribe to changes in ManagedOOM settings */
- Varlink *managed_oom_varlink_request;
+ /* When we're a system manager, this object manages the subscription from systemd-oomd to PID1 that's
+ * used to report changes in ManagedOOM settings (systemd server - oomd client). When
+ * we're a user manager, this object manages the client connection from the user manager to
+ * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
+ Varlink *managed_oom_varlink;
};
static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c
index b026621905..d20c14f5aa 100644
--- a/src/oom/oomd-manager.c
+++ b/src/oom/oomd-manager.c
@@ -1,11 +1,14 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "sd-daemon.h"
+
#include "bus-log-control-api.h"
#include "bus-util.h"
#include "bus-polkit.h"
#include "cgroup-util.h"
#include "fd-util.h"
#include "fileio.h"
+#include "format-util.h"
#include "memory-util.h"
#include "oomd-manager-bus.h"
#include "oomd-manager.h"
@@ -40,7 +43,7 @@ static int managed_oom_mode(const char *name, JsonVariant *v, JsonDispatchFlags
return 0;
}
-static int process_managed_oom_message(Manager *m, JsonVariant *parameters) {
+static int process_managed_oom_message(Manager *m, uid_t uid, JsonVariant *parameters) {
JsonVariant *c, *cgroups;
int r;
@@ -75,6 +78,23 @@ static int process_managed_oom_message(Manager *m, JsonVariant *parameters) {
if (r < 0)
continue;
+ if (uid != 0) {
+ uid_t cg_uid;
+
+ r = cg_path_get_owner_uid(message.path, &cg_uid);
+ if (r < 0) {
+ log_debug("Failed to get cgroup %s owner uid: %m", message.path);
+ continue;
+ }
+
+ /* Let's not be lenient for permission errors and skip processing if we receive an
+ * update for a cgroup that doesn't belong to the user. */
+ if (uid != cg_uid)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "cgroup path owner UID does not match sender uid "
+ "(" UID_FMT " != " UID_FMT ")", uid, cg_uid);
+ }
+
monitor_hm = streq(message.property, "ManagedOOMSwap") ?
m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts;
@@ -112,6 +132,24 @@ static int process_managed_oom_message(Manager *m, JsonVariant *parameters) {
return 0;
}
+static int process_managed_oom_request(
+ Varlink *link,
+ JsonVariant *parameters,
+ VarlinkMethodFlags flags,
+ void *userdata) {
+ Manager *m = userdata;
+ uid_t uid;
+ int r;
+
+ assert(m);
+
+ r = varlink_get_peer_uid(link, &uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get varlink peer uid: %m");
+
+ return process_managed_oom_message(m, uid, parameters);
+}
+
static int process_managed_oom_reply(
Varlink *link,
JsonVariant *parameters,
@@ -119,6 +157,7 @@ static int process_managed_oom_reply(
VarlinkReplyFlags flags,
void *userdata) {
Manager *m = userdata;
+ uid_t uid;
int r;
assert(m);
@@ -129,11 +168,17 @@ static int process_managed_oom_reply(
goto finish;
}
- r = process_managed_oom_message(m, parameters);
+ r = varlink_get_peer_uid(link, &uid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get varlink peer uid: %m");
+ goto finish;
+ }
+
+ r = process_managed_oom_message(m, uid, parameters);
finish:
if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
- m->varlink = varlink_close_unref(link);
+ m->varlink_client = varlink_close_unref(link);
return r;
}
@@ -279,9 +324,9 @@ static int acquire_managed_oom_connect(Manager *m) {
assert(m);
assert(m->event);
- r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM);
+ r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM);
if (r < 0)
- return log_error_errno(r, "Failed to connect to %s: %m", VARLINK_ADDR_PATH_MANAGED_OOM);
+ return log_error_errno(r, "Failed to connect to " VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM ": %m");
(void) varlink_set_userdata(link, m);
(void) varlink_set_description(link, "oomd");
@@ -299,7 +344,7 @@ static int acquire_managed_oom_connect(Manager *m) {
if (r < 0)
return log_error_errno(r, "Failed to observe varlink call: %m");
- m->varlink = TAKE_PTR(link);
+ m->varlink_client = TAKE_PTR(link);
return 0;
}
@@ -321,7 +366,7 @@ static int monitor_swap_contexts_handler(sd_event_source *s, uint64_t usec, void
return log_error_errno(r, "Failed to set relative time for timer: %m");
/* Reconnect if our connection dropped */
- if (!m->varlink) {
+ if (!m->varlink_client) {
r = acquire_managed_oom_connect(m);
if (r < 0)
return log_error_errno(r, "Failed to acquire varlink connection: %m");
@@ -411,7 +456,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
return log_error_errno(r, "Failed to set relative time for timer: %m");
/* Reconnect if our connection dropped */
- if (!m->varlink) {
+ if (!m->varlink_client) {
r = acquire_managed_oom_connect(m);
if (r < 0)
return log_error_errno(r, "Failed to acquire varlink connection: %m");
@@ -568,7 +613,8 @@ static int monitor_memory_pressure_contexts(Manager *m) {
Manager* manager_free(Manager *m) {
assert(m);
- varlink_close_unref(m->varlink);
+ varlink_server_unref(m->varlink_server);
+ varlink_close_unref(m->varlink_client);
sd_event_source_unref(m->swap_context_event_source);
sd_event_source_unref(m->mem_pressure_context_event_source);
sd_event_unref(m->event);
@@ -652,12 +698,47 @@ static int manager_connect_bus(Manager *m) {
return 0;
}
+static int manager_varlink_init(Manager *m, int fd) {
+ _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+ int r;
+
+ assert(m);
+ assert(!m->varlink_server);
+
+ r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate varlink server object: %m");
+
+ varlink_server_set_userdata(s, m);
+
+ r = varlink_server_bind_method(s, "io.systemd.oom.ReportManagedOOMCGroups", process_managed_oom_request);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink method: %m");
+
+ if (fd < 0)
+ r = varlink_server_listen_address(s, VARLINK_ADDR_PATH_MANAGED_OOM_USER, 0666);
+ else
+ r = varlink_server_listen_fd(s, fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind to varlink socket: %m");
+
+ r = varlink_server_attach_event(s, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ log_debug("Initialized systemd-oomd varlink server");
+
+ m->varlink_server = TAKE_PTR(s);
+ return 0;
+}
+
int manager_start(
Manager *m,
bool dry_run,
int swap_used_limit_permyriad,
int mem_pressure_limit_permyriad,
- usec_t mem_pressure_usec) {
+ usec_t mem_pressure_usec,
+ int fd) {
unsigned long l, f;
int r;
@@ -692,6 +773,10 @@ int manager_start(
if (r < 0)
return r;
+ r = manager_varlink_init(m, fd);
+ if (r < 0)
+ return r;
+
r = monitor_memory_pressure_contexts(m);
if (r < 0)
return r;
diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h
index 3edc31d0b9..8f0dd412da 100644
--- a/src/oom/oomd-manager.h
+++ b/src/oom/oomd-manager.h
@@ -53,7 +53,12 @@ struct Manager {
sd_event_source *swap_context_event_source;
sd_event_source *mem_pressure_context_event_source;
- Varlink *varlink;
+ /* This varlink object is used to manage the subscription from systemd-oomd to PID1 which it uses to
+ * listen for changes in ManagedOOM settings (oomd client - systemd server). */
+ Varlink *varlink_client;
+ /* This varlink server object is used to manage systemd-oomd's varlink server which is used by user
+ * managers to report changes in ManagedOOM settings (oomd server - systemd client). */
+ VarlinkServer *varlink_server;
};
Manager* manager_free(Manager *m);
@@ -61,7 +66,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
int manager_new(Manager **ret);
-int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
+int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec, int fd);
int manager_get_dump_string(Manager *m, char **ret);
diff --git a/src/oom/oomd.c b/src/oom/oomd.c
index 5b50833df7..8f9369983c 100644
--- a/src/oom/oomd.c
+++ b/src/oom/oomd.c
@@ -136,6 +136,12 @@ static int run(int argc, char *argv[]) {
/* Do some basic requirement checks for running systemd-oomd. It's not exhaustive as some of the other
* requirements do not have a reliable means to check for in code. */
+ int n = sd_listen_fds(0);
+ if (n > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Received too many file descriptors");
+
+ int fd = n == 1 ? SD_LISTEN_FDS_START : -1;
+
/* SwapTotal is always available in /proc/meminfo and defaults to 0, even on swap-disabled kernels. */
r = get_proc_field("/proc/meminfo", "SwapTotal", WHITESPACE, &swap);
if (r < 0)
@@ -175,7 +181,8 @@ static int run(int argc, char *argv[]) {
arg_dry_run,
arg_swap_used_limit_permyriad,
arg_mem_pressure_limit_permyriad,
- arg_mem_pressure_usec);
+ arg_mem_pressure_usec,
+ fd);
if (r < 0)
return log_error_errno(r, "Failed to start up daemon: %m");
diff --git a/test/test-functions b/test/test-functions
index c07073bf9d..821fa55294 100644
--- a/test/test-functions
+++ b/test/test-functions
@@ -1030,6 +1030,9 @@ install_systemd() {
echo LogLevel=debug >>"$initdir/etc/systemd/system.conf"
# store coredumps in journal
echo Storage=journal >>"$initdir/etc/systemd/coredump.conf"
+ # Propagate SYSTEMD_UNIT_PATH to user systemd managers
+ mkdir "$initdir/etc/systemd/system/user@.service.d/"
+ echo -e "[Service]\nPassEnvironment=SYSTEMD_UNIT_PATH\n" >"$initdir/etc/systemd/system/user@.service.d/override.conf"
}
get_ldpath() {
diff --git a/test/units/testsuite-55.service b/test/units/testsuite-55.service
index 592e9d5bec..65cc1e6815 100644
--- a/test/units/testsuite-55.service
+++ b/test/units/testsuite-55.service
@@ -1,5 +1,7 @@
[Unit]
Description=TESTSUITE-55-OOMD
+After=user@4711.service
+Wants=user@4711.service
[Service]
ExecStartPre=rm -f /failed /skipped /testok
diff --git a/test/units/testsuite-55.sh b/test/units/testsuite-55.sh
index e988dc5878..945e192420 100755
--- a/test/units/testsuite-55.sh
+++ b/test/units/testsuite-55.sh
@@ -57,6 +57,39 @@ done
if systemctl status testsuite-55-testbloat.service; then exit 42; fi
if ! systemctl status testsuite-55-testchill.service; then exit 24; fi
+# Make sure we also work correctly on user units.
+
+runas() {
+ declare userid=$1
+ shift
+ # shellcheck disable=SC2016
+ su "$userid" -s /bin/sh -c 'XDG_RUNTIME_DIR=/run/user/$UID exec "$@"' -- sh "$@"
+}
+
+runas testuser systemctl start --user testsuite-55-testchill.service
+runas testuser systemctl start --user testsuite-55-testbloat.service
+
+# Verify systemd-oomd is monitoring the expected units
+oomctl | grep -E "/user.slice.*/testsuite-55-workload.slice"
+oomctl | grep "20.00%"
+oomctl | grep "Default Memory Pressure Duration: 2s"
+
+runas testuser systemctl --user status testsuite-55-testchill.service
+
+# systemd-oomd watches for elevated pressure for 2 seconds before acting.
+# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
+timeout="$(date -ud "2 minutes" +%s)"
+while [[ $(date -u +%s) -le $timeout ]]; do
+ if ! runas testuser systemctl --user status testsuite-55-testbloat.service; then
+ break
+ fi
+ sleep 2
+done
+
+# testbloat should be killed and testchill should be fine
+if runas testuser systemctl --user status testsuite-55-testbloat.service; then exit 42; fi
+if ! runas testuser systemctl --user status testsuite-55-testchill.service; then exit 24; fi
+
# only run this portion of the test if we can set xattrs
if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then
sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down
diff --git a/units/meson.build b/units/meson.build
index 9398df721e..b8cc33e597 100644
--- a/units/meson.build
+++ b/units/meson.build
@@ -163,6 +163,7 @@ units = [
['user.slice', ''],
['var-lib-machines.mount', 'ENABLE_MACHINED',
'remote-fs.target.wants/ machines.target.wants/'],
+ ['systemd-oomd.socket', 'ENABLE_OOMD'],
]
in_units = [
diff --git a/units/systemd-oomd.service.in b/units/systemd-oomd.service.in
index 44f71c9e36..9f248e2ba4 100644
--- a/units/systemd-oomd.service.in
+++ b/units/systemd-oomd.service.in
@@ -18,6 +18,8 @@ ConditionControlGroupController=memory
ConditionPathExists=/proc/pressure/cpu
ConditionPathExists=/proc/pressure/io
ConditionPathExists=/proc/pressure/memory
+Requires=systemd-oomd.socket
+After=systemd-oomd.socket
[Service]
AmbientCapabilities=CAP_KILL CAP_DAC_OVERRIDE
diff --git a/units/systemd-oomd.socket b/units/systemd-oomd.socket
new file mode 100644
index 0000000000..47cd0e7555
--- /dev/null
+++ b/units/systemd-oomd.socket
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Userspace Out-Of-Memory (OOM) Killer Socket
+Documentation=man:systemd-oomd.service(8)
+DefaultDependencies=no
+Before=sockets.target
+
+[Socket]
+ListenStream=/run/systemd/oom/io.system.ManagedOOM
+SocketMode=0666
+
+[Install]
+WantedBy=sockets.target