summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-04-21 18:22:35 +0200
committerLennart Poettering <lennart@poettering.net>2023-04-27 12:18:32 +0200
commita8b993dc11319292c54b301f3faffc4a05ab5ec1 (patch)
treea5397671b291b436283716adee1ea3668c405714 /src
parent1a56b0c05dc14fa91f0de24f230d9b9f35cc5b05 (diff)
downloadsystemd-a8b993dc11319292c54b301f3faffc4a05ab5ec1.tar.gz
core: add DelegateSubgroup= setting
This implements a minimal subset of #24961, but in a lot more restrictive way: we only allow one level of subcgroup (as that's enough to address the no-processes in inner cgroups rule), and does not change anything about threaded cgroup logic or similar, or make any of this new behaviour mandatory. All this does is this: all non-control processes we invoke for a unit we'll invoke in a subgroup by the specified name. We'll later port all our current services that use cgroup delegation over to this, i.e. user@.service, systemd-nspawn@.service and systemd-udevd.service.
Diffstat (limited to 'src')
-rw-r--r--src/core/cgroup.c6
-rw-r--r--src/core/cgroup.h1
-rw-r--r--src/core/dbus-cgroup.c28
-rw-r--r--src/core/execute.c43
-rw-r--r--src/core/load-fragment-gperf.gperf.in1
-rw-r--r--src/core/load-fragment.c36
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/shared/bus-unit-util.c3
8 files changed, 109 insertions, 10 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index cd48183f7a..4ec5dfa587 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -293,6 +293,8 @@ void cgroup_context_done(CGroupContext *c) {
cpu_set_reset(&c->startup_cpuset_cpus);
cpu_set_reset(&c->cpuset_mems);
cpu_set_reset(&c->startup_cpuset_mems);
+
+ c->delegate_subgroup = mfree(c->delegate_subgroup);
}
static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
@@ -570,6 +572,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
prefix, managed_oom_preference_to_string(c->moom_preference),
prefix, cgroup_pressure_watch_to_string(c->memory_pressure_watch));
+ if (c->delegate_subgroup)
+ fprintf(f, "%sDelegateSubgroup: %s\n",
+ prefix, c->delegate_subgroup);
+
if (c->memory_pressure_threshold_usec != USEC_INFINITY)
fprintf(f, "%sMemoryPressureThresholdSec: %s\n",
prefix, FORMAT_TIMESPAN(c->memory_pressure_threshold_usec, 1));
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index d445ea1e8d..bbbf9408cc 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -133,6 +133,7 @@ struct CGroupContext {
bool delegate;
CGroupMask delegate_controllers;
CGroupMask disable_controllers;
+ char *delegate_subgroup;
/* For unified hierarchy */
uint64_t cpu_weight;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 3a02fcbdb1..682ad5edd5 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -435,6 +435,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("DelegateControllers", "as", property_get_delegate_controllers, 0, 0),
+ SD_BUS_PROPERTY("DelegateSubgroup", "s", NULL, offsetof(CGroupContext, delegate_subgroup), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
@@ -536,6 +537,33 @@ static int bus_cgroup_set_transient_property(
return 1;
+ } else if (streq(name, "DelegateSubgroup")) {
+ const char *s;
+
+ if (!UNIT_VTABLE(u)->can_delegate)
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!isempty(s) && cg_needs_escape(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid control group name: %s", s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (isempty(s))
+ c->delegate_subgroup = mfree(c->delegate_subgroup);
+ else {
+ r = free_and_strdup_warn(&c->delegate_subgroup, s);
+ if (r < 0)
+ return r;
+ }
+
+ unit_write_settingf(u, flags, name, "DelegateSubgroup=%s", s);
+ }
+
+ return 1;
+
} else if (STR_IN_SET(name, "DelegateControllers", "DisableControllers")) {
CGroupMask mask = 0;
diff --git a/src/core/execute.c b/src/core/execute.c
index 60f7a6439c..da7a37c187 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -4178,8 +4178,12 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p
return 0;
}
-static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
- bool using_subcgroup;
+static int exec_parameters_get_cgroup_path(
+ const ExecParameters *params,
+ const CGroupContext *c,
+ char **ret) {
+
+ const char *subgroup = NULL;
char *p;
assert(params);
@@ -4197,16 +4201,22 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **
* this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
* flag, which is only passed for the former statements, not for the latter. */
- using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
- if (using_subcgroup)
- p = path_join(params->cgroup_path, ".control");
+ if (FLAGS_SET(params->flags, EXEC_CGROUP_DELEGATE) && (FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP) || c->delegate_subgroup)) {
+ if (FLAGS_SET(params->flags, EXEC_IS_CONTROL))
+ subgroup = ".control";
+ else
+ subgroup = c->delegate_subgroup;
+ }
+
+ if (subgroup)
+ p = path_join(params->cgroup_path, subgroup);
else
p = strdup(params->cgroup_path);
if (!p)
return -ENOMEM;
*ret = p;
- return using_subcgroup;
+ return !!subgroup;
}
static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
@@ -4705,7 +4715,7 @@ static int exec_child(
if (params->cgroup_path) {
_cleanup_free_ char *p = NULL;
- r = exec_parameters_get_cgroup_path(params, &p);
+ r = exec_parameters_get_cgroup_path(params, cgroup_context, &p);
if (r < 0) {
*exit_status = EXIT_CGROUP;
return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
@@ -4880,11 +4890,26 @@ static int exec_child(
* touch a single hierarchy too. */
if (params->flags & EXEC_CGROUP_DELEGATE) {
+ _cleanup_free_ char *p = NULL;
+
r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
if (r < 0) {
*exit_status = EXIT_CGROUP;
return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
}
+
+ r = exec_parameters_get_cgroup_path(params, cgroup_context, &p);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
+ }
+ if (r > 0) {
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, p, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to adjust control subgroup access: %m");
+ }
+ }
}
if (cgroup_context && cg_unified() > 0 && is_pressure_supported() > 0) {
@@ -5635,13 +5660,13 @@ int exec_spawn(Unit *unit,
log_command_line(unit, "About to execute", command->path, command->argv);
if (params->cgroup_path) {
- r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
+ r = exec_parameters_get_cgroup_path(params, cgroup_context, &subcgroup_path);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
if (r > 0) { /* We are using a child cgroup */
r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
if (r < 0)
- return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
+ return log_unit_error_errno(unit, r, "Failed to create subcgroup '%s': %m", subcgroup_path);
/* Normally we would not propagate the xattrs to children but since we created this
* sub-cgroup internally we should do it. */
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index 8a2823b075..110bccb7ad 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -237,6 +237,7 @@
{{type}}.TasksAccounting, config_parse_bool, 0, offsetof({{type}}, cgroup_context.tasks_accounting)
{{type}}.TasksMax, config_parse_tasks_max, 0, offsetof({{type}}, cgroup_context.tasks_max)
{{type}}.Delegate, config_parse_delegate, 0, offsetof({{type}}, cgroup_context)
+{{type}}.DelegateSubgroup, config_parse_delegate_subgroup , 0, offsetof({{type}}, cgroup_context)
{{type}}.DisableControllers, config_parse_disable_controllers, 0, offsetof({{type}}, cgroup_context)
{{type}}.IPAccounting, config_parse_bool, 0, offsetof({{type}}, cgroup_context.ip_accounting)
{{type}}.IPAddressAllow, config_parse_in_addr_prefixes, AF_UNSPEC, offsetof({{type}}, cgroup_context.ip_address_allow)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 581a051d46..ebfe98d7cc 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -4029,6 +4029,42 @@ int config_parse_delegate(
return 0;
}
+int config_parse_delegate_subgroup(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = ASSERT_PTR(data);
+ UnitType t;
+
+ t = unit_name_to_type(unit);
+ assert(t >= 0);
+
+ if (!unit_vtable[t]->can_delegate) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "DelegateSubgroup= setting not supported for this unit type, ignoring.");
+ return 0;
+ }
+
+ if (isempty(rvalue)) {
+ c->delegate_subgroup = mfree(c->delegate_subgroup);
+ return 0;
+ }
+
+ if (cg_needs_escape(rvalue)) { /* Insist that specified names don't need escaping */
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid control group name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_strdup_warn(&c->delegate_subgroup, rvalue);
+}
+
int config_parse_managed_oom_mode(
const char *unit,
const char *filename,
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index a38d697338..59f02a3207 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -83,6 +83,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_cpu_shares);
CONFIG_PARSER_PROTOTYPE(config_parse_memory_limit);
CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max);
CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
+CONFIG_PARSER_PROTOTYPE(config_parse_delegate_subgroup);
CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode);
CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit);
CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_preference);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index ebbd1f7f28..a321179609 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -461,7 +461,8 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
"ManagedOOMSwap",
"ManagedOOMMemoryPressure",
"ManagedOOMPreference",
- "MemoryPressureWatch"))
+ "MemoryPressureWatch",
+ "DelegateSubgroup"))
return bus_append_string(m, field, eq);
if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) {