diff options
author | Lennart Poettering <lennart@poettering.net> | 2023-04-21 18:22:35 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2023-04-27 12:18:32 +0200 |
commit | a8b993dc11319292c54b301f3faffc4a05ab5ec1 (patch) | |
tree | a5397671b291b436283716adee1ea3668c405714 /src | |
parent | 1a56b0c05dc14fa91f0de24f230d9b9f35cc5b05 (diff) | |
download | systemd-a8b993dc11319292c54b301f3faffc4a05ab5ec1.tar.gz |
core: add DelegateSubgroup= setting
This implements a minimal subset of #24961, but in a lot more
restrictive way: we only allow one level of subcgroup (as that's enough
to address the no-processes in inner cgroups rule), and does not change
anything about threaded cgroup logic or similar, or make any of this new
behaviour mandatory.
All this does is this: all non-control processes we invoke for a unit
we'll invoke in a subgroup by the specified name.
We'll later port all our current services that use cgroup delegation
over to this, i.e. user@.service, systemd-nspawn@.service and
systemd-udevd.service.
Diffstat (limited to 'src')
-rw-r--r-- | src/core/cgroup.c | 6 | ||||
-rw-r--r-- | src/core/cgroup.h | 1 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 28 | ||||
-rw-r--r-- | src/core/execute.c | 43 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.in | 1 | ||||
-rw-r--r-- | src/core/load-fragment.c | 36 | ||||
-rw-r--r-- | src/core/load-fragment.h | 1 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 3 |
8 files changed, 109 insertions, 10 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c index cd48183f7a..4ec5dfa587 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -293,6 +293,8 @@ void cgroup_context_done(CGroupContext *c) { cpu_set_reset(&c->startup_cpuset_cpus); cpu_set_reset(&c->cpuset_mems); cpu_set_reset(&c->startup_cpuset_mems); + + c->delegate_subgroup = mfree(c->delegate_subgroup); } static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) { @@ -570,6 +572,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { prefix, managed_oom_preference_to_string(c->moom_preference), prefix, cgroup_pressure_watch_to_string(c->memory_pressure_watch)); + if (c->delegate_subgroup) + fprintf(f, "%sDelegateSubgroup: %s\n", + prefix, c->delegate_subgroup); + if (c->memory_pressure_threshold_usec != USEC_INFINITY) fprintf(f, "%sMemoryPressureThresholdSec: %s\n", prefix, FORMAT_TIMESPAN(c->memory_pressure_threshold_usec, 1)); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index d445ea1e8d..bbbf9408cc 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -133,6 +133,7 @@ struct CGroupContext { bool delegate; CGroupMask delegate_controllers; CGroupMask disable_controllers; + char *delegate_subgroup; /* For unified hierarchy */ uint64_t cpu_weight; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 3a02fcbdb1..682ad5edd5 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -435,6 +435,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0), SD_BUS_PROPERTY("DelegateControllers", "as", property_get_delegate_controllers, 0, 0), + SD_BUS_PROPERTY("DelegateSubgroup", "s", NULL, offsetof(CGroupContext, delegate_subgroup), 0), SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0), SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0), SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0), @@ -536,6 +537,33 @@ static int bus_cgroup_set_transient_property( return 1; + } else if (streq(name, "DelegateSubgroup")) { + const char *s; + + if (!UNIT_VTABLE(u)->can_delegate) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type"); + + r = sd_bus_message_read(message, "s", &s); + if (r < 0) + return r; + + if (!isempty(s) && cg_needs_escape(s)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid control group name: %s", s); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + if (isempty(s)) + c->delegate_subgroup = mfree(c->delegate_subgroup); + else { + r = free_and_strdup_warn(&c->delegate_subgroup, s); + if (r < 0) + return r; + } + + unit_write_settingf(u, flags, name, "DelegateSubgroup=%s", s); + } + + return 1; + } else if (STR_IN_SET(name, "DelegateControllers", "DisableControllers")) { CGroupMask mask = 0; diff --git a/src/core/execute.c b/src/core/execute.c index 60f7a6439c..da7a37c187 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -4178,8 +4178,12 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p return 0; } -static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) { - bool using_subcgroup; +static int exec_parameters_get_cgroup_path( + const ExecParameters *params, + const CGroupContext *c, + char **ret) { + + const char *subgroup = NULL; char *p; assert(params); @@ -4197,16 +4201,22 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char ** * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP * flag, which is only passed for the former statements, not for the latter. */ - using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL); - if (using_subcgroup) - p = path_join(params->cgroup_path, ".control"); + if (FLAGS_SET(params->flags, EXEC_CGROUP_DELEGATE) && (FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP) || c->delegate_subgroup)) { + if (FLAGS_SET(params->flags, EXEC_IS_CONTROL)) + subgroup = ".control"; + else + subgroup = c->delegate_subgroup; + } + + if (subgroup) + p = path_join(params->cgroup_path, subgroup); else p = strdup(params->cgroup_path); if (!p) return -ENOMEM; *ret = p; - return using_subcgroup; + return !!subgroup; } static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) { @@ -4705,7 +4715,7 @@ static int exec_child( if (params->cgroup_path) { _cleanup_free_ char *p = NULL; - r = exec_parameters_get_cgroup_path(params, &p); + r = exec_parameters_get_cgroup_path(params, cgroup_context, &p); if (r < 0) { *exit_status = EXIT_CGROUP; return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m"); @@ -4880,11 +4890,26 @@ static int exec_child( * touch a single hierarchy too. */ if (params->flags & EXEC_CGROUP_DELEGATE) { + _cleanup_free_ char *p = NULL; + r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid); if (r < 0) { *exit_status = EXIT_CGROUP; return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m"); } + + r = exec_parameters_get_cgroup_path(params, cgroup_context, &p); + if (r < 0) { + *exit_status = EXIT_CGROUP; + return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m"); + } + if (r > 0) { + r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, p, uid, gid); + if (r < 0) { + *exit_status = EXIT_CGROUP; + return log_unit_error_errno(unit, r, "Failed to adjust control subgroup access: %m"); + } + } } if (cgroup_context && cg_unified() > 0 && is_pressure_supported() > 0) { @@ -5635,13 +5660,13 @@ int exec_spawn(Unit *unit, log_command_line(unit, "About to execute", command->path, command->argv); if (params->cgroup_path) { - r = exec_parameters_get_cgroup_path(params, &subcgroup_path); + r = exec_parameters_get_cgroup_path(params, cgroup_context, &subcgroup_path); if (r < 0) return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m"); if (r > 0) { /* We are using a child cgroup */ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path); if (r < 0) - return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path); + return log_unit_error_errno(unit, r, "Failed to create subcgroup '%s': %m", subcgroup_path); /* Normally we would not propagate the xattrs to children but since we created this * sub-cgroup internally we should do it. */ diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 8a2823b075..110bccb7ad 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -237,6 +237,7 @@ {{type}}.TasksAccounting, config_parse_bool, 0, offsetof({{type}}, cgroup_context.tasks_accounting) {{type}}.TasksMax, config_parse_tasks_max, 0, offsetof({{type}}, cgroup_context.tasks_max) {{type}}.Delegate, config_parse_delegate, 0, offsetof({{type}}, cgroup_context) +{{type}}.DelegateSubgroup, config_parse_delegate_subgroup , 0, offsetof({{type}}, cgroup_context) {{type}}.DisableControllers, config_parse_disable_controllers, 0, offsetof({{type}}, cgroup_context) {{type}}.IPAccounting, config_parse_bool, 0, offsetof({{type}}, cgroup_context.ip_accounting) {{type}}.IPAddressAllow, config_parse_in_addr_prefixes, AF_UNSPEC, offsetof({{type}}, cgroup_context.ip_address_allow) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 581a051d46..ebfe98d7cc 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -4029,6 +4029,42 @@ int config_parse_delegate( return 0; } +int config_parse_delegate_subgroup( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + CGroupContext *c = ASSERT_PTR(data); + UnitType t; + + t = unit_name_to_type(unit); + assert(t >= 0); + + if (!unit_vtable[t]->can_delegate) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "DelegateSubgroup= setting not supported for this unit type, ignoring."); + return 0; + } + + if (isempty(rvalue)) { + c->delegate_subgroup = mfree(c->delegate_subgroup); + return 0; + } + + if (cg_needs_escape(rvalue)) { /* Insist that specified names don't need escaping */ + log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid control group name, ignoring: %s", rvalue); + return 0; + } + + return free_and_strdup_warn(&c->delegate_subgroup, rvalue); +} + int config_parse_managed_oom_mode( const char *unit, const char *filename, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index a38d697338..59f02a3207 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -83,6 +83,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_cpu_shares); CONFIG_PARSER_PROTOTYPE(config_parse_memory_limit); CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max); CONFIG_PARSER_PROTOTYPE(config_parse_delegate); +CONFIG_PARSER_PROTOTYPE(config_parse_delegate_subgroup); CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode); CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit); CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_preference); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index ebbd1f7f28..a321179609 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -461,7 +461,8 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons "ManagedOOMSwap", "ManagedOOMMemoryPressure", "ManagedOOMPreference", - "MemoryPressureWatch")) + "MemoryPressureWatch", + "DelegateSubgroup")) return bus_append_string(m, field, eq); if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) { |