summaryrefslogtreecommitdiff
path: root/src/shared
diff options
context:
space:
mode:
authorMichal Sekletar <msekleta@redhat.com>2019-03-12 18:58:26 +0100
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-06-24 16:58:54 +0200
commitb070c7c0e133362ab5e20875e7294908004266af (patch)
tree2aa0a0554298819e9c5fff3e7190e6261b43f92d /src/shared
parentc4556774496326b7288013008c0798540f88702c (diff)
downloadsystemd-b070c7c0e133362ab5e20875e7294908004266af.tar.gz
core: introduce NUMAPolicy and NUMAMask options
Make possible to set NUMA allocation policy for manager. Manager's policy is by default inherited to all forked off processes. However, it is possible to override the policy on per-service basis. Currently we support, these policies: default, prefer, bind, interleave, local. See man 2 set_mempolicy for details on each policy. Overall NUMA policy actually consists of two parts. Policy itself and bitmask representing NUMA nodes where is policy effective. Node mask can be specified using related option, NUMAMask. Default mask can be overwritten on per-service level.
Diffstat (limited to 'src/shared')
-rw-r--r--src/shared/bus-unit-util.c28
-rw-r--r--src/shared/cpu-set-util.c93
-rw-r--r--src/shared/cpu-set-util.h28
-rw-r--r--src/shared/exit-status.c3
-rw-r--r--src/shared/exit-status.h1
5 files changed, 153 insertions, 0 deletions
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index fd26b86359..bb30e8f151 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -1049,6 +1049,34 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
return bus_append_byte_array(m, field, array, allocated);
}
+ if (streq(field, "NUMAPolicy")) {
+ r = mpol_from_string(eq);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int32_t) r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "NUMAMask")) {
+ _cleanup_(cpu_set_reset) CPUSet nodes = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ r = parse_cpu_set(eq, &nodes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = cpu_set_to_dbus(&nodes, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize NUMAMask: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
if (STR_IN_SET(field, "RestrictAddressFamilies", "SystemCallFilter")) {
int whitelist = 1;
const char *p = eq;
diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c
index b0036c7f61..f27543dfe2 100644
--- a/src/shared/cpu-set-util.c
+++ b/src/shared/cpu-set-util.c
@@ -7,12 +7,20 @@
#include "alloc-util.h"
#include "cpu-set-util.h"
+#include "dirent-util.h"
+#include "errno-util.h"
#include "extract-word.h"
+#include "fd-util.h"
#include "log.h"
#include "macro.h"
#include "memory-util.h"
+#include "missing_syscall.h"
#include "parse-util.h"
+#include "stat-util.h"
#include "string-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "util.h"
char* cpu_set_to_string(const CPUSet *a) {
_cleanup_free_ char *str = NULL;
@@ -287,3 +295,88 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) {
s = (CPUSet) {};
return 0;
}
+
+bool numa_policy_is_valid(const NUMAPolicy *policy) {
+ assert(policy);
+
+ if (!mpol_is_valid(numa_policy_get_type(policy)))
+ return false;
+
+ if (!policy->nodes.set &&
+ !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED))
+ return false;
+
+ if (policy->nodes.set &&
+ numa_policy_get_type(policy) == MPOL_PREFERRED &&
+ CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1)
+ return false;
+
+ return true;
+}
+
+static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) {
+ unsigned node, bits = 0, ulong_bits;
+ _cleanup_free_ unsigned long *out = NULL;
+
+ assert(policy);
+ assert(ret_maxnode);
+ assert(ret_nodes);
+
+ if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) ||
+ (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) {
+ *ret_nodes = NULL;
+ *ret_maxnode = 0;
+ return 0;
+ }
+
+ bits = policy->nodes.allocated * 8;
+ ulong_bits = sizeof(unsigned long) * 8;
+
+ out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long)));
+ if (!out)
+ return -ENOMEM;
+
+ /* We don't make any assumptions about internal type libc is using to store NUMA node mask.
+ Hence we need to convert the node mask to the representation expected by set_mempolicy() */
+ for (node = 0; node < bits; node++)
+ if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set))
+ out[node / ulong_bits] |= 1ul << (node % ulong_bits);
+
+ *ret_nodes = TAKE_PTR(out);
+ *ret_maxnode = bits + 1;
+ return 0;
+}
+
+int apply_numa_policy(const NUMAPolicy *policy) {
+ int r;
+ _cleanup_free_ unsigned long *nodes = NULL;
+ unsigned long maxnode;
+
+ assert(policy);
+
+ if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
+ return -EOPNOTSUPP;
+
+ if (!numa_policy_is_valid(policy))
+ return -EINVAL;
+
+ r = numa_policy_to_mempolicy(policy, &maxnode, &nodes);
+ if (r < 0)
+ return r;
+
+ r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static const char* const mpol_table[] = {
+ [MPOL_DEFAULT] = "default",
+ [MPOL_PREFERRED] = "preferred",
+ [MPOL_BIND] = "bind",
+ [MPOL_INTERLEAVE] = "interleave",
+ [MPOL_LOCAL] = "local",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mpol, int);
diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h
index fd6a15f446..27812dfd59 100644
--- a/src/shared/cpu-set-util.h
+++ b/src/shared/cpu-set-util.h
@@ -4,6 +4,7 @@
#include <sched.h>
#include "macro.h"
+#include "missing_syscall.h"
/* This wraps the libc interface with a variable to keep the allocated size. */
typedef struct CPUSet {
@@ -48,3 +49,30 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated);
int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set);
int cpus_in_affinity_mask(void);
+
+static inline bool mpol_is_valid(int t) {
+ return t >= MPOL_DEFAULT && t <= MPOL_LOCAL;
+}
+
+typedef struct NUMAPolicy {
+ /* Always use numa_policy_get_type() to read the value */
+ int type;
+ CPUSet nodes;
+} NUMAPolicy;
+
+bool numa_policy_is_valid(const NUMAPolicy *p);
+
+static inline int numa_policy_get_type(const NUMAPolicy *p) {
+ return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type;
+}
+
+static inline void numa_policy_reset(NUMAPolicy *p) {
+ assert(p);
+ cpu_set_reset(&p->nodes);
+ p->type = -1;
+}
+
+int apply_numa_policy(const NUMAPolicy *policy);
+
+const char* mpol_to_string(int i) _const_;
+int mpol_from_string(const char *s) _pure_;
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
index 26b3060d9b..58ebc3ca4d 100644
--- a/src/shared/exit-status.c
+++ b/src/shared/exit-status.c
@@ -157,6 +157,9 @@ const char* exit_status_to_string(int status, ExitStatusLevel level) {
case EXIT_CONFIGURATION_DIRECTORY:
return "CONFIGURATION_DIRECTORY";
+ case EXIT_NUMA_POLICY:
+ return "NUMA_POLICY";
+
case EXIT_EXCEPTION:
return "EXCEPTION";
}
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
index 510eb319cf..5637e6aa04 100644
--- a/src/shared/exit-status.h
+++ b/src/shared/exit-status.h
@@ -69,6 +69,7 @@ enum {
EXIT_CACHE_DIRECTORY,
EXIT_LOGS_DIRECTORY, /* 240 */
EXIT_CONFIGURATION_DIRECTORY,
+ EXIT_NUMA_POLICY,
EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
};