summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2019-04-02 17:30:11 +0200
committerGitHub <noreply@github.com>2019-04-02 17:30:11 +0200
commit82c604607f30f6ab6bf9e55e88b41e5579983520 (patch)
tree7c93d0785d80c3fd6dfb86011a9145482b4264c8
parent42561fc99c8d7230470fe42cbebe24a5d7cbbb70 (diff)
parent6d85ba729949c00e4723b9517f804b9016ede6e9 (diff)
downloadsystemd-82c604607f30f6ab6bf9e55e88b41e5579983520.tar.gz
Merge pull request #12056 from poettering/seccomp-suid-sgid
Introduce RestrictSUIDSGID= for disabling SUID/SGID file creation
-rw-r--r--NEWS15
-rw-r--r--TODO3
-rw-r--r--docs/TRANSIENT-SETTINGS.md1
-rw-r--r--man/systemd.exec.xml49
-rw-r--r--src/analyze/analyze-security.c12
-rw-r--r--src/core/dbus-execute.c4
-rw-r--r--src/core/execute.c22
-rw-r--r--src/core/execute.h1
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/unit.c10
-rw-r--r--src/shared/bus-unit-util.c12
-rw-r--r--src/shared/seccomp-util.c140
-rw-r--r--src/shared/seccomp-util.h1
-rw-r--r--src/test/test-seccomp.c208
-rw-r--r--test/fuzz/fuzz-unit-file/directives.service1
-rw-r--r--units/systemd-coredump@.service.in1
-rw-r--r--units/systemd-hostnamed.service.in1
-rw-r--r--units/systemd-journal-gatewayd.service.in1
-rw-r--r--units/systemd-journal-remote.service.in1
-rw-r--r--units/systemd-journal-upload.service.in1
-rw-r--r--units/systemd-journald.service.in1
-rw-r--r--units/systemd-localed.service.in1
-rw-r--r--units/systemd-logind.service.in1
-rw-r--r--units/systemd-networkd.service.in1
-rw-r--r--units/systemd-resolved.service.in1
-rw-r--r--units/systemd-timedated.service.in1
-rw-r--r--units/systemd-timesyncd.service.in1
-rw-r--r--units/systemd-udevd.service.in3
28 files changed, 466 insertions, 30 deletions
diff --git a/NEWS b/NEWS
index e0829002b9..603aa63550 100644
--- a/NEWS
+++ b/NEWS
@@ -201,6 +201,21 @@ CHANGES WITH 242 in spe:
done anymore, and instead calling `systemctl preset-all` is
recommended after the first installation of systemd.
+ * A new boolean sandboxing option RestrictSUIDSGID= has been added that
+ is built on seccomp. When turned on creation of SUID/SGID files is
+ prohibited.
+
+ * The NoNewPrivileges= and the new RestrictSUIDSGID= options are now
+ implied if DynamicUser= is turned on for a service. This hardens
+ these services, so that they neither can benefit from nor create
+ SUID/SGID executables. This is a minor compatibility breakage, given
+ that when DynamicUser= was first introduced SUID/SGID behaviour was
+ unaffected. However, the security benefit of these two options is
+ substantial, and the setting is still relatively new, hence we opted
+ to make it mandatory for services with dynamic users.
+
+ …
+
CHANGES WITH 241:
* The default locale can now be configured at compile time. Otherwise,
diff --git a/TODO b/TODO
index 010c2ad41d..663e4fedd8 100644
--- a/TODO
+++ b/TODO
@@ -32,9 +32,6 @@ Features:
- Make sure resume= and resume_offset= on the kernel cmdline always take
precedence
-* maybe add a seccomp-based high-level filter that blocks creation of suid/sgid
- files.
-
* make MAINPID= message reception checks even stricter: if service uses User=,
then check sending UID and ignore message if it doesn't match the user or
root.
diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md
index 343df66754..f081fdb2ce 100644
--- a/docs/TRANSIENT-SETTINGS.md
+++ b/docs/TRANSIENT-SETTINGS.md
@@ -155,6 +155,7 @@ All execution-related settings are available for transient units.
✓ MemoryDenyWriteExecute=
✓ RestrictNamespaces=
✓ RestrictRealtime=
+✓ RestrictSUIDSGID=
✓ RestrictAddressFamilies=
✓ LockPersonality=
✓ LimitCPU=
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index f8c46a2995..688147ea32 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -252,7 +252,9 @@
of the service, and hence the lifetime of the dynamic user/group. Since <filename>/tmp</filename> and
<filename>/var/tmp</filename> are usually the only world-writable directories on a system this
ensures that a unit making use of dynamic user/group allocation cannot leave files around after unit
- termination. Moreover <varname>ProtectSystem=strict</varname> and
+ termination. Furthermore <varname>NoNewPrivileges=</varname> and <varname>RestrictSUIDSGID=</varname>
+ are implicitly enabled to ensure that processes invoked cannot take benefit or create SUID/SGID files
+ or directories. Moreover <varname>ProtectSystem=strict</varname> and
<varname>ProtectHome=read-only</varname> are implied, thus prohibiting the service to write to
arbitrary file system locations. In order to allow the service to write to certain directories, they
have to be whitelisted using <varname>ReadWritePaths=</varname>, but care must be taken so that
@@ -379,19 +381,21 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<varlistentry>
<term><varname>NoNewPrivileges=</varname></term>
- <listitem><para>Takes a boolean argument. If true, ensures that the service process and all its children can
- never gain new privileges through <function>execve()</function> (e.g. via setuid or setgid bits, or filesystem
- capabilities). This is the simplest and most effective way to ensure that a process and its children can never
- elevate privileges again. Defaults to false, but certain settings override this and ignore the value of this
- setting. This is the case when <varname>SystemCallFilter=</varname>,
- <varname>SystemCallArchitectures=</varname>, <varname>RestrictAddressFamilies=</varname>,
- <varname>RestrictNamespaces=</varname>, <varname>PrivateDevices=</varname>,
- <varname>ProtectKernelTunables=</varname>, <varname>ProtectKernelModules=</varname>,
- <varname>MemoryDenyWriteExecute=</varname>, <varname>RestrictRealtime=</varname>, or
- <varname>LockPersonality=</varname> are specified. Note that even if this setting is overridden by them,
- <command>systemctl show</command> shows the original value of this setting. Also see
- <ulink url="https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html">No New Privileges
- Flag</ulink>. </para></listitem>
+ <listitem><para>Takes a boolean argument. If true, ensures that the service process and all its
+ children can never gain new privileges through <function>execve()</function> (e.g. via setuid or
+ setgid bits, or filesystem capabilities). This is the simplest and most effective way to ensure that
+ a process and its children can never elevate privileges again. Defaults to false, but certain
+ settings override this and ignore the value of this setting. This is the case when
+ <varname>SystemCallFilter=</varname>, <varname>SystemCallArchitectures=</varname>,
+ <varname>RestrictAddressFamilies=</varname>, <varname>RestrictNamespaces=</varname>,
+ <varname>PrivateDevices=</varname>, <varname>ProtectKernelTunables=</varname>,
+ <varname>ProtectKernelModules=</varname>, <varname>MemoryDenyWriteExecute=</varname>,
+ <varname>RestrictRealtime=</varname>, <varname>RestrictSUIDSGID=</varname>,
+ <varname>DynamicUser=</varname> or <varname>LockPersonality=</varname> are specified. Note that even
+ if this setting is overridden by them, <command>systemctl show</command> shows the original value of
+ this setting. Also see <ulink
+ url="https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html">No New Privileges
+ Flag</ulink>.</para></listitem>
</varlistentry>
<varlistentry>
@@ -1393,6 +1397,23 @@ RestrictNamespaces=~cgroup net</programlisting>
</varlistentry>
<varlistentry>
+ <term><varname>RestrictSUIDSGID=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If set, any attempts to set the set-user-ID (SUID) or
+ set-group-ID (SGID) bits on files or directories will be denied (for details on these bits see
+ <citerefentry
+ project='man-pages'><refentrytitle>inode</refentrytitle><manvolnum>7</manvolnum></citerefentry>). If
+ running in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant>
+ capability (e.g. setting <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is
+ implied. As the SUID/SGID bits are mechanisms to elevate privileges, and allows users to acquire the
+ identity of other users, it is recommended to restrict creation of SUID/SGID files to the few
+ programs that actually require them. Note that this restricts marking of any type of file system
+ object with these bits, including both regular files and directories (where the SGID is a different
+ meaning than for files, see documentation). This option is implied if <varname>DynamicUser=</varname>
+ is enabled. Defaults to off.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>RemoveIPC=</varname></term>
<listitem><para>Takes a boolean parameter. If set, all System V and POSIX IPC objects owned by the user and
diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c
index a978ed6da8..b1b88855af 100644
--- a/src/analyze/analyze-security.c
+++ b/src/analyze/analyze-security.c
@@ -75,6 +75,7 @@ struct security_info {
uint64_t restrict_namespaces;
bool restrict_realtime;
+ bool restrict_suid_sgid;
char *root_directory;
char *root_image;
@@ -1149,6 +1150,16 @@ static const struct security_assessor security_assessor_table[] = {
.offset = offsetof(struct security_info, restrict_realtime),
},
{
+ .id = "RestrictSUIDSGID=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictSUIDSGID=",
+ .description_good = "SUID/SGID file creation by service is restricted",
+ .description_bad = "Service may create SUID/SGID files",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_suid_sgid),
+ },
+ {
.id = "RestrictNamespaces=~CLONE_NEWUSER",
.url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
.description_good = "Service cannot create user namespaces",
@@ -1881,6 +1892,7 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
{ "RestrictAddressFamilies", "(bas)", property_read_restrict_address_families, 0 },
{ "RestrictNamespaces", "t", NULL, offsetof(struct security_info, restrict_namespaces) },
{ "RestrictRealtime", "b", NULL, offsetof(struct security_info, restrict_realtime) },
+ { "RestrictSUIDSGID", "b", NULL, offsetof(struct security_info, restrict_suid_sgid) },
{ "RootDirectory", "s", NULL, offsetof(struct security_info, root_directory) },
{ "RootImage", "s", NULL, offsetof(struct security_info, root_image) },
{ "SupplementaryGroups", "as", NULL, offsetof(struct security_info, supplementary_groups) },
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index d894dbdd7b..653847acad 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -771,6 +771,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("ConfigurationDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_CONFIGURATION].paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictSUIDSGID", "b", bus_property_get_bool, offsetof(ExecContext, restrict_suid_sgid), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1128,6 +1129,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "RestrictRealtime"))
return bus_set_transient_bool(u, name, &c->restrict_realtime, message, flags, error);
+ if (streq(name, "RestrictSUIDSGID"))
+ return bus_set_transient_bool(u, name, &c->restrict_suid_sgid, message, flags, error);
+
if (streq(name, "DynamicUser"))
return bus_set_transient_bool(u, name, &c->dynamic_user, message, flags, error);
diff --git a/src/core/execute.c b/src/core/execute.c
index a74967c4d3..5e1a74d0bc 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1404,6 +1404,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
return context_has_address_families(c) ||
c->memory_deny_write_execute ||
c->restrict_realtime ||
+ c->restrict_suid_sgid ||
exec_context_restrict_namespaces_set(c) ||
c->protect_kernel_tunables ||
c->protect_kernel_modules ||
@@ -1509,6 +1510,19 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
return seccomp_restrict_realtime();
}
+static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->restrict_suid_sgid)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
+ return 0;
+
+ return seccomp_restrict_suid_sgid();
+}
+
static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
assert(u);
assert(c);
@@ -3567,6 +3581,12 @@ static int exec_child(
return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
}
+ r = apply_restrict_suid_sgid(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
+ }
+
r = apply_restrict_namespaces(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
@@ -4218,6 +4238,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
"%sIgnoreSIGPIPE: %s\n"
"%sMemoryDenyWriteExecute: %s\n"
"%sRestrictRealtime: %s\n"
+ "%sRestrictSUIDSGID: %s\n"
"%sKeyringMode: %s\n"
"%sProtectHostname: %s\n",
prefix, c->umask,
@@ -4237,6 +4258,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->ignore_sigpipe),
prefix, yes_no(c->memory_deny_write_execute),
prefix, yes_no(c->restrict_realtime),
+ prefix, yes_no(c->restrict_suid_sgid),
prefix, exec_keyring_mode_to_string(c->keyring_mode),
prefix, yes_no(c->protect_hostname));
diff --git a/src/core/execute.h b/src/core/execute.h
index b612a10e2e..23bf3b546a 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -261,6 +261,7 @@ struct ExecContext {
bool memory_deny_write_execute;
bool restrict_realtime;
+ bool restrict_suid_sgid;
bool lock_personality;
unsigned long personality;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index a2ed7f2abe..73b368e702 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -74,6 +74,7 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0,
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
+$1.RestrictSUIDSGID, config_parse_bool, 0, offsetof($1, exec_context.restrict_suid_sgid)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
@@ -82,6 +83,7 @@ $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CO
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictSUIDSGID, config_parse_warn_compat, DISABLED_CONFIGURATION 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
diff --git a/src/core/unit.c b/src/core/unit.c
index 35c268cd3b..2cde494a7e 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -4088,14 +4088,20 @@ int unit_patch_contexts(Unit *u) {
return -ENOMEM;
}
- /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID
- * around in the file system or on IPC objects. Hence enforce a strict sandbox. */
+ /* If the dynamic user option is on, let's make sure that the unit can't leave its
+ * UID/GID around in the file system or on IPC objects. Hence enforce a strict
+ * sandbox. */
ec->private_tmp = true;
ec->remove_ipc = true;
ec->protect_system = PROTECT_SYSTEM_STRICT;
if (ec->protect_home == PROTECT_HOME_NO)
ec->protect_home = PROTECT_HOME_READ_ONLY;
+
+ /* Make sure this service can neither benefit from SUID/SGID binaries nor create
+ * them. */
+ ec->no_new_privileges = true;
+ ec->restrict_suid_sgid = true;
}
}
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index b27227aeb6..b42a9e5c5b 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -746,12 +746,12 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
return bus_append_string(m, field, eq);
if (STR_IN_SET(field,
- "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
- "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
- "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix",
- "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
- "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups",
- "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality", "ProtectHostname"))
+ "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "PrivateTmp",
+ "PrivateDevices", "PrivateNetwork", "PrivateUsers", "PrivateMounts",
+ "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime",
+ "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectKernelModules",
+ "ProtectControlGroups", "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality",
+ "ProtectHostname", "RestrictSUIDSGID"))
return bus_append_parse_boolean(m, field, eq);
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index ba3f433106..7a179998bd 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -1,12 +1,14 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
+#include <fcntl.h>
#include <linux/seccomp.h>
#include <seccomp.h>
#include <stddef.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/shm.h>
+#include <sys/stat.h>
#include "af-list.h"
#include "alloc-util.h"
@@ -1776,16 +1778,20 @@ int seccomp_protect_hostname(void) {
SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(sethostname),
0);
- if (r < 0)
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
continue;
+ }
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(setdomainname),
0);
- if (r < 0)
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
continue;
+ }
r = seccomp_load(seccomp);
if (IN_SET(r, -EPERM, -EACCES))
@@ -1796,3 +1802,133 @@ int seccomp_protect_hostname(void) {
return 0;
}
+
+int seccomp_restrict_suid_sgid(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ /* Checks the mode_t parameter of the following system calls:
+ *
+ * → chmod() + fchmod() + fchmodat()
+ * → open() + creat() + openat()
+ * → mkdir() + mkdirat()
+ * → mknod() + mknodat()
+ */
+
+ for (unsigned bit = 0; bit < 2; bit ++) {
+ /* Block S_ISUID in the first iteration, S_ISGID in the second */
+ mode_t m = bit == 0 ? S_ISUID : S_ISGID;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(chmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdir),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdirat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(open),
+ 2,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(openat),
+ 2,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A3(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(creat),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add suid/sgid rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 477400237b..31c6c211fd 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -86,6 +86,7 @@ int seccomp_restrict_realtime(void);
int seccomp_memory_deny_write_execute(void);
int seccomp_lock_personality(unsigned long personality);
int seccomp_protect_hostname(void);
+int seccomp_restrict_suid_sgid(void);
extern const uint32_t seccomp_local_archs[];
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
index 14b37eed2c..8efbecbeff 100644
--- a/src/test/test-seccomp.c
+++ b/src/test/test-seccomp.c
@@ -19,10 +19,12 @@
#include "nulstr-util.h"
#include "process-util.h"
#include "raw-clone.h"
+#include "rm-rf.h"
#include "seccomp-util.h"
#include "set.h"
#include "string-util.h"
#include "tests.h"
+#include "tmpfile-util.h"
#include "virt.h"
#if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__)
@@ -759,6 +761,211 @@ static void test_lock_personality(void) {
assert_se(wait_for_terminate_and_check("lockpersonalityseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
}
+static int real_open(const char *path, int flags, mode_t mode) {
+ /* glibc internally calls openat() when open() is requested. Let's hence define our own wrapper for
+ * testing purposes that calls the real syscall. */
+
+ return (int) syscall(SYS_open, path, flags, mode);
+}
+
+static void test_restrict_suid_sgid(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ char path[] = "/tmp/suidsgidXXXXXX", dir[] = "/tmp/suidsgiddirXXXXXX";
+ int fd = -1, k = -1;
+ const char *z;
+
+ fd = mkostemp_safe(path);
+ assert_se(fd >= 0);
+
+ assert_se(mkdtemp(dir));
+ z = strjoina(dir, "/test");
+
+ assert_se(chmod(path, 0755 | S_ISUID) >= 0);
+ assert_se(chmod(path, 0755 | S_ISGID) >= 0);
+ assert_se(chmod(path, 0755 | S_ISGID | S_ISUID) >= 0);
+ assert_se(chmod(path, 0755) >= 0);
+
+ assert_se(fchmod(fd, 0755 | S_ISUID) >= 0);
+ assert_se(fchmod(fd, 0755 | S_ISGID) >= 0);
+ assert_se(fchmod(fd, 0755 | S_ISGID | S_ISUID) >= 0);
+ assert_se(fchmod(fd, 0755) >= 0);
+
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISUID, 0) >= 0);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID, 0) >= 0);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID | S_ISUID, 0) >= 0);
+ assert_se(fchmodat(AT_FDCWD, path, 0755, 0) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644 | S_ISUID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644 | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644 | S_ISUID | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mkdir(z, 0755 | S_ISUID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdir(z, 0755 | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdir(z, 0755 | S_ISUID | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdir(z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdirat(AT_FDCWD, z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknod(z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(seccomp_restrict_suid_sgid() >= 0);
+
+ assert_se(chmod(path, 0775 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(chmod(path, 0775 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(chmod(path, 0775 | S_ISGID | S_ISUID) < 0 && errno == EPERM);
+ assert_se(chmod(path, 0775) >= 0);
+
+ assert_se(fchmod(fd, 0775 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(fchmod(fd, 0775 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(fchmod(fd, 0775 | S_ISGID | S_ISUID) < 0 && errno == EPERM);
+ assert_se(fchmod(fd, 0775) >= 0);
+
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(fchmodat(AT_FDCWD, path, 0755, 0) >= 0);
+
+ assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(creat(z, 0644 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(creat(z, 0644 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(creat(z, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ k = creat(z, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mkdir(z, 0755 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(mkdir(z, 0755 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdir(z, 0755 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdir(z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdirat(AT_FDCWD, z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknod(z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(unlink(path) >= 0);
+ assert_se(rm_rf(dir, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("suidsgidseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG);
@@ -776,6 +983,7 @@ int main(int argc, char *argv[]) {
test_restrict_archs();
test_load_syscall_filter_set_raw();
test_lock_personality();
+ test_restrict_suid_sgid();
return 0;
}
diff --git a/test/fuzz/fuzz-unit-file/directives.service b/test/fuzz/fuzz-unit-file/directives.service
index 2fdfd51d60..86e59184d7 100644
--- a/test/fuzz/fuzz-unit-file/directives.service
+++ b/test/fuzz/fuzz-unit-file/directives.service
@@ -851,6 +851,7 @@ ReserveVT=
RestrictAddressFamilies=
RestrictNamespaces=
RestrictRealtime=
+RestrictSUIDSGID=
RuntimeDirectory=
RuntimeDirectoryMode=
RuntimeDirectoryPreserve=
diff --git a/units/systemd-coredump@.service.in b/units/systemd-coredump@.service.in
index f6166fa11c..afb2ab9d17 100644
--- a/units/systemd-coredump@.service.in
+++ b/units/systemd-coredump@.service.in
@@ -36,6 +36,7 @@ ProtectSystem=strict
RestrictAddressFamilies=AF_UNIX
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
RuntimeMaxSec=5min
StateDirectory=systemd/coredump
SystemCallArchitectures=native
diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in
index 9c925e80d9..b4f606cf78 100644
--- a/units/systemd-hostnamed.service.in
+++ b/units/systemd-hostnamed.service.in
@@ -32,6 +32,7 @@ ReadWritePaths=/etc
RestrictAddressFamilies=AF_UNIX
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service sethostname
diff --git a/units/systemd-journal-gatewayd.service.in b/units/systemd-journal-gatewayd.service.in
index 0f16ae4ccb..50f774512b 100644
--- a/units/systemd-journal-gatewayd.service.in
+++ b/units/systemd-journal-gatewayd.service.in
@@ -17,7 +17,6 @@ DynamicUser=yes
ExecStart=@rootlibexecdir@/systemd-journal-gatewayd
LockPersonality=yes
MemoryDenyWriteExecute=yes
-NoNewPrivileges=yes
PrivateDevices=yes
PrivateNetwork=yes
ProtectControlGroups=yes
diff --git a/units/systemd-journal-remote.service.in b/units/systemd-journal-remote.service.in
index 71727295c3..dd6322e62c 100644
--- a/units/systemd-journal-remote.service.in
+++ b/units/systemd-journal-remote.service.in
@@ -30,6 +30,7 @@ ProtectSystem=strict
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
SystemCallArchitectures=native
User=systemd-journal-remote
WatchdogSec=3min
diff --git a/units/systemd-journal-upload.service.in b/units/systemd-journal-upload.service.in
index 10e4d657d3..e3800473ec 100644
--- a/units/systemd-journal-upload.service.in
+++ b/units/systemd-journal-upload.service.in
@@ -18,7 +18,6 @@ DynamicUser=yes
ExecStart=@rootlibexecdir@/systemd-journal-upload --save-state
LockPersonality=yes
MemoryDenyWriteExecute=yes
-NoNewPrivileges=yes
PrivateDevices=yes
ProtectControlGroups=yes
ProtectHome=yes
diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in
index 4684f095c0..fab405502a 100644
--- a/units/systemd-journald.service.in
+++ b/units/systemd-journald.service.in
@@ -28,6 +28,7 @@ RestartSec=0
RestrictAddressFamilies=AF_UNIX AF_NETLINK
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
Sockets=systemd-journald.socket systemd-journald-dev-log.socket systemd-journald-audit.socket
StandardOutput=null
SystemCallArchitectures=native
diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in
index a64e7e79a8..7bca34409a 100644
--- a/units/systemd-localed.service.in
+++ b/units/systemd-localed.service.in
@@ -33,6 +33,7 @@ ReadWritePaths=/etc
RestrictAddressFamilies=AF_UNIX
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service
diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in
index 9c8938ec4a..3eef95c661 100644
--- a/units/systemd-logind.service.in
+++ b/units/systemd-logind.service.in
@@ -40,6 +40,7 @@ RestartSec=0
RestrictAddressFamilies=AF_UNIX AF_NETLINK
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
RuntimeDirectory=systemd/sessions systemd/seats systemd/users systemd/inhibit systemd/shutdown
RuntimeDirectoryPreserve=yes
SystemCallArchitectures=native
diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in
index 472ef045de..2c74da6f1e 100644
--- a/units/systemd-networkd.service.in
+++ b/units/systemd-networkd.service.in
@@ -34,6 +34,7 @@ RestartSec=0
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 AF_PACKET
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
RuntimeDirectory=systemd/netif
RuntimeDirectoryPreserve=yes
SystemCallArchitectures=native
diff --git a/units/systemd-resolved.service.in b/units/systemd-resolved.service.in
index 3144b70063..eee5d5ea8f 100644
--- a/units/systemd-resolved.service.in
+++ b/units/systemd-resolved.service.in
@@ -38,6 +38,7 @@ RestartSec=0
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
RuntimeDirectory=systemd/resolve
RuntimeDirectoryPreserve=yes
SystemCallArchitectures=native
diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in
index 46ee8c894d..df546f471f 100644
--- a/units/systemd-timedated.service.in
+++ b/units/systemd-timedated.service.in
@@ -31,6 +31,7 @@ ReadWritePaths=/etc
RestrictAddressFamilies=AF_UNIX
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service @clock
diff --git a/units/systemd-timesyncd.service.in b/units/systemd-timesyncd.service.in
index 5313a90c30..6512531e1c 100644
--- a/units/systemd-timesyncd.service.in
+++ b/units/systemd-timesyncd.service.in
@@ -38,6 +38,7 @@ RestartSec=0
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
RestrictNamespaces=yes
RestrictRealtime=yes
+RestrictSUIDSGID=yes
RuntimeDirectory=systemd/timesync
StateDirectory=systemd/timesync
SystemCallArchitectures=native
diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in
index fb98ca4d43..e8a76cc018 100644
--- a/units/systemd-udevd.service.in
+++ b/units/systemd-udevd.service.in
@@ -28,8 +28,9 @@ TasksMax=infinity
PrivateMounts=yes
ProtectHostname=yes
MemoryDenyWriteExecute=yes
-RestrictRealtime=yes
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
+RestrictRealtime=yes
+RestrictSUIDSGID=yes
SystemCallFilter=@system-service @module @raw-io
SystemCallErrorNumber=EPERM
SystemCallArchitectures=native