summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--man/system-or-user-ns.xml8
-rw-r--r--src/core/execute.c49
-rw-r--r--src/test/test-execute.c59
-rwxr-xr-xtest/units/testsuite-43.sh34
4 files changed, 98 insertions, 52 deletions
diff --git a/man/system-or-user-ns.xml b/man/system-or-user-ns.xml
index 7a302d5980..532c1ef64e 100644
--- a/man/system-or-user-ns.xml
+++ b/man/system-or-user-ns.xml
@@ -8,9 +8,13 @@
<refsect1>
<para id="singular">This option is only available for system services, or for services running in per-user
- instances of the service manager when <varname>PrivateUsers=</varname> is enabled.</para>
+ instances of the service manager in which case <varname>PrivateUsers=</varname> is implicitly enabled
+ (requires unprivileged user namespaces support to be enabled in the kernel via the
+ <literal>kernel.unprivileged_userns_clone=</literal> sysctl).</para>
<para id="plural">These options are only available for system services, or for services running in per-user
- instances of the service manager when <varname>PrivateUsers=</varname> is enabled.</para>
+ instances of the service manager in which case <varname>PrivateUsers=</varname> is implicitly enabled
+ (requires unprivileged user namespaces support to be enabled in the kernel via the
+ <literal>kernel.unprivileged_userns_clone=</literal> sysctl).</para>
</refsect1>
diff --git a/src/core/execute.c b/src/core/execute.c
index 879f1345b0..60f7a6439c 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -4400,6 +4400,44 @@ static void log_command_line(Unit *unit, const char *msg, const char *executable
LOG_UNIT_INVOCATION_ID(unit));
}
+static bool exec_context_need_unprivileged_private_users(const ExecContext *context, const Manager *manager) {
+ assert(context);
+ assert(manager);
+
+ /* These options require PrivateUsers= when used in user units, as we need to be in a user namespace
+ * to have permission to enable them when not running as root. If we have effective CAP_SYS_ADMIN
+ * (system manager) then we have privileges and don't need this. */
+ if (MANAGER_IS_SYSTEM(manager))
+ return false;
+
+ return context->private_users ||
+ context->private_tmp ||
+ context->private_devices ||
+ context->private_network ||
+ context->network_namespace_path ||
+ context->private_ipc ||
+ context->ipc_namespace_path ||
+ context->private_mounts ||
+ context->mount_apivfs ||
+ context->n_bind_mounts > 0 ||
+ context->n_temporary_filesystems > 0 ||
+ context->root_directory ||
+ !strv_isempty(context->extension_directories) ||
+ context->protect_system != PROTECT_SYSTEM_NO ||
+ context->protect_home != PROTECT_HOME_NO ||
+ context->protect_kernel_tunables ||
+ context->protect_kernel_modules ||
+ context->protect_kernel_logs ||
+ context->protect_control_groups ||
+ context->protect_clock ||
+ context->protect_hostname ||
+ !strv_isempty(context->read_write_paths) ||
+ !strv_isempty(context->read_only_paths) ||
+ !strv_isempty(context->inaccessible_paths) ||
+ !strv_isempty(context->exec_paths) ||
+ !strv_isempty(context->no_exec_paths);
+}
+
static int exec_child(
Unit *unit,
const ExecCommand *command,
@@ -5032,17 +5070,22 @@ static int exec_child(
}
}
- if (needs_sandboxing && context->private_users && have_effective_cap(CAP_SYS_ADMIN) <= 0) {
+ if (needs_sandboxing && exec_context_need_unprivileged_private_users(context, unit->manager)) {
/* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
* Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
* set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
- userns_set_up = true;
r = setup_private_users(saved_uid, saved_gid, uid, gid);
- if (r < 0) {
+ /* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let
+ * the actual requested operations fail (or silently continue). */
+ if (r < 0 && context->private_users) {
*exit_status = EXIT_USER;
return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
}
+ if (r < 0)
+ log_unit_info_errno(unit, r, "Failed to set up user namespacing for unprivileged user, ignoring: %m");
+ else
+ userns_set_up = true;
}
if (exec_needs_network_namespace(context) && runtime && runtime->shared && runtime->shared->netns_storage_socket[0] >= 0) {
diff --git a/src/test/test-execute.c b/src/test/test-execute.c
index fc6eba9fca..ae6227c492 100644
--- a/src/test/test-execute.c
+++ b/src/test/test-execute.c
@@ -401,9 +401,9 @@ static void test_exec_ignoresigpipe(Manager *m) {
static void test_exec_privatetmp(Manager *m) {
assert_se(touch("/tmp/test-exec_privatetmp") >= 0);
- test(m, "exec-privatetmp-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatetmp-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
test(m, "exec-privatetmp-no.service", 0, CLD_EXITED);
- test(m, "exec-privatetmp-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatetmp-disabled-by-prefix.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
unlink("/tmp/test-exec_privatetmp");
}
@@ -420,10 +420,10 @@ static void test_exec_privatedevices(Manager *m) {
return;
}
- test(m, "exec-privatedevices-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatedevices-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
test(m, "exec-privatedevices-no.service", 0, CLD_EXITED);
- test(m, "exec-privatedevices-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
- test(m, "exec-privatedevices-yes-with-group.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_GROUP, CLD_EXITED);
+ test(m, "exec-privatedevices-disabled-by-prefix.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-privatedevices-yes-with-group.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
/* We use capsh to test if the capabilities are
* properly set, so be sure that it exists */
@@ -433,10 +433,10 @@ static void test_exec_privatedevices(Manager *m) {
return;
}
- test(m, "exec-privatedevices-yes-capability-mknod.service", 0, CLD_EXITED);
- test(m, "exec-privatedevices-no-capability-mknod.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED);
- test(m, "exec-privatedevices-yes-capability-sys-rawio.service", 0, CLD_EXITED);
- test(m, "exec-privatedevices-no-capability-sys-rawio.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatedevices-yes-capability-mknod.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-yes-capability-sys-rawio.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-privatedevices-no-capability-sys-rawio.service", 0, CLD_EXITED);
}
static void test_exec_protecthome(Manager *m) {
@@ -466,23 +466,23 @@ static void test_exec_protectkernelmodules(Manager *m) {
return;
}
- test(m, "exec-protectkernelmodules-no-capabilities.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED);
- test(m, "exec-protectkernelmodules-yes-capabilities.service", 0, CLD_EXITED);
- test(m, "exec-protectkernelmodules-yes-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-no-capabilities.service", 0, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-yes-capabilities.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-yes-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_readonlypaths(Manager *m) {
- test(m, "exec-readonlypaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-readonlypaths-simple.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
if (path_is_read_only_fs("/var") > 0) {
log_notice("Directory /var is readonly, skipping remaining tests in %s", __func__);
return;
}
- test(m, "exec-readonlypaths.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-readonlypaths.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
test(m, "exec-readonlypaths-with-bindpaths.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
- test(m, "exec-readonlypaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-readonlypaths-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_readwritepaths(Manager *m) {
@@ -492,7 +492,7 @@ static void test_exec_readwritepaths(Manager *m) {
return;
}
- test(m, "exec-readwritepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-readwritepaths-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_inaccessiblepaths(Manager *m) {
@@ -502,14 +502,14 @@ static void test_exec_inaccessiblepaths(Manager *m) {
return;
}
- test(m, "exec-inaccessiblepaths-sys.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-inaccessiblepaths-sys.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
if (path_is_read_only_fs("/") > 0) {
log_notice("Root directory is readonly, skipping remaining tests in %s", __func__);
return;
}
- test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
}
static int on_spawn_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
@@ -687,14 +687,14 @@ static void test_exec_mount_apivfs(Manager *m) {
assert_se(mkdir_p("/tmp/test-exec-mount-apivfs-no/root", 0755) >= 0);
- test(m, "exec-mount-apivfs-no.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-mount-apivfs-no.service", can_unshare || !MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
(void) rm_rf("/tmp/test-exec-mount-apivfs-no/root", REMOVE_ROOT|REMOVE_PHYSICAL);
}
static void test_exec_noexecpaths(Manager *m) {
- test(m, "exec-noexecpaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-noexecpaths-simple.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_temporaryfilesystem(Manager *m) {
@@ -964,8 +964,8 @@ static void test_exec_passenvironment(Manager *m) {
}
static void test_exec_umask(Manager *m) {
- test(m, "exec-umask-default.service", 0, CLD_EXITED);
- test(m, "exec-umask-0177.service", 0, CLD_EXITED);
+ test(m, "exec-umask-default.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-umask-0177.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_runtimedirectory(Manager *m) {
@@ -1012,7 +1012,7 @@ static void test_exec_capabilityboundingset(Manager *m) {
}
static void test_exec_basic(Manager *m) {
- test(m, "exec-basic.service", 0, CLD_EXITED);
+ test(m, "exec-basic.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_ambientcapabilities(Manager *m) {
@@ -1052,7 +1052,7 @@ static void test_exec_ambientcapabilities(Manager *m) {
}
static void test_exec_privatenetwork(Manager *m) {
- int r, status;
+ int r;
r = find_executable("ip", NULL);
if (r < 0) {
@@ -1060,9 +1060,8 @@ static void test_exec_privatenetwork(Manager *m) {
return;
}
- status = can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE;
- test(m, "exec-privatenetwork-yes-privatemounts-no.service", status, CLD_EXITED);
- test(m, "exec-privatenetwork-yes-privatemounts-yes.service", status, CLD_EXITED);
+ test(m, "exec-privatenetwork-yes-privatemounts-no.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatenetwork-yes-privatemounts-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_networknamespacepath(Manager *m) {
@@ -1075,7 +1074,7 @@ static void test_exec_networknamespacepath(Manager *m) {
}
test(m, "exec-networknamespacepath-privatemounts-no.service", MANAGER_IS_SYSTEM(m) ? EXIT_SUCCESS : EXIT_FAILURE, CLD_EXITED);
- test(m, "exec-networknamespacepath-privatemounts-yes.service", can_unshare ? EXIT_SUCCESS : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-networknamespacepath-privatemounts-yes.service", can_unshare ? EXIT_SUCCESS : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED);
}
static void test_exec_oomscoreadjust(Manager *m) {
@@ -1105,12 +1104,12 @@ static void test_exec_unsetenvironment(Manager *m) {
}
static void test_exec_specifier(Manager *m) {
- test(m, "exec-specifier.service", 0, CLD_EXITED);
+ test(m, "exec-specifier.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED);
if (MANAGER_IS_SYSTEM(m))
test(m, "exec-specifier-system.service", 0, CLD_EXITED);
else
test(m, "exec-specifier-user.service", 0, CLD_EXITED);
- test(m, "exec-specifier@foo-bar.service", 0, CLD_EXITED);
+ test(m, "exec-specifier@foo-bar.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED);
test(m, "exec-specifier-interpolation.service", 0, CLD_EXITED);
}
diff --git a/test/units/testsuite-43.sh b/test/units/testsuite-43.sh
index fe47de26f0..90cb71e7a3 100755
--- a/test/units/testsuite-43.sh
+++ b/test/units/testsuite-43.sh
@@ -17,7 +17,7 @@ runas testuser systemd-run --wait --user --unit=test-private-users \
runas testuser systemctl --user log-level debug
runas testuser systemd-run --wait --user --unit=test-private-tmp-innerfile \
- -p PrivateUsers=yes -p PrivateTmp=yes \
+ -p PrivateTmp=yes \
-P touch /tmp/innerfile.txt
# File should not exist outside the job's tmp directory.
test ! -e /tmp/innerfile.txt
@@ -25,7 +25,7 @@ test ! -e /tmp/innerfile.txt
touch /tmp/outerfile.txt
# File should not appear in unit's private tmp.
runas testuser systemd-run --wait --user --unit=test-private-tmp-outerfile \
- -p PrivateUsers=yes -p PrivateTmp=yes \
+ -p PrivateTmp=yes \
-P test ! -e /tmp/outerfile.txt
# Confirm that creating a file in home works
@@ -35,7 +35,7 @@ test -e /home/testuser/works.txt
# Confirm that creating a file in home is blocked under read-only
runas testuser systemd-run --wait --user --unit=test-protect-home-read-only \
- -p PrivateUsers=yes -p ProtectHome=read-only \
+ -p ProtectHome=read-only \
-P bash -c '
test -e /home/testuser/works.txt || exit 10
touch /home/testuser/blocked.txt && exit 11
@@ -45,13 +45,13 @@ test ! -e /home/testuser/blocked.txt
# Check that tmpfs hides the whole directory
runas testuser systemd-run --wait --user --unit=test-protect-home-tmpfs \
- -p PrivateUsers=yes -p ProtectHome=tmpfs \
+ -p ProtectHome=tmpfs \
-P test ! -e /home/testuser
# Confirm that home, /root, and /run/user are inaccessible under "yes"
# shellcheck disable=SC2016
runas testuser systemd-run --wait --user --unit=test-protect-home-yes \
- -p PrivateUsers=yes -p ProtectHome=yes \
+ -p ProtectHome=yes \
-P bash -c '
test "$(stat -c %a /home)" = "0"
test "$(stat -c %a /root)" = "0"
@@ -70,11 +70,11 @@ runas testuser systemd-run --wait --user --unit=test-group-fail \
# Check that with a new user namespace we can bind mount
# files and use a different root directory
runas testuser systemd-run --wait --user --unit=test-bind-mount \
- -p PrivateUsers=yes -p BindPaths=/dev/null:/etc/os-release \
+ -p BindPaths=/dev/null:/etc/os-release \
test ! -s /etc/os-release
runas testuser systemd-run --wait --user --unit=test-read-write \
- -p PrivateUsers=yes -p ReadOnlyPaths=/ \
+ -p ReadOnlyPaths=/ \
-p ReadWritePaths="/var /run /tmp" \
-p NoExecPaths=/ -p ExecPaths=/usr \
test ! -w /etc/os-release
@@ -85,50 +85,50 @@ runas testuser systemd-run --wait --user --unit=test-caps \
test -s /etc/os-release
runas testuser systemd-run --wait --user --unit=test-devices \
- -p PrivateUsers=yes -p PrivateDevices=yes -p PrivateIPC=yes \
+ -p PrivateDevices=yes -p PrivateIPC=yes \
sh -c "ls -1 /dev/ | wc -l | grep -q -F 18"
# Same check as test/test-execute/exec-privatenetwork-yes.service
runas testuser systemd-run --wait --user --unit=test-network \
- -p PrivateUsers=yes -p PrivateNetwork=yes \
+ -p PrivateNetwork=yes \
/bin/sh -x -c '! ip link | grep -E "^[0-9]+: " | grep -Ev ": (lo|(erspan|gre|gretap|ip_vti|ip6_vti|ip6gre|ip6tnl|sit|tunl)0@.*):"'
runas testuser systemd-run --wait --user --unit=test-hostname \
- -p PrivateUsers=yes -p ProtectHostname=yes \
+ -p ProtectHostname=yes \
hostnamectl hostname foo \
&& { echo 'unexpected success'; exit 1; }
runas testuser systemd-run --wait --user --unit=test-clock \
- -p PrivateUsers=yes -p ProtectClock=yes \
+ -p ProtectClock=yes \
timedatectl set-time "2012-10-30 18:17:16" \
&& { echo 'unexpected success'; exit 1; }
runas testuser systemd-run --wait --user --unit=test-kernel-tunable \
- -p PrivateUsers=yes -p ProtectKernelTunables=yes \
+ -p ProtectKernelTunables=yes \
sh -c "echo 0 >/proc/sys/user/max_user_namespaces" \
&& { echo 'unexpected success'; exit 1; }
runas testuser systemd-run --wait --user --unit=test-kernel-mod \
- -p PrivateUsers=yes -p ProtectKernelModules=yes \
+ -p ProtectKernelModules=yes \
sh -c "modprobe -r overlay && modprobe overlay" \
&& { echo 'unexpected success'; exit 1; }
if sysctl kernel.dmesg_restrict=0; then
runas testuser systemd-run --wait --user --unit=test-kernel-log \
- -p PrivateUsers=yes -p ProtectKernelLogs=yes -p LogNamespace=yes \
+ -p ProtectKernelLogs=yes -p LogNamespace=yes \
dmesg \
&& { echo 'unexpected success'; exit 1; }
fi
unsquashfs -no-xattrs -d /tmp/img /usr/share/minimal_0.raw
runas testuser systemd-run --wait --user --unit=test-root-dir \
- -p PrivateUsers=yes -p RootDirectory=/tmp/img \
+ -p RootDirectory=/tmp/img \
grep MARKER=1 /etc/os-release
mkdir /tmp/img_bind
mount --bind /tmp/img /tmp/img_bind
runas testuser systemd-run --wait --user --unit=test-root-dir-bind \
- -p PrivateUsers=yes -p RootDirectory=/tmp/img_bind -p MountFlags=private \
+ -p RootDirectory=/tmp/img_bind -p MountFlags=private \
grep MARKER=1 /etc/os-release
umount /tmp/img_bind
@@ -137,7 +137,7 @@ mkdir -p /tmp/a /tmp/b /tmp/c
if unshare --mount --user --map-root-user mount -t overlay overlay /tmp/c -o lowerdir=/tmp/a:/tmp/b; then
unsquashfs -no-xattrs -d /tmp/app2 /usr/share/app1.raw
runas testuser systemd-run --wait --user --unit=test-extension-dir \
- -p PrivateUsers=yes -p ExtensionDirectories=/tmp/app2 \
+ -p ExtensionDirectories=/tmp/app2 \
-p TemporaryFileSystem=/run -p RootDirectory=/tmp/img \
-p MountAPIVFS=yes \
grep PORTABLE_PREFIXES=app1 /usr/lib/extension-release.d/extension-release.app2