diff options
-rw-r--r-- | man/system-or-user-ns.xml | 8 | ||||
-rw-r--r-- | src/core/execute.c | 49 | ||||
-rw-r--r-- | src/test/test-execute.c | 59 | ||||
-rwxr-xr-x | test/units/testsuite-43.sh | 34 |
4 files changed, 98 insertions, 52 deletions
diff --git a/man/system-or-user-ns.xml b/man/system-or-user-ns.xml index 7a302d5980..532c1ef64e 100644 --- a/man/system-or-user-ns.xml +++ b/man/system-or-user-ns.xml @@ -8,9 +8,13 @@ <refsect1> <para id="singular">This option is only available for system services, or for services running in per-user - instances of the service manager when <varname>PrivateUsers=</varname> is enabled.</para> + instances of the service manager in which case <varname>PrivateUsers=</varname> is implicitly enabled + (requires unprivileged user namespaces support to be enabled in the kernel via the + <literal>kernel.unprivileged_userns_clone=</literal> sysctl).</para> <para id="plural">These options are only available for system services, or for services running in per-user - instances of the service manager when <varname>PrivateUsers=</varname> is enabled.</para> + instances of the service manager in which case <varname>PrivateUsers=</varname> is implicitly enabled + (requires unprivileged user namespaces support to be enabled in the kernel via the + <literal>kernel.unprivileged_userns_clone=</literal> sysctl).</para> </refsect1> diff --git a/src/core/execute.c b/src/core/execute.c index 879f1345b0..60f7a6439c 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -4400,6 +4400,44 @@ static void log_command_line(Unit *unit, const char *msg, const char *executable LOG_UNIT_INVOCATION_ID(unit)); } +static bool exec_context_need_unprivileged_private_users(const ExecContext *context, const Manager *manager) { + assert(context); + assert(manager); + + /* These options require PrivateUsers= when used in user units, as we need to be in a user namespace + * to have permission to enable them when not running as root. If we have effective CAP_SYS_ADMIN + * (system manager) then we have privileges and don't need this. */ + if (MANAGER_IS_SYSTEM(manager)) + return false; + + return context->private_users || + context->private_tmp || + context->private_devices || + context->private_network || + context->network_namespace_path || + context->private_ipc || + context->ipc_namespace_path || + context->private_mounts || + context->mount_apivfs || + context->n_bind_mounts > 0 || + context->n_temporary_filesystems > 0 || + context->root_directory || + !strv_isempty(context->extension_directories) || + context->protect_system != PROTECT_SYSTEM_NO || + context->protect_home != PROTECT_HOME_NO || + context->protect_kernel_tunables || + context->protect_kernel_modules || + context->protect_kernel_logs || + context->protect_control_groups || + context->protect_clock || + context->protect_hostname || + !strv_isempty(context->read_write_paths) || + !strv_isempty(context->read_only_paths) || + !strv_isempty(context->inaccessible_paths) || + !strv_isempty(context->exec_paths) || + !strv_isempty(context->no_exec_paths); +} + static int exec_child( Unit *unit, const ExecCommand *command, @@ -5032,17 +5070,22 @@ static int exec_child( } } - if (needs_sandboxing && context->private_users && have_effective_cap(CAP_SYS_ADMIN) <= 0) { + if (needs_sandboxing && exec_context_need_unprivileged_private_users(context, unit->manager)) { /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces. * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */ - userns_set_up = true; r = setup_private_users(saved_uid, saved_gid, uid, gid); - if (r < 0) { + /* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let + * the actual requested operations fail (or silently continue). */ + if (r < 0 && context->private_users) { *exit_status = EXIT_USER; return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m"); } + if (r < 0) + log_unit_info_errno(unit, r, "Failed to set up user namespacing for unprivileged user, ignoring: %m"); + else + userns_set_up = true; } if (exec_needs_network_namespace(context) && runtime && runtime->shared && runtime->shared->netns_storage_socket[0] >= 0) { diff --git a/src/test/test-execute.c b/src/test/test-execute.c index fc6eba9fca..ae6227c492 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -401,9 +401,9 @@ static void test_exec_ignoresigpipe(Manager *m) { static void test_exec_privatetmp(Manager *m) { assert_se(touch("/tmp/test-exec_privatetmp") >= 0); - test(m, "exec-privatetmp-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-privatetmp-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); test(m, "exec-privatetmp-no.service", 0, CLD_EXITED); - test(m, "exec-privatetmp-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-privatetmp-disabled-by-prefix.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); unlink("/tmp/test-exec_privatetmp"); } @@ -420,10 +420,10 @@ static void test_exec_privatedevices(Manager *m) { return; } - test(m, "exec-privatedevices-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-privatedevices-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); test(m, "exec-privatedevices-no.service", 0, CLD_EXITED); - test(m, "exec-privatedevices-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); - test(m, "exec-privatedevices-yes-with-group.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_GROUP, CLD_EXITED); + test(m, "exec-privatedevices-disabled-by-prefix.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); + test(m, "exec-privatedevices-yes-with-group.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); /* We use capsh to test if the capabilities are * properly set, so be sure that it exists */ @@ -433,10 +433,10 @@ static void test_exec_privatedevices(Manager *m) { return; } - test(m, "exec-privatedevices-yes-capability-mknod.service", 0, CLD_EXITED); - test(m, "exec-privatedevices-no-capability-mknod.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED); - test(m, "exec-privatedevices-yes-capability-sys-rawio.service", 0, CLD_EXITED); - test(m, "exec-privatedevices-no-capability-sys-rawio.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-privatedevices-yes-capability-mknod.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); + test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED); + test(m, "exec-privatedevices-yes-capability-sys-rawio.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); + test(m, "exec-privatedevices-no-capability-sys-rawio.service", 0, CLD_EXITED); } static void test_exec_protecthome(Manager *m) { @@ -466,23 +466,23 @@ static void test_exec_protectkernelmodules(Manager *m) { return; } - test(m, "exec-protectkernelmodules-no-capabilities.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED); - test(m, "exec-protectkernelmodules-yes-capabilities.service", 0, CLD_EXITED); - test(m, "exec-protectkernelmodules-yes-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-protectkernelmodules-no-capabilities.service", 0, CLD_EXITED); + test(m, "exec-protectkernelmodules-yes-capabilities.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); + test(m, "exec-protectkernelmodules-yes-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_readonlypaths(Manager *m) { - test(m, "exec-readonlypaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-readonlypaths-simple.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); if (path_is_read_only_fs("/var") > 0) { log_notice("Directory /var is readonly, skipping remaining tests in %s", __func__); return; } - test(m, "exec-readonlypaths.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-readonlypaths.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); test(m, "exec-readonlypaths-with-bindpaths.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED); - test(m, "exec-readonlypaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-readonlypaths-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_readwritepaths(Manager *m) { @@ -492,7 +492,7 @@ static void test_exec_readwritepaths(Manager *m) { return; } - test(m, "exec-readwritepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-readwritepaths-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_inaccessiblepaths(Manager *m) { @@ -502,14 +502,14 @@ static void test_exec_inaccessiblepaths(Manager *m) { return; } - test(m, "exec-inaccessiblepaths-sys.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-inaccessiblepaths-sys.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); if (path_is_read_only_fs("/") > 0) { log_notice("Root directory is readonly, skipping remaining tests in %s", __func__); return; } - test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); } static int on_spawn_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) { @@ -687,14 +687,14 @@ static void test_exec_mount_apivfs(Manager *m) { assert_se(mkdir_p("/tmp/test-exec-mount-apivfs-no/root", 0755) >= 0); - test(m, "exec-mount-apivfs-no.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED); + test(m, "exec-mount-apivfs-no.service", can_unshare || !MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); (void) rm_rf("/tmp/test-exec-mount-apivfs-no/root", REMOVE_ROOT|REMOVE_PHYSICAL); } static void test_exec_noexecpaths(Manager *m) { - test(m, "exec-noexecpaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-noexecpaths-simple.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_temporaryfilesystem(Manager *m) { @@ -964,8 +964,8 @@ static void test_exec_passenvironment(Manager *m) { } static void test_exec_umask(Manager *m) { - test(m, "exec-umask-default.service", 0, CLD_EXITED); - test(m, "exec-umask-0177.service", 0, CLD_EXITED); + test(m, "exec-umask-default.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); + test(m, "exec-umask-0177.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_runtimedirectory(Manager *m) { @@ -1012,7 +1012,7 @@ static void test_exec_capabilityboundingset(Manager *m) { } static void test_exec_basic(Manager *m) { - test(m, "exec-basic.service", 0, CLD_EXITED); + test(m, "exec-basic.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_ambientcapabilities(Manager *m) { @@ -1052,7 +1052,7 @@ static void test_exec_ambientcapabilities(Manager *m) { } static void test_exec_privatenetwork(Manager *m) { - int r, status; + int r; r = find_executable("ip", NULL); if (r < 0) { @@ -1060,9 +1060,8 @@ static void test_exec_privatenetwork(Manager *m) { return; } - status = can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE; - test(m, "exec-privatenetwork-yes-privatemounts-no.service", status, CLD_EXITED); - test(m, "exec-privatenetwork-yes-privatemounts-yes.service", status, CLD_EXITED); + test(m, "exec-privatenetwork-yes-privatemounts-no.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-privatenetwork-yes-privatemounts-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_networknamespacepath(Manager *m) { @@ -1075,7 +1074,7 @@ static void test_exec_networknamespacepath(Manager *m) { } test(m, "exec-networknamespacepath-privatemounts-no.service", MANAGER_IS_SYSTEM(m) ? EXIT_SUCCESS : EXIT_FAILURE, CLD_EXITED); - test(m, "exec-networknamespacepath-privatemounts-yes.service", can_unshare ? EXIT_SUCCESS : EXIT_FAILURE, CLD_EXITED); + test(m, "exec-networknamespacepath-privatemounts-yes.service", can_unshare ? EXIT_SUCCESS : MANAGER_IS_SYSTEM(m) ? EXIT_FAILURE : EXIT_NAMESPACE, CLD_EXITED); } static void test_exec_oomscoreadjust(Manager *m) { @@ -1105,12 +1104,12 @@ static void test_exec_unsetenvironment(Manager *m) { } static void test_exec_specifier(Manager *m) { - test(m, "exec-specifier.service", 0, CLD_EXITED); + test(m, "exec-specifier.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED); if (MANAGER_IS_SYSTEM(m)) test(m, "exec-specifier-system.service", 0, CLD_EXITED); else test(m, "exec-specifier-user.service", 0, CLD_EXITED); - test(m, "exec-specifier@foo-bar.service", 0, CLD_EXITED); + test(m, "exec-specifier@foo-bar.service", can_unshare || MANAGER_IS_SYSTEM(m) ? 0 : EXIT_FAILURE, CLD_EXITED); test(m, "exec-specifier-interpolation.service", 0, CLD_EXITED); } diff --git a/test/units/testsuite-43.sh b/test/units/testsuite-43.sh index fe47de26f0..90cb71e7a3 100755 --- a/test/units/testsuite-43.sh +++ b/test/units/testsuite-43.sh @@ -17,7 +17,7 @@ runas testuser systemd-run --wait --user --unit=test-private-users \ runas testuser systemctl --user log-level debug runas testuser systemd-run --wait --user --unit=test-private-tmp-innerfile \ - -p PrivateUsers=yes -p PrivateTmp=yes \ + -p PrivateTmp=yes \ -P touch /tmp/innerfile.txt # File should not exist outside the job's tmp directory. test ! -e /tmp/innerfile.txt @@ -25,7 +25,7 @@ test ! -e /tmp/innerfile.txt touch /tmp/outerfile.txt # File should not appear in unit's private tmp. runas testuser systemd-run --wait --user --unit=test-private-tmp-outerfile \ - -p PrivateUsers=yes -p PrivateTmp=yes \ + -p PrivateTmp=yes \ -P test ! -e /tmp/outerfile.txt # Confirm that creating a file in home works @@ -35,7 +35,7 @@ test -e /home/testuser/works.txt # Confirm that creating a file in home is blocked under read-only runas testuser systemd-run --wait --user --unit=test-protect-home-read-only \ - -p PrivateUsers=yes -p ProtectHome=read-only \ + -p ProtectHome=read-only \ -P bash -c ' test -e /home/testuser/works.txt || exit 10 touch /home/testuser/blocked.txt && exit 11 @@ -45,13 +45,13 @@ test ! -e /home/testuser/blocked.txt # Check that tmpfs hides the whole directory runas testuser systemd-run --wait --user --unit=test-protect-home-tmpfs \ - -p PrivateUsers=yes -p ProtectHome=tmpfs \ + -p ProtectHome=tmpfs \ -P test ! -e /home/testuser # Confirm that home, /root, and /run/user are inaccessible under "yes" # shellcheck disable=SC2016 runas testuser systemd-run --wait --user --unit=test-protect-home-yes \ - -p PrivateUsers=yes -p ProtectHome=yes \ + -p ProtectHome=yes \ -P bash -c ' test "$(stat -c %a /home)" = "0" test "$(stat -c %a /root)" = "0" @@ -70,11 +70,11 @@ runas testuser systemd-run --wait --user --unit=test-group-fail \ # Check that with a new user namespace we can bind mount # files and use a different root directory runas testuser systemd-run --wait --user --unit=test-bind-mount \ - -p PrivateUsers=yes -p BindPaths=/dev/null:/etc/os-release \ + -p BindPaths=/dev/null:/etc/os-release \ test ! -s /etc/os-release runas testuser systemd-run --wait --user --unit=test-read-write \ - -p PrivateUsers=yes -p ReadOnlyPaths=/ \ + -p ReadOnlyPaths=/ \ -p ReadWritePaths="/var /run /tmp" \ -p NoExecPaths=/ -p ExecPaths=/usr \ test ! -w /etc/os-release @@ -85,50 +85,50 @@ runas testuser systemd-run --wait --user --unit=test-caps \ test -s /etc/os-release runas testuser systemd-run --wait --user --unit=test-devices \ - -p PrivateUsers=yes -p PrivateDevices=yes -p PrivateIPC=yes \ + -p PrivateDevices=yes -p PrivateIPC=yes \ sh -c "ls -1 /dev/ | wc -l | grep -q -F 18" # Same check as test/test-execute/exec-privatenetwork-yes.service runas testuser systemd-run --wait --user --unit=test-network \ - -p PrivateUsers=yes -p PrivateNetwork=yes \ + -p PrivateNetwork=yes \ /bin/sh -x -c '! ip link | grep -E "^[0-9]+: " | grep -Ev ": (lo|(erspan|gre|gretap|ip_vti|ip6_vti|ip6gre|ip6tnl|sit|tunl)0@.*):"' runas testuser systemd-run --wait --user --unit=test-hostname \ - -p PrivateUsers=yes -p ProtectHostname=yes \ + -p ProtectHostname=yes \ hostnamectl hostname foo \ && { echo 'unexpected success'; exit 1; } runas testuser systemd-run --wait --user --unit=test-clock \ - -p PrivateUsers=yes -p ProtectClock=yes \ + -p ProtectClock=yes \ timedatectl set-time "2012-10-30 18:17:16" \ && { echo 'unexpected success'; exit 1; } runas testuser systemd-run --wait --user --unit=test-kernel-tunable \ - -p PrivateUsers=yes -p ProtectKernelTunables=yes \ + -p ProtectKernelTunables=yes \ sh -c "echo 0 >/proc/sys/user/max_user_namespaces" \ && { echo 'unexpected success'; exit 1; } runas testuser systemd-run --wait --user --unit=test-kernel-mod \ - -p PrivateUsers=yes -p ProtectKernelModules=yes \ + -p ProtectKernelModules=yes \ sh -c "modprobe -r overlay && modprobe overlay" \ && { echo 'unexpected success'; exit 1; } if sysctl kernel.dmesg_restrict=0; then runas testuser systemd-run --wait --user --unit=test-kernel-log \ - -p PrivateUsers=yes -p ProtectKernelLogs=yes -p LogNamespace=yes \ + -p ProtectKernelLogs=yes -p LogNamespace=yes \ dmesg \ && { echo 'unexpected success'; exit 1; } fi unsquashfs -no-xattrs -d /tmp/img /usr/share/minimal_0.raw runas testuser systemd-run --wait --user --unit=test-root-dir \ - -p PrivateUsers=yes -p RootDirectory=/tmp/img \ + -p RootDirectory=/tmp/img \ grep MARKER=1 /etc/os-release mkdir /tmp/img_bind mount --bind /tmp/img /tmp/img_bind runas testuser systemd-run --wait --user --unit=test-root-dir-bind \ - -p PrivateUsers=yes -p RootDirectory=/tmp/img_bind -p MountFlags=private \ + -p RootDirectory=/tmp/img_bind -p MountFlags=private \ grep MARKER=1 /etc/os-release umount /tmp/img_bind @@ -137,7 +137,7 @@ mkdir -p /tmp/a /tmp/b /tmp/c if unshare --mount --user --map-root-user mount -t overlay overlay /tmp/c -o lowerdir=/tmp/a:/tmp/b; then unsquashfs -no-xattrs -d /tmp/app2 /usr/share/app1.raw runas testuser systemd-run --wait --user --unit=test-extension-dir \ - -p PrivateUsers=yes -p ExtensionDirectories=/tmp/app2 \ + -p ExtensionDirectories=/tmp/app2 \ -p TemporaryFileSystem=/run -p RootDirectory=/tmp/img \ -p MountAPIVFS=yes \ grep PORTABLE_PREFIXES=app1 /usr/lib/extension-release.d/extension-release.app2 |