summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2018-06-01 11:10:49 +0200
committerLennart Poettering <lennart@poettering.net>2018-06-12 16:12:10 +0200
commit228af36fff15430825dddb64d6dc6eeb47491aae (patch)
tree1cb41c3dd24c594c9f56df28ae107d37788343a6
parentf4ea7552c109942b49cc1a3c37e959716fb8c453 (diff)
downloadsystemd-228af36fff15430825dddb64d6dc6eeb47491aae.tar.gz
core: add new PrivateMounts= unit setting
This new setting is supposed to be useful in most cases where "MountFlags=slave" is currently used, i.e. as an explicit way to run a service in its own mount namespace and decouple propagation from all mounts of the new mount namespace towards the host. The effect of MountFlags=slave and PrivateMounts=yes is mostly the same, as both cause a CLONE_NEWNS namespace to be opened, and both will result in all mounts within it to be mounted MS_SLAVE. The difference is mostly on the conceptual/philosophical level: configuring the propagation mode is nothing people should have to think about, in particular as the matter is not precisely easyto grok. Moreover, MountFlags= allows configuration of "private" and "slave" modes which don't really make much sense to use in real-life and are quite confusing. In particular PrivateMounts=private means mounts made on the host stay pinned for good by the service which is particularly nasty for removable media mount. And PrivateMounts=shared is in most ways a NOP when used a alone... The main technical difference between setting only MountFlags=slave or only PrivateMounts=yes in a unit file is that the former remounts all mounts to MS_SLAVE and leaves them there, while that latter remounts them to MS_SHARED again right after. The latter is generally a nicer approach, since it disables propagation, while MS_SHARED is afterwards in effect, which is really nice as that means further namespacing down the tree will get MS_SHARED logic by default and we unify how applications see our mounts as we always pass them as MS_SHARED regardless whether any mount namespacing is used or not. The effect of PrivateMounts=yes was implied already by all the other mount namespacing options. With this new option we add an explicit knob for it, to request it without any other option used as well. See: #4393
-rw-r--r--src/core/dbus-execute.c4
-rw-r--r--src/core/execute.c24
-rw-r--r--src/core/execute.h1
-rw-r--r--src/core/load-fragment-gperf.gperf.m41
-rw-r--r--src/core/namespace.c5
-rw-r--r--src/core/namespace.h1
-rw-r--r--src/shared/bus-unit-util.c2
7 files changed, 24 insertions, 14 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 8c752ceaa6..747b9d8eeb 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -744,6 +744,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_bool, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1110,6 +1111,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
+ if (streq(name, "PrivateMounts"))
+ return bus_set_transient_bool(u, name, &c->private_mounts, message, flags, error);
+
if (streq(name, "PrivateNetwork"))
return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
diff --git a/src/core/execute.c b/src/core/execute.c
index 2c64e08176..6aa4ec9c78 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1780,6 +1780,7 @@ static bool exec_needs_mount_namespace(
return true;
if (context->private_devices ||
+ context->private_mounts ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables ||
@@ -2312,7 +2313,7 @@ static int apply_mount_namespace(
_cleanup_strv_free_ char **empty_directories = NULL;
char *tmp = NULL, *var = NULL;
const char *root_dir = NULL, *root_image = NULL;
- NamespaceInfo ns_info = {};
+ NamespaceInfo ns_info;
bool needs_sandboxing;
BindMount *bind_mounts = NULL;
size_t n_bind_mounts = 0;
@@ -2342,16 +2343,7 @@ static int apply_mount_namespace(
if (r < 0)
return r;
- /*
- * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
- * sandbox info, otherwise enforce it, don't ignore protected paths and
- * fail if we are enable to apply the sandbox inside the mount namespace.
- */
- if (!context->dynamic_user && root_dir)
- ns_info.ignore_protect_paths = true;
-
needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
-
if (needs_sandboxing)
ns_info = (NamespaceInfo) {
.ignore_protect_paths = false,
@@ -2360,7 +2352,19 @@ static int apply_mount_namespace(
.protect_kernel_tunables = context->protect_kernel_tunables,
.protect_kernel_modules = context->protect_kernel_modules,
.mount_apivfs = context->mount_apivfs,
+ .private_mounts = context->private_mounts,
};
+ else if (!context->dynamic_user && root_dir)
+ /*
+ * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
+ * sandbox info, otherwise enforce it, don't ignore protected paths and
+ * fail if we are enable to apply the sandbox inside the mount namespace.
+ */
+ ns_info = (NamespaceInfo) {
+ .ignore_protect_paths = true,
+ };
+ else
+ ns_info = (NamespaceInfo) {};
r = setup_namespace(root_dir, root_image,
&ns_info, context->read_write_paths,
diff --git a/src/core/execute.h b/src/core/execute.h
index ace94338e7..f434d10e7e 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -228,6 +228,7 @@ struct ExecContext {
bool private_network;
bool private_devices;
bool private_users;
+ bool private_mounts;
ProtectSystem protect_system;
ProtectHome protect_home;
bool protect_kernel_tunables;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 44c9978c54..15fb47838c 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -114,6 +114,7 @@ $1.ProtectKernelModules, config_parse_bool, 0,
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
+$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)
$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context.protect_system)
$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context.protect_home)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context.mount_flags)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 24da3b8a64..2523c2a47f 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -1133,9 +1133,9 @@ int setup_namespace(
_cleanup_free_ void *root_hash = NULL;
MountEntry *m, *mounts = NULL;
size_t root_hash_size = 0;
- bool make_slave = false;
const char *root;
size_t n_mounts;
+ bool make_slave;
bool require_prefix = false;
int r = 0;
@@ -1200,8 +1200,7 @@ int setup_namespace(
protect_home, protect_system);
/* Set mount slave mode */
- if (root || n_mounts > 0)
- make_slave = true;
+ make_slave = root || n_mounts > 0 || ns_info->private_mounts;
if (n_mounts > 0) {
m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 705eb4e13a..e0e8e09e0f 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -50,6 +50,7 @@ typedef enum ProtectSystem {
struct NamespaceInfo {
bool ignore_protect_paths:1;
bool private_dev:1;
+ bool private_mounts:1;
bool protect_control_groups:1;
bool protect_kernel_tunables:1;
bool protect_kernel_modules:1;
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 64b7ac8d69..01d820349a 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -699,7 +699,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
if (STR_IN_SET(field,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
- "NoNewPrivileges", "SyslogLevelPrefix",
+ "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix",
"MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
"ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups",
"MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality"))