summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTopi Miettinen <toiwoton@gmail.com>2021-01-16 13:49:32 +0200
committerTopi Miettinen <topimiettinen@users.noreply.github.com>2021-01-29 12:40:52 +0000
commitddc155b2fd7807cda088c437dc836eebbcf79cea (patch)
tree512024b3042da520bffd77e1b7e0e64e0405df68 /src
parent78dff3f3d72c62357543fe1716da3886cff54a10 (diff)
downloadsystemd-ddc155b2fd7807cda088c437dc836eebbcf79cea.tar.gz
New directives NoExecPaths= ExecPaths=
Implement directives `NoExecPaths=` and `ExecPaths=` to control `MS_NOEXEC` mount flag for the file system tree. This can be used to implement file system W^X policies, and for example with allow-listing mode (NoExecPaths=/) a compromised service would not be able to execute a shell, if that was not explicitly allowed. Example: [Service] NoExecPaths=/ ExecPaths=/usr/bin/daemon /usr/lib64 /usr/lib Closes: #17942.
Diffstat (limited to 'src')
-rw-r--r--src/core/dbus-execute.c8
-rw-r--r--src/core/execute.c48
-rw-r--r--src/core/execute.h2
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/namespace.c94
-rw-r--r--src/core/namespace.h2
-rw-r--r--src/shared/bus-unit-util.c2
-rw-r--r--src/shared/mount-util.c15
-rw-r--r--src/test/test-execute.c6
-rw-r--r--src/test/test-namespace.c2
-rw-r--r--src/test/test-ns.c15
11 files changed, 161 insertions, 35 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 0fbf0b167c..1f0e27a141 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -1094,6 +1094,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ExecPaths", "as", NULL, offsetof(ExecContext, exec_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NoExecPaths", "as", NULL, offsetof(ExecContext, no_exec_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -2981,7 +2983,7 @@ int bus_exec_context_set_transient_property(
return 1;
} else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
- "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) {
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths", "ExecPaths", "NoExecPaths")) {
_cleanup_strv_free_ char **l = NULL;
char ***dirs;
char **p;
@@ -3007,6 +3009,10 @@ int bus_exec_context_set_transient_property(
dirs = &c->read_write_paths;
else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths"))
dirs = &c->read_only_paths;
+ else if (streq(name, "ExecPaths"))
+ dirs = &c->exec_paths;
+ else if (streq(name, "NoExecPaths"))
+ dirs = &c->no_exec_paths;
else /* "InaccessiblePaths" */
dirs = &c->inaccessible_paths;
diff --git a/src/core/execute.c b/src/core/execute.c
index 1a679da435..b7d78f2197 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1999,7 +1999,9 @@ bool exec_needs_mount_namespace(
if (!strv_isempty(context->read_write_paths) ||
!strv_isempty(context->read_only_paths) ||
- !strv_isempty(context->inaccessible_paths))
+ !strv_isempty(context->inaccessible_paths) ||
+ !strv_isempty(context->exec_paths) ||
+ !strv_isempty(context->no_exec_paths))
return true;
if (context->n_bind_mounts > 0)
@@ -3206,6 +3208,8 @@ static int apply_mount_namespace(
&ns_info, context->read_write_paths,
needs_sandboxing ? context->read_only_paths : NULL,
needs_sandboxing ? context->inaccessible_paths : NULL,
+ needs_sandboxing ? context->exec_paths : NULL,
+ needs_sandboxing ? context->no_exec_paths : NULL,
empty_directories,
bind_mounts,
n_bind_mounts,
@@ -4815,6 +4819,8 @@ void exec_context_done(ExecContext *c) {
c->read_only_paths = strv_free(c->read_only_paths);
c->read_write_paths = strv_free(c->read_write_paths);
c->inaccessible_paths = strv_free(c->inaccessible_paths);
+ c->exec_paths = strv_free(c->exec_paths);
+ c->no_exec_paths = strv_free(c->no_exec_paths);
bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
c->bind_mounts = NULL;
@@ -5162,6 +5168,18 @@ static void strv_fprintf(FILE *f, char **l) {
fprintf(f, " %s", *g);
}
+static void strv_dump(FILE* f, const char *prefix, const char *name, char **strv) {
+ assert(f);
+ assert(prefix);
+ assert(name);
+
+ if (!strv_isempty(strv)) {
+ fprintf(f, "%s%s:", name, prefix);
+ strv_fprintf(f, strv);
+ fputs("\n", f);
+ }
+}
+
void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
int r;
@@ -5474,32 +5492,16 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
- if (!strv_isempty(c->supplementary_groups)) {
- fprintf(f, "%sSupplementaryGroups:", prefix);
- strv_fprintf(f, c->supplementary_groups);
- fputs("\n", f);
- }
+ strv_dump(f, prefix, "SupplementaryGroups", c->supplementary_groups);
if (c->pam_name)
fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
- if (!strv_isempty(c->read_write_paths)) {
- fprintf(f, "%sReadWritePaths:", prefix);
- strv_fprintf(f, c->read_write_paths);
- fputs("\n", f);
- }
-
- if (!strv_isempty(c->read_only_paths)) {
- fprintf(f, "%sReadOnlyPaths:", prefix);
- strv_fprintf(f, c->read_only_paths);
- fputs("\n", f);
- }
-
- if (!strv_isempty(c->inaccessible_paths)) {
- fprintf(f, "%sInaccessiblePaths:", prefix);
- strv_fprintf(f, c->inaccessible_paths);
- fputs("\n", f);
- }
+ strv_dump(f, prefix, "ReadWritePaths", c->read_write_paths);
+ strv_dump(f, prefix, "ReadOnlyPaths", c->read_only_paths);
+ strv_dump(f, prefix, "InaccessiblePaths", c->inaccessible_paths);
+ strv_dump(f, prefix, "ExecPaths", c->exec_paths);
+ strv_dump(f, prefix, "NoExecPaths", c->no_exec_paths);
for (size_t i = 0; i < c->n_bind_mounts; i++)
fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
diff --git a/src/core/execute.h b/src/core/execute.h
index f8231ba773..d615af5109 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -243,7 +243,7 @@ struct ExecContext {
char *apparmor_profile;
char *smack_process_label;
- char **read_write_paths, **read_only_paths, **inaccessible_paths;
+ char **read_write_paths, **read_only_paths, **inaccessible_paths, **exec_paths, **no_exec_paths;
unsigned long mount_flags;
BindMount *bind_mounts;
size_t n_bind_mounts;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 6e92fd80fd..6bf22c336a 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -119,6 +119,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv,
$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.ExecPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.exec_paths)
+$1.NoExecPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.no_exec_paths)
$1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
$1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
$1.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof($1, exec_context)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index e8306a8d55..4ed0991b56 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -54,6 +54,8 @@ typedef enum MountMode {
RUN,
READONLY,
READWRITE,
+ NOEXEC,
+ EXEC,
TMPFS,
READWRITE_IMPLICIT, /* Should have the lowest priority. */
_MOUNT_MODE_MAX,
@@ -66,6 +68,8 @@ typedef struct MountEntry {
bool has_prefix:1; /* Already is prefixed by the root dir? */
bool read_only:1; /* Shall this mount point be read-only? */
bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
+ bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */
+ bool exec:1; /* Shall clear MS_NOEXEC on the mount itself */
bool applied:1; /* Already applied */
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
const char *source_const; /* The source path, for bind mounts or images */
@@ -212,6 +216,8 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
[TMPFS] = "tmpfs",
[MOUNT_IMAGES] = "mount-images",
[READWRITE_IMPLICIT] = "rw-implicit",
+ [EXEC] = "exec",
+ [NOEXEC] = "noexec",
};
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(mount_mode, MountMode);
@@ -231,6 +237,18 @@ static bool mount_entry_read_only(const MountEntry *p) {
return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE, PRIVATE_TMP_READONLY);
}
+static bool mount_entry_noexec(const MountEntry *p) {
+ assert(p);
+
+ return p->noexec || IN_SET(p->mode, NOEXEC, INACCESSIBLE, SYSFS, PROCFS);
+}
+
+static bool mount_entry_exec(const MountEntry *p) {
+ assert(p);
+
+ return p->exec || p->mode == EXEC;
+}
+
static const char *mount_entry_source(const MountEntry *p) {
assert(p);
@@ -497,7 +515,10 @@ static void drop_duplicates(MountEntry *m, size_t *n) {
path_equal(mount_entry_path(f), mount_entry_path(previous)) &&
!f->applied && !previous->applied) {
log_debug("%s (%s) is duplicate.", mount_entry_path(f), mount_mode_to_string(f->mode));
- previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */
+ /* Propagate the flags to the remaining entry */
+ previous->read_only = previous->read_only || mount_entry_read_only(f);
+ previous->noexec = previous->noexec || mount_entry_noexec(f);
+ previous->exec = previous->exec || mount_entry_exec(f);
mount_entry_done(f);
continue;
}
@@ -1057,6 +1078,8 @@ static int apply_mount(
case READONLY:
case READWRITE:
case READWRITE_IMPLICIT:
+ case EXEC:
+ case NOEXEC:
r = path_is_mount_point(mount_entry_path(m), root_directory, 0);
if (r == -ENOENT && m->ignore)
return 0;
@@ -1064,7 +1087,7 @@ static int apply_mount(
return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m",
mount_entry_path(m));
if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY
- * bit for the mount point if needed. */
+ * and MS_NOEXEC bits for the mount point if needed. */
return 0;
/* This isn't a mount point yet, let's make it one. */
what = mount_entry_path(m);
@@ -1195,7 +1218,7 @@ static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self
else
r = bind_remount_one_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, proc_self_mountinfo);
- /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked
+ /* Note that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked
* read-only already stays this way. This improves compatibility with container managers, where we
* won't attempt to undo read-only mounts already applied. */
@@ -1207,6 +1230,40 @@ static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self
return 0;
}
+static int make_noexec(const MountEntry *m, char **deny_list, FILE *proc_self_mountinfo) {
+ unsigned long new_flags = 0, flags_mask = 0;
+ bool submounts = false;
+ int r = 0;
+
+ assert(m);
+ assert(proc_self_mountinfo);
+
+ if (mount_entry_noexec(m)) {
+ new_flags |= MS_NOEXEC;
+ flags_mask |= MS_NOEXEC;
+ } else if (mount_entry_exec(m)) {
+ new_flags &= ~MS_NOEXEC;
+ flags_mask |= MS_NOEXEC;
+ }
+
+ if (flags_mask == 0) /* No Change? */
+ return 0;
+
+ submounts = !IN_SET(m->mode, EMPTY_DIR, TMPFS);
+
+ if (submounts)
+ r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
+ else
+ r = bind_remount_one_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, proc_self_mountinfo);
+
+ if (r == -ENOENT && m->ignore)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to re-mount '%s'%s: %m", mount_entry_path(m),
+ submounts ? " and its submounts" : "");
+ return 0;
+}
+
static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) {
assert(ns_info);
@@ -1228,6 +1285,8 @@ static size_t namespace_calculate_mounts(
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
+ char** exec_paths,
+ char** no_exec_paths,
char** empty_directories,
size_t n_bind_mounts,
size_t n_temporary_filesystems,
@@ -1260,6 +1319,8 @@ static size_t namespace_calculate_mounts(
strv_length(read_write_paths) +
strv_length(read_only_paths) +
strv_length(inaccessible_paths) +
+ strv_length(exec_paths) +
+ strv_length(no_exec_paths) +
strv_length(empty_directories) +
n_bind_mounts +
n_mount_images +
@@ -1406,6 +1467,8 @@ int setup_namespace(
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
+ char** exec_paths,
+ char** no_exec_paths,
char** empty_directories,
const BindMount *bind_mounts,
size_t n_bind_mounts,
@@ -1523,6 +1586,8 @@ int setup_namespace(
read_write_paths,
read_only_paths,
inaccessible_paths,
+ exec_paths,
+ no_exec_paths,
empty_directories,
n_bind_mounts,
n_temporary_filesystems,
@@ -1550,6 +1615,14 @@ int setup_namespace(
if (r < 0)
goto finish;
+ r = append_access_mounts(&m, exec_paths, EXEC, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_access_mounts(&m, no_exec_paths, NOEXEC, require_prefix);
+ if (r < 0)
+ goto finish;
+
r = append_empty_dir_mounts(&m, empty_directories);
if (r < 0)
goto finish;
@@ -1869,6 +1942,21 @@ int setup_namespace(
goto finish;
}
}
+
+ /* Third round, flip the noexec bits with a simplified deny list. */
+ for (m = mounts, j = 0; m < mounts + n_mounts; ++m)
+ if (IN_SET(m->mode, EXEC, NOEXEC))
+ deny_list[j++] = (char*) mount_entry_path(m);
+ deny_list[j] = NULL;
+
+ for (m = mounts; m < mounts + n_mounts; ++m) {
+ r = make_noexec(m, deny_list, proc_self_mountinfo);
+ if (r < 0) {
+ if (error_path && mount_entry_path(m))
+ *error_path = strdup(mount_entry_path(m));
+ goto finish;
+ }
+ }
}
/* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 8e07dd37bc..b2ea4bd76b 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -108,6 +108,8 @@ int setup_namespace(
char **read_write_paths,
char **read_only_paths,
char **inaccessible_paths,
+ char **exec_paths,
+ char **no_exec_paths,
char **empty_directories,
const BindMount *bind_mounts,
size_t n_bind_mounts,
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 07f936dc6c..8fd2f89adc 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -904,6 +904,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"ReadWritePaths",
"ReadOnlyPaths",
"InaccessiblePaths",
+ "ExecPaths",
+ "NoExecPaths",
"RuntimeDirectory",
"StateDirectory",
"CacheDirectory",
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
index 4df391949b..183a686706 100644
--- a/src/shared/mount-util.c
+++ b/src/shared/mount-util.c
@@ -210,13 +210,14 @@ int bind_remount_recursive_with_mountinfo(
assert(prefix);
assert(proc_self_mountinfo);
- /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
- * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
- * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
- * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
- * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
- * do not have any effect on future submounts that might get propagated, they might be writable. This includes
- * future submounts that have been triggered via autofs.
+ /* Recursively remount a directory (and all its submounts) with desired flags (MS_READONLY,
+ * MS_NOSUID, MS_NOEXEC). If the directory is already mounted, we reuse the mount and simply mark it
+ * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write operation), ditto for other flags. If it
+ * isn't we first make it one. Afterwards we apply (or remove) the flags to all submounts we can
+ * access, too. When mounts are stacked on the same mount point we only care for each individual
+ * "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We do
+ * not have any effect on future submounts that might get propagated, they might be writable
+ * etc. This includes future submounts that have been triggered via autofs.
*
* If the "deny_list" parameter is specified it may contain a list of subtrees to exclude from the
* remount operation. Note that we'll ignore the deny list for the top-level path. */
diff --git a/src/test/test-execute.c b/src/test/test-execute.c
index 01e2443777..c0e046b5e2 100644
--- a/src/test/test-execute.c
+++ b/src/test/test-execute.c
@@ -408,6 +408,11 @@ static void test_exec_inaccessiblepaths(Manager *m) {
test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
}
+static void test_exec_noexecpaths(Manager *m) {
+
+ test(m, "exec-noexecpaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
static void test_exec_temporaryfilesystem(Manager *m) {
test(m, "exec-temporaryfilesystem-options.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
@@ -865,6 +870,7 @@ int main(int argc, char *argv[]) {
entry(test_exec_ignoresigpipe),
entry(test_exec_inaccessiblepaths),
entry(test_exec_ioschedulingclass),
+ entry(test_exec_noexecpaths),
entry(test_exec_oomscoreadjust),
entry(test_exec_passenvironment),
entry(test_exec_personality),
diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c
index 461dde5fa9..b4db78492e 100644
--- a/src/test/test-namespace.c
+++ b/src/test/test-namespace.c
@@ -157,6 +157,8 @@ static void test_protect_kernel_logs(void) {
NULL,
NULL,
NULL,
+ NULL,
+ NULL,
NULL, 0,
NULL, 0,
NULL, 0,
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 3b5836e980..71ccfb88f4 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -26,6 +26,19 @@ int main(int argc, char *argv[]) {
NULL
};
+ const char * const exec[] = {
+ "/lib",
+ "/usr",
+ "-/lib64",
+ "-/usr/lib64",
+ NULL
+ };
+
+ const char * const no_exec[] = {
+ "/var",
+ NULL
+ };
+
const char *inaccessible[] = {
"/home/lennart/projects",
NULL
@@ -70,6 +83,8 @@ int main(int argc, char *argv[]) {
(char **) writable,
(char **) readonly,
(char **) inaccessible,
+ (char **) exec,
+ (char **) no_exec,
NULL,
&(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,
&(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1,