summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Larsson <alexl@redhat.com>2021-09-20 10:59:16 +0200
committerAlexander Larsson <alexl@redhat.com>2021-09-20 11:41:59 +0200
commit53475bc69f4acb1f1cd5a393190f8267fb0663c4 (patch)
tree6024295a4e5fa179915c16c289bde9a575a511e5
parent47fa284aba23bc28adb3c40ac8f2072dbb581293 (diff)
downloadbubblewrap-disable-userns.tar.gz
Add --disable-userns switchdisable-userns
Some usecases of bubblewrap want to ensure that the subprocess can't further re-arrange the filesystem namespace, or do other more complex namespace modification. This can be limited by --disable-userns, which makes the kernel unable to create any new user namespaces for the process hierarchy. This is done by making a cover of the original root, but running the process with the origin root as root anyway. This "non-standard" root means the kernel will not allow creating new user namespaces. This is more typically done using chroot("/theroot") which would also mean the root of the namespace ("/") differes from the process current root ("/theroot)". However, we want to avoid this as in this case symlinks in /proc/$pid/fd would have a "/theroot" prefix when seen outside the namespace, which is something that e.g. flatpak doesn't want. Note, there is a slight cost to this as the covering bind mount duplicates all the regular mounts in namespace. However, they all refer to the same mounts so no actual files are duplicated.
-rw-r--r--bubblewrap.c44
1 files changed, 42 insertions, 2 deletions
diff --git a/bubblewrap.c b/bubblewrap.c
index 2e13fd0..b9d6fb9 100644
--- a/bubblewrap.c
+++ b/bubblewrap.c
@@ -66,6 +66,7 @@ static const char *opt_file_label = NULL;
static bool opt_as_pid_1;
const char *opt_chdir_path = NULL;
+bool opt_disable_userns = FALSE;
bool opt_unshare_user = FALSE;
bool opt_unshare_user_try = FALSE;
bool opt_unshare_pid = FALSE;
@@ -240,6 +241,7 @@ usage (int ecode, FILE *out)
" --unshare-cgroup-try Create new cgroup namespace if possible else continue by skipping it\n"
" --userns FD Use this user namespace (cannot combine with --unshare-user)\n"
" --userns2 FD After setup switch to this user namespace, only useful with --userns\n"
+ " --disable-userns Disable further use of user namespaces inside sandbox\n"
" --pidns FD Use this user namespace (as parent namespace if using --unshare-pid)\n"
" --uid UID Custom uid in the sandbox (requires --unshare-user or --userns)\n"
" --gid GID Custom gid in the sandbox (requires --unshare-user or --userns)\n"
@@ -2068,6 +2070,10 @@ parse_args_recurse (int *argcp,
argv += 1;
argc -= 1;
}
+ else if (strcmp (arg, "--disable-userns") == 0)
+ {
+ opt_disable_userns = TRUE;
+ }
else if (strcmp (arg, "--userns2") == 0)
{
int the_fd;
@@ -2420,6 +2426,7 @@ main (int argc,
struct sock_fprog seccomp_prog;
cleanup_free char *args_data = NULL;
int intermediate_pids_sockets[2] = {-1, -1};
+ bool using_userns2 = FALSE;
/* Handle --version early on before we try to acquire/drop
* any capabilities so it works in a build environment;
@@ -2947,8 +2954,12 @@ main (int argc,
die_with_error ("chdir /");
}
- if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
- die_with_error ("Setting userns2 failed");
+ if (opt_userns2_fd > 0)
+ {
+ if (setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
+ die_with_error ("Setting userns2 failed");
+ using_userns2 = TRUE;
+ }
if (opt_unshare_user &&
(ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) &&
@@ -2961,6 +2972,8 @@ main (int argc,
if (unshare (CLONE_NEWUSER))
die_with_error ("unshare user ns");
+ using_userns2 = TRUE;
+
/* We're in a new user namespace, we got back the bounding set, clear it again */
drop_cap_bounding_set (FALSE);
@@ -2969,6 +2982,33 @@ main (int argc,
-1, FALSE, FALSE);
}
+ if (opt_disable_userns)
+ {
+ if (using_userns2)
+ {
+ /* If we're not in the main userns, the we don't own the
+ current fs namespace and are not allowed to mount, so
+ create a new NS */
+ if (unshare (CLONE_NEWNS))
+ die_with_error ("unshare fs ns");
+ }
+
+ /* Mount a bind cover of the root fs. This will trigger
+ * current_chrooted() in create_user_ns() in the kernel at:
+ * https://elixir.bootlin.com/linux/v5.14.4/source/kernel/user_namespace.c#L92
+ * making it impossible for the process to create new user namespaces.
+ *
+ * What happens is that the path "/" in the namespace noew
+ * resolve to the covering bindmount, but the container process
+ * tree root is still the lower mount. Note that it is still
+ * possible for the container to reach the covering bind mount
+ * (as e.g. "/.."), but since its just a copy of the regular
+ * hierarchy it works identically to it.
+ */
+ if (mount ("/", "/", NULL, MS_SILENT | MS_MGC_VAL | MS_BIND | MS_REC, NULL) < 0)
+ die_with_error ("setting up root cover bind");
+ }
+
/* All privileged ops are done now, so drop caps we don't need */
drop_privs (!is_privileged, TRUE);