summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon McVittie <smcv@collabora.com>2022-03-22 17:12:33 +0000
committerAlexander Larsson <alexander.larsson@gmail.com>2023-01-03 11:04:09 +0100
commitb33c333bcb88557ad23a9bc5be0d619d537984e9 (patch)
treeea61579dd28016bd8039bcf32a6735dbb0ed9eb6
parentbb7ac1348f98ee48f1e2e38bdf93abca2e4f6d06 (diff)
downloadbubblewrap-b33c333bcb88557ad23a9bc5be0d619d537984e9.tar.gz
Add an option to disable nested user namespaces by setting limit to 1
Some use-cases of bubblewrap want to ensure that the subprocess can't further re-arrange the filesystem namespace, or do other more complex namespace modification. For example, Flatpak wants to prevent sandboxed processes from altering their /proc/$pid/root/.flatpak-info, so that /.flatpak-info can safely be used as an indicator that a process is part of a Flatpak app. This approach was suggested by lukts30 on containers/bubblewrap#452. The sysctl-controlled maximum numbers of namespaces are themselves namespaced, so we can disable nested user namespaces by setting the limit to 1 and then entering a new, nested user namespace. The resulting process loses its privileges in the namespace where the limit was set to 1, so it is unable to move the limit back up. Co-authored-by: Alexander Larsson <alexl@redhat.com> Signed-off-by: Simon McVittie <smcv@collabora.com>
-rw-r--r--bubblewrap.c54
-rw-r--r--bwrap.xml14
-rw-r--r--completions/bash/bwrap1
-rw-r--r--completions/zsh/_bwrap1
-rwxr-xr-xtests/test-run.sh10
5 files changed, 73 insertions, 7 deletions
diff --git a/bubblewrap.c b/bubblewrap.c
index eece9d8..bcfbe9d 100644
--- a/bubblewrap.c
+++ b/bubblewrap.c
@@ -73,6 +73,7 @@ static const char *opt_file_label = NULL;
static bool opt_as_pid_1;
const char *opt_chdir_path = NULL;
+bool opt_disable_userns = FALSE;
bool opt_unshare_user = FALSE;
bool opt_unshare_user_try = FALSE;
bool opt_unshare_pid = FALSE;
@@ -311,6 +312,7 @@ usage (int ecode, FILE *out)
" --unshare-cgroup-try Create new cgroup namespace if possible else continue by skipping it\n"
" --userns FD Use this user namespace (cannot combine with --unshare-user)\n"
" --userns2 FD After setup switch to this user namespace, only useful with --userns\n"
+ " --disable-userns Disable further use of user namespaces inside sandbox\n"
" --pidns FD Use this pid namespace (as parent namespace if using --unshare-pid)\n"
" --uid UID Custom uid in the sandbox (requires --unshare-user or --userns)\n"
" --gid GID Custom gid in the sandbox (requires --unshare-user or --userns)\n"
@@ -1777,6 +1779,10 @@ parse_args_recurse (int *argcp,
argv++;
argc--;
}
+ else if (strcmp (arg, "--disable-userns") == 0)
+ {
+ opt_disable_userns = TRUE;
+ }
else if (strcmp (arg, "--remount-ro") == 0)
{
if (argc < 2)
@@ -2677,6 +2683,12 @@ main (int argc,
if (opt_userns_fd != -1 && opt_unshare_user_try)
die ("--userns not compatible --unshare-user-try");
+ if (opt_disable_userns && !opt_unshare_user)
+ die ("--disable-userns requires --unshare-user");
+
+ if (opt_disable_userns && opt_userns_block_fd != -1)
+ die ("--disable-userns is not compatible with --userns-block-fd");
+
/* Technically using setns() is probably safe even in the privileged
* case, because we got passed in a file descriptor to the
* namespace, and that can only be gotten if you have ptrace
@@ -3155,13 +3167,34 @@ main (int argc,
if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
die_with_error ("Setting userns2 failed");
- if (opt_unshare_user &&
- (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) &&
- opt_userns_block_fd == -1)
+ if (opt_unshare_user && opt_userns_block_fd == -1 &&
+ (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid ||
+ opt_disable_userns))
{
- /* Now that devpts is mounted and we've no need for mount
- permissions we can create a new userspace and map our uid
- 1:1 */
+ /* Here we create a second level userns inside the first one. This is
+ used for one or more of these reasons:
+
+ * The 1st level namespace has a different uid/gid than the
+ requested due to requirements of beeing root in the first
+ level due for mounting devpts (opt_needs_devpts).
+
+ * To disable user namespaces we set max_user_namespaces and then
+ create the second namespace so that the sandbox cannot undo this
+ change.
+ */
+
+ if (opt_disable_userns)
+ {
+ cleanup_fd int sysctl_fd = -1;
+
+ sysctl_fd = openat (proc_fd, "sys/user/max_user_namespaces", O_WRONLY);
+
+ if (sysctl_fd < 0)
+ die_with_error ("cannot open /proc/sys/user/max_user_namespaces");
+
+ if (write_to_fd (sysctl_fd, "1", 1) < 0)
+ die_with_error ("sysctl user.max_user_namespaces = 1");
+ }
if (unshare (CLONE_NEWUSER))
die_with_error ("unshare user ns");
@@ -3169,6 +3202,15 @@ main (int argc,
/* We're in a new user namespace, we got back the bounding set, clear it again */
drop_cap_bounding_set (FALSE);
+ if (opt_disable_userns)
+ {
+ /* Verify that we can't make a new userns again */
+ res = unshare (CLONE_NEWUSER);
+
+ if (res == 0)
+ die ("unable to disable creation of new user namespaces");
+ }
+
write_uid_gid_map (opt_sandbox_uid, ns_uid,
opt_sandbox_gid, ns_gid,
-1, FALSE, FALSE);
diff --git a/bwrap.xml b/bwrap.xml
index 46e2478..8690d64 100644
--- a/bwrap.xml
+++ b/bwrap.xml
@@ -145,6 +145,20 @@
<para>This is useful because sometimes bubblewrap itself creates nested user namespaces (to work around some kernel issues) and --userns2 can be used to enter these.</para></listitem>
</varlistentry>
<varlistentry>
+ <term><option>--disable-userns</option></term>
+ <listitem><para>
+ Prevent the process in the sandbox from creating further user namespaces,
+ so that it cannot rearrange the filesystem namespace or do other more
+ complex namespace modification.
+ This is currently implemented by setting the
+ <literal>user.max_user_namespaces</literal> sysctl to 1, and then
+ entering a nested user namespace which is unable to raise that limit
+ in the outer namespace.
+ This option requires <option>--unshare-user</option>, and doesn't work
+ in the setuid version of bubblewrap.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>--pidns <arg choice="plain">FD</arg></option></term>
<listitem><para>Use an existing pid namespace instead of creating one. This is often used with --userns, because the pid namespace must be owned by the same user namespace that bwrap uses. </para>
<para>Note that this can be combined with --unshare-pid, and in that case it means that the sandbox will be in its own pid namespace, which is a child of the passed in one.</para></listitem>
diff --git a/completions/bash/bwrap b/completions/bash/bwrap
index e796be3..962d04c 100644
--- a/completions/bash/bwrap
+++ b/completions/bash/bwrap
@@ -11,6 +11,7 @@ _bwrap() {
local boolean_options="
--as-pid-1
--clearenv
+ --disable-userns
--help
--new-session
--unshare-all
diff --git a/completions/zsh/_bwrap b/completions/zsh/_bwrap
index f81ffaf..7488727 100644
--- a/completions/zsh/_bwrap
+++ b/completions/zsh/_bwrap
@@ -41,6 +41,7 @@ _bwrap_args=(
'--dev-bind[Bind mount the host path SRC on DEST, allowing device access]:source:_files:destination:_files'
'--dev[Mount new dev on DEST]:mount point for /dev:_files -/'
"--die-with-parent[Kills with SIGKILL child process (COMMAND) when bwrap or bwrap's parent dies.]"
+ '--disable-userns[Disable further use of user namespaces inside sandbox]'
'--exec-label[Exec label for the sandbox]:SELinux label:_selinux_contexts'
'--file-label[File label for temporary sandbox content]:SELinux label:_selinux_contexts'
'--gid[Custom gid in the sandbox (requires --unshare-user or --userns)]: :_guard "[0-9]#" "numeric group ID"'
diff --git a/tests/test-run.sh b/tests/test-run.sh
index a08998b..f1506bb 100755
--- a/tests/test-run.sh
+++ b/tests/test-run.sh
@@ -8,7 +8,7 @@ srcd=$(cd $(dirname "$0") && pwd)
bn=$(basename "$0")
-echo "1..57"
+echo "1..58"
# Test help
${BWRAP} --help > help.txt
@@ -112,6 +112,7 @@ echo "ok exec failure doesn't include exit-code in json-status"
if test -n "${bwrap_is_suid:-}"; then
echo "ok - # SKIP no --cap-add support"
echo "ok - # SKIP no --cap-add support"
+ echo "ok - # SKIP no --disable-userns"
else
BWRAP_RECURSE="$BWRAP --unshare-user --uid 0 --gid 0 --cap-add ALL --bind / / --bind /proc /proc"
@@ -123,6 +124,13 @@ else
$BWRAP_RECURSE -- /proc/self/exe --unshare-all ${BWRAP_RO_HOST_ARGS} findmnt > recursive-newroot.txt
assert_file_has_content recursive-newroot.txt "/usr"
echo "ok - can pivot to new rootfs recursively"
+
+ $BWRAP --dev-bind / / -- true
+ $BWRAP --unshare-user --disable-userns --dev-bind / / -- true
+ ! $BWRAP --unshare-user --disable-userns --dev-bind / / -- $BWRAP --dev-bind / / -- true
+ $BWRAP --unshare-user --disable-userns --dev-bind / / -- sh -c "echo 2 > /proc/sys/user/max_user_namespaces || true; ! $BWRAP --dev-bind / / -- true"
+ $BWRAP --unshare-user --disable-userns --dev-bind / / -- sh -c "echo 100 > /proc/sys/user/max_user_namespaces || true; ! $BWRAP --dev-bind / / -- true"
+ echo "ok - can disable nested userns"
fi
# Test error prefixing