diff options
author | Alexander Larsson <alexl@redhat.com> | 2019-11-21 16:56:15 +0100 |
---|---|---|
committer | Alexander Larsson <alexl@redhat.com> | 2019-11-26 09:28:51 +0100 |
commit | 46c7f1cca5f9a8ff985976a9d0477cf5ecb0162f (patch) | |
tree | a8b455bc378ca2170c9a468caefdf20e938f9415 | |
parent | 4a7ecc630fb67cfec8fb7ad8782c08577c297e55 (diff) | |
download | bubblewrap-46c7f1cca5f9a8ff985976a9d0477cf5ecb0162f.tar.gz |
Add support for --pidns
This allows a sandbox to share a pid namespace with another sandbox.
For this to work the namespace passed in must be owned by the user
namespace that bwrap is using, which implies either that you pass in
--userns pointing, or run under that user namespace already. In the
former case you'd typically take the userns from a running bwrap
--unshare-user instance, whereas the second case happens when using
bwrap in the setuid mode without user namespaces.
If both --unshare-pid and --pidns are specified then we first
switch to the pid namespace, and then unshare from there. This is
useful if you want a pid-isolated sandob that is visible to another
sandbox.
The implementation is a bit tricky, as it needs to fork() in order
to activate the setns():ed pid namespaces, which means we have to
pass through the final pid via a socket to make the kernel translate
the pid to the initial pid namespace for us to waitpid() on it.
-rw-r--r-- | bubblewrap.c | 64 | ||||
-rw-r--r-- | bwrap.xml | 5 |
2 files changed, 67 insertions, 2 deletions
diff --git a/bubblewrap.c b/bubblewrap.c index 8cdc10d..fe54478 100644 --- a/bubblewrap.c +++ b/bubblewrap.c @@ -88,6 +88,7 @@ const char *opt_sandbox_hostname = NULL; char *opt_args_data = NULL; /* owned */ int opt_userns_fd = -1; int opt_userns2_fd = -1; +int opt_pidns_fd = -1; #define CAP_TO_MASK_0(x) (1L << ((x) & 31)) #define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32) @@ -234,6 +235,7 @@ usage (int ecode, FILE *out) " --unshare-cgroup-try Create new cgroup namespace if possible else continue by skipping it\n" " --userns FD Use this user namespace (cannot combine with --unshare-user)\n" " --userns2 FD After setup switch to this user namspace, only useful with --userns\n" + " --pidns FD Use this user namespace (as parent namespace if using --unshare-pid)\n" " --uid UID Custom uid in the sandbox (requires --unshare-user)\n" " --gid GID Custom gid in the sandbox (requires --unshare-user)\n" " --hostname NAME Custom hostname in the sandbox (requires --unshare-uts)\n" @@ -1105,7 +1107,7 @@ setup_newroot (bool unshare_pid, if (ensure_dir (dest, 0755) != 0) die_with_error ("Can't mkdir %s", op->dest); - if (unshare_pid) + if (unshare_pid || opt_pidns_fd != -1) { /* Our own procfs */ privileged_op (privileged_op_socket, @@ -1927,6 +1929,23 @@ parse_args_recurse (int *argcp, argv += 1; argc -= 1; } + else if (strcmp (arg, "--pidns") == 0) + { + int the_fd; + char *endptr; + + if (argc < 2) + die ("--pidns takes an argument"); + + the_fd = strtol (argv[1], &endptr, 10); + if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0) + die ("Invalid fd: %s", argv[1]); + + opt_pidns_fd = the_fd; + + argv += 1; + argc -= 1; + } else if (strcmp (arg, "--setenv") == 0) { if (argc < 3) @@ -2195,6 +2214,7 @@ main (int argc, size_t seccomp_len; struct sock_fprog seccomp_prog; cleanup_free char *args_data = NULL; + int intermediate_pids_sockets[2] = {-1, -1}; /* Handle --version early on before we try to acquire/drop * any capabilities so it works in a build environment; @@ -2362,7 +2382,7 @@ main (int argc, clone_flags = SIGCHLD | CLONE_NEWNS; if (opt_unshare_user) clone_flags |= CLONE_NEWUSER; - if (opt_unshare_pid) + if (opt_unshare_pid && opt_pidns_fd == -1) clone_flags |= CLONE_NEWPID; if (opt_unshare_net) clone_flags |= CLONE_NEWNET; @@ -2409,6 +2429,14 @@ main (int argc, die_with_error ("Joining specified user namespace failed"); } + /* Sometimes we have uninteresting intermidate pids during the setup, set up code to pass the real pid down */ + if (opt_pidns_fd != -1) + { + /* Mark us as a subreaper, this way we can get exit status from grandchildren */ + prctl (PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); + create_pid_socketpair (intermediate_pids_sockets); + } + pid = raw_clone (clone_flags, NULL); if (pid == -1) { @@ -2430,6 +2458,13 @@ main (int argc, { /* Parent, outside sandbox, privileged (initially) */ + if (intermediate_pids_sockets[0] != -1) + { + close (intermediate_pids_sockets[1]); + pid = read_pid_from_socket (intermediate_pids_sockets[0]); + close (intermediate_pids_sockets[0]); + } + /* Discover namespace ids before we drop privileges */ namespace_ids_read (pid); @@ -2491,6 +2526,31 @@ main (int argc, return monitor_child (event_fd, pid, setup_finished_pipe[0]); } + if (opt_pidns_fd > 0) + { + if (setns (opt_pidns_fd, CLONE_NEWPID) != 0) + die_with_error ("Setting pidns failed"); + + /* fork to get the passed in pid ns */ + fork_intermediate_child (); + + /* We might both have specified an --pidns *and* --unshare-pid, so set up a new child pid namespace under the specified one */ + if (opt_unshare_pid) + { + if (unshare (CLONE_NEWPID)) + die_with_error ("unshare pid ns"); + + /* fork to get the new pid ns */ + fork_intermediate_child (); + } + + /* We're back, either in a child or grandchild, so message the actual pid to the monitor */ + + close (intermediate_pids_sockets[0]); + send_pid_on_socket (intermediate_pids_sockets[1]); + close (intermediate_pids_sockets[1]); + } + /* Child, in sandbox, privileged in the parent or in the user namespace (if --unshare-user). * * Note that for user namespaces we run as euid 0 during clone(), so @@ -141,6 +141,11 @@ <para>This is useful because sometimes bubblewrap itself creates nested user namespaces (to work around some kernel issues) and --userns2 can be used to enter these.</para></listitem> </varlistentry> <varlistentry> + <term><option>--pidns <arg choice="plain">FD</arg></option></term> + <listitem><para>Use an existing pid namespace instead of creating one. This is often used with --userns, because the pid namespace must be owned by the same user namespace that bwrap uses. </para> + <para>Note that this can be combined with --unshare-pid, and in that case it means that the sandbox will be in its own pid namespace, which is a child of the passed in one.</para></listitem> + </varlistentry> + <varlistentry> <term><option>--uid <arg choice="plain">UID</arg></option></term> <listitem><para>Use a custom user id in the sandbox (requires <option>--unshare-user</option>)</para></listitem> </varlistentry> |