Add support for --pidns

This allows a sandbox to share a pid namespace with another sandbox. For this to work the namespace passed in must be owned by the user namespace that bwrap is using, which implies either that you pass in --userns pointing, or run under that user namespace already. In the former case you'd typically take the userns from a running bwrap --unshare-user instance, whereas the second case happens when using bwrap in the setuid mode without user namespaces. If both --unshare-pid and --pidns are specified then we first switch to the pid namespace, and then unshare from there. This is useful if you want a pid-isolated sandob that is visible to another sandbox. The implementation is a bit tricky, as it needs to fork() in order to activate the setns():ed pid namespaces, which means we have to pass through the final pid via a socket to make the kernel translate the pid to the initial pid namespace for us to waitpid() on it.
author: Alexander Larsson <alexl@redhat.com> 2019-11-21 16:56:15 +0100
committer: Alexander Larsson <alexl@redhat.com> 2019-11-26 09:28:51 +0100
commit: 46c7f1cca5f9a8ff985976a9d0477cf5ecb0162f (patch)
tree: a8b455bc378ca2170c9a468caefdf20e938f9415
parent: 4a7ecc630fb67cfec8fb7ad8782c08577c297e55 (diff)
download: bubblewrap-46c7f1cca5f9a8ff985976a9d0477cf5ecb0162f.tar.gz
2 files changed, 67 insertions, 2 deletions
diff --git a/bubblewrap.c b/bubblewrap.c
index 8cdc10d..fe54478 100644
--- a/bubblewrap.c
+++ b/bubblewrap.c
@@ -88,6 +88,7 @@ const char *opt_sandbox_hostname = NULL;
 char *opt_args_data = NULL;  /* owned */
 int opt_userns_fd = -1;
 int opt_userns2_fd = -1;
+int opt_pidns_fd = -1;
 
 #define CAP_TO_MASK_0(x) (1L << ((x) & 31))
 #define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32)
@@ -234,6 +235,7 @@ usage (int ecode, FILE *out)
            "    --unshare-cgroup-try         Create new cgroup namespace if possible else continue by skipping it\n"
            "    --userns FD                  Use this user namespace (cannot combine with --unshare-user)\n"
            "    --userns2 FD                 After setup switch to this user namspace, only useful with --userns\n"
+           "    --pidns FD                   Use this user namespace (as parent namespace if using --unshare-pid)\n"
            "    --uid UID                    Custom uid in the sandbox (requires --unshare-user)\n"
            "    --gid GID                    Custom gid in the sandbox (requires --unshare-user)\n"
            "    --hostname NAME              Custom hostname in the sandbox (requires --unshare-uts)\n"
@@ -1105,7 +1107,7 @@ setup_newroot (bool unshare_pid,
           if (ensure_dir (dest, 0755) != 0)
             die_with_error ("Can't mkdir %s", op->dest);
 
-          if (unshare_pid)
+          if (unshare_pid || opt_pidns_fd != -1)
             {
               /* Our own procfs */
               privileged_op (privileged_op_socket,
@@ -1927,6 +1929,23 @@ parse_args_recurse (int          *argcp,
           argv += 1;
           argc -= 1;
         }
+      else if (strcmp (arg, "--pidns") == 0)
+        {
+          int the_fd;
+          char *endptr;
+
+          if (argc < 2)
+            die ("--pidns takes an argument");
+
+          the_fd = strtol (argv[1], &endptr, 10);
+          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
+            die ("Invalid fd: %s", argv[1]);
+
+          opt_pidns_fd = the_fd;
+
+          argv += 1;
+          argc -= 1;
+        }
       else if (strcmp (arg, "--setenv") == 0)
         {
           if (argc < 3)
@@ -2195,6 +2214,7 @@ main (int    argc,
   size_t seccomp_len;
   struct sock_fprog seccomp_prog;
   cleanup_free char *args_data = NULL;
+  int intermediate_pids_sockets[2] = {-1, -1};
 
   /* Handle --version early on before we try to acquire/drop
    * any capabilities so it works in a build environment;
@@ -2362,7 +2382,7 @@ main (int    argc,
   clone_flags = SIGCHLD | CLONE_NEWNS;
   if (opt_unshare_user)
     clone_flags |= CLONE_NEWUSER;
-  if (opt_unshare_pid)
+  if (opt_unshare_pid && opt_pidns_fd == -1)
     clone_flags |= CLONE_NEWPID;
   if (opt_unshare_net)
     clone_flags |= CLONE_NEWNET;
@@ -2409,6 +2429,14 @@ main (int    argc,
       die_with_error ("Joining specified user namespace failed");
     }
 
+  /* Sometimes we have uninteresting intermidate pids during the setup, set up code to pass the real pid down */
+  if (opt_pidns_fd != -1)
+    {
+      /* Mark us as a subreaper, this way we can get exit status from grandchildren */
+      prctl (PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+      create_pid_socketpair (intermediate_pids_sockets);
+    }
+
   pid = raw_clone (clone_flags, NULL);
   if (pid == -1)
     {
@@ -2430,6 +2458,13 @@ main (int    argc,
     {
       /* Parent, outside sandbox, privileged (initially) */
 
+      if (intermediate_pids_sockets[0] != -1)
+        {
+          close (intermediate_pids_sockets[1]);
+          pid = read_pid_from_socket (intermediate_pids_sockets[0]);
+          close (intermediate_pids_sockets[0]);
+        }
+
       /* Discover namespace ids before we drop privileges */
       namespace_ids_read (pid);
 
@@ -2491,6 +2526,31 @@ main (int    argc,
       return monitor_child (event_fd, pid, setup_finished_pipe[0]);
     }
 
+  if (opt_pidns_fd > 0)
+    {
+      if (setns (opt_pidns_fd, CLONE_NEWPID) != 0)
+        die_with_error ("Setting pidns failed");
+
+      /* fork to get the passed in pid ns */
+      fork_intermediate_child ();
+
+      /* We might both have specified an --pidns *and* --unshare-pid, so set up a new child pid namespace under the specified one */
+      if (opt_unshare_pid)
+        {
+          if (unshare (CLONE_NEWPID))
+            die_with_error ("unshare pid ns");
+
+          /* fork to get the new pid ns */
+          fork_intermediate_child ();
+        }
+
+      /* We're back, either in a child or grandchild, so message the actual pid to the monitor */
+
+      close (intermediate_pids_sockets[0]);
+      send_pid_on_socket (intermediate_pids_sockets[1]);
+      close (intermediate_pids_sockets[1]);
+    }
+
   /* Child, in sandbox, privileged in the parent or in the user namespace (if --unshare-user).
    *
    * Note that for user namespaces we run as euid 0 during clone(), so
diff --git a/bwrap.xml b/bwrap.xml
index b1a2b2e..7c53207 100644
--- a/bwrap.xml
+++ b/bwrap.xml
@@ -141,6 +141,11 @@
       <para>This is useful because sometimes bubblewrap itself creates nested user namespaces (to work around some kernel issues) and --userns2 can be used to enter these.</para></listitem>
     </varlistentry>
     <varlistentry>
+      <term><option>--pidns <arg choice="plain">FD</arg></option></term>
+      <listitem><para>Use an existing pid namespace instead of creating one. This is often used with --userns, because the pid namespace must be owned by the same user namespace that bwrap uses. </para>
+      <para>Note that this can be combined with --unshare-pid, and in that case it means that the sandbox will be in its own pid namespace, which is a child of the passed in one.</para></listitem>
+    </varlistentry>
+    <varlistentry>
       <term><option>--uid <arg choice="plain">UID</arg></option></term>
       <listitem><para>Use a custom user id in the sandbox (requires <option>--unshare-user</option>)</para></listitem>
     </varlistentry>
author	Alexander Larsson <alexl@redhat.com>	2019-11-21 16:56:15 +0100
committer	Alexander Larsson <alexl@redhat.com>	2019-11-26 09:28:51 +0100
commit	46c7f1cca5f9a8ff985976a9d0477cf5ecb0162f (patch)
tree	a8b455bc378ca2170c9a468caefdf20e938f9415
parent	4a7ecc630fb67cfec8fb7ad8782c08577c297e55 (diff)
download	bubblewrap-46c7f1cca5f9a8ff985976a9d0477cf5ecb0162f.tar.gz