summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Larsson <alexl@redhat.com>2019-11-27 13:33:42 +0100
committerGitHub <noreply@github.com>2019-11-27 13:33:42 +0100
commitff533b84d056f2c22633a84b34323dd085bd977a (patch)
tree095a248edcdce3e096ab5b54164c7588e13e0906
parent300da62ab6d14aaeeed20172a03090932bb23119 (diff)
parent7a8e3de7e0a263491737a203d82e2f63b51c08fe (diff)
downloadbubblewrap-ff533b84d056f2c22633a84b34323dd085bd977a.tar.gz
Merge pull request #338 from containers/reuse-namespaces
Support for reusing namespaces
-rw-r--r--bubblewrap.c173
-rw-r--r--bwrap.xml15
-rw-r--r--tests/libtest-core.sh22
-rwxr-xr-xtests/test-run.sh38
-rw-r--r--utils.c94
-rw-r--r--utils.h5
6 files changed, 333 insertions, 14 deletions
diff --git a/bubblewrap.c b/bubblewrap.c
index 1ec9d2b..8d0c5f7 100644
--- a/bubblewrap.c
+++ b/bubblewrap.c
@@ -86,6 +86,9 @@ int opt_json_status_fd = -1;
int opt_seccomp_fd = -1;
const char *opt_sandbox_hostname = NULL;
char *opt_args_data = NULL; /* owned */
+int opt_userns_fd = -1;
+int opt_userns2_fd = -1;
+int opt_pidns_fd = -1;
#define CAP_TO_MASK_0(x) (1L << ((x) & 31))
#define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32)
@@ -230,8 +233,11 @@ usage (int ecode, FILE *out)
" --unshare-uts Create new uts namespace\n"
" --unshare-cgroup Create new cgroup namespace\n"
" --unshare-cgroup-try Create new cgroup namespace if possible else continue by skipping it\n"
- " --uid UID Custom uid in the sandbox (requires --unshare-user)\n"
- " --gid GID Custom gid in the sandbox (requires --unshare-user)\n"
+ " --userns FD Use this user namespace (cannot combine with --unshare-user)\n"
+ " --userns2 FD After setup switch to this user namspace, only useful with --userns\n"
+ " --pidns FD Use this user namespace (as parent namespace if using --unshare-pid)\n"
+ " --uid UID Custom uid in the sandbox (requires --unshare-user or --userns)\n"
+ " --gid GID Custom gid in the sandbox (requires --unshare-user or --userns)\n"
" --hostname NAME Custom hostname in the sandbox (requires --unshare-uts)\n"
" --chdir DIR Change directory to DIR\n"
" --setenv VAR VALUE Set an environment variable\n"
@@ -799,9 +805,19 @@ static void
switch_to_user_with_privs (void)
{
/* If we're in a new user namespace, we got back the bounding set, clear it again */
- if (opt_unshare_user)
+ if (opt_unshare_user || opt_userns_fd != -1)
drop_cap_bounding_set (FALSE);
+ /* If we switched to a new user namespace it may allow other uids/gids, so switch to the target one */
+ if (opt_userns_fd != -1)
+ {
+ if (opt_sandbox_uid != real_uid && setuid (opt_sandbox_uid) < 0)
+ die_with_error ("unable to switch to uid %d", opt_sandbox_uid);
+
+ if (opt_sandbox_gid != real_gid && setgid (opt_sandbox_gid) < 0)
+ die_with_error ("unable to switch to gid %d", opt_sandbox_gid);
+ }
+
if (!is_privileged)
return;
@@ -822,10 +838,14 @@ drop_privs (bool keep_requested_caps)
{
assert (!keep_requested_caps || !is_privileged);
/* Drop root uid */
- if (getuid () == 0 && setuid (opt_sandbox_uid) < 0)
+ if (geteuid () == 0 && setuid (opt_sandbox_uid) < 0)
die_with_error ("unable to drop root uid");
drop_all_caps (keep_requested_caps);
+
+ /* We don't have any privs now, so mark us dumpable which makes /proc/self be owned by the user instead of root */
+ if (prctl (PR_SET_DUMPABLE, 1, 0, 0, 0) != 0)
+ die_with_error ("can't set dumpable");
}
static char *
@@ -1097,7 +1117,7 @@ setup_newroot (bool unshare_pid,
if (ensure_dir (dest, 0755) != 0)
die_with_error ("Can't mkdir %s", op->dest);
- if (unshare_pid)
+ if (unshare_pid || opt_pidns_fd != -1)
{
/* Our own procfs */
privileged_op (privileged_op_socket,
@@ -1885,6 +1905,57 @@ parse_args_recurse (int *argcp,
argv += 1;
argc -= 1;
}
+ else if (strcmp (arg, "--userns") == 0)
+ {
+ int the_fd;
+ char *endptr;
+
+ if (argc < 2)
+ die ("--userns takes an argument");
+
+ the_fd = strtol (argv[1], &endptr, 10);
+ if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
+ die ("Invalid fd: %s", argv[1]);
+
+ opt_userns_fd = the_fd;
+
+ argv += 1;
+ argc -= 1;
+ }
+ else if (strcmp (arg, "--userns2") == 0)
+ {
+ int the_fd;
+ char *endptr;
+
+ if (argc < 2)
+ die ("--userns2 takes an argument");
+
+ the_fd = strtol (argv[1], &endptr, 10);
+ if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
+ die ("Invalid fd: %s", argv[1]);
+
+ opt_userns2_fd = the_fd;
+
+ argv += 1;
+ argc -= 1;
+ }
+ else if (strcmp (arg, "--pidns") == 0)
+ {
+ int the_fd;
+ char *endptr;
+
+ if (argc < 2)
+ die ("--pidns takes an argument");
+
+ the_fd = strtol (argv[1], &endptr, 10);
+ if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
+ die ("Invalid fd: %s", argv[1]);
+
+ opt_pidns_fd = the_fd;
+
+ argv += 1;
+ argc -= 1;
+ }
else if (strcmp (arg, "--setenv") == 0)
{
if (argc < 3)
@@ -2153,6 +2224,7 @@ main (int argc,
size_t seccomp_len;
struct sock_fprog seccomp_prog;
cleanup_free char *args_data = NULL;
+ int intermediate_pids_sockets[2] = {-1, -1};
/* Handle --version early on before we try to acquire/drop
* any capabilities so it works in a build environment;
@@ -2203,14 +2275,35 @@ main (int argc,
if (opt_userns_block_fd != -1 && opt_info_fd == -1)
die ("--userns-block-fd requires --info-fd");
+ if (opt_userns_fd != -1 && opt_unshare_user)
+ die ("--userns not compatible --unshare-user");
+
+ if (opt_userns_fd != -1 && opt_unshare_user_try)
+ die ("--userns not compatible --unshare-user-try");
+
+ /* Technically using setns() is probably safe even in the privileged
+ * case, because we got passed in a file descriptor to the
+ * namespace, and that can only be gotten if you have ptrace
+ * permissions against the target, and then you could do whatever to
+ * the namespace anyway.
+ *
+ * However, for practical reasons this isn't possible to use,
+ * because (as described in acquire_privs()) setuid bwrap causes
+ * root to own the namespaces that it creates, so you will not be
+ * able to access these namespaces anyway. So, best just not support
+ * it anway.
+ */
+ if (opt_userns_fd != -1 && is_privileged)
+ die ("--userns doesn't work in setuid mode");
+
/* We have to do this if we weren't installed setuid (and we're not
* root), so let's just DWIM */
- if (!is_privileged && getuid () != 0)
+ if (!is_privileged && getuid () != 0 && opt_userns_fd == -1)
opt_unshare_user = TRUE;
#ifdef ENABLE_REQUIRE_USERNS
/* In this build option, we require userns. */
- if (is_privileged && getuid () != 0)
+ if (is_privileged && getuid () != 0 && opt_userns_fd == -1)
opt_unshare_user = TRUE;
#endif
@@ -2255,11 +2348,11 @@ main (int argc,
if (opt_sandbox_gid == -1)
opt_sandbox_gid = real_gid;
- if (!opt_unshare_user && opt_sandbox_uid != real_uid)
- die ("Specifying --uid requires --unshare-user");
+ if (!opt_unshare_user && opt_userns_fd == -1 && opt_sandbox_uid != real_uid)
+ die ("Specifying --uid requires --unshare-user or --userns");
- if (!opt_unshare_user && opt_sandbox_gid != real_gid)
- die ("Specifying --gid requires --unshare-user");
+ if (!opt_unshare_user && opt_userns_fd == -1 && opt_sandbox_gid != real_gid)
+ die ("Specifying --gid requires --unshare-user or --userns");
if (!opt_unshare_uts && opt_sandbox_hostname != NULL)
die ("Specifying --hostname requires --unshare-uts");
@@ -2299,7 +2392,7 @@ main (int argc,
clone_flags = SIGCHLD | CLONE_NEWNS;
if (opt_unshare_user)
clone_flags |= CLONE_NEWUSER;
- if (opt_unshare_pid)
+ if (opt_unshare_pid && opt_pidns_fd == -1)
clone_flags |= CLONE_NEWPID;
if (opt_unshare_net)
clone_flags |= CLONE_NEWNET;
@@ -2338,6 +2431,22 @@ main (int argc,
die_with_error ("pipe2()");
}
+ /* Switch to the custom user ns before the clone, gets us privs in that ns (assuming its a child of the current and thus allowed) */
+ if (opt_userns_fd > 0 && setns (opt_userns_fd, CLONE_NEWUSER) != 0)
+ {
+ if (errno == EINVAL)
+ die ("Joining the specified user namespace failed, it might not be a descendant of the current user namespace.");
+ die_with_error ("Joining specified user namespace failed");
+ }
+
+ /* Sometimes we have uninteresting intermediate pids during the setup, set up code to pass the real pid down */
+ if (opt_pidns_fd != -1)
+ {
+ /* Mark us as a subreaper, this way we can get exit status from grandchildren */
+ prctl (PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+ create_pid_socketpair (intermediate_pids_sockets);
+ }
+
pid = raw_clone (clone_flags, NULL);
if (pid == -1)
{
@@ -2359,6 +2468,13 @@ main (int argc,
{
/* Parent, outside sandbox, privileged (initially) */
+ if (intermediate_pids_sockets[0] != -1)
+ {
+ close (intermediate_pids_sockets[1]);
+ pid = read_pid_from_socket (intermediate_pids_sockets[0]);
+ close (intermediate_pids_sockets[0]);
+ }
+
/* Discover namespace ids before we drop privileges */
namespace_ids_read (pid);
@@ -2377,7 +2493,10 @@ main (int argc,
pid, TRUE, opt_needs_devpts);
}
- /* Initial launched process, wait for exec:ed command to exit */
+ /* Initial launched process, wait for pid 1 or exec:ed command to exit */
+
+ if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
+ die_with_error ("Setting userns2 failed");
/* We don't need any privileges in the launcher, drop them immediately. */
drop_privs (FALSE);
@@ -2417,6 +2536,31 @@ main (int argc,
return monitor_child (event_fd, pid, setup_finished_pipe[0]);
}
+ if (opt_pidns_fd > 0)
+ {
+ if (setns (opt_pidns_fd, CLONE_NEWPID) != 0)
+ die_with_error ("Setting pidns failed");
+
+ /* fork to get the passed in pid ns */
+ fork_intermediate_child ();
+
+ /* We might both have specified an --pidns *and* --unshare-pid, so set up a new child pid namespace under the specified one */
+ if (opt_unshare_pid)
+ {
+ if (unshare (CLONE_NEWPID))
+ die_with_error ("unshare pid ns");
+
+ /* fork to get the new pid ns */
+ fork_intermediate_child ();
+ }
+
+ /* We're back, either in a child or grandchild, so message the actual pid to the monitor */
+
+ close (intermediate_pids_sockets[0]);
+ send_pid_on_socket (intermediate_pids_sockets[1]);
+ close (intermediate_pids_sockets[1]);
+ }
+
/* Child, in sandbox, privileged in the parent or in the user namespace (if --unshare-user).
*
* Note that for user namespaces we run as euid 0 during clone(), so
@@ -2605,6 +2749,9 @@ main (int argc,
die_with_error ("chdir /");
}
+ if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
+ die_with_error ("Setting userns2 failed");
+
if (opt_unshare_user &&
(ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) &&
opt_userns_block_fd == -1)
diff --git a/bwrap.xml b/bwrap.xml
index 73ca161..7c53207 100644
--- a/bwrap.xml
+++ b/bwrap.xml
@@ -131,6 +131,21 @@
<listitem><para>Unshare all possible namespaces. Currently equivalent with: <option>--unshare-user-try</option> <option>--unshare-ipc</option> <option>--unshare-pid</option> <option>--unshare-net</option> <option>--unshare-uts</option> <option>--unshare-cgroup-try</option></para></listitem>
</varlistentry>
<varlistentry>
+ <term><option>--userns <arg choice="plain">FD</arg></option></term>
+ <listitem><para>Use an existing user namespace instead of creating a new one. The namespace must fulfil the permission requirements for setns(), which generally means that it must be a decendant of the currently active user namespace, owned by the same user. </para>
+ <para>This is incompatible with --unshare-user, and doesn't work in the setuid version of bubblewrap.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>--userns2 <arg choice="plain">FD</arg></option></term>
+ <listitem><para>After setting up the new namespace, switch into the specified namespace. For this to work the specified namespace must be a decendant of the user namespace used for the setup, so this is only useful in combination with --userns.</para>
+ <para>This is useful because sometimes bubblewrap itself creates nested user namespaces (to work around some kernel issues) and --userns2 can be used to enter these.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>--pidns <arg choice="plain">FD</arg></option></term>
+ <listitem><para>Use an existing pid namespace instead of creating one. This is often used with --userns, because the pid namespace must be owned by the same user namespace that bwrap uses. </para>
+ <para>Note that this can be combined with --unshare-pid, and in that case it means that the sandbox will be in its own pid namespace, which is a child of the passed in one.</para></listitem>
+ </varlistentry>
+ <varlistentry>
<term><option>--uid <arg choice="plain">UID</arg></option></term>
<listitem><para>Use a custom user id in the sandbox (requires <option>--unshare-user</option>)</para></listitem>
</varlistentry>
diff --git a/tests/libtest-core.sh b/tests/libtest-core.sh
index d0b7d37..0255b57 100644
--- a/tests/libtest-core.sh
+++ b/tests/libtest-core.sh
@@ -75,6 +75,18 @@ _fatal_print_file() {
fatal "$@"
}
+_fatal_print_files() {
+ file1="$1"
+ shift
+ file2="$1"
+ shift
+ ls -al "$file1" >&2
+ sed -e 's/^/# /' < "$file1" >&2
+ ls -al "$file2" >&2
+ sed -e 's/^/# /' < "$file2" >&2
+ fatal "$@"
+}
+
assert_not_has_file () {
if test -f "$1"; then
_fatal_print_file "$1" "File '$1' exists"
@@ -135,8 +147,18 @@ assert_file_empty() {
fi
}
+assert_files_equal() {
+ if ! cmp "$1" "$2"; then
+ _fatal_print_files "$1" "$2" "File '$1' and '$2' is not equal"
+ fi
+}
+
# Use to skip all of these tests
skip() {
echo "1..0 # SKIP" "$@"
exit 0
}
+
+extract_child_pid() {
+ grep child-pid "$1" | sed "s/^.*: \([0-9]*\).*/\1/"
+}
diff --git a/tests/test-run.sh b/tests/test-run.sh
index 30cccf0..a01f41c 100755
--- a/tests/test-run.sh
+++ b/tests/test-run.sh
@@ -80,7 +80,7 @@ if ! $RUN true; then
skip Seems like bwrap is not working at all. Maybe setuid is not working
fi
-echo "1..46"
+echo "1..49"
# Test help
${BWRAP} --help > help.txt
@@ -340,4 +340,40 @@ if $RUN --bind "$(pwd)" /tmp/here test -d /tmp/newroot; then
fi
echo "ok - we can mount another directory inside /tmp"
+# These tests need user namespaces
+if test -n "${bwrap_is_suid:-}"; then
+ echo "ok - # SKIP no setuid support for --unshare-user"
+ echo "ok - # SKIP no setuid support for --unshare-user"
+else
+ mkfifo donepipe
+
+ $RUN --info-fd 42 --unshare-user sh -c 'readlink /proc/self/ns/user > sandbox-userns; cat < donepipe' 42>info.json &
+ while ! test -f sandbox-userns; do sleep 1; done
+ SANDBOX1PID=$(extract_child_pid info.json)
+
+ $RUN --userns 11 readlink /proc/self/ns/user > sandbox2-userns 11< /proc/$SANDBOX1PID/ns/user
+ echo foo > donepipe
+
+ assert_files_equal sandbox-userns sandbox2-userns
+
+ rm donepipe info.json sandbox-userns
+
+ echo "ok - Test --userns"
+
+ mkfifo donepipe
+ $RUN --info-fd 42 --unshare-user --unshare-pid sh -c 'readlink /proc/self/ns/pid > sandbox-pidns; cat < donepipe' 42>info.json &
+ while ! test -f sandbox-pidns; do sleep 1; done
+ SANDBOX1PID=$(extract_child_pid info.json)
+
+ $RUN --userns 11 --pidns 12 readlink /proc/self/ns/pid > sandbox2-pidns 11< /proc/$SANDBOX1PID/ns/user 12< /proc/$SANDBOX1PID/ns/pid
+ echo foo > donepipe
+
+ assert_files_equal sandbox-pidns sandbox2-pidns
+
+ rm donepipe info.json sandbox-pidns
+
+ echo "ok - Test --pidns"
+fi
+
+
echo "ok - End of test"
diff --git a/utils.c b/utils.c
index ef06bd4..a99a865 100644
--- a/utils.c
+++ b/utils.c
@@ -19,6 +19,7 @@
#include "utils.h"
#include <sys/syscall.h>
+#include <sys/socket.h>
#ifdef HAVE_SELINUX
#include <selinux/selinux.h>
#endif
@@ -79,6 +80,19 @@ die_oom (void)
exit (1);
}
+/* Fork, return in child, exiting the previous parent */
+void
+fork_intermediate_child (void)
+{
+ int pid = fork ();
+ if (pid == -1)
+ die_with_error ("Can't fork for --pidns");
+
+ /* Parent is an process not needed */
+ if (pid != 0)
+ exit (0);
+}
+
void *
xmalloc (size_t size)
{
@@ -670,6 +684,86 @@ mkdir_with_parents (const char *pathname,
return 0;
}
+/* Send an ucred with current pid/uid/gid over a socket, it can be
+ read back with read_pid_from_socket(), and then the kernel has
+ translated it between namespaces as needed. */
+void
+send_pid_on_socket (int socket)
+{
+ char buf[1] = { 0 };
+ struct msghdr msg = {};
+ struct iovec iov = { buf, sizeof (buf) };
+ const ssize_t control_len_snd = CMSG_SPACE(sizeof(struct ucred));
+ char control_buf_snd[control_len_snd];
+ struct cmsghdr *cmsg;
+ struct ucred *cred;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control_buf_snd;
+ msg.msg_controllen = control_len_snd;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+
+ cred->pid = getpid ();
+ cred->uid = geteuid ();
+ cred->gid = getegid ();
+
+ if (sendmsg (socket, &msg, 0) < 0)
+ die_with_error ("Can't send pid");
+}
+
+void
+create_pid_socketpair (int sockets[2])
+{
+ int enable = 1;
+
+ if (socketpair (AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sockets) != 0)
+ die_with_error ("Can't create intermediate pids socket");
+
+ if (setsockopt (sockets[0], SOL_SOCKET, SO_PASSCRED, &enable, sizeof (enable)) < 0)
+ die_with_error ("Can't set SO_PASSCRED");
+}
+
+int
+read_pid_from_socket (int socket)
+{
+ char recv_buf[1] = { 0 };
+ struct msghdr msg = {};
+ struct iovec iov = { recv_buf, sizeof (recv_buf) };
+ const ssize_t control_len_rcv = CMSG_SPACE(sizeof(struct ucred));
+ char control_buf_rcv[control_len_rcv];
+ struct cmsghdr* cmsg;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control_buf_rcv;
+ msg.msg_controllen = control_len_rcv;
+
+ if (recvmsg (socket, &msg, 0) < 0)
+ die_with_error ("Cant read pid from socket");
+
+ if (msg.msg_controllen <= 0)
+ die ("Unexpected short read from pid socket");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg))
+ {
+ const unsigned payload_len = cmsg->cmsg_len - CMSG_LEN(0);
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ payload_len == sizeof(struct ucred))
+ {
+ struct ucred *cred = (struct ucred *)CMSG_DATA(cmsg);
+ return cred->pid;
+ }
+ }
+ die ("No pid returned on socket");
+}
+
int
raw_clone (unsigned long flags,
void *child_stack)
diff --git a/utils.h b/utils.h
index 52cf772..8c4db61 100644
--- a/utils.h
+++ b/utils.h
@@ -54,6 +54,8 @@ void die (const char *format,
void die_oom (void) __attribute__((__noreturn__));
void die_unless_label_valid (const char *label);
+void fork_intermediate_child (void);
+
void *xmalloc (size_t size);
void *xcalloc (size_t size);
void *xrealloc (void *ptr,
@@ -107,6 +109,9 @@ int get_file_mode (const char *pathname);
int mkdir_with_parents (const char *pathname,
int mode,
bool create_last);
+void create_pid_socketpair (int sockets[2]);
+void send_pid_on_socket (int socket);
+int read_pid_from_socket (int socket);
/* syscall wrappers */
int raw_clone (unsigned long flags,