diff options
author | Alexander Larsson <alexl@redhat.com> | 2019-11-27 13:33:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-27 13:33:42 +0100 |
commit | ff533b84d056f2c22633a84b34323dd085bd977a (patch) | |
tree | 095a248edcdce3e096ab5b54164c7588e13e0906 | |
parent | 300da62ab6d14aaeeed20172a03090932bb23119 (diff) | |
parent | 7a8e3de7e0a263491737a203d82e2f63b51c08fe (diff) | |
download | bubblewrap-ff533b84d056f2c22633a84b34323dd085bd977a.tar.gz |
Merge pull request #338 from containers/reuse-namespaces
Support for reusing namespaces
-rw-r--r-- | bubblewrap.c | 173 | ||||
-rw-r--r-- | bwrap.xml | 15 | ||||
-rw-r--r-- | tests/libtest-core.sh | 22 | ||||
-rwxr-xr-x | tests/test-run.sh | 38 | ||||
-rw-r--r-- | utils.c | 94 | ||||
-rw-r--r-- | utils.h | 5 |
6 files changed, 333 insertions, 14 deletions
diff --git a/bubblewrap.c b/bubblewrap.c index 1ec9d2b..8d0c5f7 100644 --- a/bubblewrap.c +++ b/bubblewrap.c @@ -86,6 +86,9 @@ int opt_json_status_fd = -1; int opt_seccomp_fd = -1; const char *opt_sandbox_hostname = NULL; char *opt_args_data = NULL; /* owned */ +int opt_userns_fd = -1; +int opt_userns2_fd = -1; +int opt_pidns_fd = -1; #define CAP_TO_MASK_0(x) (1L << ((x) & 31)) #define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32) @@ -230,8 +233,11 @@ usage (int ecode, FILE *out) " --unshare-uts Create new uts namespace\n" " --unshare-cgroup Create new cgroup namespace\n" " --unshare-cgroup-try Create new cgroup namespace if possible else continue by skipping it\n" - " --uid UID Custom uid in the sandbox (requires --unshare-user)\n" - " --gid GID Custom gid in the sandbox (requires --unshare-user)\n" + " --userns FD Use this user namespace (cannot combine with --unshare-user)\n" + " --userns2 FD After setup switch to this user namspace, only useful with --userns\n" + " --pidns FD Use this user namespace (as parent namespace if using --unshare-pid)\n" + " --uid UID Custom uid in the sandbox (requires --unshare-user or --userns)\n" + " --gid GID Custom gid in the sandbox (requires --unshare-user or --userns)\n" " --hostname NAME Custom hostname in the sandbox (requires --unshare-uts)\n" " --chdir DIR Change directory to DIR\n" " --setenv VAR VALUE Set an environment variable\n" @@ -799,9 +805,19 @@ static void switch_to_user_with_privs (void) { /* If we're in a new user namespace, we got back the bounding set, clear it again */ - if (opt_unshare_user) + if (opt_unshare_user || opt_userns_fd != -1) drop_cap_bounding_set (FALSE); + /* If we switched to a new user namespace it may allow other uids/gids, so switch to the target one */ + if (opt_userns_fd != -1) + { + if (opt_sandbox_uid != real_uid && setuid (opt_sandbox_uid) < 0) + die_with_error ("unable to switch to uid %d", opt_sandbox_uid); + + if (opt_sandbox_gid != real_gid && setgid (opt_sandbox_gid) < 0) + die_with_error ("unable to switch to gid %d", opt_sandbox_gid); + } + if (!is_privileged) return; @@ -822,10 +838,14 @@ drop_privs (bool keep_requested_caps) { assert (!keep_requested_caps || !is_privileged); /* Drop root uid */ - if (getuid () == 0 && setuid (opt_sandbox_uid) < 0) + if (geteuid () == 0 && setuid (opt_sandbox_uid) < 0) die_with_error ("unable to drop root uid"); drop_all_caps (keep_requested_caps); + + /* We don't have any privs now, so mark us dumpable which makes /proc/self be owned by the user instead of root */ + if (prctl (PR_SET_DUMPABLE, 1, 0, 0, 0) != 0) + die_with_error ("can't set dumpable"); } static char * @@ -1097,7 +1117,7 @@ setup_newroot (bool unshare_pid, if (ensure_dir (dest, 0755) != 0) die_with_error ("Can't mkdir %s", op->dest); - if (unshare_pid) + if (unshare_pid || opt_pidns_fd != -1) { /* Our own procfs */ privileged_op (privileged_op_socket, @@ -1885,6 +1905,57 @@ parse_args_recurse (int *argcp, argv += 1; argc -= 1; } + else if (strcmp (arg, "--userns") == 0) + { + int the_fd; + char *endptr; + + if (argc < 2) + die ("--userns takes an argument"); + + the_fd = strtol (argv[1], &endptr, 10); + if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0) + die ("Invalid fd: %s", argv[1]); + + opt_userns_fd = the_fd; + + argv += 1; + argc -= 1; + } + else if (strcmp (arg, "--userns2") == 0) + { + int the_fd; + char *endptr; + + if (argc < 2) + die ("--userns2 takes an argument"); + + the_fd = strtol (argv[1], &endptr, 10); + if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0) + die ("Invalid fd: %s", argv[1]); + + opt_userns2_fd = the_fd; + + argv += 1; + argc -= 1; + } + else if (strcmp (arg, "--pidns") == 0) + { + int the_fd; + char *endptr; + + if (argc < 2) + die ("--pidns takes an argument"); + + the_fd = strtol (argv[1], &endptr, 10); + if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0) + die ("Invalid fd: %s", argv[1]); + + opt_pidns_fd = the_fd; + + argv += 1; + argc -= 1; + } else if (strcmp (arg, "--setenv") == 0) { if (argc < 3) @@ -2153,6 +2224,7 @@ main (int argc, size_t seccomp_len; struct sock_fprog seccomp_prog; cleanup_free char *args_data = NULL; + int intermediate_pids_sockets[2] = {-1, -1}; /* Handle --version early on before we try to acquire/drop * any capabilities so it works in a build environment; @@ -2203,14 +2275,35 @@ main (int argc, if (opt_userns_block_fd != -1 && opt_info_fd == -1) die ("--userns-block-fd requires --info-fd"); + if (opt_userns_fd != -1 && opt_unshare_user) + die ("--userns not compatible --unshare-user"); + + if (opt_userns_fd != -1 && opt_unshare_user_try) + die ("--userns not compatible --unshare-user-try"); + + /* Technically using setns() is probably safe even in the privileged + * case, because we got passed in a file descriptor to the + * namespace, and that can only be gotten if you have ptrace + * permissions against the target, and then you could do whatever to + * the namespace anyway. + * + * However, for practical reasons this isn't possible to use, + * because (as described in acquire_privs()) setuid bwrap causes + * root to own the namespaces that it creates, so you will not be + * able to access these namespaces anyway. So, best just not support + * it anway. + */ + if (opt_userns_fd != -1 && is_privileged) + die ("--userns doesn't work in setuid mode"); + /* We have to do this if we weren't installed setuid (and we're not * root), so let's just DWIM */ - if (!is_privileged && getuid () != 0) + if (!is_privileged && getuid () != 0 && opt_userns_fd == -1) opt_unshare_user = TRUE; #ifdef ENABLE_REQUIRE_USERNS /* In this build option, we require userns. */ - if (is_privileged && getuid () != 0) + if (is_privileged && getuid () != 0 && opt_userns_fd == -1) opt_unshare_user = TRUE; #endif @@ -2255,11 +2348,11 @@ main (int argc, if (opt_sandbox_gid == -1) opt_sandbox_gid = real_gid; - if (!opt_unshare_user && opt_sandbox_uid != real_uid) - die ("Specifying --uid requires --unshare-user"); + if (!opt_unshare_user && opt_userns_fd == -1 && opt_sandbox_uid != real_uid) + die ("Specifying --uid requires --unshare-user or --userns"); - if (!opt_unshare_user && opt_sandbox_gid != real_gid) - die ("Specifying --gid requires --unshare-user"); + if (!opt_unshare_user && opt_userns_fd == -1 && opt_sandbox_gid != real_gid) + die ("Specifying --gid requires --unshare-user or --userns"); if (!opt_unshare_uts && opt_sandbox_hostname != NULL) die ("Specifying --hostname requires --unshare-uts"); @@ -2299,7 +2392,7 @@ main (int argc, clone_flags = SIGCHLD | CLONE_NEWNS; if (opt_unshare_user) clone_flags |= CLONE_NEWUSER; - if (opt_unshare_pid) + if (opt_unshare_pid && opt_pidns_fd == -1) clone_flags |= CLONE_NEWPID; if (opt_unshare_net) clone_flags |= CLONE_NEWNET; @@ -2338,6 +2431,22 @@ main (int argc, die_with_error ("pipe2()"); } + /* Switch to the custom user ns before the clone, gets us privs in that ns (assuming its a child of the current and thus allowed) */ + if (opt_userns_fd > 0 && setns (opt_userns_fd, CLONE_NEWUSER) != 0) + { + if (errno == EINVAL) + die ("Joining the specified user namespace failed, it might not be a descendant of the current user namespace."); + die_with_error ("Joining specified user namespace failed"); + } + + /* Sometimes we have uninteresting intermediate pids during the setup, set up code to pass the real pid down */ + if (opt_pidns_fd != -1) + { + /* Mark us as a subreaper, this way we can get exit status from grandchildren */ + prctl (PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); + create_pid_socketpair (intermediate_pids_sockets); + } + pid = raw_clone (clone_flags, NULL); if (pid == -1) { @@ -2359,6 +2468,13 @@ main (int argc, { /* Parent, outside sandbox, privileged (initially) */ + if (intermediate_pids_sockets[0] != -1) + { + close (intermediate_pids_sockets[1]); + pid = read_pid_from_socket (intermediate_pids_sockets[0]); + close (intermediate_pids_sockets[0]); + } + /* Discover namespace ids before we drop privileges */ namespace_ids_read (pid); @@ -2377,7 +2493,10 @@ main (int argc, pid, TRUE, opt_needs_devpts); } - /* Initial launched process, wait for exec:ed command to exit */ + /* Initial launched process, wait for pid 1 or exec:ed command to exit */ + + if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0) + die_with_error ("Setting userns2 failed"); /* We don't need any privileges in the launcher, drop them immediately. */ drop_privs (FALSE); @@ -2417,6 +2536,31 @@ main (int argc, return monitor_child (event_fd, pid, setup_finished_pipe[0]); } + if (opt_pidns_fd > 0) + { + if (setns (opt_pidns_fd, CLONE_NEWPID) != 0) + die_with_error ("Setting pidns failed"); + + /* fork to get the passed in pid ns */ + fork_intermediate_child (); + + /* We might both have specified an --pidns *and* --unshare-pid, so set up a new child pid namespace under the specified one */ + if (opt_unshare_pid) + { + if (unshare (CLONE_NEWPID)) + die_with_error ("unshare pid ns"); + + /* fork to get the new pid ns */ + fork_intermediate_child (); + } + + /* We're back, either in a child or grandchild, so message the actual pid to the monitor */ + + close (intermediate_pids_sockets[0]); + send_pid_on_socket (intermediate_pids_sockets[1]); + close (intermediate_pids_sockets[1]); + } + /* Child, in sandbox, privileged in the parent or in the user namespace (if --unshare-user). * * Note that for user namespaces we run as euid 0 during clone(), so @@ -2605,6 +2749,9 @@ main (int argc, die_with_error ("chdir /"); } + if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0) + die_with_error ("Setting userns2 failed"); + if (opt_unshare_user && (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) && opt_userns_block_fd == -1) @@ -131,6 +131,21 @@ <listitem><para>Unshare all possible namespaces. Currently equivalent with: <option>--unshare-user-try</option> <option>--unshare-ipc</option> <option>--unshare-pid</option> <option>--unshare-net</option> <option>--unshare-uts</option> <option>--unshare-cgroup-try</option></para></listitem> </varlistentry> <varlistentry> + <term><option>--userns <arg choice="plain">FD</arg></option></term> + <listitem><para>Use an existing user namespace instead of creating a new one. The namespace must fulfil the permission requirements for setns(), which generally means that it must be a decendant of the currently active user namespace, owned by the same user. </para> + <para>This is incompatible with --unshare-user, and doesn't work in the setuid version of bubblewrap.</para></listitem> + </varlistentry> + <varlistentry> + <term><option>--userns2 <arg choice="plain">FD</arg></option></term> + <listitem><para>After setting up the new namespace, switch into the specified namespace. For this to work the specified namespace must be a decendant of the user namespace used for the setup, so this is only useful in combination with --userns.</para> + <para>This is useful because sometimes bubblewrap itself creates nested user namespaces (to work around some kernel issues) and --userns2 can be used to enter these.</para></listitem> + </varlistentry> + <varlistentry> + <term><option>--pidns <arg choice="plain">FD</arg></option></term> + <listitem><para>Use an existing pid namespace instead of creating one. This is often used with --userns, because the pid namespace must be owned by the same user namespace that bwrap uses. </para> + <para>Note that this can be combined with --unshare-pid, and in that case it means that the sandbox will be in its own pid namespace, which is a child of the passed in one.</para></listitem> + </varlistentry> + <varlistentry> <term><option>--uid <arg choice="plain">UID</arg></option></term> <listitem><para>Use a custom user id in the sandbox (requires <option>--unshare-user</option>)</para></listitem> </varlistentry> diff --git a/tests/libtest-core.sh b/tests/libtest-core.sh index d0b7d37..0255b57 100644 --- a/tests/libtest-core.sh +++ b/tests/libtest-core.sh @@ -75,6 +75,18 @@ _fatal_print_file() { fatal "$@" } +_fatal_print_files() { + file1="$1" + shift + file2="$1" + shift + ls -al "$file1" >&2 + sed -e 's/^/# /' < "$file1" >&2 + ls -al "$file2" >&2 + sed -e 's/^/# /' < "$file2" >&2 + fatal "$@" +} + assert_not_has_file () { if test -f "$1"; then _fatal_print_file "$1" "File '$1' exists" @@ -135,8 +147,18 @@ assert_file_empty() { fi } +assert_files_equal() { + if ! cmp "$1" "$2"; then + _fatal_print_files "$1" "$2" "File '$1' and '$2' is not equal" + fi +} + # Use to skip all of these tests skip() { echo "1..0 # SKIP" "$@" exit 0 } + +extract_child_pid() { + grep child-pid "$1" | sed "s/^.*: \([0-9]*\).*/\1/" +} diff --git a/tests/test-run.sh b/tests/test-run.sh index 30cccf0..a01f41c 100755 --- a/tests/test-run.sh +++ b/tests/test-run.sh @@ -80,7 +80,7 @@ if ! $RUN true; then skip Seems like bwrap is not working at all. Maybe setuid is not working fi -echo "1..46" +echo "1..49" # Test help ${BWRAP} --help > help.txt @@ -340,4 +340,40 @@ if $RUN --bind "$(pwd)" /tmp/here test -d /tmp/newroot; then fi echo "ok - we can mount another directory inside /tmp" +# These tests need user namespaces +if test -n "${bwrap_is_suid:-}"; then + echo "ok - # SKIP no setuid support for --unshare-user" + echo "ok - # SKIP no setuid support for --unshare-user" +else + mkfifo donepipe + + $RUN --info-fd 42 --unshare-user sh -c 'readlink /proc/self/ns/user > sandbox-userns; cat < donepipe' 42>info.json & + while ! test -f sandbox-userns; do sleep 1; done + SANDBOX1PID=$(extract_child_pid info.json) + + $RUN --userns 11 readlink /proc/self/ns/user > sandbox2-userns 11< /proc/$SANDBOX1PID/ns/user + echo foo > donepipe + + assert_files_equal sandbox-userns sandbox2-userns + + rm donepipe info.json sandbox-userns + + echo "ok - Test --userns" + + mkfifo donepipe + $RUN --info-fd 42 --unshare-user --unshare-pid sh -c 'readlink /proc/self/ns/pid > sandbox-pidns; cat < donepipe' 42>info.json & + while ! test -f sandbox-pidns; do sleep 1; done + SANDBOX1PID=$(extract_child_pid info.json) + + $RUN --userns 11 --pidns 12 readlink /proc/self/ns/pid > sandbox2-pidns 11< /proc/$SANDBOX1PID/ns/user 12< /proc/$SANDBOX1PID/ns/pid + echo foo > donepipe + + assert_files_equal sandbox-pidns sandbox2-pidns + + rm donepipe info.json sandbox-pidns + + echo "ok - Test --pidns" +fi + + echo "ok - End of test" @@ -19,6 +19,7 @@ #include "utils.h" #include <sys/syscall.h> +#include <sys/socket.h> #ifdef HAVE_SELINUX #include <selinux/selinux.h> #endif @@ -79,6 +80,19 @@ die_oom (void) exit (1); } +/* Fork, return in child, exiting the previous parent */ +void +fork_intermediate_child (void) +{ + int pid = fork (); + if (pid == -1) + die_with_error ("Can't fork for --pidns"); + + /* Parent is an process not needed */ + if (pid != 0) + exit (0); +} + void * xmalloc (size_t size) { @@ -670,6 +684,86 @@ mkdir_with_parents (const char *pathname, return 0; } +/* Send an ucred with current pid/uid/gid over a socket, it can be + read back with read_pid_from_socket(), and then the kernel has + translated it between namespaces as needed. */ +void +send_pid_on_socket (int socket) +{ + char buf[1] = { 0 }; + struct msghdr msg = {}; + struct iovec iov = { buf, sizeof (buf) }; + const ssize_t control_len_snd = CMSG_SPACE(sizeof(struct ucred)); + char control_buf_snd[control_len_snd]; + struct cmsghdr *cmsg; + struct ucred *cred; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control_buf_snd; + msg.msg_controllen = control_len_snd; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + cred = (struct ucred *)CMSG_DATA(cmsg); + + cred->pid = getpid (); + cred->uid = geteuid (); + cred->gid = getegid (); + + if (sendmsg (socket, &msg, 0) < 0) + die_with_error ("Can't send pid"); +} + +void +create_pid_socketpair (int sockets[2]) +{ + int enable = 1; + + if (socketpair (AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sockets) != 0) + die_with_error ("Can't create intermediate pids socket"); + + if (setsockopt (sockets[0], SOL_SOCKET, SO_PASSCRED, &enable, sizeof (enable)) < 0) + die_with_error ("Can't set SO_PASSCRED"); +} + +int +read_pid_from_socket (int socket) +{ + char recv_buf[1] = { 0 }; + struct msghdr msg = {}; + struct iovec iov = { recv_buf, sizeof (recv_buf) }; + const ssize_t control_len_rcv = CMSG_SPACE(sizeof(struct ucred)); + char control_buf_rcv[control_len_rcv]; + struct cmsghdr* cmsg; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control_buf_rcv; + msg.msg_controllen = control_len_rcv; + + if (recvmsg (socket, &msg, 0) < 0) + die_with_error ("Cant read pid from socket"); + + if (msg.msg_controllen <= 0) + die ("Unexpected short read from pid socket"); + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + const unsigned payload_len = cmsg->cmsg_len - CMSG_LEN(0); + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS && + payload_len == sizeof(struct ucred)) + { + struct ucred *cred = (struct ucred *)CMSG_DATA(cmsg); + return cred->pid; + } + } + die ("No pid returned on socket"); +} + int raw_clone (unsigned long flags, void *child_stack) @@ -54,6 +54,8 @@ void die (const char *format, void die_oom (void) __attribute__((__noreturn__)); void die_unless_label_valid (const char *label); +void fork_intermediate_child (void); + void *xmalloc (size_t size); void *xcalloc (size_t size); void *xrealloc (void *ptr, @@ -107,6 +109,9 @@ int get_file_mode (const char *pathname); int mkdir_with_parents (const char *pathname, int mode, bool create_last); +void create_pid_socketpair (int sockets[2]); +void send_pid_on_socket (int socket); +int read_pid_from_socket (int socket); /* syscall wrappers */ int raw_clone (unsigned long flags, |