From 494b269a69bd1a6e63fbeb1316506b8f8f3feab1 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sat, 14 May 2022 13:41:35 +0200 Subject: Add --size option to control size of a --tmpfs Signed-off-by: Tom Smeding --- bubblewrap.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/bubblewrap.c b/bubblewrap.c index 24a402c..9e67eb5 100644 --- a/bubblewrap.c +++ b/bubblewrap.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,12 @@ __result; })) #endif +/* We limit the size of a tmpfs to half the architecture's address space, + * to avoid hitting arbitrary limits in the kernel. + * For example, on at least one x86_64 machine, the actual limit seems to be + * 2^64 - 2^12. */ +#define MAX_TMPFS_BYTES ((size_t) (SIZE_MAX >> 1)) + /* Globals to avoid having to use getuid(), since the uid/gid changes during runtime */ static uid_t real_uid; static gid_t real_gid; @@ -91,6 +98,7 @@ int opt_userns_fd = -1; int opt_userns2_fd = -1; int opt_pidns_fd = -1; int next_perms = -1; +size_t next_size_arg = 0; #define CAP_TO_MASK_0(x) (1L << ((x) & 31)) #define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32) @@ -149,6 +157,7 @@ struct _SetupOp int fd; SetupOpFlag flags; int perms; + size_t size; /* number of bytes, zero means unset/default */ SetupOp *next; }; @@ -177,6 +186,7 @@ typedef struct uint32_t op; uint32_t flags; uint32_t perms; + size_t size_arg; uint32_t arg1_offset; uint32_t arg2_offset; } PrivSepOp; @@ -341,6 +351,7 @@ usage (int ecode, FILE *out) " --cap-add CAP Add cap CAP when running as privileged user\n" " --cap-drop CAP Drop cap CAP when running as privileged user\n" " --perms OCTAL Set permissions of next argument (--bind-data, --file, etc.)\n" + " --size BYTES Set size of next argument (only for --tmpfs)\n" " --chmod OCTAL PATH Change permissions of PATH (must already exist)\n" ); exit (ecode); @@ -1001,6 +1012,7 @@ privileged_op (int privileged_op_socket, uint32_t op, uint32_t flags, uint32_t perms, + size_t size_arg, const char *arg1, const char *arg2) { @@ -1032,6 +1044,7 @@ privileged_op (int privileged_op_socket, op_buffer->op = op; op_buffer->flags = flags; op_buffer->perms = perms; + op_buffer->size_arg = size_arg; op_buffer->arg1_offset = arg1_offset; op_buffer->arg2_offset = arg2_offset; if (arg1 != NULL) @@ -1096,7 +1109,18 @@ privileged_op (int privileged_op_socket, case PRIV_SEP_OP_TMPFS_MOUNT: { - cleanup_free char *mode = xasprintf ("mode=%#o", perms); + cleanup_free char *mode = NULL; + + /* This check should be unnecessary since we checked this when parsing + * the --size option as well. However, better be safe than sorry. */ + if (size_arg > MAX_TMPFS_BYTES) + die_with_error ("Specified tmpfs size too large (%zu > %zu)", size_arg, MAX_TMPFS_BYTES); + + if (size_arg != 0) + mode = xasprintf ("mode=%#o,size=%zu", perms, size_arg); + else + mode = xasprintf ("mode=%#o", perms); + cleanup_free char *opt = label_mount (mode, opt_file_label); if (mount ("tmpfs", arg1, "tmpfs", MS_NOSUID | MS_NODEV, opt) != 0) die_with_error ("Can't mount tmpfs on %s", arg1); @@ -1197,12 +1221,12 @@ setup_newroot (bool unshare_pid, PRIV_SEP_OP_BIND_MOUNT, (op->type == SETUP_RO_BIND_MOUNT ? BIND_READONLY : 0) | (op->type == SETUP_DEV_BIND_MOUNT ? BIND_DEVICES : 0), - 0, source, dest); + 0, 0, source, dest); break; case SETUP_REMOUNT_RO_NO_RECURSIVE: privileged_op (privileged_op_socket, - PRIV_SEP_OP_REMOUNT_RO_NO_RECURSIVE, 0, 0, NULL, dest); + PRIV_SEP_OP_REMOUNT_RO_NO_RECURSIVE, 0, 0, 0, NULL, dest); break; case SETUP_MOUNT_PROC: @@ -1213,14 +1237,14 @@ setup_newroot (bool unshare_pid, { /* Our own procfs */ privileged_op (privileged_op_socket, - PRIV_SEP_OP_PROC_MOUNT, 0, 0, + PRIV_SEP_OP_PROC_MOUNT, 0, 0, 0, dest, NULL); } else { /* Use system procfs, as we share pid namespace anyway */ privileged_op (privileged_op_socket, - PRIV_SEP_OP_BIND_MOUNT, 0, 0, + PRIV_SEP_OP_BIND_MOUNT, 0, 0, 0, "oldroot/proc", dest); } @@ -1242,7 +1266,7 @@ setup_newroot (bool unshare_pid, } privileged_op (privileged_op_socket, - PRIV_SEP_OP_BIND_MOUNT, BIND_READONLY, 0, + PRIV_SEP_OP_BIND_MOUNT, BIND_READONLY, 0, 0, subdir, subdir); } @@ -1253,7 +1277,7 @@ setup_newroot (bool unshare_pid, die_with_error ("Can't mkdir %s", op->dest); privileged_op (privileged_op_socket, - PRIV_SEP_OP_TMPFS_MOUNT, 0, 0755, + PRIV_SEP_OP_TMPFS_MOUNT, 0, 0755, 0, dest, NULL); static const char *const devnodes[] = { "null", "zero", "full", "random", "urandom", "tty" }; @@ -1264,7 +1288,7 @@ setup_newroot (bool unshare_pid, if (create_file (node_dest, 0444, NULL) != 0) die_with_error ("Can't create file %s/%s", op->dest, devnodes[i]); privileged_op (privileged_op_socket, - PRIV_SEP_OP_BIND_MOUNT, BIND_DEVICES, 0, + PRIV_SEP_OP_BIND_MOUNT, BIND_DEVICES, 0, 0, node_src, node_dest); } @@ -1298,7 +1322,7 @@ setup_newroot (bool unshare_pid, if (mkdir (pts, 0755) == -1) die_with_error ("Can't create %s/devpts", op->dest); privileged_op (privileged_op_socket, - PRIV_SEP_OP_DEVPTS_MOUNT, 0, 0, pts, NULL); + PRIV_SEP_OP_DEVPTS_MOUNT, 0, 0, 0, pts, NULL); if (symlink ("pts/ptmx", ptmx) != 0) die_with_error ("Can't make symlink at %s/ptmx", op->dest); @@ -1318,7 +1342,7 @@ setup_newroot (bool unshare_pid, die_with_error ("creating %s/console", op->dest); privileged_op (privileged_op_socket, - PRIV_SEP_OP_BIND_MOUNT, BIND_DEVICES, 0, + PRIV_SEP_OP_BIND_MOUNT, BIND_DEVICES, 0, 0, src_tty_dev, dest_console); } @@ -1333,7 +1357,7 @@ setup_newroot (bool unshare_pid, die_with_error ("Can't mkdir %s", op->dest); privileged_op (privileged_op_socket, - PRIV_SEP_OP_TMPFS_MOUNT, 0, op->perms, + PRIV_SEP_OP_TMPFS_MOUNT, 0, op->perms, op->size, dest, NULL); break; @@ -1342,7 +1366,7 @@ setup_newroot (bool unshare_pid, die_with_error ("Can't mkdir %s", op->dest); privileged_op (privileged_op_socket, - PRIV_SEP_OP_MQUEUE_MOUNT, 0, 0, + PRIV_SEP_OP_MQUEUE_MOUNT, 0, 0, 0, dest, NULL); break; @@ -1423,7 +1447,7 @@ setup_newroot (bool unshare_pid, privileged_op (privileged_op_socket, PRIV_SEP_OP_BIND_MOUNT, (op->type == SETUP_MAKE_RO_BIND_FILE ? BIND_READONLY : 0), - 0, tempfile, dest); + 0, 0, tempfile, dest); /* Remove the file so we're sure the app can't get to it in any other way. Its outside the container chroot, so it shouldn't be possible, but lets @@ -1441,7 +1465,7 @@ setup_newroot (bool unshare_pid, case SETUP_SET_HOSTNAME: assert (op->dest != NULL); /* guaranteed by the constructor */ privileged_op (privileged_op_socket, - PRIV_SEP_OP_SET_HOSTNAME, 0, 0, + PRIV_SEP_OP_SET_HOSTNAME, 0, 0, 0, op->dest, NULL); break; @@ -1450,7 +1474,7 @@ setup_newroot (bool unshare_pid, } } privileged_op (privileged_op_socket, - PRIV_SEP_OP_DONE, 0, 0, NULL, NULL); + PRIV_SEP_OP_DONE, 0, 0, 0, NULL, NULL); } /* Do not leak file descriptors already used by setup_newroot () */ @@ -1537,6 +1561,7 @@ read_priv_sec_op (int read_socket, size_t buffer_size, uint32_t *flags, uint32_t *perms, + size_t *size_arg, const char **arg1, const char **arg2) { @@ -1561,6 +1586,7 @@ read_priv_sec_op (int read_socket, *flags = op->flags; *perms = op->perms; + *size_arg = op->size_arg; *arg1 = resolve_string_offset (buffer, rec_len, op->arg1_offset); *arg2 = resolve_string_offset (buffer, rec_len, op->arg2_offset); @@ -1577,7 +1603,8 @@ print_version_and_exit (void) static int is_modifier_option (const char *option) { - return strcmp (option, "--perms") == 0; + return strcmp (option, "--perms") == 0 + || strcmp(option, "--size") == 0; } static void @@ -1871,6 +1898,13 @@ parse_args_recurse (int *argcp, op->perms = 0755; next_perms = -1; + + /* If the option is unset, next_size_arg is zero, which results in + * the default tmpfs size. This is exactly what we want. */ + op->size = next_size_arg; + + next_size_arg = 0; + argv += 1; argc -= 1; } @@ -2377,6 +2411,42 @@ parse_args_recurse (int *argcp, next_perms = (int) perms; + argv += 1; + argc -= 1; + } + else if (strcmp (arg, "--size") == 0) + { + unsigned long long size; + char *endptr = NULL; + + if (is_privileged) + die ("The --size option is not permitted in setuid mode"); + + if (argc < 2) + die ("--size takes an argument"); + + if (next_size_arg != 0) + die ("--size given twice for the same action"); + + errno = 0; /* reset errno so we can detect ERANGE from strtoull */ + + size = strtoull (argv[1], &endptr, 0); + + /* isdigit: Not only check that the first digit is not '\0', but + * simultaneously guard against negative numbers or preceding + * spaces. */ + if (errno != 0 /* from strtoull */ + || !isdigit(argv[1][0]) + || endptr == NULL + || *endptr != '\0' + || size == 0) + die ("--size takes a non-zero number of bytes"); + + if (size > MAX_TMPFS_BYTES) + die ("--size (for tmpfs) is limited to %zu", MAX_TMPFS_BYTES); + + next_size_arg = (size_t) size; + argv += 1; argc -= 1; } @@ -2425,6 +2495,10 @@ parse_args_recurse (int *argcp, if (!is_modifier_option(arg) && next_perms >= 0) die ("--perms must be followed by an option that creates a file"); + /* Similarly for --size. */ + if (!is_modifier_option(arg) && next_size_arg != 0) + die ("--size must be followed by --tmpfs"); + argv++; argc--; } @@ -3006,6 +3080,7 @@ main (int argc, int status; uint32_t buffer[2048]; /* 8k, but is int32 to guarantee nice alignment */ uint32_t op, flags, perms; + size_t size_arg; const char *arg1, *arg2; cleanup_fd int unpriv_socket = -1; @@ -3015,8 +3090,8 @@ main (int argc, do { op = read_priv_sec_op (unpriv_socket, buffer, sizeof (buffer), - &flags, &perms, &arg1, &arg2); - privileged_op (-1, op, flags, perms, arg1, arg2); + &flags, &perms, &size_arg, &arg1, &arg2); + privileged_op (-1, op, flags, perms, size_arg, arg1, arg2); if (write (unpriv_socket, buffer, 1) != 1) die ("Can't write to op_socket"); } -- cgit v1.2.1