diff options
author | Daan De Meyer <daan.j.demeyer@gmail.com> | 2022-11-15 08:19:28 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-15 08:19:28 +0100 |
commit | 32a3f802f60d7672d242b315faececb7378959ff (patch) | |
tree | 0d9c1157af887a2c9780574c82286c08e79f5502 /src/partition | |
parent | 09cd639a59fec13d80ac8fce2d671d088d8cca9f (diff) | |
parent | 14697c4156a337371b9f9691b30fd36814bddf73 (diff) | |
download | systemd-32a3f802f60d7672d242b315faececb7378959ff.tar.gz |
Merge pull request #24908 from DaanDeMeyer/repart-minimize
repart: Add Minimize setting
Diffstat (limited to 'src/partition')
-rw-r--r-- | src/partition/repart.c | 293 |
1 files changed, 239 insertions, 54 deletions
diff --git a/src/partition/repart.c b/src/partition/repart.c index 8a1a8411cf..2709538b0c 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -76,19 +76,19 @@ #include "utf8.h" /* If not configured otherwise use a minimal partition size of 10M */ -#define DEFAULT_MIN_SIZE (10*1024*1024) +#define DEFAULT_MIN_SIZE (10ULL*1024ULL*1024ULL) /* Hard lower limit for new partition sizes */ -#define HARD_MIN_SIZE 4096 +#define HARD_MIN_SIZE 4096ULL /* We know up front we're never going to put more than this in a verity sig partition. */ -#define VERITY_SIG_SIZE (HARD_MIN_SIZE * 4) +#define VERITY_SIG_SIZE (HARD_MIN_SIZE*4ULL) /* libfdisk takes off slightly more than 1M of the disk size when creating a GPT disk label */ -#define GPT_METADATA_SIZE (1044*1024) +#define GPT_METADATA_SIZE (1044ULL*1024ULL) /* LUKS2 takes off 16M of the partition size with its metadata by default */ -#define LUKS2_METADATA_SIZE (16*1024*1024) +#define LUKS2_METADATA_SIZE (16ULL*1024ULL*1024ULL) /* Note: When growing and placing new partitions we always align to 4K sector size. It's how newer hard disks * are designed, and if everything is aligned to that performance is best. And for older hard disks with 512B @@ -168,6 +168,7 @@ struct Partition { sd_id128_t current_uuid, new_uuid; bool new_uuid_is_set; char *current_label, *new_label; + sd_id128_t fs_uuid; bool dropped; bool factory_reset; @@ -191,6 +192,7 @@ struct Partition { char *copy_blocks_path; bool copy_blocks_auto; + const char *copy_blocks_root; int copy_blocks_fd; uint64_t copy_blocks_size; @@ -200,6 +202,7 @@ struct Partition { EncryptMode encrypt; VerityMode verity; char *verity_match_key; + bool minimize; uint64_t gpt_flags; int no_auto; @@ -344,20 +347,18 @@ static void partition_foreignize(Partition *p) { /* Reset several parameters set through definition file to make the partition foreign. */ - p->new_label = mfree(p->new_label); p->definition_path = mfree(p->definition_path); p->drop_in_files = strv_free(p->drop_in_files); p->copy_blocks_path = mfree(p->copy_blocks_path); p->copy_blocks_fd = safe_close(p->copy_blocks_fd); + p->copy_blocks_root = NULL; p->format = mfree(p->format); p->copy_files = strv_free(p->copy_files); p->make_directories = strv_free(p->make_directories); p->verity_match_key = mfree(p->verity_match_key); - p->new_uuid = SD_ID128_NULL; - p->new_uuid_is_set = false; p->priority = 0; p->weight = 1000; p->padding_weight = 0; @@ -1338,6 +1339,7 @@ static int config_parse_copy_blocks( if (streq(rvalue, "auto")) { partition->copy_blocks_path = mfree(partition->copy_blocks_path); partition->copy_blocks_auto = true; + partition->copy_blocks_root = arg_root; return 0; } @@ -1354,6 +1356,7 @@ static int config_parse_copy_blocks( free_and_replace(partition->copy_blocks_path, d); partition->copy_blocks_auto = false; + partition->copy_blocks_root = arg_root; return 0; } @@ -1498,6 +1501,7 @@ static int partition_read_definition(Partition *p, const char *path, const char { "Partition", "NoAuto", config_parse_tristate, 0, &p->no_auto }, { "Partition", "GrowFileSystem", config_parse_tristate, 0, &p->growfs }, { "Partition", "SplitName", config_parse_string, 0, &p->split_name_format }, + { "Partition", "Minimize", config_parse_bool, 0, &p->minimize }, {} }; int r; @@ -1551,6 +1555,10 @@ static int partition_read_definition(Partition *p, const char *path, const char return log_oom(); } + if (p->minimize && !p->format) + return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL), + "Minimize= can only be enabled if Format= is set"); + if (p->verity != VERITY_OFF || p->encrypt != ENCRYPT_OFF) { r = dlopen_cryptsetup(); if (r < 0) @@ -3200,7 +3208,7 @@ static int context_copy_blocks(Context *context) { log_info("Copying in '%s' (%s) on block level into future partition %" PRIu64 ".", p->copy_blocks_path, FORMAT_BYTES(p->copy_blocks_size), p->partno); - r = copy_bytes_full(p->copy_blocks_fd, target_fd, p->copy_blocks_size, 0, NULL, NULL, NULL, NULL); + r = copy_bytes(p->copy_blocks_fd, target_fd, p->copy_blocks_size, COPY_REFLINK); if (r < 0) return log_error_errno(r, "Failed to copy in data from '%s': %m", p->copy_blocks_path); @@ -3274,14 +3282,14 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { sfd, ".", pfd, fn, UID_INVALID, GID_INVALID, - COPY_REFLINK|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, + COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, denylist); } else r = copy_tree_at( sfd, ".", tfd, ".", UID_INVALID, GID_INVALID, - COPY_REFLINK|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, + COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, denylist); if (r < 0) return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_root), *target); @@ -3313,7 +3321,7 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { if (tfd < 0) return log_error_errno(errno, "Failed to create target file '%s': %m", *target); - r = copy_bytes(sfd, tfd, UINT64_MAX, COPY_REFLINK|COPY_SIGINT); + r = copy_bytes(sfd, tfd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_SIGINT); if (r < 0) return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_root), *target); @@ -3349,17 +3357,6 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char assert(ret_root); assert(ret_tmp_root); - /* When generating read-only filesystems, we need the source tree to be available when we generate - * the read-only filesystem. Because we might have multiple source trees, we build a temporary source - * tree beforehand where we merge all our inputs. We then use this merged source tree to create the - * read-only filesystem. */ - - if (!fstype_is_ro(p->format)) { - *ret_root = NULL; - *ret_tmp_root = NULL; - return 0; - } - /* If we only have a single directory that's meant to become the root directory of the filesystem, * we can shortcut this function and just use that directory as the root directory instead. If we * allocate a temporary directory, it's stored in "ret_tmp_root" to indicate it should be removed. @@ -3396,18 +3393,28 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char } static int partition_populate_filesystem(Partition *p, const char *node, const Set *denylist) { + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + struct stat st; int r; assert(p); assert(node); - if (fstype_is_ro(p->format)) - return 0; - if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories)) return 0; - log_info("Populating partition %" PRIu64 " with files.", p->partno); + if (stat(node, &st) < 0) + return log_error_errno(errno, "Failed to stat %s: %m", node); + + if (!S_ISBLK(st.st_mode)) { + r = loop_device_make_by_path(node, O_RDWR, 0, LOCK_EX, &d); + if (r < 0) + return log_error_errno(r, "Failed to make loopback device of %s: %m", node); + + node = d->node; + } + + log_info("Populating %s filesystem with files.", p->format); /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to * appear in the host namespace. Hence we fork a child that has its own file system namespace and @@ -3444,7 +3451,7 @@ static int partition_populate_filesystem(Partition *p, const char *node, const S _exit(EXIT_SUCCESS); } - log_info("Successfully populated partition %" PRIu64 " with files.", p->partno); + log_info("Successfully populated %s filesystem with files.", p->format); return 0; } @@ -3507,7 +3514,6 @@ static int context_mkfs(Context *context) { _cleanup_free_ char *encrypted = NULL, *root = NULL; _cleanup_close_ int encrypted_dev_fd = -1; const char *fsdev; - sd_id128_t fs_uuid; if (p->dropped) continue; @@ -3518,6 +3524,10 @@ static int context_mkfs(Context *context) { if (!p->format) continue; + /* Minimized partitions will use the copy blocks logic so let's make sure to skip those here. */ + if (p->copy_blocks_fd >= 0) + continue; + assert(p->offset != UINT64_MAX); assert(p->new_size != UINT64_MAX); @@ -3545,22 +3555,19 @@ static int context_mkfs(Context *context) { log_info("Formatting future partition %" PRIu64 ".", p->partno); - /* Calculate the UUID for the file system as HMAC-SHA256 of the string "file-system-uuid", - * keyed off the partition UUID. */ - r = derive_uuid(p->new_uuid, "file-system-uuid", &fs_uuid); - if (r < 0) - return r; - /* Ideally, we populate filesystems using our own code after creating the filesystem to * ensure consistent handling of chattrs, xattrs and other similar things. However, when * using read-only filesystems such as squashfs, we can't populate after creating the * filesystem because it's read-only, so instead we create a temporary root to use as the * source tree when generating the read-only filesystem. */ - r = partition_populate_directory(p, denylist, &root, &tmp_root); - if (r < 0) - return r; - r = make_filesystem(fsdev, p->format, strempty(p->new_label), root ?: tmp_root, fs_uuid, arg_discard); + if (fstype_is_ro(p->format)) { + r = partition_populate_directory(p, denylist, &root, &tmp_root); + if (r < 0) + return r; + } + + r = make_filesystem(fsdev, p->format, strempty(p->new_label), root ?: tmp_root, p->fs_uuid, arg_discard); if (r < 0) { encrypted_dev_fd = safe_close(encrypted_dev_fd); (void) deactivate_luks(cd, encrypted); @@ -3575,11 +3582,13 @@ static int context_mkfs(Context *context) { return log_error_errno(errno, "Failed to unlock LUKS device: %m"); /* Now, we can populate all the other filesystems that aren't read-only. */ - r = partition_populate_filesystem(p, fsdev, denylist); - if (r < 0) { - encrypted_dev_fd = safe_close(encrypted_dev_fd); - (void) deactivate_luks(cd, encrypted); - return r; + if (!fstype_is_ro(p->format)) { + r = partition_populate_filesystem(p, fsdev, denylist); + if (r < 0) { + encrypted_dev_fd = safe_close(encrypted_dev_fd); + (void) deactivate_luks(cd, encrypted); + return r; + } } /* Note that we always sync explicitly here, since mkfs.fat doesn't do that on its own, and @@ -4060,6 +4069,12 @@ static int context_acquire_partition_uuids_and_labels(Context *context) { p->new_uuid_is_set = true; } + /* Calculate the UUID for the file system as HMAC-SHA256 of the string "file-system-uuid", + * keyed off the partition UUID. */ + r = derive_uuid(p->new_uuid, "file-system-uuid", &p->fs_uuid); + if (r < 0) + return r; + if (!isempty(p->current_label)) { /* never change initialized labels */ r = free_and_strdup_warn(&p->new_label, p->current_label); @@ -4401,7 +4416,7 @@ static int context_split(Context *context) { if (lseek(fd, p->offset, SEEK_SET) < 0) return log_error_errno(errno, "Failed to seek to partition offset: %m"); - r = copy_bytes_full(fd, fdt, p->new_size, COPY_REFLINK|COPY_HOLES, NULL, NULL, NULL, NULL); + r = copy_bytes(fd, fdt, p->new_size, COPY_REFLINK|COPY_HOLES); if (r < 0) return log_error_errno(r, "Failed to copy to split partition %s: %m", fname); } @@ -4884,7 +4899,6 @@ static int resolve_copy_blocks_auto( static int context_open_copy_block_paths( Context *context, - const char *root, dev_t restrict_devno) { int r; @@ -4906,7 +4920,7 @@ static int context_open_copy_block_paths( if (p->copy_blocks_path) { - source_fd = chase_symlinks_and_open(p->copy_blocks_path, root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened); + source_fd = chase_symlinks_and_open(p->copy_blocks_path, p->copy_blocks_root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NONBLOCK, &opened); if (source_fd < 0) return log_error_errno(source_fd, "Failed to open '%s': %m", p->copy_blocks_path); @@ -4920,7 +4934,7 @@ static int context_open_copy_block_paths( } else if (p->copy_blocks_auto) { dev_t devno; - r = resolve_copy_blocks_auto(p->type_uuid, root, restrict_devno, &devno, &uuid); + r = resolve_copy_blocks_auto(p->type_uuid, p->copy_blocks_root, restrict_devno, &devno, &uuid); if (r < 0) return r; @@ -4989,6 +5003,174 @@ static int context_open_copy_block_paths( return 0; } +static int fd_apparent_size(int fd, uint64_t *ret) { + off_t initial = 0; + uint64_t size = 0; + + assert(fd >= 0); + assert(ret); + + initial = lseek(fd, 0, SEEK_CUR); + if (initial < 0) + return log_error_errno(errno, "Failed to get file offset: %m"); + + for (off_t off = 0;;) { + off_t r; + + r = lseek(fd, off, SEEK_DATA); + if (r < 0 && errno == ENXIO) + /* If errno == ENXIO, that means we've reached the final hole of the file and + * that hole isn't followed by more data. */ + break; + if (r < 0) + return log_error_errno(errno, "Failed to seek data in file from offset %"PRIi64": %m", off); + + off = r; /* Set the offset to the start of the data segment. */ + + /* After copying a potential hole, find the end of the data segment by looking for + * the next hole. If we get ENXIO, we're at EOF. */ + r = lseek(fd, off, SEEK_HOLE); + if (r < 0) { + if (errno == ENXIO) + break; + return log_error_errno(errno, "Failed to seek hole in file from offset %"PRIi64": %m", off); + } + + size += r - off; + off = r; + } + + if (lseek(fd, initial, SEEK_SET) < 0) + return log_error_errno(errno, "Failed to reset file offset: %m"); + + *ret = size; + + return 0; +} + +static int context_minimize(Context *context) { + _cleanup_set_free_ Set *denylist = NULL; + const char *vt; + int r; + + assert(context); + + r = make_copy_files_denylist(context, &denylist); + if (r < 0) + return r; + + r = var_tmp_dir(&vt); + if (r < 0) + return log_error_errno(r, "Could not determine temporary directory: %m"); + + LIST_FOREACH(partitions, p, context->partitions) { + _cleanup_(rm_rf_physical_and_freep) char *tmp_root = NULL; + _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_free_ char *root = NULL; + _cleanup_close_ int fd = -1; + sd_id128_t fs_uuid; + uint64_t fsz; + + if (p->dropped) + continue; + + if (PARTITION_EXISTS(p)) /* Never format existing partitions */ + continue; + + if (!p->format) + continue; + + if (!p->minimize) + continue; + + assert(!p->copy_blocks_path); + + r = tempfn_random_child(vt, "repart", &temp); + if (r < 0) + return log_error_errno(r, "Failed to generate temporary file path: %m"); + + if (!fstype_is_ro(p->format)) { + fd = open(temp, O_CREAT|O_EXCL|O_CLOEXEC|O_RDWR|O_NOCTTY, 0600); + if (fd < 0) + return log_error_errno(errno, "Failed to open temporary file %s: %m", temp); + + /* This may seem huge but it will be created sparse so it doesn't take up any space + * on disk until written to. */ + if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0) + return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", + FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL)); + + /* We're going to populate this filesystem twice so use a random UUID the first time + * to avoid UUID conflicts. */ + r = sd_id128_randomize(&fs_uuid); + if (r < 0) + return r; + } else { + r = partition_populate_directory(p, denylist, &root, &tmp_root); + if (r < 0) + return r; + + fs_uuid = p->fs_uuid; + } + + r = make_filesystem(temp, p->format, strempty(p->new_label), root ?: tmp_root, fs_uuid, + arg_discard); + if (r < 0) + return r; + + /* Read-only filesystems are minimal from the first try because they create and size the + * loopback file for us. */ + if (fstype_is_ro(p->format)) { + p->copy_blocks_path = TAKE_PTR(temp); + continue; + } + + r = partition_populate_filesystem(p, temp, denylist); + if (r < 0) + return r; + + /* Other filesystems need to be provided with a pre-sized loopback file and will adapt to + * fully occupy it. Because we gave the filesystem a 1T sparse file, we need to shrink the + * filesystem down to a reasonable size again to fit it in the disk image. While there are + * some filesystems that support shrinking, it doesn't always work properly (e.g. shrinking + * btrfs gives us a 2.0G filesystem regardless of what we put in it). Instead, let's populate + * the filesystem again, but this time, instead of providing the filesystem with a 1T sparse + * loopback file, let's size the loopback file based on the actual data used by the + * filesystem in the sparse file after the first attempt. This should be a good guess of the + * minimal amount of space needed in the filesystem to fit all the required data. + */ + r = fd_apparent_size(fd, &fsz); + if (r < 0) + return r; + + /* Massage the size a bit because just going by actual data used in the sparse file isn't + * fool-proof. */ + fsz = round_up_size(fsz + (fsz / 2), context->grain_size); + if (minimal_size_by_fs_name(p->format) != UINT64_MAX) + fsz = MAX(minimal_size_by_fs_name(p->format), fsz); + + /* Erase the previous filesystem first. */ + if (ftruncate(fd, 0)) + return log_error_errno(errno, "Failed to erase temporary file: %m"); + + if (ftruncate(fd, fsz)) + return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz)); + + r = make_filesystem(temp, p->format, strempty(p->new_label), root ?: tmp_root, p->fs_uuid, + arg_discard); + if (r < 0) + return r; + + r = partition_populate_filesystem(p, temp, denylist); + if (r < 0) + return r; + + p->copy_blocks_path = TAKE_PTR(temp); + } + + return 0; +} + static int help(void) { _cleanup_free_ char *link = NULL; int r; @@ -5949,10 +6131,18 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; + /* Make sure each partition has a unique UUID and unique label */ + r = context_acquire_partition_uuids_and_labels(context); + if (r < 0) + return r; + + r = context_minimize(context); + if (r < 0) + return r; + /* Open all files to copy blocks from now, since we want to take their size into consideration */ r = context_open_copy_block_paths( context, - arg_root, loop_device ? loop_device->devno : /* if --image= is specified, only allow partitions on the loopback device */ arg_root && !arg_image ? 0 : /* if --root= is specified, don't accept any block device */ (dev_t) -1); /* if neither is specified, make no restrictions */ @@ -6005,11 +6195,6 @@ static int run(int argc, char *argv[]) { /* Now calculate where each new partition gets placed */ context_place_partitions(context); - /* Make sure each partition has a unique UUID and unique label */ - r = context_acquire_partition_uuids_and_labels(context); - if (r < 0) - return r; - (void) context_dump(context, node, /*late=*/ false); r = context_write_partition_table(context, node, from_scratch); |