From a6b488c528b70be136cd5e60928a834081e2ec4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Tue, 22 Nov 2022 02:52:43 +0000 Subject: Support producing multi-fragment zstd archives. When the `zstd:frame-per-file` option is specified, the zstd filter will start a new frame when flushed, i.e. for each file in the archive. The `zstd:min-frame-size=N` option modifies the `zstd:frame-per-file` option in that it will not start a new frame unless the current one exceeds `N` bytes. When the `zstd:max-frame-size=N` option is specified, the zstd filter will start a new frame any time the compressed size of the previous one exceeds `N` bytes. These options decrease compression efficiency by a varying amount (depending on the exact composition of its contents) but render the tarball seekable, to a certain extent. --- libarchive/archive_write_add_filter_zstd.c | 151 +++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 38 deletions(-) diff --git a/libarchive/archive_write_add_filter_zstd.c b/libarchive/archive_write_add_filter_zstd.c index 1d194b19..37c5e741 100644 --- a/libarchive/archive_write_add_filter_zstd.c +++ b/libarchive/archive_write_add_filter_zstd.c @@ -55,8 +55,19 @@ struct private_data { int compression_level; int threads; #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR + enum { + running, + finishing, + resetting, + } state; + int frame_per_file; + size_t min_frame_size; + size_t max_frame_size; + size_t cur_frame; + size_t cur_frame_in; + size_t cur_frame_out; + size_t total_in; ZSTD_CStream *cstream; - int64_t total_in; ZSTD_outBuffer out; #else struct archive_write_program_data *pdata; @@ -78,6 +89,7 @@ static int archive_compressor_zstd_options(struct archive_write_filter *, static int archive_compressor_zstd_open(struct archive_write_filter *); static int archive_compressor_zstd_write(struct archive_write_filter *, const void *, size_t); +static int archive_compressor_zstd_flush(struct archive_write_filter *); static int archive_compressor_zstd_close(struct archive_write_filter *); static int archive_compressor_zstd_free(struct archive_write_filter *); #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR @@ -106,6 +118,7 @@ archive_write_add_filter_zstd(struct archive *_a) f->data = data; f->open = &archive_compressor_zstd_open; f->options = &archive_compressor_zstd_options; + f->flush = &archive_compressor_zstd_flush; f->close = &archive_compressor_zstd_close; f->free = &archive_compressor_zstd_free; f->code = ARCHIVE_FILTER_ZSTD; @@ -113,6 +126,11 @@ archive_write_add_filter_zstd(struct archive *_a) data->compression_level = CLEVEL_DEFAULT; data->threads = 0; #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR + data->frame_per_file = 0; + data->min_frame_size = 0; + data->max_frame_size = SIZE_MAX; + data->cur_frame_in = 0; + data->cur_frame_out = 0; data->cstream = ZSTD_createCStream(); if (data->cstream == NULL) { free(data); @@ -154,6 +172,8 @@ static int string_to_number(const char *string, intmax_t *numberp) { char *end; + if (string == NULL || *string == '\0') + return (ARCHIVE_WARN); *numberp = strtoimax(string, &end, 10); if (end == string || *end != '\0' || errno == EOVERFLOW) { *numberp = 0; @@ -206,6 +226,31 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, } data->threads = threads; return (ARCHIVE_OK); +#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR + } else if (strcmp(key, "frame-per-file") == 0) { + data->frame_per_file = 1; + return (ARCHIVE_OK); + } else if (strcmp(key, "min-frame-size") == 0) { + intmax_t min_frame_size; + if (string_to_number(value, &min_frame_size) != ARCHIVE_OK) { + return (ARCHIVE_WARN); + } + if (min_frame_size < 0) { + return (ARCHIVE_WARN); + } + data->min_frame_size = min_frame_size; + return (ARCHIVE_OK); + } else if (strcmp(key, "max-frame-size") == 0) { + intmax_t max_frame_size; + if (string_to_number(value, &max_frame_size) != ARCHIVE_OK) { + return (ARCHIVE_WARN); + } + if (max_frame_size < 1024) { + return (ARCHIVE_WARN); + } + data->max_frame_size = max_frame_size; + return (ARCHIVE_OK); +#endif } /* Note: The "warn" return is just to inform the options @@ -267,15 +312,22 @@ archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff, size_t length) { struct private_data *data = (struct private_data *)f->data; - int ret; - /* Update statistics */ - data->total_in += length; + return (drive_compressor(f, data, 0, buff, length)); +} - if ((ret = drive_compressor(f, data, 0, buff, length)) != ARCHIVE_OK) - return (ret); +/* + * Flush the compressed stream. + */ +static int +archive_compressor_zstd_flush(struct archive_write_filter *f) +{ + struct private_data *data = (struct private_data *)f->data; - return (ARCHIVE_OK); + if (data->frame_per_file && data->state == running && + data->cur_frame_out > data->min_frame_size) + data->state = finishing; + return (drive_compressor(f, data, 1, NULL, 0)); } /* @@ -286,56 +338,72 @@ archive_compressor_zstd_close(struct archive_write_filter *f) { struct private_data *data = (struct private_data *)f->data; - /* Finish zstd frame */ - return drive_compressor(f, data, 1, NULL, 0); + if (data->state == running) + data->state = finishing; + return (drive_compressor(f, data, 1, NULL, 0)); } /* * Utility function to push input data through compressor, * writing full output blocks as necessary. - * - * Note that this handles both the regular write case (finishing == - * false) and the end-of-archive case (finishing == true). */ static int drive_compressor(struct archive_write_filter *f, - struct private_data *data, int finishing, const void *src, size_t length) + struct private_data *data, int flush, const void *src, size_t length) { ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 }; - size_t zstdret; + size_t ipos, opos, zstdret = 0; int ret; for (;;) { - if (data->out.pos == data->out.size) { - ret = __archive_write_filter(f->next_filter, - data->out.dst, data->out.pos); - if (ret != ARCHIVE_OK) - return (ARCHIVE_FATAL); - data->out.pos = 0; + ipos = in.pos; + opos = data->out.pos; + switch (data->state) { + case running: + if (in.pos == in.size) + return (ARCHIVE_OK); + zstdret = ZSTD_compressStream(data->cstream, + &data->out, &in); + if (ZSTD_isError(zstdret)) + goto zstd_fatal; + break; + case finishing: + zstdret = ZSTD_endStream(data->cstream, &data->out); + if (ZSTD_isError(zstdret)) + goto zstd_fatal; + if (zstdret == 0) + data->state = resetting; + break; + case resetting: + ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only); + data->cur_frame++; + data->cur_frame_in = 0; + data->cur_frame_out = 0; + data->state = running; + break; } - - /* If there's nothing to do, we're done. */ - if (!finishing && in.pos == in.size) - return (ARCHIVE_OK); - - zstdret = !finishing ? - ZSTD_compressStream(data->cstream, &data->out, &in) : - ZSTD_endStream(data->cstream, &data->out); - - if (ZSTD_isError(zstdret)) { - archive_set_error(f->archive, ARCHIVE_ERRNO_MISC, - "Zstd compression failed: %s", - ZSTD_getErrorName(zstdret)); - return (ARCHIVE_FATAL); + data->total_in += in.pos - ipos; + data->cur_frame_in += in.pos - ipos; + data->cur_frame_out += data->out.pos - opos; + if (data->state == running && + data->cur_frame_in >= data->max_frame_size) { + data->state = finishing; } - - /* If we're finishing, 0 means nothing left to flush */ - if (finishing && zstdret == 0) { + if (data->out.pos == data->out.size || + (flush && data->out.pos > 0)) { ret = __archive_write_filter(f->next_filter, data->out.dst, data->out.pos); - return (ret); + if (ret != ARCHIVE_OK) + goto fatal; + data->out.pos = 0; } } +zstd_fatal: + archive_set_error(f->archive, ARCHIVE_ERRNO_MISC, + "Zstd compression failed: %s", + ZSTD_getErrorName(zstdret)); +fatal: + return (ARCHIVE_FATAL); } #else /* HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR */ @@ -380,6 +448,13 @@ archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff, return __archive_write_program_write(f, data->pdata, buff, length); } +static int +archive_compressor_zstd_flush(struct archive_write_filter *f) +{ + + return (ARCHIVE_OK); +} + static int archive_compressor_zstd_close(struct archive_write_filter *f) { -- cgit v1.2.1