diff options
author | Michihiro NAKAJIMA <ggcueroad@gmail.com> | 2014-06-30 06:44:47 +0900 |
---|---|---|
committer | Michihiro NAKAJIMA <ggcueroad@gmail.com> | 2014-06-30 06:44:47 +0900 |
commit | 724f3f918e509ddca60ce62fad696d7621d3623d (patch) | |
tree | f45486aff03f327a7f2a1c023c15562c32e0b306 /libarchive/archive_read_support_filter_lz4.c | |
parent | 44b869c872439b5eb2d1aa34079ae3231d3d04d3 (diff) | |
download | libarchive-724f3f918e509ddca60ce62fad696d7621d3623d.tar.gz |
Add support for lz4 read filter.
TODO: detecting liblz4 library by CMake.
Diffstat (limited to 'libarchive/archive_read_support_filter_lz4.c')
-rw-r--r-- | libarchive/archive_read_support_filter_lz4.c | 728 |
1 files changed, 728 insertions, 0 deletions
diff --git a/libarchive/archive_read_support_filter_lz4.c b/libarchive/archive_read_support_filter_lz4.c new file mode 100644 index 00000000..db62cb35 --- /dev/null +++ b/libarchive/archive_read_support_filter_lz4.c @@ -0,0 +1,728 @@ +/*- + * Copyright (c) 2014 Michihiro NAKAJIMA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +__FBSDID("$FreeBSD$"); + +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#include <stdio.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_STRING_H +#include <string.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_LZ4_H +#include <lz4.h> +#endif + +#include "archive.h" +#include "archive_endian.h" +#include "archive_private.h" +#include "archive_read_private.h" +#include "archive_xxhash.h" + +#define LZ4_MAGICNUMBER 0x184d2204 +#define LZ4_SKIPPABLED 0x184d2a50 +#define LZ4_LEGACY 0x184c2102 + +#if defined(HAVE_LIBLZ4) +struct private_data { + enum { SELECT_STREAM, + READ_DEFAULT_STREAM, + READ_DEFAULT_BLOCK, + READ_LEGACY_STREAM, + READ_LEGACY_BLOCK, + } stage; + struct { + unsigned block_independence:1; + unsigned block_checksum:3; + unsigned stream_size:1; + unsigned stream_checksum:1; + unsigned preset_dictionary:1; + int block_maximum_size; + } flags; + int64_t stream_size; + uint32_t dict_id; + char *out_block; + size_t out_block_size; + + /* Bytes read but not yet consumed via __archive_read_consume() */ + size_t unconsumed; + size_t decoded_size; + void *xxh32_state; + + char valid; /* True = decompressor is initialized */ + char eof; /* True = found end of compressed data. */ +}; + +#define LEGACY_BLOCK_SIZE (8 * 1024 * 1024) + +/* Lz4 filter */ +static ssize_t lz4_filter_read(struct archive_read_filter *, const void **); +static int lz4_filter_close(struct archive_read_filter *); +#endif + +/* + * Note that we can detect lz4 archives even if we can't decompress + * them. (In fact, we like detecting them because we can give better + * error messages.) So the bid framework here gets compiled even + * if liblz4 is unavailable. + */ +static int lz4_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); +static int lz4_reader_init(struct archive_read_filter *); +static int lz4_reader_free(struct archive_read_filter_bidder *); +#if defined(HAVE_LIBLZ4) +static ssize_t lz4_filter_read_default_stream(struct archive_read_filter *, + const void **); +static ssize_t lz4_filter_read_legacy_stream(struct archive_read_filter *, + const void **); +#endif + +int +archive_read_support_filter_lz4(struct archive *_a) +{ + struct archive_read *a = (struct archive_read *)_a; + struct archive_read_filter_bidder *reader; + + archive_check_magic(_a, ARCHIVE_READ_MAGIC, + ARCHIVE_STATE_NEW, "archive_read_support_filter_lz4"); + + if (__archive_read_get_bidder(a, &reader) != ARCHIVE_OK) + return (ARCHIVE_FATAL); + + reader->data = NULL; + reader->name = "lz4"; + reader->bid = lz4_reader_bid; + reader->init = lz4_reader_init; + reader->options = NULL; + reader->free = lz4_reader_free; +#if defined(HAVE_LIBLZ4) + return (ARCHIVE_OK); +#else + archive_set_error(_a, ARCHIVE_ERRNO_MISC, + "Using external lz4 program"); + return (ARCHIVE_WARN); +#endif +} + +static int +lz4_reader_free(struct archive_read_filter_bidder *self){ + (void)self; /* UNUSED */ + return (ARCHIVE_OK); +} + +/* + * Test whether we can handle this data. + * + * This logic returns zero if any part of the signature fails. It + * also tries to Do The Right Thing if a very short buffer prevents us + * from verifying as much as we would like. + */ +static int +lz4_reader_bid(struct archive_read_filter_bidder *self, + struct archive_read_filter *filter) +{ + const unsigned char *buffer; + ssize_t avail; + int bits_checked; + uint32_t number; + + (void)self; /* UNUSED */ + + /* Minimal lz4 archive is 11 bytes. */ + buffer = __archive_read_filter_ahead(filter, 11, &avail); + if (buffer == NULL) + return (0); + + /* First four bytes must be LZ4 magic numbers. */ + bits_checked = 0; + if ((number = archive_le32dec(buffer)) == LZ4_MAGICNUMBER) { + unsigned char flag, BD; + + bits_checked += 32; + /* Next follows a stream descriptor. */ + /* Descriptor Flags. */ + flag = buffer[4]; + /* A version number must be "01". */ + if (((flag & 0xc0) >> 6) != 1) + return (0); + /* A reserved bit must be "0". */ + if (flag & 2) + return (0); + bits_checked += 8; + BD = buffer[5]; + /* A block maximum size shuld be more than 3. */ + if (((BD & 0x70) >> 4) < 4) + return (0); + /* Reserved bits must be "0". */ + if (BD & ~0x70) + return (0); + bits_checked += 8; + } else if (number == LZ4_LEGACY) { + bits_checked += 32; + } + + return (bits_checked); +} + +#if !defined(HAVE_LIBLZ4) + +/* + * If we don't have the library on this system, we can't actually do the + * decompression. We can, however, still detect compressed archives + * and emit a useful message. + */ +static int +lz4_reader_init(struct archive_read_filter *self) +{ + int r; + + r = __archive_read_program(self, "lz4 -d -q"); + /* Note: We set the format here even if __archive_read_program() + * above fails. We do, after all, know what the format is + * even if we weren't able to read it. */ + self->code = ARCHIVE_FILTER_LZ4; + self->name = "lz4"; + return (r); +} + + +#else + +/* + * Setup the callbacks. + */ +static int +lz4_reader_init(struct archive_read_filter *self) +{ + struct private_data *state; + + self->code = ARCHIVE_FILTER_LZ4; + self->name = "lz4"; + + state = (struct private_data *)calloc(sizeof(*state), 1); + if (state == NULL) { + archive_set_error(&self->archive->archive, ENOMEM, + "Can't allocate data for lz4 decompression"); + return (ARCHIVE_FATAL); + } + + self->data = state; + state->stage = SELECT_STREAM; + self->read = lz4_filter_read; + self->skip = NULL; /* not supported */ + self->close = lz4_filter_close; + + return (ARCHIVE_OK); +} + +static int +lz4_allocate_out_block(struct archive_read_filter *self) +{ + struct private_data *state = (struct private_data *)self->data; + size_t out_block_size = state->flags.block_maximum_size; + void *out_block; + + if (!state->flags.block_independence) + out_block_size += 64 * 1024; + if (state->out_block_size < out_block_size) { + free(state->out_block); + out_block = (unsigned char *)malloc(out_block_size); + state->out_block_size = out_block_size; + if (out_block == NULL) { + archive_set_error(&self->archive->archive, ENOMEM, + "Can't allocate data for lz4 decompression"); + return (ARCHIVE_FATAL); + } + state->out_block = out_block; + } + if (!state->flags.block_independence) + memset(state->out_block, 0, 64 * 1024); + return (ARCHIVE_OK); +} + +static int +lz4_allocate_out_block_for_legacy(struct archive_read_filter *self) +{ + struct private_data *state = (struct private_data *)self->data; + size_t out_block_size = LEGACY_BLOCK_SIZE; + void *out_block; + + if (state->out_block_size < out_block_size) { + free(state->out_block); + out_block = (unsigned char *)malloc(out_block_size); + state->out_block_size = out_block_size; + if (out_block == NULL) { + archive_set_error(&self->archive->archive, ENOMEM, + "Can't allocate data for lz4 decompression"); + return (ARCHIVE_FATAL); + } + state->out_block = out_block; + } + return (ARCHIVE_OK); +} + +/* + * Return the next block of decompressed data. + */ +static ssize_t +lz4_filter_read(struct archive_read_filter *self, const void **p) +{ + struct private_data *state = (struct private_data *)self->data; + ssize_t ret; + + if (state->eof) { + *p = NULL; + return (0); + } + + __archive_read_filter_consume(self->upstream, state->unconsumed); + state->unconsumed = 0; + + switch (state->stage) { + case SELECT_STREAM: + break; + case READ_DEFAULT_STREAM: + case READ_LEGACY_STREAM: + /* Reading a lz4 stream already failed. */ + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, "Invalid sequence."); + return (ARCHIVE_FATAL); + case READ_DEFAULT_BLOCK: + ret = lz4_filter_read_default_stream(self, p); + if (ret != 0 || state->stage != SELECT_STREAM) + return ret; + break; + case READ_LEGACY_BLOCK: + ret = lz4_filter_read_legacy_stream(self, p); + if (ret != 0 || state->stage != SELECT_STREAM) + return ret; + break; + default: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, "Program error."); + return (ARCHIVE_FATAL); + break; + } + + while (state->stage == SELECT_STREAM) { + const char *read_buf; + + /* Read a magic number. */ + read_buf = __archive_read_filter_ahead(self->upstream, 4, + NULL); + if (read_buf == NULL) { + state->eof = 1; + *p = NULL; + return (0); + } + uint32_t number = archive_le32dec(read_buf); + __archive_read_filter_consume(self->upstream, 4); + if (number == LZ4_MAGICNUMBER) + return lz4_filter_read_default_stream(self, p); + else if (number == LZ4_LEGACY) + return lz4_filter_read_legacy_stream(self, p); + else if ((number & ~0xF) == LZ4_SKIPPABLED) { + read_buf = __archive_read_filter_ahead( + self->upstream, 4, NULL); + if (read_buf == NULL) { + archive_set_error( + &self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Malformed lz4 data"); + return (ARCHIVE_FATAL); + } + uint32_t skip_bytes = archive_le32dec(read_buf); + __archive_read_filter_consume(self->upstream, + 4 + skip_bytes); + } else { + /* Ignore following unrecognized data. */ + state->eof = 1; + *p = NULL; + return (0); + } + } + state->eof = 1; + *p = NULL; + return (0); +} + +static int +lz4_filter_read_descriptor(struct archive_read_filter *self) +{ + struct private_data *state = (struct private_data *)self->data; + const char *read_buf; + ssize_t bytes_remaining; + ssize_t descriptor_bytes; + unsigned char flag, bd; + unsigned int chsum, chsum_verifier; + + /* Make sure we have 2 bytes for flags. */ + read_buf = __archive_read_filter_ahead(self->upstream, 2, + &bytes_remaining); + if (read_buf == NULL) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "truncated lz4 input"); + return (ARCHIVE_FATAL); + } + + /* + Parse flags. + */ + flag = (unsigned char)read_buf[0]; + /* Verify version number. */ + if ((flag & 0xc0) != 1<<6) + goto malformed_error; + /* A reserved bit must be zero. */ + if (flag & 0x02) + goto malformed_error; + state->flags.block_independence = (flag & 0x20) != 0; + state->flags.block_checksum = (flag & 0x10)?4:0; + state->flags.stream_size = (flag & 0x08) != 0; + state->flags.stream_checksum = (flag & 0x04) != 0; + state->flags.preset_dictionary = (flag & 0x01) != 0; + + /* BD */ + bd = (unsigned char)read_buf[1]; + /* Reserved bits must be zero. */ + if (bd & 0x8f) + goto malformed_error; + /* Get a maxinum block size. */ + switch (read_buf[1] >> 4) { + case 4: /* 64 KB */ + state->flags.block_maximum_size = 64 * 1024; + break; + case 5: /* 256 KB */ + state->flags.block_maximum_size = 256 * 1024; + break; + case 6: /* 1 MB */ + state->flags.block_maximum_size = 1024 * 1024; + break; + case 7: /* 4 MB */ + state->flags.block_maximum_size = 4 * 1024 * 1024; + break; + default: + goto malformed_error; + } + + /* Read the whole descriptor in a stream block. */ + descriptor_bytes = 3; + if (state->flags.stream_size) + descriptor_bytes += 8; + if (state->flags.preset_dictionary) + descriptor_bytes += 4; + if (bytes_remaining < descriptor_bytes) { + read_buf = __archive_read_filter_ahead(self->upstream, + descriptor_bytes, &bytes_remaining); + if (read_buf == NULL) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "truncated lz4 input"); + return (ARCHIVE_FATAL); + } + } + /* Check if a descriptor is corrupted */ + chsum = __archive_xxhash.XXH32(read_buf, (int)descriptor_bytes -1, 0); + chsum = (chsum >> 8) & 0xff; + chsum_verifier = read_buf[descriptor_bytes-1] & 0xff; + if (chsum != chsum_verifier) + goto malformed_error; + + __archive_read_filter_consume(self->upstream, descriptor_bytes); + + /* Make sure we have an enough buffer for uncompressed data. */ + if (lz4_allocate_out_block(self) != ARCHIVE_OK) + return (ARCHIVE_FATAL); + if (state->flags.stream_checksum) + state->xxh32_state = __archive_xxhash.XXH32_init(0); + + state->decoded_size = 0; + /* Success */ + return (ARCHIVE_OK); +malformed_error: + archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, + "malformed lz4 data"); + return (ARCHIVE_FATAL); +} + +static ssize_t +lz4_filter_read_data_block(struct archive_read_filter *self, const void **p) +{ + struct private_data *state = (struct private_data *)self->data; + ssize_t compressed_size; + const char *read_buf; + ssize_t bytes_remaining; + int checksum_size; + ssize_t uncompressed_size; + size_t prefix64k; + + *p = NULL; + + /* Make sure we have 4 bytes for a block size. */ + read_buf = __archive_read_filter_ahead(self->upstream, 4, + &bytes_remaining); + if (read_buf == NULL) + goto truncated_error; + compressed_size = archive_le32dec(read_buf); + if ((compressed_size & ~(1 << 31)) > state->flags.block_maximum_size) + goto malformed_error; + /* A compressed size == 0 means the end of stream blocks. */ + if (compressed_size == 0) { + __archive_read_filter_consume(self->upstream, 4); + return 0; + } + + checksum_size = state->flags.block_checksum; + /* Check if the block is uncompressed. */ + if (compressed_size & (1 << 31)) { + compressed_size &= ~(1 << 31); + uncompressed_size = compressed_size; + } else + uncompressed_size = 0;/* Unknown yet. */ + + /* + Unfortunately, lz4 decompression API requires a whole block + for its decompression speed, so we read a whole block and allocate + a huge buffer used for decoded data. + */ + read_buf = __archive_read_filter_ahead(self->upstream, + 4 + compressed_size + checksum_size, &bytes_remaining); + if (read_buf == NULL) + goto truncated_error; + + /* Optional process, checking a block sum. */ + if (checksum_size) { + unsigned int chsum = __archive_xxhash.XXH32( + read_buf + 4, (int)compressed_size, 0); + unsigned int chsum_block = + archive_le32dec(read_buf + 4 + compressed_size); + if (chsum != chsum_block) + goto malformed_error; + } + + + /* If the block is uncompressed, there is nothing to do. */ + if (uncompressed_size) { + /* Prepare a prefix 64k block for next block. */ + if (!state->flags.block_independence) { + prefix64k = 64 * 1024; + if (uncompressed_size < (ssize_t)prefix64k) { + memcpy(state->out_block + + prefix64k - uncompressed_size, + read_buf + 4, + uncompressed_size); + memset(state->out_block, 0, + prefix64k - uncompressed_size); + } else { + memcpy(state->out_block, + read_buf + 4 + + uncompressed_size - prefix64k, + prefix64k); + } + state->decoded_size = 0; + } + state->unconsumed = 4 + uncompressed_size + checksum_size; + *p = read_buf + 4; + return uncompressed_size; + } + + /* + Decompress a block data. + */ + if (state->flags.block_independence) { + prefix64k = 0; + uncompressed_size = LZ4_decompress_safe(read_buf + 4, + state->out_block, (int)compressed_size, + state->flags.block_maximum_size); + } else { + prefix64k = 64 * 1024; + if (state->decoded_size) { + if (state->decoded_size < prefix64k) { + memmove(state->out_block + + prefix64k - state->decoded_size, + state->out_block + prefix64k, + state->decoded_size); + memset(state->out_block, 0, + prefix64k - state->decoded_size); + } else { + memmove(state->out_block, + state->out_block + state->decoded_size, + prefix64k); + } + } + uncompressed_size = LZ4_decompress_safe_withPrefix64k( + read_buf + 4, + state->out_block + prefix64k, (int)compressed_size, + state->flags.block_maximum_size); + } + + /* Check if an error happend in decompression process. */ + if (uncompressed_size < 0) { + archive_set_error(&(self->archive->archive), + ARCHIVE_ERRNO_MISC, "lz4 decompression failed"); + return (ARCHIVE_FATAL); + } + + state->unconsumed = 4 + compressed_size + checksum_size; + *p = state->out_block + prefix64k; + state->decoded_size = uncompressed_size; + return uncompressed_size; + +malformed_error: + archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, + "malformed lz4 data"); + return (ARCHIVE_FATAL); +truncated_error: + archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC, + "truncated lz4 input"); + return (ARCHIVE_FATAL); +} + +static ssize_t +lz4_filter_read_default_stream(struct archive_read_filter *self, const void **p) +{ + struct private_data *state = (struct private_data *)self->data; + const char *read_buf; + ssize_t bytes_remaining; + ssize_t ret; + + if (state->stage == SELECT_STREAM) { + state->stage = READ_DEFAULT_STREAM; + /* First, read a desciprtor. */ + if((ret = lz4_filter_read_descriptor(self)) != ARCHIVE_OK) + return (ret); + state->stage = READ_DEFAULT_BLOCK; + } + /* Decompress a block. */ + ret = lz4_filter_read_data_block(self, p); + + /* If the end of block is detected, change the filter status + to read next stream. */ + if (ret == 0 && *p == NULL) + state->stage = SELECT_STREAM; + + /* Optional process, checking a stream sum. */ + if (state->flags.stream_checksum) { + if (state->stage == SELECT_STREAM) { + unsigned int checksum; + unsigned int checksum_stream; + read_buf = __archive_read_filter_ahead(self->upstream, + 4, &bytes_remaining); + if (read_buf == NULL) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, "truncated lz4 input"); + return (ARCHIVE_FATAL); + } + checksum = archive_le32dec(read_buf); + __archive_read_filter_consume(self->upstream, 4); + checksum_stream = __archive_xxhash.XXH32_digest( + state->xxh32_state); + state->xxh32_state = NULL; + if (checksum != checksum_stream) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "lz4 stream cheksum error"); + return (ARCHIVE_FATAL); + } + } else if (ret > 0) + __archive_xxhash.XXH32_update(state->xxh32_state, + *p, (int)ret); + } + return (ret); +} + +static ssize_t +lz4_filter_read_legacy_stream(struct archive_read_filter *self, const void **p) +{ + struct private_data *state = (struct private_data *)self->data; + int compressed; + const char *read_buf; + ssize_t ret; + + *p = NULL; + ret = lz4_allocate_out_block_for_legacy(self); + if (ret != ARCHIVE_OK) + return ret; + + /* Make sure we have 4 bytes for a block size. */ + read_buf = __archive_read_filter_ahead(self->upstream, 4, NULL); + if (read_buf == NULL) { + if (state->stage == SELECT_STREAM) { + state->stage = READ_LEGACY_STREAM; + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "truncated lz4 input"); + return (ARCHIVE_FATAL); + } + state->stage = SELECT_STREAM; + return 0; + } + state->stage = READ_LEGACY_BLOCK; + compressed = archive_le32dec(read_buf); + if (compressed > LZ4_COMPRESSBOUND(LEGACY_BLOCK_SIZE)) { + state->stage = SELECT_STREAM; + return 0; + } + + /* Make sure we have a whole block. */ + read_buf = __archive_read_filter_ahead(self->upstream, + 4 + compressed, NULL); + ret = LZ4_decompress_safe(read_buf + 4, state->out_block, + compressed, (int)state->out_block_size); + if (ret < 0) { + archive_set_error(&(self->archive->archive), + ARCHIVE_ERRNO_MISC, "lz4 decompression failed"); + return (ARCHIVE_FATAL); + } + *p = state->out_block; + state->unconsumed = 4 + compressed; + return ret; +} + +/* + * Clean up the decompressor. + */ +static int +lz4_filter_close(struct archive_read_filter *self) +{ + struct private_data *state; + int ret = ARCHIVE_OK; + + state = (struct private_data *)self->data; + free(state->xxh32_state); + free(state->out_block); + free(state); + return (ret); +} + +#endif /* HAVE_LIBLZ4 */ |