diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2017-11-30 15:55:59 +0000 |
---|---|---|
committer | Edward Thomson <ethomson@edwardthomson.com> | 2017-12-20 16:08:03 +0000 |
commit | ddefea750adcde06867b49d251760844540919fe (patch) | |
tree | c30ef58e4b708058e2b65b9a5ca779869e2b9291 /src | |
parent | d1e446550a966a1dbc5d765aa79fe9bc47a1c1a3 (diff) | |
download | libgit2-ddefea750adcde06867b49d251760844540919fe.tar.gz |
odb: support large loose objects
zlib will only inflate/deflate an `int`s worth of data at a time.
We need to loop through large files in order to ensure that we inflate
the entire file, not just an `int`s worth of data. Thankfully, we
already have this loop in our `git_zstream` layer. Handle large objects
using the `git_zstream`.
Diffstat (limited to 'src')
-rw-r--r-- | src/odb_loose.c | 190 |
1 files changed, 92 insertions, 98 deletions
diff --git a/src/odb_loose.c b/src/odb_loose.c index 72b47f091..2294931b4 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -16,6 +16,7 @@ #include "delta.h" #include "filebuf.h" #include "object.h" +#include "zstream.h" #include "git2/odb_backend.h" #include "git2/types.h" @@ -119,53 +120,53 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj) return used; } -static size_t get_object_header(obj_hdr *hdr, unsigned char *data) +static int parse_header( + obj_hdr *out, + size_t *out_len, + const unsigned char *_data, + size_t data_len) { - char c, typename[10]; - size_t size, used = 0; + const char *data = (char *)_data; + size_t i, typename_len, size_idx, size_len; + int64_t size; - /* - * type name string followed by space. - */ - while ((c = data[used]) != ' ') { - typename[used++] = c; - if (used >= sizeof(typename)) - return 0; + *out_len = 0; + + /* find the object type name */ + for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) { + if (data[i] == ' ') + break; } - typename[used] = 0; - if (used == 0) - return 0; - hdr->type = git_object_string2type(typename); - used++; /* consume the space */ - /* - * length follows immediately in decimal (without - * leading zeros). - */ - size = data[used++] - '0'; - if (size > 9) - return 0; - if (size) { - while ((c = data[used]) != '\0') { - size_t d = c - '0'; - if (d > 9) - break; - used++; - size = size * 10 + d; - } + if (typename_len == data_len) + goto on_error; + + out->type = git_object_stringn2type(data, typename_len); + + size_idx = typename_len + 1; + for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) { + if (data[i] == '\0') + break; } - hdr->size = size; - /* - * the length must be followed by a zero byte - */ - if (data[used++] != '\0') - return 0; + if (i == data_len) + goto on_error; - return used; -} + if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 || + size < 0) + goto on_error; + + out->size = size; + if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1)) + goto on_error; + + return 0; +on_error: + giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header"); + return -1; +} /*********************************************************** * @@ -269,45 +270,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen) return 0; } -static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr) -{ - unsigned char *buf, *head = hb; - size_t tail, alloc_size; - - /* - * allocate a buffer to hold the inflated data and copy the - * initial sequence of inflated data from the tail of the - * head buffer, if any. - */ - if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) || - (buf = git__malloc(alloc_size)) == NULL) { - inflateEnd(s); - return NULL; - } - tail = s->total_out - used; - if (used > 0 && tail > 0) { - if (tail > hdr->size) - tail = hdr->size; - memcpy(buf, head + used, tail); - } - used = tail; - - /* - * inflate the remainder of the object data, if any - */ - if (hdr->size < used) - inflateEnd(s); - else { - set_stream_output(s, buf + used, hdr->size - used); - if (finish_inflate(s)) { - git__free(buf); - return NULL; - } - } - - return buf; -} - /* * At one point, there was a loose object format that was intended to * mimic the format used in pack-files. This was to allow easy copying @@ -354,43 +316,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj) static int inflate_disk_obj(git_rawobj *out, git_buf *obj) { - unsigned char head[64], *buf; - z_stream zs; + git_zstream zstream = GIT_ZSTREAM_INIT; + unsigned char head[64], *body = NULL; + size_t decompressed, head_len, body_len, alloc_size; obj_hdr hdr; - size_t used; + int error; - /* - * check for a pack-like loose object - */ + /* check for a pack-like loose object */ if (!is_zlib_compressed_data((unsigned char *)obj->ptr)) return inflate_packlike_loose_disk_obj(out, obj); + if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 || + (error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0) + goto done; + + decompressed = sizeof(head); + /* - * inflate the initial part of the io buffer in order - * to parse the object header (type and size). - */ - if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK || - (used = get_object_header(&hdr, head)) == 0 || - !git_object_typeisloose(hdr.type)) - { - abort_inflate(&zs); + * inflate the initial part of the compressed buffer in order to parse the + * header; read the largest header possible, then push back the remainder. + */ + if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 || + (error = parse_header(&hdr, &head_len, head, decompressed)) < 0) + goto done; + + if (!git_object_typeisloose(hdr.type)) { giterr_set(GITERR_ODB, "failed to inflate disk object"); - return -1; + error = -1; + goto done; } /* * allocate a buffer and inflate the object data into it * (including the initial sequence in the head buffer). */ - if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL) - return -1; - buf[hdr.size] = '\0'; + if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) || + (body = git__malloc(alloc_size)) == NULL) { + error = -1; + goto done; + } - out->data = buf; + assert(decompressed >= head_len); + body_len = decompressed - head_len; + + if (body_len) + memcpy(body, head + head_len, body_len); + + decompressed = hdr.size - body_len; + if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0) + goto done; + + if (!git_zstream_done(&zstream)) { + giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely"); + error = -1; + goto done; + } + + body[hdr.size] = '\0'; + + out->data = body; out->len = hdr.size; out->type = hdr.type; - return 0; +done: + if (error < 0) + git__free(body); + + git_zstream_free(&zstream); + return error; } @@ -435,6 +428,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) git_file fd; z_stream zs; obj_hdr header_obj; + size_t header_len; unsigned char raw_buffer[16], inflated_buffer[64]; assert(out && loc); @@ -460,7 +454,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) } if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR) - || get_object_header(&header_obj, inflated_buffer) == 0 + || parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0 || git_object_typeisloose(header_obj.type) == 0) { giterr_set(GITERR_ZLIB, "failed to read loose object header"); |