summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2017-11-30 15:55:59 +0000
committerEdward Thomson <ethomson@edwardthomson.com>2017-12-20 16:08:03 +0000
commitddefea750adcde06867b49d251760844540919fe (patch)
treec30ef58e4b708058e2b65b9a5ca779869e2b9291
parentd1e446550a966a1dbc5d765aa79fe9bc47a1c1a3 (diff)
downloadlibgit2-ddefea750adcde06867b49d251760844540919fe.tar.gz
odb: support large loose objects
zlib will only inflate/deflate an `int`s worth of data at a time. We need to loop through large files in order to ensure that we inflate the entire file, not just an `int`s worth of data. Thankfully, we already have this loop in our `git_zstream` layer. Handle large objects using the `git_zstream`.
-rw-r--r--src/odb_loose.c190
1 files changed, 92 insertions, 98 deletions
diff --git a/src/odb_loose.c b/src/odb_loose.c
index 72b47f091..2294931b4 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -16,6 +16,7 @@
#include "delta.h"
#include "filebuf.h"
#include "object.h"
+#include "zstream.h"
#include "git2/odb_backend.h"
#include "git2/types.h"
@@ -119,53 +120,53 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
return used;
}
-static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
+static int parse_header(
+ obj_hdr *out,
+ size_t *out_len,
+ const unsigned char *_data,
+ size_t data_len)
{
- char c, typename[10];
- size_t size, used = 0;
+ const char *data = (char *)_data;
+ size_t i, typename_len, size_idx, size_len;
+ int64_t size;
- /*
- * type name string followed by space.
- */
- while ((c = data[used]) != ' ') {
- typename[used++] = c;
- if (used >= sizeof(typename))
- return 0;
+ *out_len = 0;
+
+ /* find the object type name */
+ for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
+ if (data[i] == ' ')
+ break;
}
- typename[used] = 0;
- if (used == 0)
- return 0;
- hdr->type = git_object_string2type(typename);
- used++; /* consume the space */
- /*
- * length follows immediately in decimal (without
- * leading zeros).
- */
- size = data[used++] - '0';
- if (size > 9)
- return 0;
- if (size) {
- while ((c = data[used]) != '\0') {
- size_t d = c - '0';
- if (d > 9)
- break;
- used++;
- size = size * 10 + d;
- }
+ if (typename_len == data_len)
+ goto on_error;
+
+ out->type = git_object_stringn2type(data, typename_len);
+
+ size_idx = typename_len + 1;
+ for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
+ if (data[i] == '\0')
+ break;
}
- hdr->size = size;
- /*
- * the length must be followed by a zero byte
- */
- if (data[used++] != '\0')
- return 0;
+ if (i == data_len)
+ goto on_error;
- return used;
-}
+ if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
+ size < 0)
+ goto on_error;
+
+ out->size = size;
+ if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
+ goto on_error;
+
+ return 0;
+on_error:
+ giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
+ return -1;
+}
/***********************************************************
*
@@ -269,45 +270,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
return 0;
}
-static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
-{
- unsigned char *buf, *head = hb;
- size_t tail, alloc_size;
-
- /*
- * allocate a buffer to hold the inflated data and copy the
- * initial sequence of inflated data from the tail of the
- * head buffer, if any.
- */
- if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) ||
- (buf = git__malloc(alloc_size)) == NULL) {
- inflateEnd(s);
- return NULL;
- }
- tail = s->total_out - used;
- if (used > 0 && tail > 0) {
- if (tail > hdr->size)
- tail = hdr->size;
- memcpy(buf, head + used, tail);
- }
- used = tail;
-
- /*
- * inflate the remainder of the object data, if any
- */
- if (hdr->size < used)
- inflateEnd(s);
- else {
- set_stream_output(s, buf + used, hdr->size - used);
- if (finish_inflate(s)) {
- git__free(buf);
- return NULL;
- }
- }
-
- return buf;
-}
-
/*
* At one point, there was a loose object format that was intended to
* mimic the format used in pack-files. This was to allow easy copying
@@ -354,43 +316,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
{
- unsigned char head[64], *buf;
- z_stream zs;
+ git_zstream zstream = GIT_ZSTREAM_INIT;
+ unsigned char head[64], *body = NULL;
+ size_t decompressed, head_len, body_len, alloc_size;
obj_hdr hdr;
- size_t used;
+ int error;
- /*
- * check for a pack-like loose object
- */
+ /* check for a pack-like loose object */
if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
return inflate_packlike_loose_disk_obj(out, obj);
+ if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
+ (error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
+ goto done;
+
+ decompressed = sizeof(head);
+
/*
- * inflate the initial part of the io buffer in order
- * to parse the object header (type and size).
- */
- if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK ||
- (used = get_object_header(&hdr, head)) == 0 ||
- !git_object_typeisloose(hdr.type))
- {
- abort_inflate(&zs);
+ * inflate the initial part of the compressed buffer in order to parse the
+ * header; read the largest header possible, then push back the remainder.
+ */
+ if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
+ (error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
+ goto done;
+
+ if (!git_object_typeisloose(hdr.type)) {
giterr_set(GITERR_ODB, "failed to inflate disk object");
- return -1;
+ error = -1;
+ goto done;
}
/*
* allocate a buffer and inflate the object data into it
* (including the initial sequence in the head buffer).
*/
- if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
- return -1;
- buf[hdr.size] = '\0';
+ if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
+ (body = git__malloc(alloc_size)) == NULL) {
+ error = -1;
+ goto done;
+ }
- out->data = buf;
+ assert(decompressed >= head_len);
+ body_len = decompressed - head_len;
+
+ if (body_len)
+ memcpy(body, head + head_len, body_len);
+
+ decompressed = hdr.size - body_len;
+ if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
+ goto done;
+
+ if (!git_zstream_done(&zstream)) {
+ giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
+ error = -1;
+ goto done;
+ }
+
+ body[hdr.size] = '\0';
+
+ out->data = body;
out->len = hdr.size;
out->type = hdr.type;
- return 0;
+done:
+ if (error < 0)
+ git__free(body);
+
+ git_zstream_free(&zstream);
+ return error;
}
@@ -435,6 +428,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
git_file fd;
z_stream zs;
obj_hdr header_obj;
+ size_t header_len;
unsigned char raw_buffer[16], inflated_buffer[64];
assert(out && loc);
@@ -460,7 +454,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
}
if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
- || get_object_header(&header_obj, inflated_buffer) == 0
+ || parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
|| git_object_typeisloose(header_obj.type) == 0)
{
giterr_set(GITERR_ZLIB, "failed to read loose object header");