diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2017-12-30 08:09:22 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-30 08:09:22 -0800 |
commit | e14bf97ebbed574dd23dc937a05994074baad91c (patch) | |
tree | 7ba7575994ca7d35bb9802a0d113f604faed0f9d | |
parent | 083b1a2e2d8d190db02db3db0dad4fa742eccb02 (diff) | |
parent | 456e52189c95315028d668f9e508798d490765e2 (diff) | |
download | libgit2-e14bf97ebbed574dd23dc937a05994074baad91c.tar.gz |
Merge pull request #4443 from libgit2/ethomson/large_loose_blobs
Inflate large loose blobs
-rw-r--r-- | src/hash/hash_common_crypto.h | 17 | ||||
-rw-r--r-- | src/hash/hash_win32.c | 30 | ||||
-rw-r--r-- | src/object.c | 13 | ||||
-rw-r--r-- | src/object.h | 2 | ||||
-rw-r--r-- | src/odb_loose.c | 195 | ||||
-rw-r--r-- | src/util.c | 44 | ||||
-rw-r--r-- | src/util.h | 1 | ||||
-rw-r--r-- | src/zstream.c | 30 | ||||
-rw-r--r-- | tests/core/string.c | 42 | ||||
-rw-r--r-- | tests/odb/largefiles.c | 114 |
10 files changed, 352 insertions, 136 deletions
diff --git a/src/hash/hash_common_crypto.h b/src/hash/hash_common_crypto.h index eeeddd0cc..4cd229d3c 100644 --- a/src/hash/hash_common_crypto.h +++ b/src/hash/hash_common_crypto.h @@ -16,6 +16,8 @@ struct git_hash_ctx { CC_SHA1_CTX c; }; +#define CC_LONG_MAX ((CC_LONG)-1) + #define git_hash_global_init() 0 #define git_hash_ctx_init(ctx) git_hash_init(ctx) #define git_hash_ctx_cleanup(ctx) @@ -27,10 +29,21 @@ GIT_INLINE(int) git_hash_init(git_hash_ctx *ctx) return 0; } -GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *data, size_t len) +GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *_data, size_t len) { + const unsigned char *data = _data; + assert(ctx); - CC_SHA1_Update(&ctx->c, data, len); + + while (len > 0) { + CC_LONG chunk = (len > CC_LONG_MAX) ? CC_LONG_MAX : (CC_LONG)len; + + CC_SHA1_Update(&ctx->c, data, chunk); + + data += chunk; + len -= chunk; + } + return 0; } diff --git a/src/hash/hash_win32.c b/src/hash/hash_win32.c index 4d53a57bd..20ba9a5fe 100644 --- a/src/hash/hash_win32.c +++ b/src/hash/hash_win32.c @@ -136,12 +136,21 @@ GIT_INLINE(int) hash_cryptoapi_init(git_hash_ctx *ctx) return 0; } -GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *data, size_t len) +GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *_data, size_t len) { + const BYTE *data = (BYTE *)_data; + assert(ctx->ctx.cryptoapi.valid); - if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, (const BYTE *)data, (DWORD)len, 0)) - return -1; + while (len > 0) { + DWORD chunk = (len > MAXDWORD) ? MAXDWORD : (DWORD)len; + + if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, data, chunk, 0)) + return -1; + + data += chunk; + len -= chunk; + } return 0; } @@ -202,10 +211,19 @@ GIT_INLINE(int) hash_cng_init(git_hash_ctx *ctx) return 0; } -GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *data, size_t len) +GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *_data, size_t len) { - if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, (PBYTE)data, (ULONG)len, 0) < 0) - return -1; + PBYTE data = (PBYTE)_data; + + while (len > 0) { + ULONG chunk = (len > ULONG_MAX) ? ULONG_MAX : (ULONG)len; + + if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, data, chunk, 0) < 0) + return -1; + + data += chunk; + len -= chunk; + } return 0; } diff --git a/src/object.c b/src/object.c index 4d069a34c..48f561384 100644 --- a/src/object.c +++ b/src/object.c @@ -236,13 +236,22 @@ const char *git_object_type2string(git_otype type) git_otype git_object_string2type(const char *str) { + if (!str) + return GIT_OBJ_BAD; + + return git_object_stringn2type(str, strlen(str)); +} + +git_otype git_object_stringn2type(const char *str, size_t len) +{ size_t i; - if (!str || !*str) + if (!str || !len || !*str) return GIT_OBJ_BAD; for (i = 0; i < ARRAY_SIZE(git_objects_table); i++) - if (!strcmp(str, git_objects_table[i].str)) + if (*git_objects_table[i].str && + !git__prefixncmp(str, len, git_objects_table[i].str)) return (git_otype)i; return GIT_OBJ_BAD; diff --git a/src/object.h b/src/object.h index ff61c1d33..e46c9cafa 100644 --- a/src/object.h +++ b/src/object.h @@ -30,6 +30,8 @@ int git_object__from_odb_object( int git_object__resolve_to_type(git_object **obj, git_otype type); +git_otype git_object_stringn2type(const char *str, size_t len); + int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header); void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid); diff --git a/src/odb_loose.c b/src/odb_loose.c index 72b47f091..9900aae2a 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -16,6 +16,7 @@ #include "delta.h" #include "filebuf.h" #include "object.h" +#include "zstream.h" #include "git2/odb_backend.h" #include "git2/types.h" @@ -119,53 +120,58 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj) return used; } -static size_t get_object_header(obj_hdr *hdr, unsigned char *data) +static int parse_header( + obj_hdr *out, + size_t *out_len, + const unsigned char *_data, + size_t data_len) { - char c, typename[10]; - size_t size, used = 0; + const char *data = (char *)_data; + size_t i, typename_len, size_idx, size_len; + int64_t size; - /* - * type name string followed by space. - */ - while ((c = data[used]) != ' ') { - typename[used++] = c; - if (used >= sizeof(typename)) - return 0; + *out_len = 0; + + /* find the object type name */ + for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) { + if (data[i] == ' ') + break; } - typename[used] = 0; - if (used == 0) - return 0; - hdr->type = git_object_string2type(typename); - used++; /* consume the space */ - /* - * length follows immediately in decimal (without - * leading zeros). - */ - size = data[used++] - '0'; - if (size > 9) - return 0; - if (size) { - while ((c = data[used]) != '\0') { - size_t d = c - '0'; - if (d > 9) - break; - used++; - size = size * 10 + d; - } + if (typename_len == data_len) + goto on_error; + + out->type = git_object_stringn2type(data, typename_len); + + size_idx = typename_len + 1; + for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) { + if (data[i] == '\0') + break; } - hdr->size = size; - /* - * the length must be followed by a zero byte - */ - if (data[used++] != '\0') - return 0; + if (i == data_len) + goto on_error; - return used; -} + if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 || + size < 0) + goto on_error; + + if ((uint64_t)size > SIZE_MAX) { + giterr_set(GITERR_OBJECT, "object is larger than available memory"); + return -1; + } + out->size = size; + if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1)) + goto on_error; + + return 0; + +on_error: + giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header"); + return -1; +} /*********************************************************** * @@ -269,45 +275,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen) return 0; } -static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr) -{ - unsigned char *buf, *head = hb; - size_t tail, alloc_size; - - /* - * allocate a buffer to hold the inflated data and copy the - * initial sequence of inflated data from the tail of the - * head buffer, if any. - */ - if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) || - (buf = git__malloc(alloc_size)) == NULL) { - inflateEnd(s); - return NULL; - } - tail = s->total_out - used; - if (used > 0 && tail > 0) { - if (tail > hdr->size) - tail = hdr->size; - memcpy(buf, head + used, tail); - } - used = tail; - - /* - * inflate the remainder of the object data, if any - */ - if (hdr->size < used) - inflateEnd(s); - else { - set_stream_output(s, buf + used, hdr->size - used); - if (finish_inflate(s)) { - git__free(buf); - return NULL; - } - } - - return buf; -} - /* * At one point, there was a loose object format that was intended to * mimic the format used in pack-files. This was to allow easy copying @@ -354,43 +321,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj) static int inflate_disk_obj(git_rawobj *out, git_buf *obj) { - unsigned char head[64], *buf; - z_stream zs; + git_zstream zstream = GIT_ZSTREAM_INIT; + unsigned char head[64], *body = NULL; + size_t decompressed, head_len, body_len, alloc_size; obj_hdr hdr; - size_t used; + int error; - /* - * check for a pack-like loose object - */ + /* check for a pack-like loose object */ if (!is_zlib_compressed_data((unsigned char *)obj->ptr)) return inflate_packlike_loose_disk_obj(out, obj); + if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 || + (error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0) + goto done; + + decompressed = sizeof(head); + /* - * inflate the initial part of the io buffer in order - * to parse the object header (type and size). - */ - if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK || - (used = get_object_header(&hdr, head)) == 0 || - !git_object_typeisloose(hdr.type)) - { - abort_inflate(&zs); + * inflate the initial part of the compressed buffer in order to parse the + * header; read the largest header possible, then push back the remainder. + */ + if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 || + (error = parse_header(&hdr, &head_len, head, decompressed)) < 0) + goto done; + + if (!git_object_typeisloose(hdr.type)) { giterr_set(GITERR_ODB, "failed to inflate disk object"); - return -1; + error = -1; + goto done; } /* * allocate a buffer and inflate the object data into it * (including the initial sequence in the head buffer). */ - if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL) - return -1; - buf[hdr.size] = '\0'; + if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) || + (body = git__malloc(alloc_size)) == NULL) { + error = -1; + goto done; + } - out->data = buf; + assert(decompressed >= head_len); + body_len = decompressed - head_len; + + if (body_len) + memcpy(body, head + head_len, body_len); + + decompressed = hdr.size - body_len; + if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0) + goto done; + + if (!git_zstream_done(&zstream)) { + giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely"); + error = -1; + goto done; + } + + body[hdr.size] = '\0'; + + out->data = body; out->len = hdr.size; out->type = hdr.type; - return 0; +done: + if (error < 0) + git__free(body); + + git_zstream_free(&zstream); + return error; } @@ -435,6 +433,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) git_file fd; z_stream zs; obj_hdr header_obj; + size_t header_len; unsigned char raw_buffer[16], inflated_buffer[64]; assert(out && loc); @@ -460,7 +459,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) } if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR) - || get_object_header(&header_obj, inflated_buffer) == 0 + || parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0 || git_object_typeisloose(header_obj.type) == 0) { giterr_set(GITERR_ZLIB, "failed to read loose object header"); diff --git a/src/util.c b/src/util.c index 6ae5cdaec..1760a315e 100644 --- a/src/util.c +++ b/src/util.c @@ -252,35 +252,47 @@ void git__strtolower(char *str) git__strntolower(str, strlen(str)); } -int git__prefixcmp(const char *str, const char *prefix) +GIT_INLINE(int) prefixcmp(const char *str, size_t str_n, const char *prefix, bool icase) { - for (;;) { - unsigned char p = *(prefix++), s; + int s, p; + + while (str_n--) { + s = (unsigned char)*str++; + p = (unsigned char)*prefix++; + + if (icase) { + s = git__tolower(s); + p = git__tolower(p); + } + if (!p) return 0; - if ((s = *(str++)) != p) + + if (s != p) return s - p; } + + return (0 - *prefix); } -int git__prefixcmp_icase(const char *str, const char *prefix) +int git__prefixcmp(const char *str, const char *prefix) { - return strncasecmp(str, prefix, strlen(prefix)); + return prefixcmp(str, SIZE_MAX, prefix, false); } -int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix) +int git__prefixncmp(const char *str, size_t str_n, const char *prefix) { - int s, p; - - while(str_n--) { - s = (unsigned char)git__tolower(*str++); - p = (unsigned char)git__tolower(*prefix++); + return prefixcmp(str, str_n, prefix, false); +} - if (s != p) - return s - p; - } +int git__prefixcmp_icase(const char *str, const char *prefix) +{ + return prefixcmp(str, SIZE_MAX, prefix, true); +} - return (0 - *prefix); +int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix) +{ + return prefixcmp(str, str_n, prefix, true); } int git__suffixcmp(const char *str, const char *suffix) diff --git a/src/util.h b/src/util.h index 7c9a54ff1..80ee8e647 100644 --- a/src/util.h +++ b/src/util.h @@ -180,6 +180,7 @@ GIT_INLINE(void) git__free(void *ptr) extern int git__prefixcmp(const char *str, const char *prefix); extern int git__prefixcmp_icase(const char *str, const char *prefix); +extern int git__prefixncmp(const char *str, size_t str_n, const char *prefix); extern int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix); extern int git__suffixcmp(const char *str, const char *suffix); diff --git a/src/zstream.c b/src/zstream.c index 4895bdb16..963c9a344 100644 --- a/src/zstream.c +++ b/src/zstream.c @@ -14,17 +14,22 @@ #define ZSTREAM_BUFFER_SIZE (1024 * 1024) #define ZSTREAM_BUFFER_MIN_EXTRA 8 -static int zstream_seterr(git_zstream *zs) +GIT_INLINE(int) zstream_seterr(git_zstream *zs) { - if (zs->zerr == Z_OK || zs->zerr == Z_STREAM_END) + switch (zs->zerr) { + case Z_OK: + case Z_STREAM_END: + case Z_BUF_ERROR: /* not fatal; we retry with a larger buffer */ return 0; - - if (zs->zerr == Z_MEM_ERROR) + case Z_MEM_ERROR: giterr_set_oom(); - else if (zs->z.msg) - giterr_set_str(GITERR_ZLIB, zs->z.msg); - else - giterr_set(GITERR_ZLIB, "unknown compression error"); + break; + default: + if (zs->z.msg) + giterr_set_str(GITERR_ZLIB, zs->z.msg); + else + giterr_set(GITERR_ZLIB, "unknown compression error"); + } return -1; } @@ -98,8 +103,9 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) /* set up in data */ zstream->z.next_in = (Bytef *)zstream->in; zstream->z.avail_in = (uInt)zstream->in_len; + if ((size_t)zstream->z.avail_in != zstream->in_len) { - zstream->z.avail_in = INT_MAX; + zstream->z.avail_in = UINT_MAX; zflush = Z_NO_FLUSH; } else { zflush = Z_FINISH; @@ -110,7 +116,7 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) zstream->z.next_out = out; zstream->z.avail_out = (uInt)out_remain; if ((size_t)zstream->z.avail_out != out_remain) - zstream->z.avail_out = INT_MAX; + zstream->z.avail_out = UINT_MAX; out_queued = (size_t)zstream->z.avail_out; /* compress next chunk */ @@ -119,8 +125,8 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) else zstream->zerr = deflate(&zstream->z, zflush); - if (zstream->zerr == Z_STREAM_ERROR) - return zstream_seterr(zstream); + if (zstream_seterr(zstream)) + return -1; out_used = (out_queued - zstream->z.avail_out); out_remain -= out_used; diff --git a/tests/core/string.c b/tests/core/string.c index 90e8fa027..85db0c662 100644 --- a/tests/core/string.c +++ b/tests/core/string.c @@ -40,6 +40,48 @@ void test_core_string__2(void) cl_assert(git__strcasesort_cmp("fooBar", "foobar") < 0); } +/* compare prefixes with len */ +void test_core_string__prefixncmp(void) +{ + cl_assert(git__prefixncmp("", 0, "") == 0); + cl_assert(git__prefixncmp("a", 1, "") == 0); + cl_assert(git__prefixncmp("", 0, "a") < 0); + cl_assert(git__prefixncmp("a", 1, "b") < 0); + cl_assert(git__prefixncmp("b", 1, "a") > 0); + cl_assert(git__prefixncmp("ab", 2, "a") == 0); + cl_assert(git__prefixncmp("ab", 1, "a") == 0); + cl_assert(git__prefixncmp("ab", 2, "ac") < 0); + cl_assert(git__prefixncmp("a", 1, "ac") < 0); + cl_assert(git__prefixncmp("ab", 1, "ac") < 0); + cl_assert(git__prefixncmp("ab", 2, "aa") > 0); + cl_assert(git__prefixncmp("ab", 1, "aa") < 0); +} + +/* compare prefixes with len */ +void test_core_string__prefixncmp_icase(void) +{ + cl_assert(git__prefixncmp_icase("", 0, "") == 0); + cl_assert(git__prefixncmp_icase("a", 1, "") == 0); + cl_assert(git__prefixncmp_icase("", 0, "a") < 0); + cl_assert(git__prefixncmp_icase("a", 1, "b") < 0); + cl_assert(git__prefixncmp_icase("A", 1, "b") < 0); + cl_assert(git__prefixncmp_icase("a", 1, "B") < 0); + cl_assert(git__prefixncmp_icase("b", 1, "a") > 0); + cl_assert(git__prefixncmp_icase("B", 1, "a") > 0); + cl_assert(git__prefixncmp_icase("b", 1, "A") > 0); + cl_assert(git__prefixncmp_icase("ab", 2, "a") == 0); + cl_assert(git__prefixncmp_icase("Ab", 2, "a") == 0); + cl_assert(git__prefixncmp_icase("ab", 2, "A") == 0); + cl_assert(git__prefixncmp_icase("ab", 1, "a") == 0); + cl_assert(git__prefixncmp_icase("ab", 2, "ac") < 0); + cl_assert(git__prefixncmp_icase("Ab", 2, "ac") < 0); + cl_assert(git__prefixncmp_icase("ab", 2, "Ac") < 0); + cl_assert(git__prefixncmp_icase("a", 1, "ac") < 0); + cl_assert(git__prefixncmp_icase("ab", 1, "ac") < 0); + cl_assert(git__prefixncmp_icase("ab", 2, "aa") > 0); + cl_assert(git__prefixncmp_icase("ab", 1, "aa") < 0); +} + void test_core_string__strcmp(void) { cl_assert(git__strcmp("", "") == 0); diff --git a/tests/odb/largefiles.c b/tests/odb/largefiles.c new file mode 100644 index 000000000..22f136df5 --- /dev/null +++ b/tests/odb/largefiles.c @@ -0,0 +1,114 @@ +#include "clar_libgit2.h" +#include "git2/odb_backend.h" + +static git_repository *repo; +static git_odb *odb; + +void test_odb_largefiles__initialize(void) +{ + repo = cl_git_sandbox_init("testrepo.git"); + cl_git_pass(git_repository_odb(&odb, repo)); +} + +void test_odb_largefiles__cleanup(void) +{ + git_odb_free(odb); + cl_git_sandbox_cleanup(); +} + +static void writefile(git_oid *oid) +{ + static git_odb_stream *stream; + git_buf buf = GIT_BUF_INIT; + size_t i; + + for (i = 0; i < 3041; i++) + cl_git_pass(git_buf_puts(&buf, "Hello, world.\n")); + + cl_git_pass(git_odb_open_wstream(&stream, odb, 5368709122, GIT_OBJ_BLOB)); + for (i = 0; i < 126103; i++) + cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size)); + + cl_git_pass(git_odb_stream_finalize_write(oid, stream)); + + git_odb_stream_free(stream); + git_buf_free(&buf); +} + +void test_odb_largefiles__write_from_memory(void) +{ + git_oid expected, oid; + git_buf buf = GIT_BUF_INIT; + size_t i; + +#ifndef GIT_ARCH_64 + cl_skip(); +#endif + + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || + !cl_is_env_set("GITTEST_INVASIVE_MEMORY") || + !cl_is_env_set("GITTEST_SLOW")) + cl_skip(); + + for (i = 0; i < (3041*126103); i++) + cl_git_pass(git_buf_puts(&buf, "Hello, world.\n")); + + git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c"); + cl_git_pass(git_odb_write(&oid, odb, buf.ptr, buf.size, GIT_OBJ_BLOB)); + + cl_assert_equal_oid(&expected, &oid); +} + +void test_odb_largefiles__streamwrite(void) +{ + git_oid expected, oid; + + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || + !cl_is_env_set("GITTEST_SLOW")) + cl_skip(); + + git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c"); + writefile(&oid); + + cl_assert_equal_oid(&expected, &oid); +} + +void test_odb_largefiles__read_into_memory(void) +{ + git_oid oid; + git_odb_object *obj; + +#ifndef GIT_ARCH_64 + cl_skip(); +#endif + + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || + !cl_is_env_set("GITTEST_INVASIVE_MEMORY") || + !cl_is_env_set("GITTEST_SLOW")) + cl_skip(); + + writefile(&oid); + cl_git_pass(git_odb_read(&obj, odb, &oid)); + + git_odb_object_free(obj); +} + +void test_odb_largefiles__read_into_memory_rejected_on_32bit(void) +{ + git_oid oid; + git_odb_object *obj = NULL; + +#ifdef GIT_ARCH_64 + cl_skip(); +#endif + + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || + !cl_is_env_set("GITTEST_INVASIVE_MEMORY") || + !cl_is_env_set("GITTEST_SLOW")) + cl_skip(); + + writefile(&oid); + cl_git_fail(git_odb_read(&obj, odb, &oid)); + + git_odb_object_free(obj); +} |