diff options
-rw-r--r-- | include/git2/odb.h | 9 | ||||
-rw-r--r-- | include/git2/sys/odb_backend.h | 3 | ||||
-rw-r--r-- | src/odb.c | 9 | ||||
-rw-r--r-- | src/odb_loose.c | 507 | ||||
-rw-r--r-- | src/zstream.c | 85 | ||||
-rw-r--r-- | src/zstream.h | 6 | ||||
-rw-r--r-- | tests/odb/largefiles.c | 77 | ||||
-rw-r--r-- | tests/odb/loose.c | 84 |
8 files changed, 555 insertions, 225 deletions
diff --git a/include/git2/odb.h b/include/git2/odb.h index b7dc0c5f3..006a75b7a 100644 --- a/include/git2/odb.h +++ b/include/git2/odb.h @@ -357,11 +357,18 @@ GIT_EXTERN(void) git_odb_stream_free(git_odb_stream *stream); * @see git_odb_stream * * @param out pointer where to store the stream + * @param len pointer where to store the length of the object + * @param type pointer where to store the type of the object * @param db object database where the stream will read from * @param oid oid of the object the stream will read from * @return 0 if the stream was created; error code otherwise */ -GIT_EXTERN(int) git_odb_open_rstream(git_odb_stream **out, git_odb *db, const git_oid *oid); +GIT_EXTERN(int) git_odb_open_rstream( + git_odb_stream **out, + size_t *len, + git_otype *type, + git_odb *db, + const git_oid *oid); /** * Open a stream for writing a pack file to the ODB. diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h index 9bcc50ddd..792f103fe 100644 --- a/include/git2/sys/odb_backend.h +++ b/include/git2/sys/odb_backend.h @@ -56,7 +56,8 @@ struct git_odb_backend { git_odb_stream **, git_odb_backend *, git_off_t, git_otype); int (* readstream)( - git_odb_stream **, git_odb_backend *, const git_oid *); + git_odb_stream **, size_t *, git_otype *, + git_odb_backend *, const git_oid *); int (* exists)( git_odb_backend *, const git_oid *); @@ -1396,7 +1396,12 @@ void git_odb_stream_free(git_odb_stream *stream) stream->free(stream); } -int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid) +int git_odb_open_rstream( + git_odb_stream **stream, + size_t *len, + git_otype *type, + git_odb *db, + const git_oid *oid) { size_t i, reads = 0; int error = GIT_ERROR; @@ -1409,7 +1414,7 @@ int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oi if (b->readstream != NULL) { ++reads; - error = b->readstream(stream, b, oid); + error = b->readstream(stream, len, type, b, oid); } } diff --git a/src/odb_loose.c b/src/odb_loose.c index 9900aae2a..7d77eed38 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -21,6 +21,9 @@ #include "git2/odb_backend.h" #include "git2/types.h" +/* maximum possible header length */ +#define MAX_HEADER_LEN 64 + typedef struct { /* object header data */ git_otype type; /* object type */ size_t size; /* object size */ @@ -31,6 +34,15 @@ typedef struct { git_filebuf fbuf; } loose_writestream; +typedef struct { + git_odb_stream stream; + git_map map; + char start[MAX_HEADER_LEN]; + size_t start_len; + size_t start_read; + git_zstream zstream; +} loose_readstream; + typedef struct loose_backend { git_odb_backend parent; @@ -92,32 +104,42 @@ static int object_mkdir(const git_buf *name, const loose_backend *be) GIT_MKDIR_PATH | GIT_MKDIR_SKIP_LAST | GIT_MKDIR_VERIFY_DIR, NULL); } -static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj) +static int parse_header_packlike( + obj_hdr *out, size_t *out_len, const unsigned char *data, size_t len) { unsigned long c; - unsigned char *data = (unsigned char *)obj->ptr; size_t shift, size, used = 0; - if (git_buf_len(obj) == 0) - return 0; + if (len == 0) + goto on_error; c = data[used++]; - hdr->type = (c >> 4) & 7; + out->type = (c >> 4) & 7; size = c & 15; shift = 4; while (c & 0x80) { - if (git_buf_len(obj) <= used) - return 0; + if (len <= used) + goto on_error; + if (sizeof(size_t) * 8 <= shift) - return 0; + goto on_error; + c = data[used++]; size += (c & 0x7f) << shift; shift += 7; } - hdr->size = size; - return used; + out->size = size; + + if (out_len) + *out_len = used; + + return 0; + +on_error: + giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header"); + return -1; } static int parse_header( @@ -173,107 +195,26 @@ on_error: return -1; } -/*********************************************************** - * - * ZLIB RELATED FUNCTIONS - * - ***********************************************************/ - -static void init_stream(z_stream *s, void *out, size_t len) -{ - memset(s, 0, sizeof(*s)); - s->next_out = out; - s->avail_out = (uInt)len; -} - -static void set_stream_input(z_stream *s, void *in, size_t len) -{ - s->next_in = in; - s->avail_in = (uInt)len; -} - -static void set_stream_output(z_stream *s, void *out, size_t len) -{ - s->next_out = out; - s->avail_out = (uInt)len; -} - - -static int start_inflate(z_stream *s, git_buf *obj, void *out, size_t len) -{ - int status; - - init_stream(s, out, len); - set_stream_input(s, obj->ptr, git_buf_len(obj)); - - if ((status = inflateInit(s)) < Z_OK) - return status; - - return inflate(s, 0); -} - -static void abort_inflate(z_stream *s) -{ - inflateEnd(s); -} - -static int finish_inflate(z_stream *s) -{ - int status = Z_OK; - - while (status == Z_OK) - status = inflate(s, Z_FINISH); - - inflateEnd(s); - - if ((status != Z_STREAM_END) || (s->avail_in != 0)) { - giterr_set(GITERR_ZLIB, "failed to finish zlib inflation; stream aborted prematurely"); - return -1; - } - - return 0; -} - -static int is_zlib_compressed_data(unsigned char *data) +static int is_zlib_compressed_data(unsigned char *data, size_t data_len) { unsigned int w; + if (data_len < 2) + return 0; + w = ((unsigned int)(data[0]) << 8) + data[1]; return (data[0] & 0x8F) == 0x08 && !(w % 31); } -static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen) -{ - z_stream zs; - int status = Z_OK; - - memset(&zs, 0x0, sizeof(zs)); - - zs.next_out = out; - zs.avail_out = (uInt)outlen; - - zs.next_in = in; - zs.avail_in = (uInt)inlen; - - if (inflateInit(&zs) < Z_OK) { - giterr_set(GITERR_ZLIB, "failed to inflate buffer"); - return -1; - } - - while (status == Z_OK) - status = inflate(&zs, Z_FINISH); - - inflateEnd(&zs); - - if (status != Z_STREAM_END /* || zs.avail_in != 0 */ || - zs.total_out != outlen) - { - giterr_set(GITERR_ZLIB, "failed to inflate buffer; stream aborted prematurely"); - return -1; - } +/*********************************************************** + * + * ODB OBJECT READING & WRITING + * + * Backend for the public API; read headers and full objects + * from the ODB. Write raw data to the ODB. + * + ***********************************************************/ - return 0; -} /* * At one point, there was a loose object format that was intended to @@ -281,56 +222,62 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen) * of loose object data into packs. This format is no longer used, but * we must still read it. */ -static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj) +static int read_loose_packlike(git_rawobj *out, git_buf *obj) { - unsigned char *in, *buf; + git_buf body = GIT_BUF_INIT; + const unsigned char *obj_data; obj_hdr hdr; - size_t len, used, alloclen; + size_t obj_len, head_len, alloc_size; + int error; + + obj_data = (unsigned char *)obj->ptr; + obj_len = obj->size; /* * read the object header, which is an (uncompressed) * binary encoding of the object type and size. */ - if ((used = get_binary_object_header(&hdr, obj)) == 0 || - !git_object_typeisloose(hdr.type)) { + if ((error = parse_header_packlike(&hdr, &head_len, obj_data, obj_len)) < 0) + goto done; + + if (!git_object_typeisloose(hdr.type) || head_len > obj_len) { giterr_set(GITERR_ODB, "failed to inflate loose object"); - return -1; + error = -1; + goto done; } + obj_data += head_len; + obj_len -= head_len; + /* * allocate a buffer and inflate the data into it */ - GITERR_CHECK_ALLOC_ADD(&alloclen, hdr.size, 1); - buf = git__malloc(alloclen); - GITERR_CHECK_ALLOC(buf); - - in = ((unsigned char *)obj->ptr) + used; - len = obj->size - used; - if (inflate_buffer(in, len, buf, hdr.size) < 0) { - git__free(buf); - return -1; + if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) || + git_buf_init(&body, alloc_size) < 0) { + error = -1; + goto done; } - buf[hdr.size] = '\0'; - out->data = buf; + if ((error = git_zstream_inflatebuf(&body, obj_data, obj_len)) < 0) + goto done; + out->len = hdr.size; out->type = hdr.type; + out->data = git_buf_detach(&body); - return 0; +done: + git_buf_free(&body); + return error; } -static int inflate_disk_obj(git_rawobj *out, git_buf *obj) +static int read_loose_standard(git_rawobj *out, git_buf *obj) { git_zstream zstream = GIT_ZSTREAM_INIT; - unsigned char head[64], *body = NULL; + unsigned char head[MAX_HEADER_LEN], *body = NULL; size_t decompressed, head_len, body_len, alloc_size; obj_hdr hdr; int error; - /* check for a pack-like loose object */ - if (!is_zlib_compressed_data((unsigned char *)obj->ptr)) - return inflate_packlike_loose_disk_obj(out, obj); - if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 || (error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0) goto done; @@ -338,9 +285,10 @@ static int inflate_disk_obj(git_rawobj *out, git_buf *obj) decompressed = sizeof(head); /* - * inflate the initial part of the compressed buffer in order to parse the - * header; read the largest header possible, then push back the remainder. - */ + * inflate the initial part of the compressed buffer in order to + * parse the header; read the largest header possible, then push the + * remainder into the body buffer. + */ if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 || (error = parse_header(&hdr, &head_len, head, decompressed)) < 0) goto done; @@ -391,20 +339,6 @@ done: return error; } - - - - - -/*********************************************************** - * - * ODB OBJECT READING & WRITING - * - * Backend for the public API; read headers and full objects - * from the ODB. Write raw data to the ODB. - * - ***********************************************************/ - static int read_loose(git_rawobj *out, git_buf *loc) { int error; @@ -419,22 +353,62 @@ static int read_loose(git_rawobj *out, git_buf *loc) out->len = 0; out->type = GIT_OBJ_BAD; - if (!(error = git_futils_readbuffer(&obj, loc->ptr))) - error = inflate_disk_obj(out, &obj); + if ((error = git_futils_readbuffer(&obj, loc->ptr)) < 0) + goto done; + + if (!is_zlib_compressed_data((unsigned char *)obj.ptr, obj.size)) + error = read_loose_packlike(out, &obj); + else + error = read_loose_standard(out, &obj); +done: git_buf_free(&obj); + return error; +} + +static int read_header_loose_packlike( + git_rawobj *out, const unsigned char *data, size_t len) +{ + obj_hdr hdr; + size_t header_len; + int error; + + if ((error = parse_header_packlike(&hdr, &header_len, data, len)) < 0) + return error; + out->len = hdr.size; + out->type = hdr.type; + + return error; +} + +static int read_header_loose_standard( + git_rawobj *out, const unsigned char *data, size_t len) +{ + git_zstream zs = GIT_ZSTREAM_INIT; + obj_hdr hdr; + unsigned char inflated[MAX_HEADER_LEN]; + size_t header_len, inflated_len = sizeof(inflated); + int error; + + if ((error = git_zstream_init(&zs, GIT_ZSTREAM_INFLATE)) < 0 || + (error = git_zstream_set_input(&zs, data, len)) < 0 || + (error = git_zstream_get_output_chunk(inflated, &inflated_len, &zs)) < 0 || + (error = parse_header(&hdr, &header_len, inflated, inflated_len)) < 0) + goto done; + + out->len = hdr.size; + out->type = hdr.type; + +done: + git_zstream_free(&zs); return error; } static int read_header_loose(git_rawobj *out, git_buf *loc) { - int error = 0, z_return = Z_ERRNO, read_bytes; - git_file fd; - z_stream zs; - obj_hdr header_obj; - size_t header_len; - unsigned char raw_buffer[16], inflated_buffer[64]; + unsigned char obj[1024]; + int fd, obj_len, error; assert(out && loc); @@ -443,35 +417,23 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) out->data = NULL; - if ((fd = git_futils_open_ro(loc->ptr)) < 0) - return fd; - - init_stream(&zs, inflated_buffer, sizeof(inflated_buffer)); - - z_return = inflateInit(&zs); + if ((error = fd = git_futils_open_ro(loc->ptr)) < 0 || + (error = obj_len = p_read(fd, obj, sizeof(obj))) < 0) + goto done; - while (z_return == Z_OK) { - if ((read_bytes = p_read(fd, raw_buffer, sizeof(raw_buffer))) > 0) { - set_stream_input(&zs, raw_buffer, read_bytes); - z_return = inflate(&zs, 0); - } else - z_return = Z_STREAM_END; - } + if (!is_zlib_compressed_data(obj, (size_t)obj_len)) + error = read_header_loose_packlike(out, obj, (size_t)obj_len); + else + error = read_header_loose_standard(out, obj, (size_t)obj_len); - if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR) - || parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0 - || git_object_typeisloose(header_obj.type) == 0) - { + if (!error && !git_object_typeisloose(out->type)) { giterr_set(GITERR_ZLIB, "failed to read loose object header"); error = -1; - } else { - out->len = header_obj.size; - out->type = header_obj.type; + goto done; } - finish_inflate(&zs); +done: p_close(fd); - return error; } @@ -812,7 +774,7 @@ static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb return error; } -static int loose_backend__stream_fwrite(git_odb_stream *_stream, const git_oid *oid) +static int loose_backend__writestream_finalize(git_odb_stream *_stream, const git_oid *oid) { loose_writestream *stream = (loose_writestream *)_stream; loose_backend *backend = (loose_backend *)_stream->backend; @@ -831,13 +793,13 @@ static int loose_backend__stream_fwrite(git_odb_stream *_stream, const git_oid * return error; } -static int loose_backend__stream_write(git_odb_stream *_stream, const char *data, size_t len) +static int loose_backend__writestream_write(git_odb_stream *_stream, const char *data, size_t len) { loose_writestream *stream = (loose_writestream *)_stream; return git_filebuf_write(&stream->fbuf, data, len); } -static void loose_backend__stream_free(git_odb_stream *_stream) +static void loose_backend__writestream_free(git_odb_stream *_stream) { loose_writestream *stream = (loose_writestream *)_stream; @@ -856,11 +818,11 @@ static int filebuf_flags(loose_backend *backend) return flags; } -static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend, git_off_t length, git_otype type) +static int loose_backend__writestream(git_odb_stream **stream_out, git_odb_backend *_backend, git_off_t length, git_otype type) { loose_backend *backend; loose_writestream *stream = NULL; - char hdr[64]; + char hdr[MAX_HEADER_LEN]; git_buf tmp_path = GIT_BUF_INIT; int hdrlen; @@ -876,9 +838,9 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_ stream->stream.backend = _backend; stream->stream.read = NULL; /* read only */ - stream->stream.write = &loose_backend__stream_write; - stream->stream.finalize_write = &loose_backend__stream_fwrite; - stream->stream.free = &loose_backend__stream_free; + stream->stream.write = &loose_backend__writestream_write; + stream->stream.finalize_write = &loose_backend__writestream_finalize; + stream->stream.free = &loose_backend__writestream_free; stream->stream.mode = GIT_STREAM_WRONLY; if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 || @@ -896,11 +858,187 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_ return !stream ? -1 : 0; } +static int loose_backend__readstream_read( + git_odb_stream *_stream, + char *buffer, + size_t buffer_len) +{ + loose_readstream *stream = (loose_readstream *)_stream; + size_t start_remain = stream->start_len - stream->start_read; + int total = 0, error; + + /* + * if we read more than just the header in the initial read, play + * that back for the caller. + */ + if (start_remain && buffer_len) { + size_t chunk = min(start_remain, buffer_len); + memcpy(buffer, stream->start + stream->start_read, chunk); + + buffer += chunk; + stream->start_read += chunk; + + total += chunk; + buffer_len -= chunk; + } + + if (buffer_len) { + size_t chunk = min(buffer_len, INT_MAX); + + if ((error = git_zstream_get_output(buffer, &chunk, &stream->zstream)) < 0) + return error; + + total += chunk; + } + + return total; +} + +static void loose_backend__readstream_free(git_odb_stream *_stream) +{ + loose_readstream *stream = (loose_readstream *)_stream; + + git_futils_mmap_free(&stream->map); + git_zstream_free(&stream->zstream); + git__free(stream); +} + +static int loose_backend__readstream_packlike( + obj_hdr *hdr, + loose_readstream *stream) +{ + const unsigned char *data; + size_t data_len, head_len; + int error; + + data = stream->map.data; + data_len = stream->map.len; + + /* + * read the object header, which is an (uncompressed) + * binary encoding of the object type and size. + */ + if ((error = parse_header_packlike(hdr, &head_len, data, data_len)) < 0) + return error; + + if (!git_object_typeisloose(hdr->type)) { + giterr_set(GITERR_ODB, "failed to inflate loose object"); + return -1; + } + + return git_zstream_set_input(&stream->zstream, + data + head_len, data_len - head_len); +} + +static int loose_backend__readstream_standard( + obj_hdr *hdr, + loose_readstream *stream) +{ + unsigned char head[MAX_HEADER_LEN]; + size_t init, head_len; + int error; + + if ((error = git_zstream_set_input(&stream->zstream, + stream->map.data, stream->map.len)) < 0) + return error; + + init = sizeof(head); + + /* + * inflate the initial part of the compressed buffer in order to + * parse the header; read the largest header possible, then store + * it in the `start` field of the stream object. + */ + if ((error = git_zstream_get_output(head, &init, &stream->zstream)) < 0 || + (error = parse_header(hdr, &head_len, head, init)) < 0) + return error; + + if (!git_object_typeisloose(hdr->type)) { + giterr_set(GITERR_ODB, "failed to inflate disk object"); + return -1; + } + + if (init > head_len) { + stream->start_len = init - head_len; + memcpy(stream->start, head + head_len, init - head_len); + } + + return 0; +} + +static int loose_backend__readstream( + git_odb_stream **stream_out, + size_t *len_out, + git_otype *type_out, + git_odb_backend *_backend, + const git_oid *oid) +{ + loose_backend *backend; + loose_readstream *stream = NULL; + git_hash_ctx *hash_ctx = NULL; + git_buf object_path = GIT_BUF_INIT; + obj_hdr hdr; + int error = 0; + + assert(stream_out && len_out && type_out && _backend && oid); + + backend = (loose_backend *)_backend; + *stream_out = NULL; + *len_out = 0; + *type_out = GIT_OBJ_BAD; + + if (locate_object(&object_path, backend, oid) < 0) { + error = git_odb__error_notfound("no matching loose object", + oid, GIT_OID_HEXSZ); + goto done; + } + + stream = git__calloc(1, sizeof(loose_readstream)); + GITERR_CHECK_ALLOC(stream); + + hash_ctx = git__malloc(sizeof(git_hash_ctx)); + GITERR_CHECK_ALLOC(hash_ctx); + + if ((error = git_hash_ctx_init(hash_ctx)) < 0 || + (error = git_futils_mmap_ro_file(&stream->map, object_path.ptr)) < 0 || + (error = git_zstream_init(&stream->zstream, GIT_ZSTREAM_INFLATE)) < 0) + goto done; + + /* check for a packlike loose object */ + if (!is_zlib_compressed_data(stream->map.data, stream->map.len)) + error = loose_backend__readstream_packlike(&hdr, stream); + else + error = loose_backend__readstream_standard(&hdr, stream); + + if (error < 0) + goto done; + + stream->stream.backend = _backend; + stream->stream.hash_ctx = hash_ctx; + stream->stream.read = &loose_backend__readstream_read; + stream->stream.free = &loose_backend__readstream_free; + + *stream_out = (git_odb_stream *)stream; + *len_out = hdr.size; + *type_out = hdr.type; + +done: + if (error < 0) { + git_futils_mmap_free(&stream->map); + git_zstream_free(&stream->zstream); + git_hash_ctx_cleanup(hash_ctx); + git__free(stream); + } + + git_buf_free(&object_path); + return error; +} + static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_otype type) { int error = 0, header_len; git_buf final_path = GIT_BUF_INIT; - char header[64]; + char header[MAX_HEADER_LEN]; git_filebuf fbuf = GIT_FILEBUF_INIT; loose_backend *backend; @@ -1002,7 +1140,8 @@ int git_odb_backend_loose( backend->parent.write = &loose_backend__write; backend->parent.read_prefix = &loose_backend__read_prefix; backend->parent.read_header = &loose_backend__read_header; - backend->parent.writestream = &loose_backend__stream; + backend->parent.writestream = &loose_backend__writestream; + backend->parent.readstream = &loose_backend__readstream; backend->parent.exists = &loose_backend__exists; backend->parent.exists_prefix = &loose_backend__exists_prefix; backend->parent.foreach = &loose_backend__foreach; diff --git a/src/zstream.c b/src/zstream.c index 963c9a344..affa55653 100644 --- a/src/zstream.c +++ b/src/zstream.c @@ -87,9 +87,52 @@ size_t git_zstream_suggest_output_len(git_zstream *zstream) return ZSTREAM_BUFFER_MIN_EXTRA; } +int git_zstream_get_output_chunk( + void *out, size_t *out_len, git_zstream *zstream) +{ + size_t in_queued, in_used, out_queued; + + /* set up input data */ + zstream->z.next_in = (Bytef *)zstream->in; + + /* feed as much data to zlib as it can consume, at most UINT_MAX */ + if (zstream->in_len > UINT_MAX) { + zstream->z.avail_in = UINT_MAX; + zstream->flush = Z_NO_FLUSH; + } else { + zstream->z.avail_in = (uInt)zstream->in_len; + zstream->flush = Z_FINISH; + } + in_queued = (size_t)zstream->z.avail_in; + + /* set up output data */ + zstream->z.next_out = out; + zstream->z.avail_out = (uInt)*out_len; + + if ((size_t)zstream->z.avail_out != *out_len) + zstream->z.avail_out = UINT_MAX; + out_queued = (size_t)zstream->z.avail_out; + + /* compress next chunk */ + if (zstream->type == GIT_ZSTREAM_INFLATE) + zstream->zerr = inflate(&zstream->z, zstream->flush); + else + zstream->zerr = deflate(&zstream->z, zstream->flush); + + if (zstream_seterr(zstream)) + return -1; + + in_used = (in_queued - zstream->z.avail_in); + zstream->in_len -= in_used; + zstream->in += in_used; + + *out_len = (out_queued - zstream->z.avail_out); + + return 0; +} + int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) { - int zflush = Z_FINISH; size_t out_remain = *out_len; if (zstream->in_len && zstream->zerr == Z_STREAM_END) { @@ -98,47 +141,17 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) } while (out_remain > 0 && zstream->zerr != Z_STREAM_END) { - size_t out_queued, in_queued, out_used, in_used; + size_t out_written = out_remain; - /* set up in data */ - zstream->z.next_in = (Bytef *)zstream->in; - zstream->z.avail_in = (uInt)zstream->in_len; - - if ((size_t)zstream->z.avail_in != zstream->in_len) { - zstream->z.avail_in = UINT_MAX; - zflush = Z_NO_FLUSH; - } else { - zflush = Z_FINISH; - } - in_queued = (size_t)zstream->z.avail_in; - - /* set up out data */ - zstream->z.next_out = out; - zstream->z.avail_out = (uInt)out_remain; - if ((size_t)zstream->z.avail_out != out_remain) - zstream->z.avail_out = UINT_MAX; - out_queued = (size_t)zstream->z.avail_out; - - /* compress next chunk */ - if (zstream->type == GIT_ZSTREAM_INFLATE) - zstream->zerr = inflate(&zstream->z, zflush); - else - zstream->zerr = deflate(&zstream->z, zflush); - - if (zstream_seterr(zstream)) + if (git_zstream_get_output_chunk(out, &out_written, zstream) < 0) return -1; - out_used = (out_queued - zstream->z.avail_out); - out_remain -= out_used; - out = ((char *)out) + out_used; - - in_used = (in_queued - zstream->z.avail_in); - zstream->in_len -= in_used; - zstream->in += in_used; + out_remain -= out_written; + out = ((char *)out) + out_written; } /* either we finished the input or we did not flush the data */ - assert(zstream->in_len > 0 || zflush == Z_FINISH); + assert(zstream->in_len > 0 || zstream->flush == Z_FINISH); /* set out_size to number of bytes actually written to output */ *out_len = *out_len - out_remain; diff --git a/src/zstream.h b/src/zstream.h index c2404f328..47ecc1322 100644 --- a/src/zstream.h +++ b/src/zstream.h @@ -23,6 +23,7 @@ typedef struct { git_zstream_t type; const char *in; size_t in_len; + int flush; int zerr; } git_zstream; @@ -35,6 +36,11 @@ int git_zstream_set_input(git_zstream *zstream, const void *in, size_t in_len); size_t git_zstream_suggest_output_len(git_zstream *zstream); +/* get as much output as is available in the input buffer */ +int git_zstream_get_output_chunk( + void *out, size_t *out_len, git_zstream *zstream); + +/* get all the output from the entire input buffer */ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream); bool git_zstream_done(git_zstream *zstream); diff --git a/tests/odb/largefiles.c b/tests/odb/largefiles.c index 22f136df5..cd3651b4a 100644 --- a/tests/odb/largefiles.c +++ b/tests/odb/largefiles.c @@ -1,5 +1,9 @@ #include "clar_libgit2.h" #include "git2/odb_backend.h" +#include "hash.h" +#include "odb.h" + +#define LARGEFILE_SIZE 5368709122 static git_repository *repo; static git_odb *odb; @@ -25,7 +29,7 @@ static void writefile(git_oid *oid) for (i = 0; i < 3041; i++) cl_git_pass(git_buf_puts(&buf, "Hello, world.\n")); - cl_git_pass(git_odb_open_wstream(&stream, odb, 5368709122, GIT_OBJ_BLOB)); + cl_git_pass(git_odb_open_wstream(&stream, odb, LARGEFILE_SIZE, GIT_OBJ_BLOB)); for (i = 0; i < 126103; i++) cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size)); @@ -63,6 +67,10 @@ void test_odb_largefiles__streamwrite(void) { git_oid expected, oid; +#ifndef GIT_ARCH_64 + cl_skip(); +#endif + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || !cl_is_env_set("GITTEST_SLOW")) cl_skip(); @@ -73,6 +81,52 @@ void test_odb_largefiles__streamwrite(void) cl_assert_equal_oid(&expected, &oid); } +void test_odb_largefiles__streamread(void) +{ + git_oid oid, read_oid; + git_odb_stream *stream; + char buf[10240]; + char hdr[64]; + size_t len, total = 0; + git_hash_ctx hash; + git_otype type; + int hdr_len, ret; + +#ifndef GIT_ARCH_64 + cl_skip(); +#endif + + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || + !cl_is_env_set("GITTEST_SLOW")) + cl_skip(); + + writefile(&oid); + + cl_git_pass(git_odb_open_rstream(&stream, &len, &type, odb, &oid)); + + cl_assert_equal_sz(LARGEFILE_SIZE, len); + cl_assert_equal_i(GIT_OBJ_BLOB, type); + + cl_git_pass(git_hash_ctx_init(&hash)); + hdr_len = git_odb__format_object_header(hdr, sizeof(hdr), len, type); + + cl_git_pass(git_hash_update(&hash, hdr, hdr_len)); + + while ((ret = git_odb_stream_read(stream, buf, 10240)) > 0) { + cl_git_pass(git_hash_update(&hash, buf, ret)); + total += ret; + } + + cl_assert_equal_sz(LARGEFILE_SIZE, total); + + git_hash_final(&read_oid, &hash); + + cl_assert_equal_oid(&oid, &read_oid); + + git_hash_ctx_cleanup(&hash); + git_odb_stream_free(stream); +} + void test_odb_largefiles__read_into_memory(void) { git_oid oid; @@ -112,3 +166,24 @@ void test_odb_largefiles__read_into_memory_rejected_on_32bit(void) git_odb_object_free(obj); } + +void test_odb_largefiles__read_header(void) +{ + git_oid oid; + size_t len; + git_otype type; + +#ifndef GIT_ARCH_64 + cl_skip(); +#endif + + if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || + !cl_is_env_set("GITTEST_SLOW")) + cl_skip(); + + writefile(&oid); + cl_git_pass(git_odb_read_header(&len, &type, odb, &oid)); + + cl_assert_equal_sz(LARGEFILE_SIZE, len); + cl_assert_equal_i(GIT_OBJ_BLOB, type); +} diff --git a/tests/odb/loose.c b/tests/odb/loose.c index 2e24d6723..83d080729 100644 --- a/tests/odb/loose.c +++ b/tests/odb/loose.c @@ -55,6 +55,63 @@ static void test_read_object(object_data *data) git_odb_free(odb); } +static void test_read_header(object_data *data) +{ + git_oid id; + git_odb *odb; + size_t len; + git_otype type; + + write_object_files(data); + + cl_git_pass(git_odb_open(&odb, "test-objects")); + cl_git_pass(git_oid_fromstr(&id, data->id)); + cl_git_pass(git_odb_read_header(&len, &type, odb, &id)); + + cl_assert_equal_sz(data->dlen, len); + cl_assert_equal_i(git_object_string2type(data->type), type); + + git_odb_free(odb); +} + +static void test_readstream_object(object_data *data, size_t blocksize) +{ + git_oid id; + git_odb *odb; + git_odb_stream *stream; + git_rawobj tmp; + char buf[2048], *ptr = buf; + size_t remain; + int ret; + + write_object_files(data); + + cl_git_pass(git_odb_open(&odb, "test-objects")); + cl_git_pass(git_oid_fromstr(&id, data->id)); + cl_git_pass(git_odb_open_rstream(&stream, &tmp.len, &tmp.type, odb, &id)); + + remain = tmp.len; + + while (remain) { + cl_assert((ret = git_odb_stream_read(stream, ptr, blocksize)) >= 0); + if (ret == 0) + break; + + cl_assert(remain >= (size_t)ret); + remain -= ret; + ptr += ret; + } + + cl_assert(remain == 0); + + tmp.data = buf; + + cmp_objects(&tmp, data); + + git_odb_stream_free(stream); + git_odb_free(odb); +} + void test_odb_loose__initialize(void) { p_fsync__cnt = 0; @@ -103,6 +160,33 @@ void test_odb_loose__simple_reads(void) test_read_object(&some); } +void test_odb_loose__streaming_reads(void) +{ + size_t blocksizes[] = { 1, 2, 4, 16, 99, 1024, 123456789 }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(blocksizes); i++) { + test_readstream_object(&commit, blocksizes[i]); + test_readstream_object(&tree, blocksizes[i]); + test_readstream_object(&tag, blocksizes[i]); + test_readstream_object(&zero, blocksizes[i]); + test_readstream_object(&one, blocksizes[i]); + test_readstream_object(&two, blocksizes[i]); + test_readstream_object(&some, blocksizes[i]); + } +} + +void test_odb_loose__read_header(void) +{ + test_read_header(&commit); + test_read_header(&tree); + test_read_header(&tag); + test_read_header(&zero); + test_read_header(&one); + test_read_header(&two); + test_read_header(&some); +} + void test_write_object_permission( mode_t dir_mode, mode_t file_mode, mode_t expected_dir_mode, mode_t expected_file_mode) |