From c6f26b48e47f6c0cd67769418c33efe66e5b5fe8 Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Fri, 13 Dec 2013 18:26:46 -0500 Subject: Refactor zlib for easier deflate streaming --- src/compress.c | 53 -------------------------------- src/compress.h | 16 ---------- src/indexer.c | 6 ++-- src/pack-objects.c | 6 ++-- src/zstream.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/zstream.h | 25 +++++++++++++++ 6 files changed, 120 insertions(+), 76 deletions(-) delete mode 100644 src/compress.c delete mode 100644 src/compress.h create mode 100644 src/zstream.c create mode 100644 src/zstream.h diff --git a/src/compress.c b/src/compress.c deleted file mode 100644 index 14b79404c..000000000 --- a/src/compress.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ - -#include "compress.h" - -#include - -#define BUFFER_SIZE (1024 * 1024) - -int git__compress(git_buf *buf, const void *buff, size_t len) -{ - z_stream zs; - char *zb; - size_t have; - - memset(&zs, 0, sizeof(zs)); - if (deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK) - return -1; - - zb = git__malloc(BUFFER_SIZE); - GITERR_CHECK_ALLOC(zb); - - zs.next_in = (void *)buff; - zs.avail_in = (uInt)len; - - do { - zs.next_out = (unsigned char *)zb; - zs.avail_out = BUFFER_SIZE; - - if (deflate(&zs, Z_FINISH) == Z_STREAM_ERROR) { - git__free(zb); - return -1; - } - - have = BUFFER_SIZE - (size_t)zs.avail_out; - - if (git_buf_put(buf, zb, have) < 0) { - git__free(zb); - return -1; - } - - } while (zs.avail_out == 0); - - assert(zs.avail_in == 0); - - deflateEnd(&zs); - git__free(zb); - return 0; -} diff --git a/src/compress.h b/src/compress.h deleted file mode 100644 index 49e6f4749..000000000 --- a/src/compress.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_compress_h__ -#define INCLUDE_compress_h__ - -#include "common.h" - -#include "buffer.h" - -int git__compress(git_buf *buf, const void *buff, size_t len); - -#endif /* INCLUDE_compress_h__ */ diff --git a/src/indexer.c b/src/indexer.c index 6132571cc..ccab8fcf0 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -5,8 +5,6 @@ * a Linking Exception. For full terms see the included COPYING file. */ -#include - #include "git2/indexer.h" #include "git2/object.h" @@ -18,7 +16,7 @@ #include "filebuf.h" #include "oid.h" #include "oidmap.h" -#include "compress.h" +#include "zstream.h" #define UINT31_MAX (0x7FFFFFFF) @@ -662,7 +660,7 @@ static int inject_object(git_indexer *idx, git_oid *id) idx->pack->mwf.size += hdr_len; entry->crc = crc32(entry->crc, hdr, hdr_len); - if ((error = git__compress(&buf, data, len)) < 0) + if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0) goto cleanup; /* And then the compressed object */ diff --git a/src/pack-objects.c b/src/pack-objects.c index 335944c0c..0d31d50ff 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -7,7 +7,7 @@ #include "pack-objects.h" -#include "compress.h" +#include "zstream.h" #include "delta.h" #include "iterator.h" #include "netops.h" @@ -319,7 +319,7 @@ static int write_object(git_buf *buf, git_packbuilder *pb, git_pobject *po) /* Write data */ if (po->z_delta_size) size = po->z_delta_size; - else if (git__compress(&zbuf, data, size) < 0) + else if (git_zstream_deflatebuf(&zbuf, data, size) < 0) goto on_error; else { if (po->delta) @@ -931,7 +931,7 @@ static int find_deltas(git_packbuilder *pb, git_pobject **list, * between writes at that moment. */ if (po->delta_data) { - if (git__compress(&zbuf, po->delta_data, po->delta_size) < 0) + if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0) goto on_error; git__free(po->delta_data); diff --git a/src/zstream.c b/src/zstream.c new file mode 100644 index 000000000..e043dd3a9 --- /dev/null +++ b/src/zstream.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include + +#include "zstream.h" +#include "buffer.h" + +#define BUFFER_SIZE (1024 * 1024) + +static int zstream_seterr(int zerr, git_zstream *zstream) +{ + if (zerr == Z_MEM_ERROR) + giterr_set_oom(); + else if (zstream->msg) + giterr_set(GITERR_ZLIB, zstream->msg); + else + giterr_set(GITERR_ZLIB, "Unknown compression error"); + + return -1; +} + +int git_zstream_init(git_zstream *zstream) +{ + int zerr; + + if ((zerr = deflateInit(zstream, Z_DEFAULT_COMPRESSION)) != Z_OK) + return zstream_seterr(zerr, zstream); + + return 0; +} + +ssize_t git_zstream_deflate(void *out, size_t out_len, git_zstream *zstream, const void *in, size_t in_len) +{ + int zerr; + + if ((ssize_t)out_len < 0) + out_len = INT_MAX; + + zstream->next_in = (Bytef *)in; + zstream->avail_in = in_len; + zstream->next_out = out; + zstream->avail_out = out_len; + + if ((zerr = deflate(zstream, Z_FINISH)) == Z_STREAM_ERROR) + return zstream_seterr(zerr, zstream); + + return (out_len - zstream->avail_out); +} + +void git_zstream_free(git_zstream *zstream) +{ + deflateEnd(zstream); +} + +int git_zstream_deflatebuf(git_buf *out, const void *in, size_t in_len) +{ + git_zstream zstream = GIT_ZSTREAM_INIT; + size_t out_len; + ssize_t written; + int error = 0; + + if ((error = git_zstream_init(&zstream)) < 0) + goto done; + + do { + if (out->asize - out->size < BUFFER_SIZE) + git_buf_grow(out, out->asize + BUFFER_SIZE); + + out_len = out->asize - out->size; + + if ((written = git_zstream_deflate(out->ptr + out->size, out_len, &zstream, in, in_len)) <= 0) + break; + + in = (char *)in + written; + in_len -= written; + out->size += written; + } while (written > 0); + + if (written < 0) + error = written; + +done: + git_zstream_free(&zstream); + return error; +} diff --git a/src/zstream.h b/src/zstream.h new file mode 100644 index 000000000..e6c841120 --- /dev/null +++ b/src/zstream.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_zstream_h__ +#define INCLUDE_zstream_h__ + +#include + +#include "common.h" +#include "buffer.h" + +#define git_zstream z_stream + +#define GIT_ZSTREAM_INIT {0} + +int git_zstream_init(git_zstream *zstream); +ssize_t git_zstream_deflate(void *out, size_t out_len, git_zstream *zstream, const void *in, size_t in_len); +void git_zstream_free(git_zstream *zstream); + +int git_zstream_deflatebuf(git_buf *out, const void *in, size_t in_len); + +#endif /* INCLUDE_zstream_h__ */ -- cgit v1.2.1 From 0ade2f7a5919d1953f679661a752c31328ccb90a Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Sat, 14 Dec 2013 10:37:57 -0500 Subject: Packbuilder stream deflate instead of one-shot --- src/pack-objects.c | 145 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 81 insertions(+), 64 deletions(-) diff --git a/src/pack-objects.c b/src/pack-objects.c index 0d31d50ff..9df9a015e 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -61,6 +61,9 @@ struct pack_write_context { /* The minimal interval between progress updates (in seconds). */ #define MIN_PROGRESS_UPDATE_INTERVAL 0.5 +/* Size of the buffer to feed to zlib */ +#define COMPRESS_BUFLEN (1024 * 1024) + static unsigned name_hash(const char *name) { unsigned c, hash = 0; @@ -275,78 +278,95 @@ on_error: return -1; } -static int write_object(git_buf *buf, git_packbuilder *pb, git_pobject *po) +static int write_object( + git_packbuilder *pb, + git_pobject *po, + int (*write_cb)(void *buf, size_t size, void *cb_data), + void *cb_data) { + git_zstream zstream = GIT_ZSTREAM_INIT; git_odb_object *obj = NULL; - git_buf zbuf = GIT_BUF_INIT; git_otype type; - unsigned char hdr[10]; - size_t hdr_len; - unsigned long size; + unsigned char hdr[10], *zbuf = NULL; void *data; + size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len; + ssize_t written; + int error; if (po->delta) { if (po->delta_data) data = po->delta_data; - else if (get_delta(&data, pb->odb, po) < 0) - goto on_error; - size = po->delta_size; + else if ((error = get_delta(&data, pb->odb, po)) < 0) + goto done; + + data_len = po->delta_size; type = GIT_OBJ_REF_DELTA; } else { - if (git_odb_read(&obj, pb->odb, &po->id)) - goto on_error; + if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0) + goto done; data = (void *)git_odb_object_data(obj); - size = (unsigned long)git_odb_object_size(obj); + data_len = git_odb_object_size(obj); type = git_odb_object_type(obj); } /* Write header */ - hdr_len = git_packfile__object_header(hdr, size, type); - - if (git_buf_put(buf, (char *)hdr, hdr_len) < 0) - goto on_error; + hdr_len = git_packfile__object_header(hdr, data_len, type); - if (git_hash_update(&pb->ctx, hdr, hdr_len) < 0) - goto on_error; + if ((error = write_cb(hdr, hdr_len, cb_data)) < 0 || + (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0) + goto done; if (type == GIT_OBJ_REF_DELTA) { - if (git_buf_put(buf, (char *)po->delta->id.id, GIT_OID_RAWSZ) < 0 || - git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ) < 0) - goto on_error; + if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 || + (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0) + goto done; } /* Write data */ - if (po->z_delta_size) - size = po->z_delta_size; - else if (git_zstream_deflatebuf(&zbuf, data, size) < 0) - goto on_error; - else { + if (po->z_delta_size) { + data_len = po->z_delta_size; + + if ((error = write_cb(data, data_len, cb_data)) < 0 || + (error = git_hash_update(&pb->ctx, data, data_len)) < 0) + goto done; + } else { + zbuf = git__malloc(zbuf_len); + GITERR_CHECK_ALLOC(zbuf); + + if ((error = git_zstream_init(&zstream)) < 0) + goto done; + + while ((written = git_zstream_deflate(zbuf, zbuf_len, &zstream, data, data_len)) > 0) { + if ((error = write_cb(zbuf, written, cb_data)) < 0 || + (error = git_hash_update(&pb->ctx, zbuf, written)) < 0) + goto done; + + data = (char *)data + written; + data_len -= written; + } + + if (written < 0) { + error = written; + goto done; + } + if (po->delta) git__free(data); - data = zbuf.ptr; - size = (unsigned long)zbuf.size; } - if (git_buf_put(buf, data, size) < 0 || - git_hash_update(&pb->ctx, data, size) < 0) - goto on_error; - if (po->delta_data) { git__free(po->delta_data); po->delta_data = NULL; } - git_odb_object_free(obj); - git_buf_free(&zbuf); - pb->nr_written++; - return 0; -on_error: +done: + git__free(zbuf); + git_zstream_free(&zstream); git_odb_object_free(obj); - git_buf_free(&zbuf); - return -1; + return error; } enum write_one_status { @@ -356,9 +376,15 @@ enum write_one_status { WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */ }; -static int write_one(git_buf *buf, git_packbuilder *pb, git_pobject *po, - enum write_one_status *status) +static int write_one( + enum write_one_status *status, + git_packbuilder *pb, + git_pobject *po, + int (*write_cb)(void *buf, size_t size, void *cb_data), + void *cb_data) { + int error; + if (po->recursing) { *status = WRITE_ONE_RECURSIVE; return 0; @@ -369,21 +395,19 @@ static int write_one(git_buf *buf, git_packbuilder *pb, git_pobject *po, if (po->delta) { po->recursing = 1; - if (write_one(buf, pb, po->delta, status) < 0) - return -1; - switch (*status) { - case WRITE_ONE_RECURSIVE: - /* we cannot depend on this one */ + + if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0) + return error; + + /* we cannot depend on this one */ + if (*status == WRITE_ONE_RECURSIVE) po->delta = NULL; - break; - default: - break; - } } po->written = 1; po->recursing = 0; - return write_object(buf, pb, po); + + return write_object(pb, po, write_cb, cb_data); } GIT_INLINE(void) add_to_write_order(git_pobject **wo, unsigned int *endp, @@ -563,12 +587,11 @@ static git_pobject **compute_write_order(git_packbuilder *pb) } static int write_pack(git_packbuilder *pb, - int (*cb)(void *buf, size_t size, void *data), - void *data) + int (*write_cb)(void *buf, size_t size, void *cb_data), + void *cb_data) { git_pobject **write_order; git_pobject *po; - git_buf buf = GIT_BUF_INIT; enum write_one_status status; struct git_pack_header ph; git_oid entry_oid; @@ -586,10 +609,8 @@ static int write_pack(git_packbuilder *pb, ph.hdr_version = htonl(PACK_VERSION); ph.hdr_entries = htonl(pb->nr_objects); - if ((error = cb(&ph, sizeof(ph), data)) < 0) - goto done; - - if ((error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0) + if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 || + (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0) goto done; pb->nr_remaining = pb->nr_objects; @@ -597,21 +618,18 @@ static int write_pack(git_packbuilder *pb, pb->nr_written = 0; for ( ; i < pb->nr_objects; ++i) { po = write_order[i]; - if ((error = write_one(&buf, pb, po, &status)) < 0) - goto done; - if ((error = cb(buf.ptr, buf.size, data)) < 0) + + if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0) goto done; - git_buf_clear(&buf); } pb->nr_remaining -= pb->nr_written; } while (pb->nr_remaining && i < pb->nr_objects); - if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0) goto done; - error = cb(entry_oid.id, GIT_OID_RAWSZ, data); + error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data); done: /* if callback cancelled writing, we must still free delta_data */ @@ -624,7 +642,6 @@ done: } git__free(write_order); - git_buf_free(&buf); return error; } -- cgit v1.2.1 From 52a8a130728e29d8ac9d6e4f9e3a7017afe461fb Mon Sep 17 00:00:00 2001 From: Edward Thomson Date: Mon, 6 Jan 2014 16:41:12 -0800 Subject: Packbuilder contains its own zstream --- src/pack-objects.c | 9 ++++----- src/pack-objects.h | 2 ++ src/zstream.c | 5 +++++ src/zstream.h | 1 + 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/pack-objects.c b/src/pack-objects.c index 9df9a015e..c4ed4dce3 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -130,6 +130,7 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo) pb->nr_threads = 1; /* do not spawn any thread by default */ if (git_hash_ctx_init(&pb->ctx) < 0 || + git_zstream_init(&pb->zstream) < 0 || git_repository_odb(&pb->odb, repo) < 0 || packbuilder_config(pb) < 0) goto on_error; @@ -284,7 +285,6 @@ static int write_object( int (*write_cb)(void *buf, size_t size, void *cb_data), void *cb_data) { - git_zstream zstream = GIT_ZSTREAM_INIT; git_odb_object *obj = NULL; git_otype type; unsigned char hdr[10], *zbuf = NULL; @@ -334,10 +334,9 @@ static int write_object( zbuf = git__malloc(zbuf_len); GITERR_CHECK_ALLOC(zbuf); - if ((error = git_zstream_init(&zstream)) < 0) - goto done; + git_zstream_reset(&pb->zstream); - while ((written = git_zstream_deflate(zbuf, zbuf_len, &zstream, data, data_len)) > 0) { + while ((written = git_zstream_deflate(zbuf, zbuf_len, &pb->zstream, data, data_len)) > 0) { if ((error = write_cb(zbuf, written, cb_data)) < 0 || (error = git_hash_update(&pb->ctx, zbuf, written)) < 0) goto done; @@ -364,7 +363,6 @@ static int write_object( done: git__free(zbuf); - git_zstream_free(&zstream); git_odb_object_free(obj); return error; } @@ -1413,6 +1411,7 @@ void git_packbuilder_free(git_packbuilder *pb) git__free(pb->object_list); git_hash_ctx_cleanup(&pb->ctx); + git_zstream_free(&pb->zstream); git__free(pb); } diff --git a/src/pack-objects.h b/src/pack-objects.h index 0c94a5a7a..4647df75a 100644 --- a/src/pack-objects.h +++ b/src/pack-objects.h @@ -14,6 +14,7 @@ #include "hash.h" #include "oidmap.h" #include "netops.h" +#include "zstream.h" #include "git2/oid.h" #include "git2/pack.h" @@ -54,6 +55,7 @@ struct git_packbuilder { git_odb *odb; /* associated object database */ git_hash_ctx ctx; + git_zstream zstream; uint32_t nr_objects, nr_alloc, diff --git a/src/zstream.c b/src/zstream.c index e043dd3a9..7def0440b 100644 --- a/src/zstream.c +++ b/src/zstream.c @@ -52,6 +52,11 @@ ssize_t git_zstream_deflate(void *out, size_t out_len, git_zstream *zstream, con return (out_len - zstream->avail_out); } +void git_zstream_reset(git_zstream *zstream) +{ + deflateReset(zstream); +} + void git_zstream_free(git_zstream *zstream) { deflateEnd(zstream); diff --git a/src/zstream.h b/src/zstream.h index e6c841120..9672903c0 100644 --- a/src/zstream.h +++ b/src/zstream.h @@ -18,6 +18,7 @@ int git_zstream_init(git_zstream *zstream); ssize_t git_zstream_deflate(void *out, size_t out_len, git_zstream *zstream, const void *in, size_t in_len); +void git_zstream_reset(git_zstream *zstream); void git_zstream_free(git_zstream *zstream); int git_zstream_deflatebuf(git_buf *out, const void *in, size_t in_len); -- cgit v1.2.1