summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2018-02-08 22:51:46 +0000
committerGitHub <noreply@github.com>2018-02-08 22:51:46 +0000
commit0fd0bfe435989b4947babfcd61b0bf573ff41e28 (patch)
tree51c7d216d514f56c55dc46b7bd2e808a7dc84d2e
parentd749822c58400be8212215fa0013cc8b76c7a1d3 (diff)
parent09df354e03b6856ba713ee36a89186ea7b52a123 (diff)
downloadlibgit2-0fd0bfe435989b4947babfcd61b0bf573ff41e28.tar.gz
Merge pull request #4450 from libgit2/ethomson/odb_loose_readstream
Streaming read support for the loose ODB backend
-rw-r--r--include/git2/odb.h9
-rw-r--r--include/git2/sys/odb_backend.h3
-rw-r--r--src/odb.c9
-rw-r--r--src/odb_loose.c507
-rw-r--r--src/zstream.c85
-rw-r--r--src/zstream.h6
-rw-r--r--tests/odb/largefiles.c77
-rw-r--r--tests/odb/loose.c84
8 files changed, 555 insertions, 225 deletions
diff --git a/include/git2/odb.h b/include/git2/odb.h
index b7dc0c5f3..006a75b7a 100644
--- a/include/git2/odb.h
+++ b/include/git2/odb.h
@@ -357,11 +357,18 @@ GIT_EXTERN(void) git_odb_stream_free(git_odb_stream *stream);
* @see git_odb_stream
*
* @param out pointer where to store the stream
+ * @param len pointer where to store the length of the object
+ * @param type pointer where to store the type of the object
* @param db object database where the stream will read from
* @param oid oid of the object the stream will read from
* @return 0 if the stream was created; error code otherwise
*/
-GIT_EXTERN(int) git_odb_open_rstream(git_odb_stream **out, git_odb *db, const git_oid *oid);
+GIT_EXTERN(int) git_odb_open_rstream(
+ git_odb_stream **out,
+ size_t *len,
+ git_otype *type,
+ git_odb *db,
+ const git_oid *oid);
/**
* Open a stream for writing a pack file to the ODB.
diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h
index 9bcc50ddd..792f103fe 100644
--- a/include/git2/sys/odb_backend.h
+++ b/include/git2/sys/odb_backend.h
@@ -56,7 +56,8 @@ struct git_odb_backend {
git_odb_stream **, git_odb_backend *, git_off_t, git_otype);
int (* readstream)(
- git_odb_stream **, git_odb_backend *, const git_oid *);
+ git_odb_stream **, size_t *, git_otype *,
+ git_odb_backend *, const git_oid *);
int (* exists)(
git_odb_backend *, const git_oid *);
diff --git a/src/odb.c b/src/odb.c
index c2b17fade..775cf90de 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -1396,7 +1396,12 @@ void git_odb_stream_free(git_odb_stream *stream)
stream->free(stream);
}
-int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid)
+int git_odb_open_rstream(
+ git_odb_stream **stream,
+ size_t *len,
+ git_otype *type,
+ git_odb *db,
+ const git_oid *oid)
{
size_t i, reads = 0;
int error = GIT_ERROR;
@@ -1409,7 +1414,7 @@ int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oi
if (b->readstream != NULL) {
++reads;
- error = b->readstream(stream, b, oid);
+ error = b->readstream(stream, len, type, b, oid);
}
}
diff --git a/src/odb_loose.c b/src/odb_loose.c
index 9900aae2a..7d77eed38 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -21,6 +21,9 @@
#include "git2/odb_backend.h"
#include "git2/types.h"
+/* maximum possible header length */
+#define MAX_HEADER_LEN 64
+
typedef struct { /* object header data */
git_otype type; /* object type */
size_t size; /* object size */
@@ -31,6 +34,15 @@ typedef struct {
git_filebuf fbuf;
} loose_writestream;
+typedef struct {
+ git_odb_stream stream;
+ git_map map;
+ char start[MAX_HEADER_LEN];
+ size_t start_len;
+ size_t start_read;
+ git_zstream zstream;
+} loose_readstream;
+
typedef struct loose_backend {
git_odb_backend parent;
@@ -92,32 +104,42 @@ static int object_mkdir(const git_buf *name, const loose_backend *be)
GIT_MKDIR_PATH | GIT_MKDIR_SKIP_LAST | GIT_MKDIR_VERIFY_DIR, NULL);
}
-static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
+static int parse_header_packlike(
+ obj_hdr *out, size_t *out_len, const unsigned char *data, size_t len)
{
unsigned long c;
- unsigned char *data = (unsigned char *)obj->ptr;
size_t shift, size, used = 0;
- if (git_buf_len(obj) == 0)
- return 0;
+ if (len == 0)
+ goto on_error;
c = data[used++];
- hdr->type = (c >> 4) & 7;
+ out->type = (c >> 4) & 7;
size = c & 15;
shift = 4;
while (c & 0x80) {
- if (git_buf_len(obj) <= used)
- return 0;
+ if (len <= used)
+ goto on_error;
+
if (sizeof(size_t) * 8 <= shift)
- return 0;
+ goto on_error;
+
c = data[used++];
size += (c & 0x7f) << shift;
shift += 7;
}
- hdr->size = size;
- return used;
+ out->size = size;
+
+ if (out_len)
+ *out_len = used;
+
+ return 0;
+
+on_error:
+ giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
+ return -1;
}
static int parse_header(
@@ -173,107 +195,26 @@ on_error:
return -1;
}
-/***********************************************************
- *
- * ZLIB RELATED FUNCTIONS
- *
- ***********************************************************/
-
-static void init_stream(z_stream *s, void *out, size_t len)
-{
- memset(s, 0, sizeof(*s));
- s->next_out = out;
- s->avail_out = (uInt)len;
-}
-
-static void set_stream_input(z_stream *s, void *in, size_t len)
-{
- s->next_in = in;
- s->avail_in = (uInt)len;
-}
-
-static void set_stream_output(z_stream *s, void *out, size_t len)
-{
- s->next_out = out;
- s->avail_out = (uInt)len;
-}
-
-
-static int start_inflate(z_stream *s, git_buf *obj, void *out, size_t len)
-{
- int status;
-
- init_stream(s, out, len);
- set_stream_input(s, obj->ptr, git_buf_len(obj));
-
- if ((status = inflateInit(s)) < Z_OK)
- return status;
-
- return inflate(s, 0);
-}
-
-static void abort_inflate(z_stream *s)
-{
- inflateEnd(s);
-}
-
-static int finish_inflate(z_stream *s)
-{
- int status = Z_OK;
-
- while (status == Z_OK)
- status = inflate(s, Z_FINISH);
-
- inflateEnd(s);
-
- if ((status != Z_STREAM_END) || (s->avail_in != 0)) {
- giterr_set(GITERR_ZLIB, "failed to finish zlib inflation; stream aborted prematurely");
- return -1;
- }
-
- return 0;
-}
-
-static int is_zlib_compressed_data(unsigned char *data)
+static int is_zlib_compressed_data(unsigned char *data, size_t data_len)
{
unsigned int w;
+ if (data_len < 2)
+ return 0;
+
w = ((unsigned int)(data[0]) << 8) + data[1];
return (data[0] & 0x8F) == 0x08 && !(w % 31);
}
-static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
-{
- z_stream zs;
- int status = Z_OK;
-
- memset(&zs, 0x0, sizeof(zs));
-
- zs.next_out = out;
- zs.avail_out = (uInt)outlen;
-
- zs.next_in = in;
- zs.avail_in = (uInt)inlen;
-
- if (inflateInit(&zs) < Z_OK) {
- giterr_set(GITERR_ZLIB, "failed to inflate buffer");
- return -1;
- }
-
- while (status == Z_OK)
- status = inflate(&zs, Z_FINISH);
-
- inflateEnd(&zs);
-
- if (status != Z_STREAM_END /* || zs.avail_in != 0 */ ||
- zs.total_out != outlen)
- {
- giterr_set(GITERR_ZLIB, "failed to inflate buffer; stream aborted prematurely");
- return -1;
- }
+/***********************************************************
+ *
+ * ODB OBJECT READING & WRITING
+ *
+ * Backend for the public API; read headers and full objects
+ * from the ODB. Write raw data to the ODB.
+ *
+ ***********************************************************/
- return 0;
-}
/*
* At one point, there was a loose object format that was intended to
@@ -281,56 +222,62 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
* of loose object data into packs. This format is no longer used, but
* we must still read it.
*/
-static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
+static int read_loose_packlike(git_rawobj *out, git_buf *obj)
{
- unsigned char *in, *buf;
+ git_buf body = GIT_BUF_INIT;
+ const unsigned char *obj_data;
obj_hdr hdr;
- size_t len, used, alloclen;
+ size_t obj_len, head_len, alloc_size;
+ int error;
+
+ obj_data = (unsigned char *)obj->ptr;
+ obj_len = obj->size;
/*
* read the object header, which is an (uncompressed)
* binary encoding of the object type and size.
*/
- if ((used = get_binary_object_header(&hdr, obj)) == 0 ||
- !git_object_typeisloose(hdr.type)) {
+ if ((error = parse_header_packlike(&hdr, &head_len, obj_data, obj_len)) < 0)
+ goto done;
+
+ if (!git_object_typeisloose(hdr.type) || head_len > obj_len) {
giterr_set(GITERR_ODB, "failed to inflate loose object");
- return -1;
+ error = -1;
+ goto done;
}
+ obj_data += head_len;
+ obj_len -= head_len;
+
/*
* allocate a buffer and inflate the data into it
*/
- GITERR_CHECK_ALLOC_ADD(&alloclen, hdr.size, 1);
- buf = git__malloc(alloclen);
- GITERR_CHECK_ALLOC(buf);
-
- in = ((unsigned char *)obj->ptr) + used;
- len = obj->size - used;
- if (inflate_buffer(in, len, buf, hdr.size) < 0) {
- git__free(buf);
- return -1;
+ if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
+ git_buf_init(&body, alloc_size) < 0) {
+ error = -1;
+ goto done;
}
- buf[hdr.size] = '\0';
- out->data = buf;
+ if ((error = git_zstream_inflatebuf(&body, obj_data, obj_len)) < 0)
+ goto done;
+
out->len = hdr.size;
out->type = hdr.type;
+ out->data = git_buf_detach(&body);
- return 0;
+done:
+ git_buf_free(&body);
+ return error;
}
-static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
+static int read_loose_standard(git_rawobj *out, git_buf *obj)
{
git_zstream zstream = GIT_ZSTREAM_INIT;
- unsigned char head[64], *body = NULL;
+ unsigned char head[MAX_HEADER_LEN], *body = NULL;
size_t decompressed, head_len, body_len, alloc_size;
obj_hdr hdr;
int error;
- /* check for a pack-like loose object */
- if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
- return inflate_packlike_loose_disk_obj(out, obj);
-
if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
goto done;
@@ -338,9 +285,10 @@ static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
decompressed = sizeof(head);
/*
- * inflate the initial part of the compressed buffer in order to parse the
- * header; read the largest header possible, then push back the remainder.
- */
+ * inflate the initial part of the compressed buffer in order to
+ * parse the header; read the largest header possible, then push the
+ * remainder into the body buffer.
+ */
if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
goto done;
@@ -391,20 +339,6 @@ done:
return error;
}
-
-
-
-
-
-/***********************************************************
- *
- * ODB OBJECT READING & WRITING
- *
- * Backend for the public API; read headers and full objects
- * from the ODB. Write raw data to the ODB.
- *
- ***********************************************************/
-
static int read_loose(git_rawobj *out, git_buf *loc)
{
int error;
@@ -419,22 +353,62 @@ static int read_loose(git_rawobj *out, git_buf *loc)
out->len = 0;
out->type = GIT_OBJ_BAD;
- if (!(error = git_futils_readbuffer(&obj, loc->ptr)))
- error = inflate_disk_obj(out, &obj);
+ if ((error = git_futils_readbuffer(&obj, loc->ptr)) < 0)
+ goto done;
+
+ if (!is_zlib_compressed_data((unsigned char *)obj.ptr, obj.size))
+ error = read_loose_packlike(out, &obj);
+ else
+ error = read_loose_standard(out, &obj);
+done:
git_buf_free(&obj);
+ return error;
+}
+
+static int read_header_loose_packlike(
+ git_rawobj *out, const unsigned char *data, size_t len)
+{
+ obj_hdr hdr;
+ size_t header_len;
+ int error;
+
+ if ((error = parse_header_packlike(&hdr, &header_len, data, len)) < 0)
+ return error;
+ out->len = hdr.size;
+ out->type = hdr.type;
+
+ return error;
+}
+
+static int read_header_loose_standard(
+ git_rawobj *out, const unsigned char *data, size_t len)
+{
+ git_zstream zs = GIT_ZSTREAM_INIT;
+ obj_hdr hdr;
+ unsigned char inflated[MAX_HEADER_LEN];
+ size_t header_len, inflated_len = sizeof(inflated);
+ int error;
+
+ if ((error = git_zstream_init(&zs, GIT_ZSTREAM_INFLATE)) < 0 ||
+ (error = git_zstream_set_input(&zs, data, len)) < 0 ||
+ (error = git_zstream_get_output_chunk(inflated, &inflated_len, &zs)) < 0 ||
+ (error = parse_header(&hdr, &header_len, inflated, inflated_len)) < 0)
+ goto done;
+
+ out->len = hdr.size;
+ out->type = hdr.type;
+
+done:
+ git_zstream_free(&zs);
return error;
}
static int read_header_loose(git_rawobj *out, git_buf *loc)
{
- int error = 0, z_return = Z_ERRNO, read_bytes;
- git_file fd;
- z_stream zs;
- obj_hdr header_obj;
- size_t header_len;
- unsigned char raw_buffer[16], inflated_buffer[64];
+ unsigned char obj[1024];
+ int fd, obj_len, error;
assert(out && loc);
@@ -443,35 +417,23 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
out->data = NULL;
- if ((fd = git_futils_open_ro(loc->ptr)) < 0)
- return fd;
-
- init_stream(&zs, inflated_buffer, sizeof(inflated_buffer));
-
- z_return = inflateInit(&zs);
+ if ((error = fd = git_futils_open_ro(loc->ptr)) < 0 ||
+ (error = obj_len = p_read(fd, obj, sizeof(obj))) < 0)
+ goto done;
- while (z_return == Z_OK) {
- if ((read_bytes = p_read(fd, raw_buffer, sizeof(raw_buffer))) > 0) {
- set_stream_input(&zs, raw_buffer, read_bytes);
- z_return = inflate(&zs, 0);
- } else
- z_return = Z_STREAM_END;
- }
+ if (!is_zlib_compressed_data(obj, (size_t)obj_len))
+ error = read_header_loose_packlike(out, obj, (size_t)obj_len);
+ else
+ error = read_header_loose_standard(out, obj, (size_t)obj_len);
- if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
- || parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
- || git_object_typeisloose(header_obj.type) == 0)
- {
+ if (!error && !git_object_typeisloose(out->type)) {
giterr_set(GITERR_ZLIB, "failed to read loose object header");
error = -1;
- } else {
- out->len = header_obj.size;
- out->type = header_obj.type;
+ goto done;
}
- finish_inflate(&zs);
+done:
p_close(fd);
-
return error;
}
@@ -812,7 +774,7 @@ static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb
return error;
}
-static int loose_backend__stream_fwrite(git_odb_stream *_stream, const git_oid *oid)
+static int loose_backend__writestream_finalize(git_odb_stream *_stream, const git_oid *oid)
{
loose_writestream *stream = (loose_writestream *)_stream;
loose_backend *backend = (loose_backend *)_stream->backend;
@@ -831,13 +793,13 @@ static int loose_backend__stream_fwrite(git_odb_stream *_stream, const git_oid *
return error;
}
-static int loose_backend__stream_write(git_odb_stream *_stream, const char *data, size_t len)
+static int loose_backend__writestream_write(git_odb_stream *_stream, const char *data, size_t len)
{
loose_writestream *stream = (loose_writestream *)_stream;
return git_filebuf_write(&stream->fbuf, data, len);
}
-static void loose_backend__stream_free(git_odb_stream *_stream)
+static void loose_backend__writestream_free(git_odb_stream *_stream)
{
loose_writestream *stream = (loose_writestream *)_stream;
@@ -856,11 +818,11 @@ static int filebuf_flags(loose_backend *backend)
return flags;
}
-static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend, git_off_t length, git_otype type)
+static int loose_backend__writestream(git_odb_stream **stream_out, git_odb_backend *_backend, git_off_t length, git_otype type)
{
loose_backend *backend;
loose_writestream *stream = NULL;
- char hdr[64];
+ char hdr[MAX_HEADER_LEN];
git_buf tmp_path = GIT_BUF_INIT;
int hdrlen;
@@ -876,9 +838,9 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_
stream->stream.backend = _backend;
stream->stream.read = NULL; /* read only */
- stream->stream.write = &loose_backend__stream_write;
- stream->stream.finalize_write = &loose_backend__stream_fwrite;
- stream->stream.free = &loose_backend__stream_free;
+ stream->stream.write = &loose_backend__writestream_write;
+ stream->stream.finalize_write = &loose_backend__writestream_finalize;
+ stream->stream.free = &loose_backend__writestream_free;
stream->stream.mode = GIT_STREAM_WRONLY;
if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 ||
@@ -896,11 +858,187 @@ static int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_
return !stream ? -1 : 0;
}
+static int loose_backend__readstream_read(
+ git_odb_stream *_stream,
+ char *buffer,
+ size_t buffer_len)
+{
+ loose_readstream *stream = (loose_readstream *)_stream;
+ size_t start_remain = stream->start_len - stream->start_read;
+ int total = 0, error;
+
+ /*
+ * if we read more than just the header in the initial read, play
+ * that back for the caller.
+ */
+ if (start_remain && buffer_len) {
+ size_t chunk = min(start_remain, buffer_len);
+ memcpy(buffer, stream->start + stream->start_read, chunk);
+
+ buffer += chunk;
+ stream->start_read += chunk;
+
+ total += chunk;
+ buffer_len -= chunk;
+ }
+
+ if (buffer_len) {
+ size_t chunk = min(buffer_len, INT_MAX);
+
+ if ((error = git_zstream_get_output(buffer, &chunk, &stream->zstream)) < 0)
+ return error;
+
+ total += chunk;
+ }
+
+ return total;
+}
+
+static void loose_backend__readstream_free(git_odb_stream *_stream)
+{
+ loose_readstream *stream = (loose_readstream *)_stream;
+
+ git_futils_mmap_free(&stream->map);
+ git_zstream_free(&stream->zstream);
+ git__free(stream);
+}
+
+static int loose_backend__readstream_packlike(
+ obj_hdr *hdr,
+ loose_readstream *stream)
+{
+ const unsigned char *data;
+ size_t data_len, head_len;
+ int error;
+
+ data = stream->map.data;
+ data_len = stream->map.len;
+
+ /*
+ * read the object header, which is an (uncompressed)
+ * binary encoding of the object type and size.
+ */
+ if ((error = parse_header_packlike(hdr, &head_len, data, data_len)) < 0)
+ return error;
+
+ if (!git_object_typeisloose(hdr->type)) {
+ giterr_set(GITERR_ODB, "failed to inflate loose object");
+ return -1;
+ }
+
+ return git_zstream_set_input(&stream->zstream,
+ data + head_len, data_len - head_len);
+}
+
+static int loose_backend__readstream_standard(
+ obj_hdr *hdr,
+ loose_readstream *stream)
+{
+ unsigned char head[MAX_HEADER_LEN];
+ size_t init, head_len;
+ int error;
+
+ if ((error = git_zstream_set_input(&stream->zstream,
+ stream->map.data, stream->map.len)) < 0)
+ return error;
+
+ init = sizeof(head);
+
+ /*
+ * inflate the initial part of the compressed buffer in order to
+ * parse the header; read the largest header possible, then store
+ * it in the `start` field of the stream object.
+ */
+ if ((error = git_zstream_get_output(head, &init, &stream->zstream)) < 0 ||
+ (error = parse_header(hdr, &head_len, head, init)) < 0)
+ return error;
+
+ if (!git_object_typeisloose(hdr->type)) {
+ giterr_set(GITERR_ODB, "failed to inflate disk object");
+ return -1;
+ }
+
+ if (init > head_len) {
+ stream->start_len = init - head_len;
+ memcpy(stream->start, head + head_len, init - head_len);
+ }
+
+ return 0;
+}
+
+static int loose_backend__readstream(
+ git_odb_stream **stream_out,
+ size_t *len_out,
+ git_otype *type_out,
+ git_odb_backend *_backend,
+ const git_oid *oid)
+{
+ loose_backend *backend;
+ loose_readstream *stream = NULL;
+ git_hash_ctx *hash_ctx = NULL;
+ git_buf object_path = GIT_BUF_INIT;
+ obj_hdr hdr;
+ int error = 0;
+
+ assert(stream_out && len_out && type_out && _backend && oid);
+
+ backend = (loose_backend *)_backend;
+ *stream_out = NULL;
+ *len_out = 0;
+ *type_out = GIT_OBJ_BAD;
+
+ if (locate_object(&object_path, backend, oid) < 0) {
+ error = git_odb__error_notfound("no matching loose object",
+ oid, GIT_OID_HEXSZ);
+ goto done;
+ }
+
+ stream = git__calloc(1, sizeof(loose_readstream));
+ GITERR_CHECK_ALLOC(stream);
+
+ hash_ctx = git__malloc(sizeof(git_hash_ctx));
+ GITERR_CHECK_ALLOC(hash_ctx);
+
+ if ((error = git_hash_ctx_init(hash_ctx)) < 0 ||
+ (error = git_futils_mmap_ro_file(&stream->map, object_path.ptr)) < 0 ||
+ (error = git_zstream_init(&stream->zstream, GIT_ZSTREAM_INFLATE)) < 0)
+ goto done;
+
+ /* check for a packlike loose object */
+ if (!is_zlib_compressed_data(stream->map.data, stream->map.len))
+ error = loose_backend__readstream_packlike(&hdr, stream);
+ else
+ error = loose_backend__readstream_standard(&hdr, stream);
+
+ if (error < 0)
+ goto done;
+
+ stream->stream.backend = _backend;
+ stream->stream.hash_ctx = hash_ctx;
+ stream->stream.read = &loose_backend__readstream_read;
+ stream->stream.free = &loose_backend__readstream_free;
+
+ *stream_out = (git_odb_stream *)stream;
+ *len_out = hdr.size;
+ *type_out = hdr.type;
+
+done:
+ if (error < 0) {
+ git_futils_mmap_free(&stream->map);
+ git_zstream_free(&stream->zstream);
+ git_hash_ctx_cleanup(hash_ctx);
+ git__free(stream);
+ }
+
+ git_buf_free(&object_path);
+ return error;
+}
+
static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_otype type)
{
int error = 0, header_len;
git_buf final_path = GIT_BUF_INIT;
- char header[64];
+ char header[MAX_HEADER_LEN];
git_filebuf fbuf = GIT_FILEBUF_INIT;
loose_backend *backend;
@@ -1002,7 +1140,8 @@ int git_odb_backend_loose(
backend->parent.write = &loose_backend__write;
backend->parent.read_prefix = &loose_backend__read_prefix;
backend->parent.read_header = &loose_backend__read_header;
- backend->parent.writestream = &loose_backend__stream;
+ backend->parent.writestream = &loose_backend__writestream;
+ backend->parent.readstream = &loose_backend__readstream;
backend->parent.exists = &loose_backend__exists;
backend->parent.exists_prefix = &loose_backend__exists_prefix;
backend->parent.foreach = &loose_backend__foreach;
diff --git a/src/zstream.c b/src/zstream.c
index 963c9a344..affa55653 100644
--- a/src/zstream.c
+++ b/src/zstream.c
@@ -87,9 +87,52 @@ size_t git_zstream_suggest_output_len(git_zstream *zstream)
return ZSTREAM_BUFFER_MIN_EXTRA;
}
+int git_zstream_get_output_chunk(
+ void *out, size_t *out_len, git_zstream *zstream)
+{
+ size_t in_queued, in_used, out_queued;
+
+ /* set up input data */
+ zstream->z.next_in = (Bytef *)zstream->in;
+
+ /* feed as much data to zlib as it can consume, at most UINT_MAX */
+ if (zstream->in_len > UINT_MAX) {
+ zstream->z.avail_in = UINT_MAX;
+ zstream->flush = Z_NO_FLUSH;
+ } else {
+ zstream->z.avail_in = (uInt)zstream->in_len;
+ zstream->flush = Z_FINISH;
+ }
+ in_queued = (size_t)zstream->z.avail_in;
+
+ /* set up output data */
+ zstream->z.next_out = out;
+ zstream->z.avail_out = (uInt)*out_len;
+
+ if ((size_t)zstream->z.avail_out != *out_len)
+ zstream->z.avail_out = UINT_MAX;
+ out_queued = (size_t)zstream->z.avail_out;
+
+ /* compress next chunk */
+ if (zstream->type == GIT_ZSTREAM_INFLATE)
+ zstream->zerr = inflate(&zstream->z, zstream->flush);
+ else
+ zstream->zerr = deflate(&zstream->z, zstream->flush);
+
+ if (zstream_seterr(zstream))
+ return -1;
+
+ in_used = (in_queued - zstream->z.avail_in);
+ zstream->in_len -= in_used;
+ zstream->in += in_used;
+
+ *out_len = (out_queued - zstream->z.avail_out);
+
+ return 0;
+}
+
int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
{
- int zflush = Z_FINISH;
size_t out_remain = *out_len;
if (zstream->in_len && zstream->zerr == Z_STREAM_END) {
@@ -98,47 +141,17 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
}
while (out_remain > 0 && zstream->zerr != Z_STREAM_END) {
- size_t out_queued, in_queued, out_used, in_used;
+ size_t out_written = out_remain;
- /* set up in data */
- zstream->z.next_in = (Bytef *)zstream->in;
- zstream->z.avail_in = (uInt)zstream->in_len;
-
- if ((size_t)zstream->z.avail_in != zstream->in_len) {
- zstream->z.avail_in = UINT_MAX;
- zflush = Z_NO_FLUSH;
- } else {
- zflush = Z_FINISH;
- }
- in_queued = (size_t)zstream->z.avail_in;
-
- /* set up out data */
- zstream->z.next_out = out;
- zstream->z.avail_out = (uInt)out_remain;
- if ((size_t)zstream->z.avail_out != out_remain)
- zstream->z.avail_out = UINT_MAX;
- out_queued = (size_t)zstream->z.avail_out;
-
- /* compress next chunk */
- if (zstream->type == GIT_ZSTREAM_INFLATE)
- zstream->zerr = inflate(&zstream->z, zflush);
- else
- zstream->zerr = deflate(&zstream->z, zflush);
-
- if (zstream_seterr(zstream))
+ if (git_zstream_get_output_chunk(out, &out_written, zstream) < 0)
return -1;
- out_used = (out_queued - zstream->z.avail_out);
- out_remain -= out_used;
- out = ((char *)out) + out_used;
-
- in_used = (in_queued - zstream->z.avail_in);
- zstream->in_len -= in_used;
- zstream->in += in_used;
+ out_remain -= out_written;
+ out = ((char *)out) + out_written;
}
/* either we finished the input or we did not flush the data */
- assert(zstream->in_len > 0 || zflush == Z_FINISH);
+ assert(zstream->in_len > 0 || zstream->flush == Z_FINISH);
/* set out_size to number of bytes actually written to output */
*out_len = *out_len - out_remain;
diff --git a/src/zstream.h b/src/zstream.h
index c2404f328..47ecc1322 100644
--- a/src/zstream.h
+++ b/src/zstream.h
@@ -23,6 +23,7 @@ typedef struct {
git_zstream_t type;
const char *in;
size_t in_len;
+ int flush;
int zerr;
} git_zstream;
@@ -35,6 +36,11 @@ int git_zstream_set_input(git_zstream *zstream, const void *in, size_t in_len);
size_t git_zstream_suggest_output_len(git_zstream *zstream);
+/* get as much output as is available in the input buffer */
+int git_zstream_get_output_chunk(
+ void *out, size_t *out_len, git_zstream *zstream);
+
+/* get all the output from the entire input buffer */
int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream);
bool git_zstream_done(git_zstream *zstream);
diff --git a/tests/odb/largefiles.c b/tests/odb/largefiles.c
index 22f136df5..cd3651b4a 100644
--- a/tests/odb/largefiles.c
+++ b/tests/odb/largefiles.c
@@ -1,5 +1,9 @@
#include "clar_libgit2.h"
#include "git2/odb_backend.h"
+#include "hash.h"
+#include "odb.h"
+
+#define LARGEFILE_SIZE 5368709122
static git_repository *repo;
static git_odb *odb;
@@ -25,7 +29,7 @@ static void writefile(git_oid *oid)
for (i = 0; i < 3041; i++)
cl_git_pass(git_buf_puts(&buf, "Hello, world.\n"));
- cl_git_pass(git_odb_open_wstream(&stream, odb, 5368709122, GIT_OBJ_BLOB));
+ cl_git_pass(git_odb_open_wstream(&stream, odb, LARGEFILE_SIZE, GIT_OBJ_BLOB));
for (i = 0; i < 126103; i++)
cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size));
@@ -63,6 +67,10 @@ void test_odb_largefiles__streamwrite(void)
{
git_oid expected, oid;
+#ifndef GIT_ARCH_64
+ cl_skip();
+#endif
+
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
@@ -73,6 +81,52 @@ void test_odb_largefiles__streamwrite(void)
cl_assert_equal_oid(&expected, &oid);
}
+void test_odb_largefiles__streamread(void)
+{
+ git_oid oid, read_oid;
+ git_odb_stream *stream;
+ char buf[10240];
+ char hdr[64];
+ size_t len, total = 0;
+ git_hash_ctx hash;
+ git_otype type;
+ int hdr_len, ret;
+
+#ifndef GIT_ARCH_64
+ cl_skip();
+#endif
+
+ if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
+ !cl_is_env_set("GITTEST_SLOW"))
+ cl_skip();
+
+ writefile(&oid);
+
+ cl_git_pass(git_odb_open_rstream(&stream, &len, &type, odb, &oid));
+
+ cl_assert_equal_sz(LARGEFILE_SIZE, len);
+ cl_assert_equal_i(GIT_OBJ_BLOB, type);
+
+ cl_git_pass(git_hash_ctx_init(&hash));
+ hdr_len = git_odb__format_object_header(hdr, sizeof(hdr), len, type);
+
+ cl_git_pass(git_hash_update(&hash, hdr, hdr_len));
+
+ while ((ret = git_odb_stream_read(stream, buf, 10240)) > 0) {
+ cl_git_pass(git_hash_update(&hash, buf, ret));
+ total += ret;
+ }
+
+ cl_assert_equal_sz(LARGEFILE_SIZE, total);
+
+ git_hash_final(&read_oid, &hash);
+
+ cl_assert_equal_oid(&oid, &read_oid);
+
+ git_hash_ctx_cleanup(&hash);
+ git_odb_stream_free(stream);
+}
+
void test_odb_largefiles__read_into_memory(void)
{
git_oid oid;
@@ -112,3 +166,24 @@ void test_odb_largefiles__read_into_memory_rejected_on_32bit(void)
git_odb_object_free(obj);
}
+
+void test_odb_largefiles__read_header(void)
+{
+ git_oid oid;
+ size_t len;
+ git_otype type;
+
+#ifndef GIT_ARCH_64
+ cl_skip();
+#endif
+
+ if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
+ !cl_is_env_set("GITTEST_SLOW"))
+ cl_skip();
+
+ writefile(&oid);
+ cl_git_pass(git_odb_read_header(&len, &type, odb, &oid));
+
+ cl_assert_equal_sz(LARGEFILE_SIZE, len);
+ cl_assert_equal_i(GIT_OBJ_BLOB, type);
+}
diff --git a/tests/odb/loose.c b/tests/odb/loose.c
index 2e24d6723..83d080729 100644
--- a/tests/odb/loose.c
+++ b/tests/odb/loose.c
@@ -55,6 +55,63 @@ static void test_read_object(object_data *data)
git_odb_free(odb);
}
+static void test_read_header(object_data *data)
+{
+ git_oid id;
+ git_odb *odb;
+ size_t len;
+ git_otype type;
+
+ write_object_files(data);
+
+ cl_git_pass(git_odb_open(&odb, "test-objects"));
+ cl_git_pass(git_oid_fromstr(&id, data->id));
+ cl_git_pass(git_odb_read_header(&len, &type, odb, &id));
+
+ cl_assert_equal_sz(data->dlen, len);
+ cl_assert_equal_i(git_object_string2type(data->type), type);
+
+ git_odb_free(odb);
+}
+
+static void test_readstream_object(object_data *data, size_t blocksize)
+{
+ git_oid id;
+ git_odb *odb;
+ git_odb_stream *stream;
+ git_rawobj tmp;
+ char buf[2048], *ptr = buf;
+ size_t remain;
+ int ret;
+
+ write_object_files(data);
+
+ cl_git_pass(git_odb_open(&odb, "test-objects"));
+ cl_git_pass(git_oid_fromstr(&id, data->id));
+ cl_git_pass(git_odb_open_rstream(&stream, &tmp.len, &tmp.type, odb, &id));
+
+ remain = tmp.len;
+
+ while (remain) {
+ cl_assert((ret = git_odb_stream_read(stream, ptr, blocksize)) >= 0);
+ if (ret == 0)
+ break;
+
+ cl_assert(remain >= (size_t)ret);
+ remain -= ret;
+ ptr += ret;
+ }
+
+ cl_assert(remain == 0);
+
+ tmp.data = buf;
+
+ cmp_objects(&tmp, data);
+
+ git_odb_stream_free(stream);
+ git_odb_free(odb);
+}
+
void test_odb_loose__initialize(void)
{
p_fsync__cnt = 0;
@@ -103,6 +160,33 @@ void test_odb_loose__simple_reads(void)
test_read_object(&some);
}
+void test_odb_loose__streaming_reads(void)
+{
+ size_t blocksizes[] = { 1, 2, 4, 16, 99, 1024, 123456789 };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(blocksizes); i++) {
+ test_readstream_object(&commit, blocksizes[i]);
+ test_readstream_object(&tree, blocksizes[i]);
+ test_readstream_object(&tag, blocksizes[i]);
+ test_readstream_object(&zero, blocksizes[i]);
+ test_readstream_object(&one, blocksizes[i]);
+ test_readstream_object(&two, blocksizes[i]);
+ test_readstream_object(&some, blocksizes[i]);
+ }
+}
+
+void test_odb_loose__read_header(void)
+{
+ test_read_header(&commit);
+ test_read_header(&tree);
+ test_read_header(&tag);
+ test_read_header(&zero);
+ test_read_header(&one);
+ test_read_header(&two);
+ test_read_header(&some);
+}
+
void test_write_object_permission(
mode_t dir_mode, mode_t file_mode,
mode_t expected_dir_mode, mode_t expected_file_mode)