summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/indexer.c251
-rw-r--r--src/odb.c6
-rw-r--r--src/odb.h4
-rw-r--r--src/pack.c66
-rw-r--r--src/pack.h14
5 files changed, 282 insertions, 59 deletions
diff --git a/src/indexer.c b/src/indexer.c
index a51d903ed..f78ca5774 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -38,15 +38,20 @@ struct git_indexer {
struct git_indexer_stream {
unsigned int parsed_header :1,
- opened_pack;
+ opened_pack :1,
+ have_stream :1,
+ have_delta :1;
struct git_pack_file *pack;
git_filebuf pack_file;
git_filebuf index_file;
git_off_t off;
+ git_off_t entry_start;
+ git_packfile_stream stream;
size_t nr_objects;
git_vector objects;
git_vector deltas;
unsigned int fanout[256];
+ git_hash_ctx hash_ctx;
git_oid hash;
git_transfer_progress_callback progress_cb;
void *progress_payload;
@@ -176,56 +181,169 @@ cleanup:
}
/* Try to store the delta so we can try to resolve it later */
-static int store_delta(git_indexer_stream *idx, git_off_t entry_start, size_t entry_size, git_otype type)
+static int store_delta(git_indexer_stream *idx)
{
- git_mwindow *w = NULL;
struct delta_info *delta;
- git_rawobj obj;
- int error;
+
+ delta = git__calloc(1, sizeof(struct delta_info));
+ GITERR_CHECK_ALLOC(delta);
+ delta->delta_off = idx->entry_start;
+
+ if (git_vector_insert(&idx->deltas, delta) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
+{
+ char buffer[64];
+ size_t hdrlen;
+
+ hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), len, type);
+ git_hash_update(ctx, buffer, hdrlen);
+}
+
+static int hash_object_stream(git_hash_ctx *ctx, git_packfile_stream *stream)
+{
+ char buffer[8*1024];
+ ssize_t read;
+
+ assert(ctx && stream);
+
+ do {
+ if ((read = git_packfile_stream_read(stream, buffer, sizeof(buffer))) < 0)
+ break;
+
+ git_hash_update(ctx, buffer, read);
+ } while (read > 0);
+
+ if (read < 0)
+ return (int)read;
+
+ return 0;
+}
+
+/* In order to create the packfile stream, we need to skip over the delta base description */
+static int advance_delta_offset(git_indexer_stream *idx, git_otype type)
+{
+ git_mwindow *w = NULL;
assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
if (type == GIT_OBJ_REF_DELTA) {
idx->off += GIT_OID_RAWSZ;
} else {
- git_off_t base_off;
-
- base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start);
+ git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
git_mwindow_close(&w);
if (base_off < 0)
return (int)base_off;
}
- error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return GIT_EBUFS;
- } else if (error < 0){
- return -1;
+ return 0;
+}
+
+/* Read from the stream and discard any output */
+static int read_object_stream(git_packfile_stream *stream)
+{
+ char buffer[4*1024];
+ ssize_t read;
+
+ assert(stream);
+
+ do {
+ read = git_packfile_stream_read(stream, buffer, sizeof(buffer));
+ } while (read > 0);
+
+ if (read < 0)
+ return (int)read;
+
+ return 0;
+}
+
+static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
+{
+ void *ptr;
+ uint32_t crc;
+ unsigned int left, len;
+ git_mwindow *w = NULL;
+
+ crc = crc32(0L, Z_NULL, 0);
+ while (size) {
+ ptr = git_mwindow_open(mwf, &w, start, size, &left);
+ if (ptr == NULL)
+ return -1;
+
+ len = min(left, size);
+ crc = crc32(crc, ptr, len);
+ size -= len;
+ start += len;
+ git_mwindow_close(&w);
}
- delta = git__calloc(1, sizeof(struct delta_info));
- GITERR_CHECK_ALLOC(delta);
- delta->delta_off = entry_start;
+ *crc_out = htonl(crc);
+ return 0;
+}
- git__free(obj.data);
+static int store_object(git_indexer_stream *idx)
+{
+ int i;
+ git_oid oid;
+ struct entry *entry;
+ git_off_t entry_size;
+ struct git_pack_entry *pentry;
+ git_hash_ctx *ctx = &idx->hash_ctx;
+ git_off_t entry_start = idx->entry_start;
- if (git_vector_insert(&idx->deltas, delta) < 0)
- return -1;
+ entry = git__calloc(1, sizeof(*entry));
+ GITERR_CHECK_ALLOC(entry);
+
+ pentry = git__malloc(sizeof(struct git_pack_entry));
+ GITERR_CHECK_ALLOC(pentry);
+
+ git_hash_final(&oid, ctx);
+ entry_size = idx->off - entry_start;
+ if (entry_start > UINT31_MAX) {
+ entry->offset = UINT32_MAX;
+ entry->offset_long = entry_start;
+ } else {
+ entry->offset = (uint32_t)entry_start;
+ }
+
+ git_oid_cpy(&pentry->sha1, &oid);
+ pentry->offset = entry_start;
+ if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
+ git__free(pentry);
+ goto on_error;
+ }
+
+ git_oid_cpy(&entry->oid, &oid);
+
+ if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
+ goto on_error;
+
+ /* Add the object to the list */
+ if (git_vector_insert(&idx->objects, entry) < 0)
+ goto on_error;
+
+ for (i = oid.id[0]; i < 256; ++i) {
+ idx->fanout[i]++;
+ }
return 0;
+
+on_error:
+ git__free(entry);
+
+ return -1;
}
static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
int i;
git_oid oid;
- void *packed;
size_t entry_size;
- unsigned int left;
struct entry *entry;
- git_mwindow *w = NULL;
- git_mwindow_file *mwf = &idx->pack->mwf;
struct git_pack_entry *pentry;
entry = git__calloc(1, sizeof(*entry));
@@ -258,13 +376,9 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
entry->crc = crc32(0L, Z_NULL, 0);
entry_size = (size_t)(idx->off - entry_start);
- packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
- if (packed == NULL)
+ if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
goto on_error;
- entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
- git_mwindow_close(&w);
-
/* Add the object to the list */
if (git_vector_insert(&idx->objects, entry) < 0)
goto on_error;
@@ -349,7 +463,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
/* As the file grows any windows we try to use will be out of date */
git_mwindow_free_all(mwf);
while (processed < idx->nr_objects) {
- git_rawobj obj;
+ git_packfile_stream *stream = &idx->stream;
git_off_t entry_start = idx->off;
size_t entry_size;
git_otype type;
@@ -358,46 +472,71 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
if (idx->pack->mwf.size <= idx->off + 20)
return 0;
- error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- return -1;
-
- git_mwindow_close(&w);
-
- if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
- error = store_delta(idx, entry_start, entry_size, type);
+ if (!idx->have_stream) {
+ error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
if (error == GIT_EBUFS) {
idx->off = entry_start;
return 0;
}
if (error < 0)
- return error;
+ return -1;
+
+ git_mwindow_close(&w);
+ idx->entry_start = entry_start;
+ git_hash_ctx_init(&idx->hash_ctx);
+
+ if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
+ error = advance_delta_offset(idx, type);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return 0;
+ }
+ if (error < 0)
+ return -1;
+
+ idx->have_delta = 1;
+ } else {
+ idx->have_delta = 0;
+ hash_header(&idx->hash_ctx, entry_size, type);
+ }
+
+ idx->have_stream = 1;
+ if (git_packfile_stream_open(stream, idx->pack, idx->off) < 0)
+ goto on_error;
- stats->received_objects++;
- do_progress_callback(idx, stats);
- continue;
}
- idx->off = entry_start;
- error = git_packfile_unpack(&obj, idx->pack, &idx->off);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
+ if (idx->have_delta) {
+ error = read_object_stream(stream);
+ } else {
+ error = hash_object_stream(&idx->hash_ctx, stream);
}
- if (error < 0)
- return -1;
- if (hash_and_save(idx, &obj, entry_start) < 0)
+ idx->off = stream->curpos;
+ if (error == GIT_EBUFS)
+ return 0;
+
+ /* We want to free the stream reasorces no matter what here */
+ idx->have_stream = 0;
+ git_packfile_stream_free(stream);
+
+ if (error < 0)
goto on_error;
- git__free(obj.data);
+ if (idx->have_delta) {
+ error = store_delta(idx);
+ } else {
+ error = store_object(idx);
+ }
- stats->indexed_objects = (unsigned int)++processed;
+ if (error < 0)
+ goto on_error;
+
+ if (!idx->have_delta) {
+ stats->indexed_objects = (unsigned int)++processed;
+ }
stats->received_objects++;
+
do_progress_callback(idx, stats);
}
diff --git a/src/odb.c b/src/odb.c
index b6d1f798d..2385a580c 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -34,7 +34,7 @@ typedef struct
static int load_alternates(git_odb *odb, const char *objects_dir, int alternate_depth);
-static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)
+int git_odb__format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)
{
const char *type_str = git_object_type2string(obj_type);
int len = p_snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len);
@@ -55,7 +55,7 @@ int git_odb__hashobj(git_oid *id, git_rawobj *obj)
if (!obj->data && obj->len != 0)
return -1;
- hdrlen = format_object_header(header, sizeof(header), obj->len, obj->type);
+ hdrlen = git_odb__format_object_header(header, sizeof(header), obj->len, obj->type);
vec[0].data = header;
vec[0].len = hdrlen;
@@ -133,7 +133,7 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
if ((error = git_hash_ctx_init(&ctx)) < 0)
return -1;
- hdr_len = format_object_header(hdr, sizeof(hdr), size, type);
+ hdr_len = git_odb__format_object_header(hdr, sizeof(hdr), size, type);
if ((error = git_hash_update(&ctx, hdr, hdr_len)) < 0)
goto done;
diff --git a/src/odb.h b/src/odb.h
index e9e33dde8..ed4ee7e7c 100644
--- a/src/odb.h
+++ b/src/odb.h
@@ -46,6 +46,10 @@ struct git_odb {
int git_odb__hashobj(git_oid *id, git_rawobj *obj);
/*
+ * Format the object header such as it would appear in the on-disk object
+ */
+int git_odb__format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type);
+/*
* Hash an open file descriptor.
* This is a performance call when the contents of a fd need to be hashed,
* but the fd is already open and we have the size of the contents.
diff --git a/src/pack.c b/src/pack.c
index d7d39392f..520e13828 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -441,6 +441,72 @@ static void use_git_free(void *opaq, void *ptr)
git__free(ptr);
}
+int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos)
+{
+ int st;
+
+ memset(obj, 0, sizeof(git_packfile_stream));
+ obj->curpos = curpos;
+ obj->p = p;
+ obj->zstream.zalloc = use_git_alloc;
+ obj->zstream.zfree = use_git_free;
+ obj->zstream.next_in = Z_NULL;
+ obj->zstream.next_out = Z_NULL;
+ st = inflateInit(&obj->zstream);
+ if (st != Z_OK) {
+ git__free(obj);
+ giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
+ return -1;
+ }
+
+ return 0;
+}
+
+ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
+{
+ unsigned char *in;
+ size_t written;
+ int st;
+
+ if (obj->done)
+ return 0;
+
+ in = pack_window_open(obj->p, &obj->mw, obj->curpos, &obj->zstream.avail_in);
+ if (in == NULL)
+ return GIT_EBUFS;
+
+ obj->zstream.next_out = buffer;
+ obj->zstream.avail_out = len;
+ obj->zstream.next_in = in;
+
+ st = inflate(&obj->zstream, Z_SYNC_FLUSH);
+ git_mwindow_close(&obj->mw);
+
+ obj->curpos += obj->zstream.next_in - in;
+ written = len - obj->zstream.avail_out;
+
+ if (st != Z_OK && st != Z_STREAM_END) {
+ giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
+ return -1;
+ }
+
+ if (st == Z_STREAM_END)
+ obj->done = 1;
+
+
+ /* If we didn't write anything out but we're not done, we need more data */
+ if (!written && st != Z_STREAM_END)
+ return GIT_EBUFS;
+
+ return written;
+
+}
+
+void git_packfile_stream_free(git_packfile_stream *obj)
+{
+ inflateEnd(&obj->zstream);
+}
+
int packfile_unpack_compressed(
git_rawobj *obj,
struct git_pack_file *p,
diff --git a/src/pack.h b/src/pack.h
index c1277fdfb..188ea2bbd 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -8,6 +8,8 @@
#ifndef INCLUDE_pack_h__
#define INCLUDE_pack_h__
+#include <zlib.h>
+
#include "git2/oid.h"
#include "common.h"
@@ -76,6 +78,14 @@ struct git_pack_entry {
struct git_pack_file *p;
};
+typedef struct git_packfile_stream {
+ git_off_t curpos;
+ int done;
+ z_stream zstream;
+ struct git_pack_file *p;
+ git_mwindow *mw;
+} git_packfile_stream;
+
int git_packfile_unpack_header(
size_t *size_p,
git_otype *type_p,
@@ -98,6 +108,10 @@ int packfile_unpack_compressed(
size_t size,
git_otype type);
+int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos);
+ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len);
+void git_packfile_stream_free(git_packfile_stream *obj);
+
git_off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs,
git_off_t *curpos, git_otype type,
git_off_t delta_obj_offset);