diff options
-rw-r--r-- | examples/network/clone.c | 8 | ||||
-rw-r--r-- | examples/network/fetch.c | 14 | ||||
-rw-r--r-- | examples/network/index-pack.c | 2 | ||||
-rw-r--r-- | include/git2/indexer.h | 4 | ||||
-rw-r--r-- | include/git2/sys/odb_backend.h | 2 | ||||
-rw-r--r-- | include/git2/types.h | 10 | ||||
-rw-r--r-- | src/indexer.c | 317 | ||||
-rw-r--r-- | src/odb.c | 2 | ||||
-rw-r--r-- | src/odb_pack.c | 3 | ||||
-rw-r--r-- | src/pack-objects.c | 38 | ||||
-rw-r--r-- | src/pack.c | 32 | ||||
-rw-r--r-- | src/pack.h | 2 | ||||
-rw-r--r-- | src/transports/smart.h | 4 | ||||
-rw-r--r-- | src/transports/smart_pkt.c | 3 | ||||
-rw-r--r-- | src/transports/smart_protocol.c | 6 | ||||
-rw-r--r-- | tests-clar/pack/indexer.c | 128 | ||||
-rw-r--r-- | tests-clar/pack/packbuilder.c | 4 |
17 files changed, 492 insertions, 87 deletions
diff --git a/examples/network/clone.c b/examples/network/clone.c index db35bd7db..4df47eb7f 100644 --- a/examples/network/clone.c +++ b/examples/network/clone.c @@ -25,13 +25,19 @@ static void print_progress(const progress_data *pd) : 0.f; int kbytes = pd->fetch_progress.received_bytes / 1024; - printf("net %3d%% (%4d kb, %5d/%5d) / idx %3d%% (%5d/%5d) / chk %3d%% (%4" PRIuZ "/%4" PRIuZ ") %s\n", + if (pd->fetch_progress.received_objects == pd->fetch_progress.total_objects) { + printf("Resolving deltas %d/%d\r", + pd->fetch_progress.indexed_deltas, + pd->fetch_progress.total_deltas); + } else { + printf("net %3d%% (%4d kb, %5d/%5d) / idx %3d%% (%5d/%5d) / chk %3d%% (%4" PRIuZ "/%4" PRIuZ ") %s\n", network_percent, kbytes, pd->fetch_progress.received_objects, pd->fetch_progress.total_objects, index_percent, pd->fetch_progress.indexed_objects, pd->fetch_progress.total_objects, checkout_percent, pd->completed_steps, pd->total_steps, pd->path); + } } static int fetch_progress(const git_transfer_progress *stats, void *payload) diff --git a/examples/network/fetch.c b/examples/network/fetch.c index 0c545ad7e..4167ef3ca 100644 --- a/examples/network/fetch.c +++ b/examples/network/fetch.c @@ -72,6 +72,7 @@ int fetch(git_repository *repo, int argc, char **argv) const git_transfer_progress *stats; struct dl_data data; git_remote_callbacks callbacks = GIT_REMOTE_CALLBACKS_INIT; + int resolve_deltas_ln = 0; #ifndef _WIN32 pthread_t worker; #endif @@ -113,10 +114,14 @@ int fetch(git_repository *repo, int argc, char **argv) do { usleep(10000); - if (stats->total_objects > 0) + if (stats->received_objects == stats->total_objects) { + printf("Resolving deltas %d/%d\r", + stats->indexed_deltas, stats->total_deltas); + } else if (stats->total_objects > 0) { printf("Received %d/%d objects (%d) in %" PRIuZ " bytes\r", stats->received_objects, stats->total_objects, stats->indexed_objects, stats->received_bytes); + } } while (!data.finished); if (data.ret < 0) @@ -125,8 +130,13 @@ int fetch(git_repository *repo, int argc, char **argv) pthread_join(worker, NULL); #endif - printf("\rReceived %d/%d objects in %zu bytes\n", + if (stats->local_objects > 0) { + printf("\rReceived %d/%d objects in %zu bytes (used %d local objects)\n", + stats->indexed_objects, stats->total_objects, stats->received_bytes, stats->local_objects); + } else{ + printf("\rReceived %d/%d objects in %zu bytes\n", stats->indexed_objects, stats->total_objects, stats->received_bytes); + } // Disconnect the underlying connection to prevent from idling. git_remote_disconnect(remote); diff --git a/examples/network/index-pack.c b/examples/network/index-pack.c index 889305da8..08b45c58c 100644 --- a/examples/network/index-pack.c +++ b/examples/network/index-pack.c @@ -46,7 +46,7 @@ int index_pack(git_repository *repo, int argc, char **argv) return EXIT_FAILURE; } - if (git_indexer_stream_new(&idx, ".", NULL, NULL) < 0) { + if (git_indexer_stream_new(&idx, ".", NULL, NULL, NULL) < 0) { puts("bad idx"); return -1; } diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 4db072c9b..0858b6ea1 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -20,12 +20,16 @@ typedef struct git_indexer_stream git_indexer_stream; * * @param out where to store the indexer instance * @param path to the directory where the packfile should be stored + * @param odb object database from which to read base objects when + * fixing thin packs. Pass NULL if no thin pack is expected (an error + * will be returned if there are bases missing) * @param progress_cb function to call with progress information * @param progress_cb_payload payload for the progress callback */ GIT_EXTERN(int) git_indexer_stream_new( git_indexer_stream **out, const char *path, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_cb_payload); diff --git a/include/git2/sys/odb_backend.h b/include/git2/sys/odb_backend.h index 4365906d4..8039a5b82 100644 --- a/include/git2/sys/odb_backend.h +++ b/include/git2/sys/odb_backend.h @@ -80,7 +80,7 @@ struct git_odb_backend { git_odb_backend *, git_odb_foreach_cb cb, void *payload); int (* writepack)( - git_odb_writepack **, git_odb_backend *, + git_odb_writepack **, git_odb_backend *, git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload); void (* free)(git_odb_backend *); diff --git a/include/git2/types.h b/include/git2/types.h index b500c986d..2d18d385a 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -212,11 +212,21 @@ typedef struct git_remote_callbacks git_remote_callbacks; /** * This is passed as the first argument to the callback to allow the * user to see the progress. + * + * - total_objects: number of objects in the packfile being downloaded + * - indexed_objects: received objects that have been hashed + * - received_objects: objects which have been downloaded + * - local_objects: locally-available objects that have been injected + * in order to fix a thin pack. + * - received-bytes: size of the packfile received up to now */ typedef struct git_transfer_progress { unsigned int total_objects; unsigned int indexed_objects; unsigned int received_objects; + unsigned int local_objects; + unsigned int total_deltas; + unsigned int indexed_deltas; size_t received_bytes; } git_transfer_progress; diff --git a/src/indexer.c b/src/indexer.c index 1270488f0..9f55c8b2c 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -18,6 +18,7 @@ #include "filebuf.h" #include "oid.h" #include "oidmap.h" +#include "compress.h" #define UINT31_MAX (0x7FFFFFFF) @@ -33,6 +34,7 @@ struct git_indexer_stream { opened_pack :1, have_stream :1, have_delta :1; + struct git_pack_header hdr; struct git_pack_file *pack; git_filebuf pack_file; git_off_t off; @@ -48,6 +50,9 @@ struct git_indexer_stream { void *progress_payload; char objbuf[8*1024]; + /* Needed to look up objects which we want to inject to fix a thin pack */ + git_odb *odb; + /* Fields for calculating the packfile trailer (hash of everything before it) */ char inbuf[GIT_OID_RAWSZ]; size_t inbuf_len; @@ -114,6 +119,7 @@ static int objects_cmp(const void *a, const void *b) int git_indexer_stream_new( git_indexer_stream **out, const char *prefix, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload) { @@ -124,6 +130,7 @@ int git_indexer_stream_new( idx = git__calloc(1, sizeof(git_indexer_stream)); GITERR_CHECK_ALLOC(idx); + idx->odb = odb; idx->progress_cb = progress_cb; idx->progress_payload = progress_payload; git_hash_ctx_init(&idx->trailer); @@ -309,17 +316,10 @@ on_error: return -1; } -static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) +static int save_entry(git_indexer_stream *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start) { int i, error; khiter_t k; - git_oid oid; - size_t entry_size; - struct entry *entry; - struct git_pack_entry *pentry; - - entry = git__calloc(1, sizeof(*entry)); - GITERR_CHECK_ALLOC(entry); if (entry_start > UINT31_MAX) { entry->offset = UINT32_MAX; @@ -328,6 +328,34 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent entry->offset = (uint32_t)entry_start; } + pentry->offset = entry_start; + k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); + if (!error) + return -1; + + kh_value(idx->pack->idx_cache, k) = pentry; + + /* Add the object to the list */ + if (git_vector_insert(&idx->objects, entry) < 0) + return -1; + + for (i = entry->oid.id[0]; i < 256; ++i) { + idx->fanout[i]++; + } + + return 0; +} + +static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) +{ + git_oid oid; + size_t entry_size; + struct entry *entry; + struct git_pack_entry *pentry; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + if (git_odb__hashobj(&oid, obj) < 0) { giterr_set(GITERR_INDEXER, "Failed to hash object"); goto on_error; @@ -337,15 +365,6 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent GITERR_CHECK_ALLOC(pentry); git_oid_cpy(&pentry->sha1, &oid); - pentry->offset = entry_start; - k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); - if (!error) { - git__free(pentry); - goto on_error; - } - - kh_value(idx->pack->idx_cache, k) = pentry; - git_oid_cpy(&entry->oid, &oid); entry->crc = crc32(0L, Z_NULL, 0); @@ -353,15 +372,7 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) goto on_error; - /* Add the object to the list */ - if (git_vector_insert(&idx->objects, entry) < 0) - goto on_error; - - for (i = oid.id[0]; i < 256; ++i) { - idx->fanout[i]++; - } - - return 0; + return save_entry(idx, entry, pentry, entry_start); on_error: git__free(entry); @@ -415,8 +426,8 @@ static void hash_partially(git_indexer_stream *idx, const uint8_t *data, size_t int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats) { int error = -1; - struct git_pack_header hdr; size_t processed; + struct git_pack_header *hdr = &idx->hdr; git_mwindow_file *mwf = &idx->pack->mwf; assert(idx && data && stats); @@ -443,14 +454,14 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz if (!idx->parsed_header) { unsigned int total_objects; - if ((unsigned)idx->pack->mwf.size < sizeof(hdr)) + if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) return 0; - if (parse_header(&hdr, idx->pack) < 0) + if (parse_header(&idx->hdr, idx->pack) < 0) return -1; idx->parsed_header = 1; - idx->nr_objects = ntohl(hdr.hdr_entries); + idx->nr_objects = ntohl(hdr->hdr_entries); idx->off = sizeof(struct git_pack_header); /* for now, limit to 2^32 objects */ @@ -471,6 +482,9 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz return -1; stats->received_objects = 0; + stats->local_objects = 0; + stats->total_deltas = 0; + stats->indexed_deltas = 0; processed = stats->indexed_objects = 0; stats->total_objects = total_objects; do_progress_callback(idx, stats); @@ -591,24 +605,229 @@ static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char return git_buf_oom(path) ? -1 : 0; } -static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) +/** + * Rewind the packfile by the trailer, as we might need to fix the + * packfile by injecting objects at the tail and must overwrite it. + */ +static git_off_t seek_back_trailer(git_indexer_stream *idx) +{ + git_off_t off; + + if ((off = p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_CUR)) < 0) + return -1; + + idx->pack->mwf.size -= GIT_OID_RAWSZ; + git_mwindow_free_all(&idx->pack->mwf); + + return off; +} + +static int inject_object(git_indexer_stream *idx, git_oid *id) +{ + git_odb_object *obj; + struct entry *entry; + struct git_pack_entry *pentry; + git_oid foo = {{0}}; + unsigned char hdr[64]; + git_buf buf = GIT_BUF_INIT; + git_off_t entry_start; + const void *data; + size_t len, hdr_len; + int error; + + entry = git__calloc(1, sizeof(*entry)); + GITERR_CHECK_ALLOC(entry); + + entry_start = seek_back_trailer(idx); + + if (git_odb_read(&obj, idx->odb, id) < 0) + return -1; + + data = git_odb_object_data(obj); + len = git_odb_object_size(obj); + + entry->crc = crc32(0L, Z_NULL, 0); + + /* Write out the object header */ + hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj)); + git_filebuf_write(&idx->pack_file, hdr, hdr_len); + idx->pack->mwf.size += hdr_len; + entry->crc = crc32(entry->crc, hdr, hdr_len); + + if ((error = git__compress(&buf, data, len)) < 0) + goto cleanup; + + /* And then the compressed object */ + git_filebuf_write(&idx->pack_file, buf.ptr, buf.size); + idx->pack->mwf.size += buf.size; + entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, buf.size)); + git_buf_free(&buf); + + /* Write a fake trailer so the pack functions play ball */ + if ((error = git_filebuf_write(&idx->pack_file, &foo, GIT_OID_RAWSZ)) < 0) + goto cleanup; + + idx->pack->mwf.size += GIT_OID_RAWSZ; + + pentry = git__calloc(1, sizeof(struct git_pack_entry)); + GITERR_CHECK_ALLOC(pentry); + + git_oid_cpy(&pentry->sha1, id); + git_oid_cpy(&entry->oid, id); + idx->off = entry_start + hdr_len + len; + + if ((error = save_entry(idx, entry, pentry, entry_start)) < 0) + git__free(pentry); + +cleanup: + git_odb_object_free(obj); + return error; +} + +static int fix_thin_pack(git_indexer_stream *idx, git_transfer_progress *stats) { + int error, found_ref_delta = 0; unsigned int i; struct delta_info *delta; + size_t size; + git_otype type; + git_mwindow *w = NULL; + git_off_t curpos; + unsigned char *base_info; + unsigned int left = 0; + git_oid base; + + assert(git_vector_length(&idx->deltas) > 0); + if (idx->odb == NULL) { + giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB"); + return -1; + } + + /* Loop until we find the first REF delta */ git_vector_foreach(&idx->deltas, i, delta) { - git_rawobj obj; + curpos = delta->delta_off; + error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); + git_mwindow_close(&w); + if (error < 0) + return error; + + if (type == GIT_OBJ_REF_DELTA) { + found_ref_delta = 1; + break; + } + } + + if (!found_ref_delta) { + giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object"); + return -1; + } - idx->off = delta->delta_off; - if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + /* curpos now points to the base information, which is an OID */ + base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); + if (base_info == NULL) { + giterr_set(GITERR_INDEXER, "failed to map delta information"); + return -1; + } + + git_oid_fromraw(&base, base_info); + git_mwindow_close(&w); + + if (inject_object(idx, &base) < 0) + return -1; + + stats->local_objects++; + + return 0; +} + +static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats) +{ + unsigned int i; + struct delta_info *delta; + int progressed = 0; + + while (idx->deltas.length > 0) { + progressed = 0; + git_vector_foreach(&idx->deltas, i, delta) { + git_rawobj obj; + + idx->off = delta->delta_off; + if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) + continue; + + if (hash_and_save(idx, &obj, delta->delta_off) < 0) + continue; + + git__free(obj.data); + stats->indexed_objects++; + stats->indexed_deltas++; + progressed = 1; + do_progress_callback(idx, stats); + + /* + * Remove this delta from the list and + * decrease i so we don't skip over the next + * delta. + */ + git_vector_remove(&idx->deltas, i); + git__free(delta); + i--; + } + + if (!progressed && (fix_thin_pack(idx, stats) < 0)) { + giterr_set(GITERR_INDEXER, "missing delta bases"); return -1; + } + } + + return 0; +} + +static int update_header_and_rehash(git_indexer_stream *idx, git_transfer_progress *stats) +{ + void *ptr; + size_t chunk = 1024*1024; + git_off_t hashed = 0; + git_mwindow *w = NULL; + git_mwindow_file *mwf; + unsigned int left; + git_hash_ctx *ctx; + + mwf = &idx->pack->mwf; + ctx = &idx->trailer; + + git_hash_ctx_init(ctx); + git_mwindow_free_all(mwf); + + /* Update the header to include the numer of local objects we injected */ + idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects); + if (p_lseek(idx->pack_file.fd, 0, SEEK_SET) < 0) { + giterr_set(GITERR_OS, "failed to seek to the beginning of the pack"); + return -1; + } + + if (p_write(idx->pack_file.fd, &idx->hdr, sizeof(struct git_pack_header)) < 0) { + giterr_set(GITERR_OS, "failed to update the pack header"); + return -1; + } - if (hash_and_save(idx, &obj, delta->delta_off) < 0) + /* + * We now use the same technique as before to determine the + * hash. We keep reading up to the end and let + * hash_partially() keep the existing trailer out of the + * calculation. + */ + idx->inbuf_len = 0; + while (hashed < mwf->size) { + ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left); + if (ptr == NULL) return -1; - git__free(obj.data); - stats->indexed_objects++; - do_progress_callback(idx, stats); + hash_partially(idx, ptr, left); + hashed += left; + + git_mwindow_close(&w); } return 0; @@ -651,15 +870,31 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress * return -1; } - if (idx->deltas.length > 0) - if (resolve_deltas(idx, stats) < 0) - return -1; + /* Freeze the number of deltas */ + stats->total_deltas = stats->total_objects - stats->indexed_objects; + + if (resolve_deltas(idx, stats) < 0) + return -1; if (stats->indexed_objects != stats->total_objects) { giterr_set(GITERR_INDEXER, "early EOF"); return -1; } + if (stats->local_objects > 0) { + if (update_header_and_rehash(idx, stats) < 0) + return -1; + + git_hash_final(&trailer_hash, &idx->trailer); + if (p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_END) < 0) + return -1; + + if (p_write(idx->pack_file.fd, &trailer_hash, GIT_OID_RAWSZ) < 0) { + giterr_set(GITERR_OS, "failed to update pack trailer"); + return -1; + } + } + git_vector_sort(&idx->objects); git_buf_sets(&filename, idx->pack->pack_name); @@ -995,7 +995,7 @@ int git_odb_write_pack(struct git_odb_writepack **out, git_odb *db, git_transfer if (b->writepack != NULL) { ++writes; - error = b->writepack(out, b, progress_cb, progress_payload); + error = b->writepack(out, b, db, progress_cb, progress_payload); } } diff --git a/src/odb_pack.c b/src/odb_pack.c index 897bddf32..4c7c82253 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -541,6 +541,7 @@ static void pack_backend__writepack_free(struct git_odb_writepack *_writepack) static int pack_backend__writepack(struct git_odb_writepack **out, git_odb_backend *_backend, + git_odb *odb, git_transfer_progress_callback progress_cb, void *progress_payload) { @@ -557,7 +558,7 @@ static int pack_backend__writepack(struct git_odb_writepack **out, GITERR_CHECK_ALLOC(writepack); if (git_indexer_stream_new(&writepack->indexer_stream, - backend->pack_folder, progress_cb, progress_payload) < 0) { + backend->pack_folder, odb, progress_cb, progress_payload) < 0) { git__free(writepack); return -1; } diff --git a/src/pack-objects.c b/src/pack-objects.c index 821f292b9..93541e974 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -232,40 +232,6 @@ int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid, return 0; } -/* - * The per-object header is a pretty dense thing, which is - * - first byte: low four bits are "size", - * then three bits of "type", - * with the high bit being "size continues". - * - each byte afterwards: low seven bits are size continuation, - * with the high bit being "size continues" - */ -static int gen_pack_object_header( - unsigned char *hdr, - unsigned long size, - git_otype type) -{ - unsigned char *hdr_base; - unsigned char c; - - assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA); - - /* TODO: add support for chunked objects; see git.git 6c0d19b1 */ - - c = (unsigned char)((type << 4) | (size & 15)); - size >>= 4; - hdr_base = hdr; - - while (size) { - *hdr++ = c | 0x80; - c = size & 0x7f; - size >>= 7; - } - *hdr++ = c; - - return (int)(hdr - hdr_base); -} - static int get_delta(void **out, git_odb *odb, git_pobject *po) { git_odb_object *src = NULL, *trg = NULL; @@ -327,7 +293,7 @@ static int write_object(git_buf *buf, git_packbuilder *pb, git_pobject *po) } /* Write header */ - hdr_len = gen_pack_object_header(hdr, size, type); + hdr_len = git_packfile__object_header(hdr, size, type); if (git_buf_put(buf, (char *)hdr, hdr_len) < 0) goto on_error; @@ -1292,7 +1258,7 @@ int git_packbuilder_write( PREPARE_PACK; if (git_indexer_stream_new( - &indexer, path, progress_cb, progress_cb_payload) < 0) + &indexer, path, pb->odb, progress_cb, progress_cb_payload) < 0) return -1; ctx.indexer = indexer; diff --git a/src/pack.c b/src/pack.c index e7fb9f1ae..5df0f50b9 100644 --- a/src/pack.c +++ b/src/pack.c @@ -364,6 +364,38 @@ static unsigned char *pack_window_open( return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left); } +/* + * The per-object header is a pretty dense thing, which is + * - first byte: low four bits are "size", + * then three bits of "type", + * with the high bit being "size continues". + * - each byte afterwards: low seven bits are size continuation, + * with the high bit being "size continues" + */ +int git_packfile__object_header(unsigned char *hdr, unsigned long size, git_otype type) +{ + unsigned char *hdr_base; + unsigned char c; + + assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA); + + /* TODO: add support for chunked objects; see git.git 6c0d19b1 */ + + c = (unsigned char)((type << 4) | (size & 15)); + size >>= 4; + hdr_base = hdr; + + while (size) { + *hdr++ = c | 0x80; + c = size & 0x7f; + size >>= 7; + } + *hdr++ = c; + + return (int)(hdr - hdr_base); +} + + static int packfile_unpack_header1( unsigned long *usedp, size_t *sizep, diff --git a/src/pack.h b/src/pack.h index aeeac9ce1..ddeefea1d 100644 --- a/src/pack.h +++ b/src/pack.h @@ -112,6 +112,8 @@ typedef struct git_packfile_stream { git_mwindow *mw; } git_packfile_stream; +int git_packfile__object_header(unsigned char *hdr, unsigned long size, git_otype type); + int git_packfile_unpack_header( size_t *size_p, git_otype *type_p, diff --git a/src/transports/smart.h b/src/transports/smart.h index 3519477da..7fda27d4e 100644 --- a/src/transports/smart.h +++ b/src/transports/smart.h @@ -21,6 +21,7 @@ #define GIT_CAP_INCLUDE_TAG "include-tag" #define GIT_CAP_DELETE_REFS "delete-refs" #define GIT_CAP_REPORT_STATUS "report-status" +#define GIT_CAP_THIN_PACK "thin-pack" enum git_pkt_type { GIT_PKT_CMD, @@ -116,7 +117,8 @@ typedef struct transport_smart_caps { side_band_64k:1, include_tag:1, delete_refs:1, - report_status:1; + report_status:1, + thin_pack:1; } transport_smart_caps; typedef int (*packetsize_cb)(size_t received, void *payload); diff --git a/src/transports/smart_pkt.c b/src/transports/smart_pkt.c index 99da37567..a1f623c78 100644 --- a/src/transports/smart_pkt.c +++ b/src/transports/smart_pkt.c @@ -472,6 +472,9 @@ static int buffer_want_with_caps(const git_remote_head *head, transport_smart_ca if (caps->include_tag) git_buf_puts(&str, GIT_CAP_INCLUDE_TAG " "); + if (caps->thin_pack) + git_buf_puts(&str, GIT_CAP_THIN_PACK " "); + if (git_buf_oom(&str)) return -1; diff --git a/src/transports/smart_protocol.c b/src/transports/smart_protocol.c index 3b4d57856..af6e35fa1 100644 --- a/src/transports/smart_protocol.c +++ b/src/transports/smart_protocol.c @@ -128,6 +128,12 @@ int git_smart__detect_caps(git_pkt_ref *pkt, transport_smart_caps *caps) continue; } + if (!git__prefixcmp(ptr, GIT_CAP_THIN_PACK)) { + caps->common = caps->thin_pack = 1; + ptr += strlen(GIT_CAP_THIN_PACK); + continue; + } + /* We don't know this capability, so skip it */ ptr = strchr(ptr, ' '); } diff --git a/tests-clar/pack/indexer.c b/tests-clar/pack/indexer.c new file mode 100644 index 000000000..17ec7b3f8 --- /dev/null +++ b/tests-clar/pack/indexer.c @@ -0,0 +1,128 @@ +#include "clar_libgit2.h" +#include <git2.h> +#include "fileops.h" +#include "hash.h" +#include "iterator.h" +#include "vector.h" +#include "posix.h" + + +/* + * This is a packfile with three objects. The second is a delta which + * depends on the third, which is also a delta. + */ +unsigned char out_of_order_pack[] = { + 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, + 0x32, 0x78, 0x9c, 0x63, 0x67, 0x00, 0x00, 0x00, 0x10, 0x00, 0x08, 0x76, + 0xe6, 0x8f, 0xe8, 0x12, 0x9b, 0x54, 0x6b, 0x10, 0x1a, 0xee, 0x95, 0x10, + 0xc5, 0x32, 0x8e, 0x7f, 0x21, 0xca, 0x1d, 0x18, 0x78, 0x9c, 0x63, 0x62, + 0x66, 0x4e, 0xcb, 0xcf, 0x07, 0x00, 0x02, 0xac, 0x01, 0x4d, 0x75, 0x01, + 0xd7, 0x71, 0x36, 0x66, 0xf4, 0xde, 0x82, 0x27, 0x76, 0xc7, 0x62, 0x2c, + 0x10, 0xf1, 0xb0, 0x7d, 0xe2, 0x80, 0xdc, 0x78, 0x9c, 0x63, 0x62, 0x62, + 0x62, 0xb7, 0x03, 0x00, 0x00, 0x69, 0x00, 0x4c, 0xde, 0x7d, 0xaa, 0xe4, + 0x19, 0x87, 0x58, 0x80, 0x61, 0x09, 0x9a, 0x33, 0xca, 0x7a, 0x31, 0x92, + 0x6f, 0xae, 0x66, 0x75 +}; +unsigned int out_of_order_pack_len = 112; + +/* + * Packfile with two objects. The second is a delta against an object + * which is not in the packfile + */ +unsigned char thin_pack[] = { + 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, + 0x32, 0x78, 0x9c, 0x63, 0x67, 0x00, 0x00, 0x00, 0x10, 0x00, 0x08, 0x76, + 0xe6, 0x8f, 0xe8, 0x12, 0x9b, 0x54, 0x6b, 0x10, 0x1a, 0xee, 0x95, 0x10, + 0xc5, 0x32, 0x8e, 0x7f, 0x21, 0xca, 0x1d, 0x18, 0x78, 0x9c, 0x63, 0x62, + 0x66, 0x4e, 0xcb, 0xcf, 0x07, 0x00, 0x02, 0xac, 0x01, 0x4d, 0x42, 0x52, + 0x3a, 0x6f, 0x39, 0xd1, 0xfe, 0x66, 0x68, 0x6b, 0xa5, 0xe5, 0xe2, 0x97, + 0xac, 0x94, 0x6c, 0x76, 0x0b, 0x04 +}; +unsigned int thin_pack_len = 78; + +unsigned char base_obj[] = { 07, 076 }; +unsigned int base_obj_len = 2; + +void test_pack_indexer__out_of_order(void) +{ + git_indexer_stream *idx; + git_transfer_progress stats; + + cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL)); + cl_git_pass(git_indexer_stream_add(idx, out_of_order_pack, out_of_order_pack_len, &stats)); + cl_git_pass(git_indexer_stream_finalize(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.received_objects, 3); + cl_assert_equal_i(stats.indexed_objects, 3); + + git_indexer_stream_free(idx); +} + +void test_pack_indexer__fix_thin(void) +{ + git_indexer_stream *idx; + git_transfer_progress stats; + git_repository *repo; + git_odb *odb; + git_oid id, should_id; + + cl_git_pass(git_repository_init(&repo, "thin.git", true)); + cl_git_pass(git_repository_odb(&odb, repo)); + + /* Store the missing base into your ODB so the indexer can fix the pack */ + cl_git_pass(git_odb_write(&id, odb, base_obj, base_obj_len, GIT_OBJ_BLOB)); + git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18"); + cl_assert(!git_oid_cmp(&id, &should_id)); + + cl_git_pass(git_indexer_stream_new(&idx, ".", odb, NULL, NULL)); + cl_git_pass(git_indexer_stream_add(idx, thin_pack, thin_pack_len, &stats)); + cl_git_pass(git_indexer_stream_finalize(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 2); + cl_assert_equal_i(stats.received_objects, 2); + cl_assert_equal_i(stats.indexed_objects, 2); + cl_assert_equal_i(stats.local_objects, 1); + + git_oid_fromstr(&should_id, "11f0f69b334728fdd8bc86b80499f22f29d85b15"); + cl_assert(!git_oid_cmp(git_indexer_stream_hash(idx), &should_id)); + + git_indexer_stream_free(idx); + git_odb_free(odb); + git_repository_free(repo); + + /* + * The pack's name/hash only tells us what objects there are, + * so we need to go through the packfile again in order to + * figure out whether we calculated the trailer correctly. + */ + { + unsigned char buffer[128]; + int fd; + ssize_t read; + git_off_t left; + struct stat st; + const char *name = "pack-11f0f69b334728fdd8bc86b80499f22f29d85b15.pack"; + + fd = p_open(name, O_RDONLY); + cl_assert(fd != -1); + + cl_git_pass(p_stat(name, &st)); + left = st.st_size; + + cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL)); + read = p_read(fd, buffer, sizeof(buffer)); + cl_assert(read != -1); + p_close(fd); + + cl_git_pass(git_indexer_stream_add(idx, buffer, read, &stats)); + cl_git_pass(git_indexer_stream_finalize(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.received_objects, 3); + cl_assert_equal_i(stats.indexed_objects, 3); + cl_assert_equal_i(stats.local_objects, 0); + + git_indexer_stream_free(idx); + } +} diff --git a/tests-clar/pack/packbuilder.c b/tests-clar/pack/packbuilder.c index 764fba213..69292567e 100644 --- a/tests-clar/pack/packbuilder.c +++ b/tests-clar/pack/packbuilder.c @@ -92,7 +92,7 @@ void test_pack_packbuilder__create_pack(void) seed_packbuilder(); - cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL)); + cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL, NULL)); cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats)); cl_git_pass(git_indexer_stream_finalize(_indexer, &stats)); @@ -141,7 +141,7 @@ void test_pack_packbuilder__foreach(void) git_indexer_stream *idx; seed_packbuilder(); - cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL)); + cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL)); cl_git_pass(git_packbuilder_foreach(_packbuilder, foreach_cb, idx)); cl_git_pass(git_indexer_stream_finalize(idx, &stats)); git_indexer_stream_free(idx); |