diff options
author | Carlos Martín Nieto <carlos@cmartin.tk> | 2011-07-28 23:35:39 +0200 |
---|---|---|
committer | Carlos Martín Nieto <carlos@cmartin.tk> | 2011-08-03 14:02:41 +0200 |
commit | b7c44096ae12c47085978a4992d4f8cdf7946db4 (patch) | |
tree | e27a123be645a8ebd855d96c38580032a7e9113f | |
parent | bcf21c556c2bbc46a93e81a19c5f9112dfb8f2c8 (diff) | |
download | libgit2-b7c44096ae12c47085978a4992d4f8cdf7946db4.tar.gz |
Implement the indexer
Only v2 index files are supported.
Signed-off-by: Carlos Martín Nieto <carlos@cmartin.tk>
-rw-r--r-- | include/git2/indexer.h | 4 | ||||
-rw-r--r-- | src/indexer.c | 251 |
2 files changed, 212 insertions, 43 deletions
diff --git a/include/git2/indexer.h b/include/git2/indexer.h index f32b1ef6b..1f59ee314 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -2,6 +2,7 @@ #define _INCLUDE_git_indexer_h__ #include "git2/common.h" +#include "git2/oid.h" typedef struct git_indexer_stats { unsigned int total; @@ -12,7 +13,8 @@ typedef struct git_indexer_stats { typedef struct git_indexer git_indexer; GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname); -GIT_EXTERN(int) git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data); +GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_indexer_stats *stats); +GIT_EXTERN(const git_oid *) git_indexer_result(git_indexer *idx); GIT_EXTERN(void) git_indexer_free(git_indexer *idx); diff --git a/src/indexer.c b/src/indexer.c index 6de0fec00..4def1af9e 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -26,14 +26,20 @@ #include "git2/indexer.h" #include "git2/object.h" #include "git2/zlib.h" +#include "git2/oid.h" #include "common.h" #include "pack.h" #include "mwindow.h" #include "posix.h" +#include "pack.h" +#include "filebuf.h" +#include "sha1.h" + +#define UINT31_MAX (0x7FFFFFFF) struct entry { - unsigned char sha[GIT_OID_RAWSZ]; + git_oid oid; uint32_t crc; uint32_t offset; uint64_t offset_long; @@ -42,11 +48,19 @@ struct entry { typedef struct git_indexer { struct git_pack_file *pack; struct stat st; - git_indexer_stats stats; struct git_pack_header hdr; - struct entry *objects; + size_t nr_objects; + git_vector objects; + git_filebuf file; + unsigned int fanout[256]; + git_oid hash; } git_indexer; +const git_oid *git_indexer_hash(git_indexer *idx) +{ + return &idx->hash; +} + static int parse_header(git_indexer *idx) { int error; @@ -55,7 +69,7 @@ static int parse_header(git_indexer *idx) if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS) return git__rethrow(error, "Failed to read in pack header"); - if (idx->hdr.hdr_signature != htonl(PACK_SIGNATURE)) + if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE)) return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); if (!pack_version_ok(idx->hdr.hdr_version)) @@ -65,12 +79,23 @@ static int parse_header(git_indexer *idx) return GIT_SUCCESS; } +int objects_cmp(const void *a, const void *b) +{ + const struct entry *entrya = a; + const struct entry *entryb = b; + + return git_oid_cmp(&entrya->oid, &entryb->oid); +} + int git_indexer_new(git_indexer **out, const char *packname) { git_indexer *idx; unsigned int namelen; int ret, error; + if (git_path_root(packname) < 0) + return git__throw(GIT_EINVALIDPATH, "Path is not absolute"); + idx = git__malloc(sizeof(git_indexer)); if (idx == NULL) return GIT_ENOMEM; @@ -83,7 +108,7 @@ int git_indexer_new(git_indexer **out, const char *packname) goto cleanup; memset(idx->pack, 0x0, sizeof(struct git_pack_file)); - memcpy(idx->pack->pack_name, packname, namelen); + memcpy(idx->pack->pack_name, packname, namelen + 1); ret = p_stat(packname, &idx->st); if (ret < 0) { @@ -102,6 +127,7 @@ int git_indexer_new(git_indexer **out, const char *packname) } idx->pack->mwf.fd = ret; + idx->pack->mwf.size = idx->st.st_size; error = parse_header(idx); if (error < GIT_SUCCESS) { @@ -109,61 +135,187 @@ int git_indexer_new(git_indexer **out, const char *packname) goto cleanup; } - idx->objects = git__calloc(sizeof(struct entry), idx->hdr.hdr_entries); - if (idx->objects == NULL) { - error = GIT_ENOMEM; + idx->nr_objects = ntohl(idx->hdr.hdr_entries); + + error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp); + if (error < GIT_SUCCESS) { goto cleanup; } - idx->stats.total = idx->hdr.hdr_entries; - *out = idx; return GIT_SUCCESS; cleanup: - free(idx->pack); - free(idx); + git_indexer_free(idx); return error; } -/* - * Create the index. Every time something interesting happens - * (something has been parse or resolved), the callback gets called - * with some stats so it can tell the user how hard we're working - */ -int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *cb_data) +static void index_path(char *path, git_indexer *idx) { - git_mwindow_file *mwf = &idx->pack->mwf; - off_t off = 0; + char *ptr; + const char prefix[] = "pack-", suffix[] = ".idx\0"; + + ptr = strrchr(path, '/') + 1; + + memcpy(ptr, prefix, STRLEN(prefix)); + ptr += STRLEN(prefix); + git_oid_fmt(ptr, &idx->hash); + ptr += GIT_OID_HEXSZ; + memcpy(ptr, suffix, STRLEN(suffix)); +} + +static int write_index(git_indexer *idx) +{ + git_mwindow *w = NULL; + int error, namelen; + unsigned int i, long_offsets, left; + struct git_pack_idx_header hdr; + char filename[GIT_PATH_MAX]; + struct entry *entry; + void *packfile_hash; + git_oid file_hash; + SHA_CTX ctx; + + git_vector_sort(&idx->objects); + + namelen = strlen(idx->pack->pack_name); + memcpy(filename, idx->pack->pack_name, namelen); + memcpy(filename + namelen - STRLEN("pack"), "idx\0", STRLEN("idx\0")); + + error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS); + + /* Write out the header */ + hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); + hdr.idx_version = htonl(2); + error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr)); + + /* Write out the fanout table */ + for (i = 0; i < 256; ++i) { + uint32_t n = htonl(idx->fanout[i]); + error = git_filebuf_write(&idx->file, &n, sizeof(n)); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the object names (SHA-1 hashes) */ + SHA1_Init(&ctx); + git_vector_foreach(&idx->objects, i, entry) { + error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid)); + SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); + if (error < GIT_SUCCESS) + goto cleanup; + } + SHA1_Final(idx->hash.id, &ctx); + + /* Write out the CRC32 values */ + git_vector_foreach(&idx->objects, i, entry) { + error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t)); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the offsets */ + git_vector_foreach(&idx->objects, i, entry) { + uint32_t n; + + if (entry->offset == UINT32_MAX) + n = htonl(0x80000000 | long_offsets++); + else + n = htonl(entry->offset); + + error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t)); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the long offsets */ + git_vector_foreach(&idx->objects, i, entry) { + uint32_t split[2]; + + if (entry->offset != UINT32_MAX) + continue; + + split[0] = htonl(entry->offset_long >> 32); + split[1] = htonl(entry->offset_long & 0xffffffff); + + error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2); + if (error < GIT_SUCCESS) + goto cleanup; + } + + /* Write out the packfile trailer */ + + packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + if (packfile_hash == NULL) { + error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash"); + goto cleanup; + } + + memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); + + git_mwindow_close(&w); + + error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); + + /* Write out the index sha */ + error = git_filebuf_hash(&file_hash, &idx->file); + if (error < GIT_SUCCESS) + goto cleanup; + + error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); + if (error < GIT_SUCCESS) + goto cleanup; + + /* Figure out what the final name should be */ + index_path(filename, idx); + /* Commit file */ + error = git_filebuf_commit_at(&idx->file, filename); + +cleanup: + if (error < GIT_SUCCESS) + git_filebuf_cleanup(&idx->file); + + return error; +} + +int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) +{ + git_mwindow_file *mwf; + off_t off = sizeof(struct git_pack_header); int error; - unsigned int fanout[256] = {0}; + struct entry *entry; + unsigned int left, processed; - /* FIXME: Write the keep file */ + assert(idx && stats); + mwf = &idx->pack->mwf; error = git_mwindow_file_register(mwf); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to register mwindow file"); - /* Notify before the first one */ - if (cb) - cb(&idx->stats, cb_data); + stats->total = idx->nr_objects; + stats->processed = processed = 0; - while (idx->stats.processed < idx->stats.total) { + while (processed < idx->nr_objects) { git_rawobj obj; git_oid oid; - struct entry entry; + git_mwindow *w = NULL; char hdr[512] = {0}; /* FIXME: How long should this be? */ int i, hdr_len; + off_t entry_start = off; + void *packed; + size_t entry_size; - memset(&entry, 0x0, sizeof(entry)); /* Necessary? */ + entry = git__malloc(sizeof(struct entry)); + memset(entry, 0x0, sizeof(struct entry)); if (off > UINT31_MAX) { - entry.offset = ~0ULL; - entry.offset_long = off; + entry->offset = UINT32_MAX; + entry->offset_long = off; } else { - entry.offset = off; + entry->offset = off; } error = git_packfile_unpack(&obj, idx->pack, &off); @@ -178,30 +330,40 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void goto cleanup; } - memcpy(&entry.sha, oid.id, GIT_OID_RAWSZ); - /* entry.crc = crc32(obj.data) */ + git_oid_cpy(&entry->oid, &oid); + entry->crc = crc32(0L, Z_NULL, 0); + + entry_size = off - entry_start; + packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); + if (packed == NULL) { + error = git__rethrow(error, "Failed to open window to read packed data"); + goto cleanup; + } + entry->crc = htonl(crc32(entry->crc, packed, entry_size)); + git_mwindow_close(&w); /* Add the object to the list */ - //memcpy(&idx->objects[idx->stats.processed], &entry, sizeof(entry)); - idx->objects[idx->stats.processed] = entry; + error = git_vector_insert(&idx->objects, entry); + if (error < GIT_SUCCESS) { + error = git__rethrow(error, "Failed to add entry to list"); + goto cleanup; + } for (i = oid.id[0]; i < 256; ++i) { - fanout[i]++; + idx->fanout[i]++; } free(obj.data); - idx->stats.processed++; - - if (cb) - cb(&idx->stats, cb_data); - + stats->processed = ++processed; } /* * All's gone well, so let's write the index file. */ + error = write_index(idx); + /* Delete keep file */ cleanup: git_mwindow_free_all(mwf); @@ -211,8 +373,13 @@ cleanup: void git_indexer_free(git_indexer *idx) { + unsigned int i; + struct entry *e; + p_close(idx->pack->mwf.fd); - free(idx->objects); + git_vector_foreach(&idx->objects, i, e) + free(e); + git_vector_free(&idx->objects); free(idx->pack); free(idx); } |