diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2022-01-22 14:19:13 -0500 |
---|---|---|
committer | Edward Thomson <ethomson@edwardthomson.com> | 2022-01-23 09:48:20 -0500 |
commit | 55eb096b93d4e658bfc0d19270b56f00dff9ec0d (patch) | |
tree | fd3abf3a11cc37e66fc3b0be96f1e404b33e87e2 | |
parent | b90da6d7c74e9d0217f1c69aa21aca0e1821b1fc (diff) | |
download | libgit2-55eb096b93d4e658bfc0d19270b56f00dff9ec0d.tar.gz |
indexer: use a byte array for checksum
The index's checksum is not an object ID, so we should not use the
`git_oid` type. Use a byte array for checksum calculation and storage.
Deprecate the `git_indexer_hash` function. Callers should use the new
`git_indexer_name` function which provides a unique packfile name.
-rw-r--r-- | examples/index-pack.c | 4 | ||||
-rw-r--r-- | include/git2/indexer.h | 14 | ||||
-rw-r--r-- | src/indexer.c | 50 | ||||
-rw-r--r-- | tests/pack/indexer.c | 3 | ||||
-rw-r--r-- | tests/pack/packbuilder.c | 13 |
5 files changed, 53 insertions, 31 deletions
diff --git a/examples/index-pack.c b/examples/index-pack.c index c58ac038a..df37dd0c4 100644 --- a/examples/index-pack.c +++ b/examples/index-pack.c @@ -17,7 +17,6 @@ int lg2_index_pack(git_repository *repo, int argc, char **argv) git_indexer *idx; git_indexer_progress stats = {0, 0}; int error; - char hash[GIT_OID_HEXSZ + 1] = {0}; int fd; ssize_t read_bytes; char buf[512]; @@ -61,8 +60,7 @@ int lg2_index_pack(git_repository *repo, int argc, char **argv) printf("\rIndexing %u of %u\n", stats.indexed_objects, stats.total_objects); - git_oid_fmt(hash, git_indexer_hash(idx)); - puts(hash); + puts(git_indexer_name(idx)); cleanup: close(fd); diff --git a/include/git2/indexer.h b/include/git2/indexer.h index 4bacbd317..ffe9bf366 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -129,16 +129,30 @@ GIT_EXTERN(int) git_indexer_append(git_indexer *idx, const void *data, size_t si */ GIT_EXTERN(int) git_indexer_commit(git_indexer *idx, git_indexer_progress *stats); +#ifndef GIT_DEPRECATE_HARD /** * Get the packfile's hash * * A packfile's name is derived from the sorted hashing of all object * names. This is only correct after the index has been finalized. * + * @deprecated use git_indexer_name * @param idx the indexer instance * @return the packfile's hash */ GIT_EXTERN(const git_oid *) git_indexer_hash(const git_indexer *idx); +#endif + +/** + * Get the unique name for the resulting packfile. + * + * The packfile's name is derived from the packfile's content. + * This is only correct after the index has been finalized. + * + * @param idx the indexer instance + * @return a NUL terminated string for the packfile name + */ +GIT_EXTERN(const char *) git_indexer_name(const git_indexer *idx); /** * Free the indexer and its resources diff --git a/src/indexer.c b/src/indexer.c index 16db9ca8d..f9a32e7ac 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -55,7 +55,8 @@ struct git_indexer { git_vector deltas; unsigned int fanout[256]; git_hash_ctx hash_ctx; - git_oid hash; + unsigned char checksum[GIT_HASH_SHA1_SIZE]; + char name[(GIT_HASH_SHA1_SIZE * 2) + 1]; git_indexer_progress_cb progress_cb; void *progress_payload; char objbuf[8*1024]; @@ -76,9 +77,16 @@ struct delta_info { off64_t delta_off; }; +#ifndef GIT_DEPRECATE_HARD const git_oid *git_indexer_hash(const git_indexer *idx) { - return &idx->hash; + return (git_oid *)idx->checksum; +} +#endif + +const char *git_indexer_name(const git_indexer *idx) +{ + return idx->name; } static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack) @@ -897,8 +905,7 @@ static int index_path(git_str *path, git_indexer *idx, const char *suffix) git_str_truncate(path, slash); git_str_puts(path, prefix); - git_oid_fmt(path->ptr + git_str_len(path), &idx->hash); - path->size += GIT_OID_HEXSZ; + git_str_puts(path, idx->name); git_str_puts(path, suffix); return git_str_oom(path) ? -1 : 0; @@ -919,12 +926,13 @@ static int inject_object(git_indexer *idx, git_oid *id) git_odb_object *obj = NULL; struct entry *entry = NULL; struct git_pack_entry *pentry = NULL; - git_oid foo = {{0}}; + unsigned char empty_checksum[GIT_HASH_SHA1_SIZE] = {0}; unsigned char hdr[64]; git_str buf = GIT_STR_INIT; off64_t entry_start; const void *data; size_t len, hdr_len; + size_t checksum_size = GIT_HASH_SHA1_SIZE; int error; if ((error = seek_back_trailer(idx)) < 0) @@ -966,7 +974,7 @@ static int inject_object(git_indexer *idx, git_oid *id) /* Write a fake trailer so the pack functions play ball */ - if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0) + if ((error = append_to_pack(idx, empty_checksum, checksum_size)) < 0) goto cleanup; idx->pack->mwf.size += GIT_OID_RAWSZ; @@ -1160,9 +1168,11 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) struct git_pack_idx_header hdr; git_str filename = GIT_STR_INIT; struct entry *entry; - git_oid trailer_hash, file_hash; + unsigned char checksum[GIT_HASH_SHA1_SIZE]; git_filebuf index_file = {0}; void *packfile_trailer; + size_t checksum_size = GIT_HASH_SHA1_SIZE; + bool mismatch; if (!idx->parsed_header) { git_error_set(GIT_ERROR_INDEXER, "incomplete pack header"); @@ -1170,27 +1180,27 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) } /* Test for this before resolve_deltas(), as it plays with idx->off */ - if (idx->off + 20 < idx->pack->mwf.size) { + if (idx->off + (ssize_t)checksum_size < idx->pack->mwf.size) { git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack"); return -1; } - if (idx->off + 20 > idx->pack->mwf.size) { + if (idx->off + (ssize_t)checksum_size > idx->pack->mwf.size) { git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack"); return -1; } - packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); + packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - checksum_size, checksum_size, &left); if (packfile_trailer == NULL) { git_mwindow_close(&w); goto on_error; } /* Compare the packfile trailer as it was sent to us and what we calculated */ - git_oid_fromraw(&file_hash, packfile_trailer); + git_hash_final(checksum, &idx->trailer); + mismatch = !!memcmp(checksum, packfile_trailer, checksum_size); git_mwindow_close(&w); - git_hash_final(trailer_hash.id, &idx->trailer); - if (git_oid_cmp(&file_hash, &trailer_hash)) { + if (mismatch) { git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch"); return -1; } @@ -1210,8 +1220,8 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) if (update_header_and_rehash(idx, stats) < 0) return -1; - git_hash_final(trailer_hash.id, &idx->trailer); - write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); + git_hash_final(checksum, &idx->trailer); + write_at(idx, checksum, idx->pack->mwf.size - checksum_size, checksum_size); } /* @@ -1230,7 +1240,9 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) /* Use the trailer hash as the pack file name to ensure * files with different contents have different names */ - git_oid_cpy(&idx->hash, &trailer_hash); + memcpy(idx->checksum, checksum, checksum_size); + if (git_hash_fmt(idx->name, checksum, checksum_size) < 0) + return -1; git_str_sets(&filename, idx->pack->pack_name); git_str_shorten(&filename, strlen("pack")); @@ -1291,14 +1303,14 @@ int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) } /* Write out the packfile trailer to the index */ - if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0) + if (git_filebuf_write(&index_file, checksum, checksum_size) < 0) goto on_error; /* Write out the hash of the idx */ - if (git_filebuf_hash(trailer_hash.id, &index_file) < 0) + if (git_filebuf_hash(checksum, &index_file) < 0) goto on_error; - git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid)); + git_filebuf_write(&index_file, checksum, checksum_size); /* Figure out what the final name should be */ if (index_path(&filename, idx, ".idx") < 0) diff --git a/tests/pack/indexer.c b/tests/pack/indexer.c index 954bee953..ec48ffd98 100644 --- a/tests/pack/indexer.c +++ b/tests/pack/indexer.c @@ -169,8 +169,7 @@ void test_pack_indexer__fix_thin(void) cl_assert_equal_i(stats.indexed_objects, 2); cl_assert_equal_i(stats.local_objects, 1); - git_oid_fromstr(&should_id, "fefdb2d740a3a6b6c03a0c7d6ce431c6d5810e13"); - cl_assert_equal_oid(&should_id, git_indexer_hash(idx)); + cl_assert_equal_s("fefdb2d740a3a6b6c03a0c7d6ce431c6d5810e13", git_indexer_name(idx)); git_indexer_free(idx); git_odb_free(odb); diff --git a/tests/pack/packbuilder.c b/tests/pack/packbuilder.c index 5b1f7b9e9..73bc6674b 100644 --- a/tests/pack/packbuilder.c +++ b/tests/pack/packbuilder.c @@ -5,6 +5,7 @@ #include "iterator.h" #include "vector.h" #include "posix.h" +#include "hash.h" static git_repository *_repo; static git_revwalk *_revwalker; @@ -98,8 +99,8 @@ void test_pack_packbuilder__create_pack(void) git_indexer_progress stats; git_str buf = GIT_STR_INIT, path = GIT_STR_INIT; git_hash_ctx ctx; - git_oid hash; - char hex[GIT_OID_HEXSZ+1]; hex[GIT_OID_HEXSZ] = '\0'; + unsigned char hash[GIT_HASH_SHA1_SIZE]; + char hex[(GIT_HASH_SHA1_SIZE * 2) + 1]; seed_packbuilder(); @@ -107,8 +108,7 @@ void test_pack_packbuilder__create_pack(void) cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats)); cl_git_pass(git_indexer_commit(_indexer, &stats)); - git_oid_fmt(hex, git_indexer_hash(_indexer)); - git_str_printf(&path, "pack-%s.pack", hex); + git_str_printf(&path, "pack-%s.pack", git_indexer_name(_indexer)); /* * By default, packfiles are created with only one thread. @@ -128,14 +128,13 @@ void test_pack_packbuilder__create_pack(void) cl_git_pass(git_hash_ctx_init(&ctx, GIT_HASH_ALGORITHM_SHA1)); cl_git_pass(git_hash_update(&ctx, buf.ptr, buf.size)); - cl_git_pass(git_hash_final(hash.id, &ctx)); + cl_git_pass(git_hash_final(hash, &ctx)); git_hash_ctx_cleanup(&ctx); git_str_dispose(&path); git_str_dispose(&buf); - git_oid_fmt(hex, &hash); - + git_hash_fmt(hex, hash, GIT_HASH_SHA1_SIZE); cl_assert_equal_s(hex, "5d410bdf97cf896f9007681b92868471d636954b"); } |