diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2021-08-29 21:30:51 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-29 21:30:51 -0400 |
commit | 78cd76249cb32e65860156f758d33264bf3db766 (patch) | |
tree | 4dc102b5a6e72fe552fb5945b55e50dc4e9605cb | |
parent | 0a79012e9df33db31046c653ab04c69eaeed200a (diff) | |
parent | e66545e30fe862d6c2ccfc06275fcadd0dce0953 (diff) | |
download | libgit2-78cd76249cb32e65860156f758d33264bf3db766.tar.gz |
Merge pull request #5404 from lhchavez/multi-pack-index-write
midx: Add a way to write multi-pack-index files
-rw-r--r-- | include/git2/sys/midx.h | 74 | ||||
-rw-r--r-- | include/git2/types.h | 3 | ||||
-rw-r--r-- | src/midx.c | 398 | ||||
-rw-r--r-- | src/midx.h | 16 | ||||
-rw-r--r-- | src/pack.c | 67 | ||||
-rw-r--r-- | src/pack.h | 19 | ||||
-rw-r--r-- | tests/pack/midx.c | 30 |
7 files changed, 607 insertions, 0 deletions
diff --git a/include/git2/sys/midx.h b/include/git2/sys/midx.h new file mode 100644 index 000000000..e3d749829 --- /dev/null +++ b/include/git2/sys/midx.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_sys_git_midx_h__ +#define INCLUDE_sys_git_midx_h__ + +#include "git2/common.h" +#include "git2/types.h" + +/** + * @file git2/midx.h + * @brief Git multi-pack-index routines + * @defgroup git_midx Git multi-pack-index routines + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * Create a new writer for `multi-pack-index` files. + * + * @param out location to store the writer pointer. + * @param pack_dir the directory where the `.pack` and `.idx` files are. The + * `multi-pack-index` file will be written in this directory, too. + * @return 0 or an error code + */ +GIT_EXTERN(int) git_midx_writer_new( + git_midx_writer **out, + const char *pack_dir); + +/** + * Free the multi-pack-index writer and its resources. + * + * @param w the writer to free. If NULL no action is taken. + */ +GIT_EXTERN(void) git_midx_writer_free(git_midx_writer *w); + +/** + * Add an `.idx` file to the writer. + * + * @param w the writer + * @param idx_path the path of an `.idx` file. + * @return 0 or an error code + */ +GIT_EXTERN(int) git_midx_writer_add( + git_midx_writer *w, + const char *idx_path); + +/** + * Write a `multi-pack-index` file to a file. + * + * @param w the writer + * @return 0 or an error code + */ +GIT_EXTERN(int) git_midx_writer_commit( + git_midx_writer *w); + +/** + * Dump the contents of the `multi-pack-index` to an in-memory buffer. + * + * @param midx Buffer where to store the contents of the `multi-pack-index`. + * @param w the writer + * @return 0 or an error code + */ +GIT_EXTERN(int) git_midx_writer_dump( + git_buf *midx, + git_midx_writer *w); + +/** @} */ +GIT_END_DECL +#endif diff --git a/include/git2/types.h b/include/git2/types.h index 562eb8e5f..4de0672f9 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -96,6 +96,9 @@ typedef struct git_odb_stream git_odb_stream; /** A stream to write a packfile to the ODB */ typedef struct git_odb_writepack git_odb_writepack; +/** a writer for multi-pack-index files. */ +typedef struct git_midx_writer git_midx_writer; + /** An open refs database handle. */ typedef struct git_refdb git_refdb; diff --git a/src/midx.c b/src/midx.c index d6bb9c1e7..9aab8b588 100644 --- a/src/midx.c +++ b/src/midx.c @@ -7,11 +7,15 @@ #include "midx.h" +#include "array.h" #include "buffer.h" +#include "filebuf.h" #include "futils.h" #include "hash.h" #include "odb.h" #include "pack.h" +#include "path.h" +#include "repository.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -36,6 +40,8 @@ struct git_midx_chunk { size_t length; }; +typedef int (*midx_write_cb)(const char *buf, size_t size, void *cb_data); + static int midx_error(const char *message) { git_error_set(GIT_ERROR_ODB, "invalid multi-pack-index file - %s", message); @@ -475,3 +481,395 @@ void git_midx_free(git_midx_file *idx) git_midx_close(idx); git__free(idx); } + +static int packfile__cmp(const void *a_, const void *b_) +{ + const struct git_pack_file *a = a_; + const struct git_pack_file *b = b_; + + return strcmp(a->pack_name, b->pack_name); +} + +int git_midx_writer_new( + git_midx_writer **out, + const char *pack_dir) +{ + git_midx_writer *w = git__calloc(1, sizeof(git_midx_writer)); + GIT_ERROR_CHECK_ALLOC(w); + + if (git_buf_sets(&w->pack_dir, pack_dir) < 0) { + git__free(w); + return -1; + } + git_path_squash_slashes(&w->pack_dir); + + if (git_vector_init(&w->packs, 0, packfile__cmp) < 0) { + git_buf_dispose(&w->pack_dir); + git__free(w); + return -1; + } + + *out = w; + return 0; +} + +void git_midx_writer_free(git_midx_writer *w) +{ + struct git_pack_file *p; + size_t i; + + if (!w) + return; + + git_vector_foreach (&w->packs, i, p) + git_mwindow_put_pack(p); + git_vector_free(&w->packs); + git_buf_dispose(&w->pack_dir); + git__free(w); +} + +int git_midx_writer_add( + git_midx_writer *w, + const char *idx_path) +{ + git_buf idx_path_buf = GIT_BUF_INIT; + int error; + struct git_pack_file *p; + + error = git_path_prettify(&idx_path_buf, idx_path, git_buf_cstr(&w->pack_dir)); + if (error < 0) + return error; + + error = git_mwindow_get_pack(&p, git_buf_cstr(&idx_path_buf)); + git_buf_dispose(&idx_path_buf); + if (error < 0) + return error; + + error = git_vector_insert(&w->packs, p); + if (error < 0) { + git_mwindow_put_pack(p); + return error; + } + + return 0; +} + +typedef git_array_t(git_midx_entry) object_entry_array_t; + +struct object_entry_cb_state { + uint32_t pack_index; + object_entry_array_t *object_entries_array; +}; + +static int object_entry__cb(const git_oid *oid, off64_t offset, void *data) +{ + struct object_entry_cb_state *state = (struct object_entry_cb_state *)data; + + git_midx_entry *entry = git_array_alloc(*state->object_entries_array); + GIT_ERROR_CHECK_ALLOC(entry); + + git_oid_cpy(&entry->sha1, oid); + entry->offset = offset; + entry->pack_index = state->pack_index; + + return 0; +} + +static int object_entry__cmp(const void *a_, const void *b_) +{ + const git_midx_entry *a = (const git_midx_entry *)a_; + const git_midx_entry *b = (const git_midx_entry *)b_; + + return git_oid_cmp(&a->sha1, &b->sha1); +} + +static int write_offset(off64_t offset, midx_write_cb write_cb, void *cb_data) +{ + int error; + uint32_t word; + + word = htonl((uint32_t)((offset >> 32) & 0xffffffffu)); + error = write_cb((const char *)&word, sizeof(word), cb_data); + if (error < 0) + return error; + word = htonl((uint32_t)((offset >> 0) & 0xffffffffu)); + error = write_cb((const char *)&word, sizeof(word), cb_data); + if (error < 0) + return error; + + return 0; +} + +static int write_chunk_header(int chunk_id, off64_t offset, midx_write_cb write_cb, void *cb_data) +{ + uint32_t word = htonl(chunk_id); + int error = write_cb((const char *)&word, sizeof(word), cb_data); + if (error < 0) + return error; + return write_offset(offset, write_cb, cb_data); + + return 0; +} + +static int midx_write_buf(const char *buf, size_t size, void *data) +{ + git_buf *b = (git_buf *)data; + return git_buf_put(b, buf, size); +} + +struct midx_write_hash_context { + midx_write_cb write_cb; + void *cb_data; + git_hash_ctx *ctx; +}; + +static int midx_write_hash(const char *buf, size_t size, void *data) +{ + struct midx_write_hash_context *ctx = (struct midx_write_hash_context *)data; + int error; + + error = git_hash_update(ctx->ctx, buf, size); + if (error < 0) + return error; + + return ctx->write_cb(buf, size, ctx->cb_data); +} + +static int midx_write( + git_midx_writer *w, + midx_write_cb write_cb, + void *cb_data) +{ + int error = 0; + size_t i; + struct git_pack_file *p; + struct git_midx_header hdr = {0}; + uint32_t oid_fanout_count; + uint32_t object_large_offsets_count; + uint32_t oid_fanout[256]; + off64_t offset; + git_buf packfile_names = GIT_BUF_INIT, + oid_lookup = GIT_BUF_INIT, + object_offsets = GIT_BUF_INIT, + object_large_offsets = GIT_BUF_INIT; + git_oid idx_checksum = {{0}}; + git_midx_entry *entry; + object_entry_array_t object_entries_array = GIT_ARRAY_INIT; + git_vector object_entries = GIT_VECTOR_INIT; + git_hash_ctx ctx; + struct midx_write_hash_context hash_cb_data = {0}; + + hdr.signature = htonl(MIDX_SIGNATURE); + hdr.version = MIDX_VERSION; + hdr.object_id_version = MIDX_OBJECT_ID_VERSION; + hdr.base_midx_files = 0; + + hash_cb_data.write_cb = write_cb; + hash_cb_data.cb_data = cb_data; + hash_cb_data.ctx = &ctx; + + error = git_hash_ctx_init(&ctx); + if (error < 0) + return error; + cb_data = &hash_cb_data; + write_cb = midx_write_hash; + + git_vector_sort(&w->packs); + git_vector_foreach (&w->packs, i, p) { + git_buf relative_index = GIT_BUF_INIT; + struct object_entry_cb_state state = {0}; + size_t path_len; + + state.pack_index = (uint32_t)i; + state.object_entries_array = &object_entries_array; + + error = git_buf_sets(&relative_index, p->pack_name); + if (error < 0) + goto cleanup; + error = git_path_make_relative(&relative_index, git_buf_cstr(&w->pack_dir)); + if (error < 0) { + git_buf_dispose(&relative_index); + goto cleanup; + } + path_len = git_buf_len(&relative_index); + if (path_len <= strlen(".pack") || git__suffixcmp(git_buf_cstr(&relative_index), ".pack") != 0) { + git_buf_dispose(&relative_index); + git_error_set(GIT_ERROR_INVALID, "invalid packfile name: '%s'", p->pack_name); + error = -1; + goto cleanup; + } + path_len -= strlen(".pack"); + + git_buf_put(&packfile_names, git_buf_cstr(&relative_index), path_len); + git_buf_puts(&packfile_names, ".idx"); + git_buf_putc(&packfile_names, '\0'); + git_buf_dispose(&relative_index); + + error = git_pack_foreach_entry_offset(p, object_entry__cb, &state); + if (error < 0) + goto cleanup; + } + + /* Sort the object entries. */ + error = git_vector_init(&object_entries, git_array_size(object_entries_array), object_entry__cmp); + if (error < 0) + goto cleanup; + git_array_foreach (object_entries_array, i, entry) + error = git_vector_set(NULL, &object_entries, i, entry); + git_vector_set_sorted(&object_entries, 0); + git_vector_sort(&object_entries); + git_vector_uniq(&object_entries, NULL); + + /* Pad the packfile names so it is a multiple of four. */ + while (git_buf_len(&packfile_names) & 3) + git_buf_putc(&packfile_names, '\0'); + + /* Fill the OID Fanout table. */ + oid_fanout_count = 0; + for (i = 0; i < 256; i++) { + while (oid_fanout_count < git_vector_length(&object_entries) && + ((const git_midx_entry *)git_vector_get(&object_entries, oid_fanout_count))->sha1.id[0] <= i) + ++oid_fanout_count; + oid_fanout[i] = htonl(oid_fanout_count); + } + + /* Fill the OID Lookup table. */ + git_vector_foreach (&object_entries, i, entry) { + error = git_buf_put(&oid_lookup, (const char *)&entry->sha1, sizeof(entry->sha1)); + if (error < 0) + goto cleanup; + } + + /* Fill the Object Offsets and Object Large Offsets tables. */ + object_large_offsets_count = 0; + git_vector_foreach (&object_entries, i, entry) { + uint32_t word; + + word = htonl((uint32_t)entry->pack_index); + error = git_buf_put(&object_offsets, (const char *)&word, sizeof(word)); + if (error < 0) + goto cleanup; + if (entry->offset >= 0x80000000l) { + word = htonl(0x80000000u | object_large_offsets_count++); + error = write_offset(entry->offset, midx_write_buf, &object_large_offsets); + } else { + word = htonl((uint32_t)entry->offset & 0x7fffffffu); + } + error = git_buf_put(&object_offsets, (const char *)&word, sizeof(word)); + if (error < 0) + goto cleanup; + } + + /* Write the header. */ + hdr.packfiles = htonl((uint32_t)git_vector_length(&w->packs)); + hdr.chunks = 4; + if (git_buf_len(&object_large_offsets) > 0) + hdr.chunks++; + error = write_cb((const char *)&hdr, sizeof(hdr), cb_data); + if (error < 0) + goto cleanup; + + /* Write the chunk headers. */ + offset = sizeof(hdr) + (hdr.chunks + 1) * 12; + error = write_chunk_header(MIDX_PACKFILE_NAMES_ID, offset, write_cb, cb_data); + if (error < 0) + goto cleanup; + offset += git_buf_len(&packfile_names); + error = write_chunk_header(MIDX_OID_FANOUT_ID, offset, write_cb, cb_data); + if (error < 0) + goto cleanup; + offset += sizeof(oid_fanout); + error = write_chunk_header(MIDX_OID_LOOKUP_ID, offset, write_cb, cb_data); + if (error < 0) + goto cleanup; + offset += git_buf_len(&oid_lookup); + error = write_chunk_header(MIDX_OBJECT_OFFSETS_ID, offset, write_cb, cb_data); + if (error < 0) + goto cleanup; + offset += git_buf_len(&object_offsets); + if (git_buf_len(&object_large_offsets) > 0) { + error = write_chunk_header(MIDX_OBJECT_LARGE_OFFSETS_ID, offset, write_cb, cb_data); + if (error < 0) + goto cleanup; + offset += git_buf_len(&object_large_offsets); + } + error = write_chunk_header(0, offset, write_cb, cb_data); + if (error < 0) + goto cleanup; + + /* Write all the chunks. */ + error = write_cb(git_buf_cstr(&packfile_names), git_buf_len(&packfile_names), cb_data); + if (error < 0) + goto cleanup; + error = write_cb((const char *)oid_fanout, sizeof(oid_fanout), cb_data); + if (error < 0) + goto cleanup; + error = write_cb(git_buf_cstr(&oid_lookup), git_buf_len(&oid_lookup), cb_data); + if (error < 0) + goto cleanup; + error = write_cb(git_buf_cstr(&object_offsets), git_buf_len(&object_offsets), cb_data); + if (error < 0) + goto cleanup; + error = write_cb(git_buf_cstr(&object_large_offsets), git_buf_len(&object_large_offsets), cb_data); + if (error < 0) + goto cleanup; + + /* Finalize the checksum and write the trailer. */ + error = git_hash_final(&idx_checksum, &ctx); + if (error < 0) + goto cleanup; + error = write_cb((const char *)&idx_checksum, sizeof(idx_checksum), cb_data); + if (error < 0) + goto cleanup; + +cleanup: + git_array_clear(object_entries_array); + git_vector_free(&object_entries); + git_buf_dispose(&packfile_names); + git_buf_dispose(&oid_lookup); + git_buf_dispose(&object_offsets); + git_buf_dispose(&object_large_offsets); + git_hash_ctx_cleanup(&ctx); + return error; +} + +static int midx_write_filebuf(const char *buf, size_t size, void *data) +{ + git_filebuf *f = (git_filebuf *)data; + return git_filebuf_write(f, buf, size); +} + +int git_midx_writer_commit( + git_midx_writer *w) +{ + int error; + int filebuf_flags = GIT_FILEBUF_DO_NOT_BUFFER; + git_buf midx_path = GIT_BUF_INIT; + git_filebuf output = GIT_FILEBUF_INIT; + + error = git_buf_joinpath(&midx_path, git_buf_cstr(&w->pack_dir), "multi-pack-index"); + if (error < 0) + return error; + + if (git_repository__fsync_gitdir) + filebuf_flags |= GIT_FILEBUF_FSYNC; + error = git_filebuf_open(&output, git_buf_cstr(&midx_path), filebuf_flags, 0644); + git_buf_dispose(&midx_path); + if (error < 0) + return error; + + error = midx_write(w, midx_write_filebuf, &output); + if (error < 0) { + git_filebuf_cleanup(&output); + return error; + } + + return git_filebuf_commit(&output); +} + +int git_midx_writer_dump( + git_buf *midx, + git_midx_writer *w) +{ + return midx_write(w, midx_write_buf, midx); +} diff --git a/src/midx.h b/src/midx.h index 543ff2178..4ce17ce73 100644 --- a/src/midx.h +++ b/src/midx.h @@ -12,6 +12,8 @@ #include <ctype.h> +#include "git2/sys/midx.h" + #include "map.h" #include "mwindow.h" #include "odb.h" @@ -67,6 +69,20 @@ typedef struct git_midx_entry { git_oid sha1; } git_midx_entry; +/* + * A writer for `multi-pack-index` files. + */ +struct git_midx_writer { + /* + * The path of the directory where the .pack/.idx files are stored. The + * `multi-pack-index` file will be written to the same directory. + */ + git_buf pack_dir; + + /* The list of `git_pack_file`s. */ + git_vector packs; +}; + int git_midx_open( git_midx_file **idx_out, const char *path); diff --git a/src/pack.c b/src/pack.c index 5d284ca9e..94b1ecd9d 100644 --- a/src/pack.c +++ b/src/pack.c @@ -1368,6 +1368,73 @@ int git_pack_foreach_entry( return error; } +int git_pack_foreach_entry_offset( + struct git_pack_file *p, + git_pack_foreach_entry_offset_cb cb, + void *data) +{ + const unsigned char *index; + off64_t current_offset; + const git_oid *current_oid; + uint32_t i; + int error = 0; + + if (git_mutex_lock(&p->lock) < 0) + return packfile_error("failed to get lock for git_pack_foreach_entry_offset"); + + index = p->index_map.data; + if (index == NULL) { + if ((error = pack_index_open_locked(p)) < 0) + goto cleanup; + + GIT_ASSERT(p->index_map.data); + index = p->index_map.data; + } + + if (p->index_version > 1) + index += 8; + + index += 4 * 256; + + /* all offsets should have been validated by pack_index_check_locked */ + if (p->index_version > 1) { + const unsigned char *offsets = index + 24 * p->num_objects; + const unsigned char *large_offset_ptr; + const unsigned char *large_offsets = index + 28 * p->num_objects; + const unsigned char *large_offsets_end = ((const unsigned char *)p->index_map.data) + p->index_map.len - 20; + for (i = 0; i < p->num_objects; i++) { + current_offset = ntohl(*(const uint32_t *)(offsets + 4 * i)); + if (current_offset & 0x80000000) { + large_offset_ptr = large_offsets + (current_offset & 0x7fffffff) * 8; + if (large_offset_ptr >= large_offsets_end) { + error = packfile_error("invalid large offset"); + goto cleanup; + } + current_offset = (((off64_t)ntohl(*((uint32_t *)(large_offset_ptr + 0)))) << 32) | + ntohl(*((uint32_t *)(large_offset_ptr + 4))); + } + current_oid = (const git_oid *)(index + 20 * i); + if ((error = cb(current_oid, current_offset, data)) != 0) { + error = git_error_set_after_callback(error); + goto cleanup; + } + } + } else { + for (i = 0; i < p->num_objects; i++) { + current_offset = ntohl(*(const uint32_t *)(index + 24 * i)); + current_oid = (const git_oid *)(index + 24 * i + 4); + if ((error = cb(current_oid, current_offset, data)) != 0) { + error = git_error_set_after_callback(error); + goto cleanup; + } + } + } + +cleanup: + git_mutex_unlock(&p->lock); + return error; +} + int git_pack__lookup_sha1(const void *oid_lookup_table, size_t stride, unsigned lo, unsigned hi, const unsigned char *oid_prefix) { diff --git a/src/pack.h b/src/pack.h index 1d077240d..bf279c6b6 100644 --- a/src/pack.h +++ b/src/pack.h @@ -20,6 +20,14 @@ #include "oidmap.h" #include "zstream.h" +/** + * Function type for callbacks from git_pack_foreach_entry_offset. + */ +typedef int git_pack_foreach_entry_offset_cb( + const git_oid *id, + off64_t offset, + void *payload); + #define GIT_PACK_FILE_MODE 0444 #define PACK_SIGNATURE 0x5041434b /* "PACK" */ @@ -176,5 +184,16 @@ int git_pack_foreach_entry( struct git_pack_file *p, git_odb_foreach_cb cb, void *data); +/** + * Similar to git_pack_foreach_entry, but: + * - It also provides the offset of the object within the + * packfile. + * - It does not sort the objects in any order. + * - It retains the lock while invoking the callback. + */ +int git_pack_foreach_entry_offset( + struct git_pack_file *p, + git_pack_foreach_entry_offset_cb cb, + void *data); #endif diff --git a/tests/pack/midx.c b/tests/pack/midx.c index 0d7efbef1..6e6c1a6d5 100644 --- a/tests/pack/midx.c +++ b/tests/pack/midx.c @@ -1,6 +1,7 @@ #include "clar_libgit2.h" #include <git2.h> +#include <git2/sys/midx.h> #include "midx.h" @@ -44,3 +45,32 @@ void test_pack_midx__lookup(void) git_commit_free(commit); git_repository_free(repo); } + +void test_pack_midx__writer(void) +{ + git_repository *repo; + git_midx_writer *w = NULL; + git_buf midx = GIT_BUF_INIT, expected_midx = GIT_BUF_INIT, path = GIT_BUF_INIT; + + cl_git_pass(git_repository_open(&repo, cl_fixture("testrepo.git"))); + + cl_git_pass(git_buf_joinpath(&path, git_repository_path(repo), "objects/pack")); + cl_git_pass(git_midx_writer_new(&w, git_buf_cstr(&path))); + + cl_git_pass(git_midx_writer_add(w, "pack-d7c6adf9f61318f041845b01440d09aa7a91e1b5.idx")); + cl_git_pass(git_midx_writer_add(w, "pack-d85f5d483273108c9d8dd0e4728ccf0b2982423a.idx")); + cl_git_pass(git_midx_writer_add(w, "pack-a81e489679b7d3418f9ab594bda8ceb37dd4c695.idx")); + + cl_git_pass(git_midx_writer_dump(&midx, w)); + cl_git_pass(git_buf_joinpath(&path, git_repository_path(repo), "objects/pack/multi-pack-index")); + cl_git_pass(git_futils_readbuffer(&expected_midx, git_buf_cstr(&path))); + + cl_assert_equal_i(git_buf_len(&midx), git_buf_len(&expected_midx)); + cl_assert_equal_strn(git_buf_cstr(&midx), git_buf_cstr(&expected_midx), git_buf_len(&midx)); + + git_buf_dispose(&midx); + git_buf_dispose(&expected_midx); + git_buf_dispose(&path); + git_midx_writer_free(w); + git_repository_free(repo); +} |