From 2c4eb83ee14bca44c2442727bacd82312cbedfc5 Mon Sep 17 00:00:00 2001 From: Colin Stolley Date: Thu, 16 Jun 2022 16:50:35 -0500 Subject: commit-graph: only verify csum on git_commit_graph_open(). It is expensive to compute the sha1 of the entire commit-graph file each time we open it. Git only does this if it is re-writing the file. This patch will only verify the checksum when calling the external API git_commit_graph_open(), which explicitly says it opens and verifies the commit graph in the documentation. For internal library calls, we call git_commit_graph_get_file(), which mmaps the commit-graph file in read-only mode. Therefore it is safe to skip the validation check there. Tests were added to check that the validation works in the happy path, and prevents us from opening the file when validation fails. (Note from Derrick Stolee: This patch was applied internally at GitHub after we recognized the performance impact it had during an upgrade of libgit2. The original author left the company before we remembered to send it upstream.) Signed-off-by: Derrick Stolee --- src/libgit2/commit_graph.c | 28 +++++++++++++++++++------- src/libgit2/commit_graph.h | 3 +++ tests/libgit2/graph/commitgraph.c | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/src/libgit2/commit_graph.c b/src/libgit2/commit_graph.c index 322d24b19..d8da1c5d3 100644 --- a/src/libgit2/commit_graph.c +++ b/src/libgit2/commit_graph.c @@ -201,7 +201,6 @@ int git_commit_graph_file_parse( struct git_commit_graph_chunk *last_chunk; uint32_t i; off64_t last_chunk_offset, chunk_offset, trailer_offset; - unsigned char checksum[GIT_HASH_SHA1_SIZE]; size_t checksum_size; int error; struct git_commit_graph_chunk chunk_oid_fanout = {0}, chunk_oid_lookup = {0}, @@ -234,11 +233,6 @@ int git_commit_graph_file_parse( return commit_graph_error("wrong commit-graph size"); memcpy(file->checksum, (data + trailer_offset), checksum_size); - if (git_hash_buf(checksum, data, (size_t)trailer_offset, GIT_HASH_ALGORITHM_SHA1) < 0) - return commit_graph_error("could not calculate signature"); - if (memcmp(checksum, file->checksum, checksum_size) != 0) - return commit_graph_error("index signature mismatch"); - chunk_hdr = data + sizeof(struct git_commit_graph_header); last_chunk = NULL; for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) { @@ -331,9 +325,29 @@ error: return error; } +int git_commit_graph_validate(git_commit_graph *cgraph) { + unsigned char checksum[GIT_HASH_SHA1_SIZE]; + size_t checksum_size = GIT_HASH_SHA1_SIZE; + size_t trailer_offset = cgraph->file->graph_map.len - checksum_size; + + if (cgraph->file->graph_map.len < checksum_size) + return commit_graph_error("map length too small"); + + if (git_hash_buf(checksum, cgraph->file->graph_map.data, trailer_offset, GIT_HASH_ALGORITHM_SHA1) < 0) + return commit_graph_error("could not calculate signature"); + if (memcmp(checksum, cgraph->file->checksum, checksum_size) != 0) + return commit_graph_error("index signature mismatch"); + + return 0; +} + int git_commit_graph_open(git_commit_graph **cgraph_out, const char *objects_dir) { - return git_commit_graph_new(cgraph_out, objects_dir, true); + int error = git_commit_graph_new(cgraph_out, objects_dir, true); + if (!error) { + return git_commit_graph_validate(*cgraph_out); + } + return error; } int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path) diff --git a/src/libgit2/commit_graph.h b/src/libgit2/commit_graph.h index b78ab8177..517abb239 100644 --- a/src/libgit2/commit_graph.h +++ b/src/libgit2/commit_graph.h @@ -106,6 +106,9 @@ struct git_commit_graph { /** Create a new commit-graph, optionally opening the underlying file. */ int git_commit_graph_new(git_commit_graph **cgraph_out, const char *objects_dir, bool open_file); +/** Validate the checksum of a commit graph */ +int git_commit_graph_validate(git_commit_graph *cgraph); + /** Open and validate a commit-graph file. */ int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path); diff --git a/tests/libgit2/graph/commitgraph.c b/tests/libgit2/graph/commitgraph.c index e2452000d..82f7f936f 100644 --- a/tests/libgit2/graph/commitgraph.c +++ b/tests/libgit2/graph/commitgraph.c @@ -124,3 +124,44 @@ void test_graph_commitgraph__writer(void) git_commit_graph_writer_free(w); git_repository_free(repo); } + +void test_graph_commitgraph__validate(void) +{ + git_repository *repo; + struct git_commit_graph *cgraph; + git_str objects_dir = GIT_STR_INIT; + + cl_git_pass(git_repository_open(&repo, cl_fixture("testrepo.git"))); + cl_git_pass(git_str_joinpath(&objects_dir, git_repository_path(repo), "objects")); + + /* git_commit_graph_open() calls git_commit_graph_validate() */ + cl_git_pass(git_commit_graph_open(&cgraph, git_str_cstr(&objects_dir))); + + git_commit_graph_free(cgraph); + git_str_dispose(&objects_dir); + git_repository_free(repo); +} + +void test_graph_commitgraph__validate_corrupt(void) +{ + git_repository *repo; + struct git_commit_graph *cgraph; + int fd = -1; + + cl_fixture_sandbox("testrepo.git"); + cl_git_pass(git_repository_open(&repo, cl_git_sandbox_path(1, "testrepo.git", NULL))); + + /* corrupt commit graph checksum at the end of the file */ + cl_assert((fd = p_open(cl_git_sandbox_path(0, "testrepo.git", "objects", "info", "commit-graph", NULL), O_WRONLY)) > 0); + cl_assert(p_lseek(fd, -5, SEEK_END) > 0); + cl_must_pass(p_write(fd, "\0\0", 2)); + cl_must_pass(p_close(fd)); + + /* git_commit_graph_open() calls git_commit_graph_validate() */ + cl_git_fail(git_commit_graph_open(&cgraph, cl_git_sandbox_path(1, "testrepo.git", "objects", NULL))); + + git_commit_graph_free(cgraph); + git_repository_free(repo); + + cl_fixture_cleanup("testrepo.git"); +} -- cgit v1.2.1 From 6de3221debf785c328842667eb04c07c747c73a0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 10 Nov 2022 08:50:15 -0500 Subject: commit_graph: use uint64_t for arithmetic This should resolve some issues with UBSan builds by using unsigned 64-bit integers for all arithmetic until we finally convert to an offset or size value. Signed-off-by: Derrick Stolee --- src/libgit2/commit_graph.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libgit2/commit_graph.c b/src/libgit2/commit_graph.c index d8da1c5d3..9554fe855 100644 --- a/src/libgit2/commit_graph.c +++ b/src/libgit2/commit_graph.c @@ -200,7 +200,7 @@ int git_commit_graph_file_parse( const unsigned char *chunk_hdr; struct git_commit_graph_chunk *last_chunk; uint32_t i; - off64_t last_chunk_offset, chunk_offset, trailer_offset; + uint64_t last_chunk_offset, chunk_offset, trailer_offset; size_t checksum_size; int error; struct git_commit_graph_chunk chunk_oid_fanout = {0}, chunk_oid_lookup = {0}, @@ -236,8 +236,8 @@ int git_commit_graph_file_parse( chunk_hdr = data + sizeof(struct git_commit_graph_header); last_chunk = NULL; for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) { - chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 - | ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8)))); + chunk_offset = ((uint64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 + | ((uint64_t)ntohl(*((uint32_t *)(chunk_hdr + 8)))); if (chunk_offset < last_chunk_offset) return commit_graph_error("chunks are non-monotonic"); if (chunk_offset >= trailer_offset) -- cgit v1.2.1