summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2020-07-30 13:20:30 -0700
committerJunio C Hamano <gitster@pobox.com>2020-07-30 13:20:30 -0700
commitde6dda0dc3d95cd9aaf43a7b85ceeb57316dcc27 (patch)
tree8c970a139cd81136e71e6db49807b55878b6d54e
parent47ae905ffb98cc4d4fd90083da6bc8dab55d9ecc (diff)
parent7fbfe07ab4d4e58c0971dac73001b89f180a0af3 (diff)
downloadgit-de6dda0dc3d95cd9aaf43a7b85ceeb57316dcc27.tar.gz
Merge branch 'sg/commit-graph-cleanups' into master
The changed-path Bloom filter is improved using ideas from an independent implementation. * sg/commit-graph-cleanups: commit-graph: simplify write_commit_graph_file() #2 commit-graph: simplify write_commit_graph_file() #1 commit-graph: simplify parse_commit_graph() #2 commit-graph: simplify parse_commit_graph() #1 commit-graph: clean up #includes diff.h: drop diff_tree_oid() & friends' return value commit-slab: add a function to deep free entries on the slab commit-graph-format.txt: all multi-byte numbers are in network byte order commit-graph: fix parsing the Chunk Lookup table tree-walk.c: don't match submodule entries for 'submod/anything'
-rw-r--r--Documentation/technical/commit-graph-format.txt2
-rw-r--r--commit-graph.c112
-rw-r--r--commit-graph.h6
-rw-r--r--commit-slab-decl.h1
-rw-r--r--commit-slab-impl.h13
-rw-r--r--commit-slab.h10
-rw-r--r--diff.h10
-rw-r--r--revision.c9
-rw-r--r--shallow.c14
-rwxr-xr-xt/t4010-diff-pathspec.sh4
-rwxr-xr-xt/t5318-commit-graph.sh5
-rw-r--r--tree-diff.c30
-rw-r--r--tree-walk.c9
13 files changed, 117 insertions, 108 deletions
diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt
index 1beef17182..440541045d 100644
--- a/Documentation/technical/commit-graph-format.txt
+++ b/Documentation/technical/commit-graph-format.txt
@@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
of the body. The header includes certain values, such as number of chunks
and hash type.
-All 4-byte numbers are in network order.
+All multi-byte numbers are in network byte order.
HEADER:
diff --git a/commit-graph.c b/commit-graph.c
index 1af68c297d..1f37097dff 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1,7 +1,5 @@
-#include "cache.h"
-#include "config.h"
-#include "dir.h"
#include "git-compat-util.h"
+#include "config.h"
#include "lockfile.h"
#include "pack.h"
#include "packfile.h"
@@ -285,8 +283,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
const unsigned char *data, *chunk_lookup;
uint32_t i;
struct commit_graph *graph;
- uint64_t last_chunk_offset;
- uint32_t last_chunk_id;
+ uint64_t next_chunk_offset;
uint32_t graph_signature;
unsigned char graph_version, hash_version;
@@ -326,24 +323,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->data = graph_map;
graph->data_len = graph_size;
- last_chunk_id = 0;
- last_chunk_offset = 8;
+ if (graph_size < GRAPH_HEADER_SIZE +
+ (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
+ error(_("commit-graph file is too small to hold %u chunks"),
+ graph->num_chunks);
+ free(graph);
+ return NULL;
+ }
+
chunk_lookup = data + 8;
+ next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id;
- uint64_t chunk_offset;
+ uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0;
- if (data + graph_size - chunk_lookup <
- GRAPH_CHUNKLOOKUP_WIDTH) {
- error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
- goto free_and_return;
- }
-
chunk_id = get_be32(chunk_lookup + 0);
- chunk_offset = get_be64(chunk_lookup + 4);
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
+ next_chunk_offset = get_be64(chunk_lookup + 4);
if (chunk_offset > graph_size - the_hash_algo->rawsz) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@@ -362,8 +361,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_OIDLOOKUP:
if (graph->chunk_oid_lookup)
chunk_repeated = 1;
- else
+ else {
graph->chunk_oid_lookup = data + chunk_offset;
+ graph->num_commits = (next_chunk_offset - chunk_offset)
+ / graph->hash_len;
+ }
break;
case GRAPH_CHUNKID_DATA:
@@ -417,15 +419,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return;
}
-
- if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
- {
- graph->num_commits = (chunk_offset - last_chunk_offset)
- / graph->hash_len;
- }
-
- last_chunk_id = chunk_id;
- last_chunk_offset = chunk_offset;
}
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@@ -1586,17 +1579,22 @@ static int write_graph_chunk_base(struct hashfile *f,
return 0;
}
+struct chunk_info {
+ uint32_t id;
+ uint64_t size;
+};
+
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
{
uint32_t i;
int fd;
struct hashfile *f;
struct lock_file lk = LOCK_INIT;
- uint32_t chunk_ids[MAX_NUM_CHUNKS + 1];
- uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
+ struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT;
int num_chunks = 3;
+ uint64_t chunk_offset;
struct object_id file_hash;
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -1644,51 +1642,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
}
- chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
- chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
- chunk_ids[2] = GRAPH_CHUNKID_DATA;
+ chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
+ chunks[0].size = GRAPH_FANOUT_SIZE;
+ chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
+ chunks[1].size = hashsz * ctx->commits.nr;
+ chunks[2].id = GRAPH_CHUNKID_DATA;
+ chunks[2].size = (hashsz + 16) * ctx->commits.nr;
if (ctx->num_extra_edges) {
- chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
+ chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
+ chunks[num_chunks].size = 4 * ctx->num_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
- chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES;
+ chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
+ chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
num_chunks++;
- chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA;
+ chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
+ chunks[num_chunks].size = sizeof(uint32_t) * 3
+ + ctx->total_bloom_filter_data_size;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
- chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE;
+ chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
+ chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++;
}
- chunk_ids[num_chunks] = 0;
-
- chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
- chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
- chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
- chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
-
- num_chunks = 3;
- if (ctx->num_extra_edges) {
- chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
- 4 * ctx->num_extra_edges;
- num_chunks++;
- }
- if (ctx->changed_paths) {
- chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
- sizeof(uint32_t) * ctx->commits.nr;
- num_chunks++;
-
- chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
- sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
- num_chunks++;
- }
- if (ctx->num_commit_graphs_after > 1) {
- chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
- hashsz * (ctx->num_commit_graphs_after - 1);
- num_chunks++;
- }
+ chunks[num_chunks].id = 0;
+ chunks[num_chunks].size = 0;
hashwrite_be32(f, GRAPH_SIGNATURE);
@@ -1697,13 +1678,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
hashwrite_u8(f, num_chunks);
hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
+ chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
for (i = 0; i <= num_chunks; i++) {
uint32_t chunk_write[3];
- chunk_write[0] = htonl(chunk_ids[i]);
- chunk_write[1] = htonl(chunk_offsets[i] >> 32);
- chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
+ chunk_write[0] = htonl(chunks[i].id);
+ chunk_write[1] = htonl(chunk_offset >> 32);
+ chunk_write[2] = htonl(chunk_offset & 0xffffffff);
hashwrite(f, chunk_write, 12);
+
+ chunk_offset += chunks[i].size;
}
if (ctx->report_progress) {
diff --git a/commit-graph.h b/commit-graph.h
index 28f89cdf3e..cf8d3c9647 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -2,9 +2,6 @@
#define COMMIT_GRAPH_H
#include "git-compat-util.h"
-#include "repository.h"
-#include "string-list.h"
-#include "cache.h"
#include "object-store.h"
#include "oidset.h"
@@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void);
struct commit;
struct bloom_filter_settings;
+struct repository;
+struct raw_object_store;
+struct string_list;
char *get_commit_graph_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
diff --git a/commit-slab-decl.h b/commit-slab-decl.h
index bfbed1516a..98de2c970c 100644
--- a/commit-slab-decl.h
+++ b/commit-slab-decl.h
@@ -32,6 +32,7 @@ struct slabname { \
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
void init_ ##slabname(struct slabname *s); \
void clear_ ##slabname(struct slabname *s); \
+void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
diff --git a/commit-slab-impl.h b/commit-slab-impl.h
index 5c0eb91a5d..557738df27 100644
--- a/commit-slab-impl.h
+++ b/commit-slab-impl.h
@@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
FREE_AND_NULL(s->slab); \
} \
\
+scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
+{ \
+ unsigned int i; \
+ for (i = 0; i < s->slab_count; i++) { \
+ unsigned int j; \
+ if (!s->slab[i]) \
+ continue; \
+ for (j = 0; j < s->slab_size; j++) \
+ free_fn(&s->slab[i][j * s->stride]); \
+ } \
+ clear_ ##slabname(s); \
+} \
+ \
scope elemtype *slabname## _at_peek(struct slabname *s, \
const struct commit *c, \
int add_if_missing) \
diff --git a/commit-slab.h b/commit-slab.h
index 05b3f2804e..8e72a30536 100644
--- a/commit-slab.h
+++ b/commit-slab.h
@@ -47,6 +47,16 @@
*
* Call this function before the slab falls out of scope to avoid
* leaking memory.
+ *
+ * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
+ *
+ * Empties the slab, similar to clear_indegree(), but in addition it
+ * calls the given 'free_fn' for each slab entry to release any
+ * additional memory that might be owned by the entry (but not the
+ * entry itself!).
+ * Note that 'free_fn' might be called even for entries for which no
+ * indegree_at() call has been made; in this case 'free_fn' is invoked
+ * with a pointer to a zero-initialized location.
*/
#define define_commit_slab(slabname, elemtype) \
diff --git a/diff.h b/diff.h
index 9443dc1b00..e0c0af6286 100644
--- a/diff.h
+++ b/diff.h
@@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
-int diff_tree_oid(const struct object_id *old_oid,
- const struct object_id *new_oid,
- const char *base, struct diff_options *opt);
-int diff_root_tree_oid(const struct object_id *new_oid, const char *base,
- struct diff_options *opt);
+void diff_tree_oid(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ const char *base, struct diff_options *opt);
+void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
+ struct diff_options *opt);
struct combine_diff_path {
struct combine_diff_path *next;
diff --git a/revision.c b/revision.c
index 6aa7f4f567..049008c728 100644
--- a/revision.c
+++ b/revision.c
@@ -791,9 +791,7 @@ static int rev_compare_tree(struct rev_info *revs,
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
- if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "",
- &revs->pruning) < 0)
- return REV_TREE_DIFFERENT;
+ diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
if (!nth_parent)
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@@ -804,7 +802,6 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{
- int retval;
struct tree *t1 = get_commit_tree(commit);
if (!t1)
@@ -812,9 +809,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
- retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
+ diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
- return retval >= 0 && (tree_difference == REV_TREE_SAME);
+ return tree_difference == REV_TREE_SAME;
}
struct treesame_state {
diff --git a/shallow.c b/shallow.c
index b826de9b67..91b9e1073c 100644
--- a/shallow.c
+++ b/shallow.c
@@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
* supports a "valid" flag.
*/
define_commit_slab(commit_depth, int *);
+static void free_depth_in_slab(int **ptr)
+{
+ FREE_AND_NULL(*ptr);
+}
struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag)
{
@@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
}
}
}
- for (i = 0; i < depths.slab_count; i++) {
- int j;
-
- if (!depths.slab[i])
- continue;
- for (j = 0; j < depths.slab_size; j++)
- free(depths.slab[i][j]);
- }
- clear_commit_depth(&depths);
+ deep_clear_commit_depth(&depths, free_depth_in_slab);
return result;
}
diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh
index e5ca359edf..65cc703c65 100755
--- a/t/t4010-diff-pathspec.sh
+++ b/t/t4010-diff-pathspec.sh
@@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
test_expect_success 'diff-tree ignores trailing slash on submodule path' '
git diff --name-only HEAD^ HEAD submod >expect &&
git diff --name-only HEAD^ HEAD submod/ >actual &&
- test_cmp expect actual
+ test_cmp expect actual &&
+ git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
+ test_must_be_empty actual
'
test_expect_success 'diff multiple wildcard pathspecs' '
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index 26f332d6a3..50d5f0849f 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' '
'
test_expect_success 'detect low chunk count' '
- corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
+ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk"
'
@@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' '
test_expect_success 'detect incorrect chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
- "chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
+ "commit-graph file is too small to hold [0-9]* chunks" \
+ $GRAPH_CHUNK_LOOKUP_OFFSET
'
test_expect_success 'git fsck (checks commit-graph)' '
diff --git a/tree-diff.c b/tree-diff.c
index f3d303c6e5..6ebad1a46f 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
-static int ll_diff_tree_oid(const struct object_id *old_oid,
- const struct object_id *new_oid,
- struct strbuf *base, struct diff_options *opt);
+static void ll_diff_tree_oid(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ struct strbuf *base, struct diff_options *opt);
/*
* Compare two tree entries, taking into account only path/S_ISDIR(mode),
@@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid,
q->nr = 1;
}
-static int ll_diff_tree_oid(const struct object_id *old_oid,
- const struct object_id *new_oid,
- struct strbuf *base, struct diff_options *opt)
+static void ll_diff_tree_oid(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ struct strbuf *base, struct diff_options *opt)
{
struct combine_diff_path phead, *p;
pathchange_fn_t pathchange_old = opt->pathchange;
@@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid,
}
opt->pathchange = pathchange_old;
- return 0;
}
-int diff_tree_oid(const struct object_id *old_oid,
- const struct object_id *new_oid,
- const char *base_str, struct diff_options *opt)
+void diff_tree_oid(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ const char *base_str, struct diff_options *opt)
{
struct strbuf base;
- int retval;
strbuf_init(&base, PATH_MAX);
strbuf_addstr(&base, base_str);
- retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt);
+ ll_diff_tree_oid(old_oid, new_oid, &base, opt);
if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
try_to_follow_renames(old_oid, new_oid, &base, opt);
strbuf_release(&base);
-
- return retval;
}
-int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt)
+void diff_root_tree_oid(const struct object_id *new_oid,
+ const char *base,
+ struct diff_options *opt)
{
- return diff_tree_oid(NULL, new_oid, base, opt);
+ diff_tree_oid(NULL, new_oid, base, opt);
}
diff --git a/tree-walk.c b/tree-walk.c
index bb0ad34c54..0160294712 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item,
if (matchlen > pathlen) {
if (match[pathlen] != '/')
return 0;
- if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
+ /*
+ * Reject non-directories as partial pathnames, except
+ * when match is a submodule with a trailing slash and
+ * nothing else (to handle 'submod/' and 'submod'
+ * uniformly).
+ */
+ if (!S_ISDIR(entry->mode) &&
+ (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
return 0;
}