diff options
-rw-r--r-- | builtin/prune.c | 2 | ||||
-rw-r--r-- | builtin/reflog.c | 2 | ||||
-rw-r--r-- | reachable.c | 112 | ||||
-rw-r--r-- | reachable.h | 3 | ||||
-rwxr-xr-x | t/t6501-freshen-objects.sh | 88 |
5 files changed, 204 insertions, 3 deletions
diff --git a/builtin/prune.c b/builtin/prune.c index 763f53e06a..04d3b12ae4 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -135,7 +135,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix) if (show_progress) progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2); - mark_reachable_objects(&revs, 1, progress); + mark_reachable_objects(&revs, 1, expire, progress); stop_progress(&progress); for_each_loose_file_in_objdir(get_object_directory(), prune_object, prune_cruft, prune_subdir, NULL); diff --git a/builtin/reflog.c b/builtin/reflog.c index e8a8fb13b9..80bddc259b 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix) init_revisions(&cb.revs, prefix); if (cb.verbose) printf("Marking reachable objects..."); - mark_reachable_objects(&cb.revs, 0, NULL); + mark_reachable_objects(&cb.revs, 0, 0, NULL); if (cb.verbose) putchar('\n'); } diff --git a/reachable.c b/reachable.c index d03f8294ee..55589a02eb 100644 --- a/reachable.c +++ b/reachable.c @@ -97,7 +97,109 @@ static void mark_commit(struct commit *c, void *data) mark_object(&c->object, NULL, NULL, data); } +struct recent_data { + struct rev_info *revs; + unsigned long timestamp; +}; + +static void add_recent_object(const unsigned char *sha1, + unsigned long mtime, + struct recent_data *data) +{ + struct object *obj; + enum object_type type; + + if (mtime <= data->timestamp) + return; + + /* + * We do not want to call parse_object here, because + * inflating blobs and trees could be very expensive. + * However, we do need to know the correct type for + * later processing, and the revision machinery expects + * commits and tags to have been parsed. + */ + type = sha1_object_info(sha1, NULL); + if (type < 0) + die("unable to get object info for %s", sha1_to_hex(sha1)); + + switch (type) { + case OBJ_TAG: + case OBJ_COMMIT: + obj = parse_object_or_die(sha1, NULL); + break; + case OBJ_TREE: + obj = (struct object *)lookup_tree(sha1); + break; + case OBJ_BLOB: + obj = (struct object *)lookup_blob(sha1); + break; + default: + die("unknown object type for %s: %s", + sha1_to_hex(sha1), typename(type)); + } + + if (!obj) + die("unable to lookup %s", sha1_to_hex(sha1)); + + add_pending_object(data->revs, obj, ""); +} + +static int add_recent_loose(const unsigned char *sha1, + const char *path, void *data) +{ + struct stat st; + struct object *obj = lookup_object(sha1); + + if (obj && obj->flags & SEEN) + return 0; + + if (stat(path, &st) < 0) { + /* + * It's OK if an object went away during our iteration; this + * could be due to a simultaneous repack. But anything else + * we should abort, since we might then fail to mark objects + * which should not be pruned. + */ + if (errno == ENOENT) + return 0; + return error("unable to stat %s: %s", + sha1_to_hex(sha1), strerror(errno)); + } + + add_recent_object(sha1, st.st_mtime, data); + return 0; +} + +static int add_recent_packed(const unsigned char *sha1, + struct packed_git *p, uint32_t pos, + void *data) +{ + struct object *obj = lookup_object(sha1); + + if (obj && obj->flags & SEEN) + return 0; + add_recent_object(sha1, p->mtime, data); + return 0; +} + +static int add_unseen_recent_objects_to_traversal(struct rev_info *revs, + unsigned long timestamp) +{ + struct recent_data data; + int r; + + data.revs = revs; + data.timestamp = timestamp; + + r = for_each_loose_object(add_recent_loose, &data); + if (r) + return r; + return for_each_packed_object(add_recent_packed, &data); +} + void mark_reachable_objects(struct rev_info *revs, int mark_reflog, + unsigned long mark_recent, struct progress *progress) { struct connectivity_progress cp; @@ -133,5 +235,15 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, if (prepare_revision_walk(revs)) die("revision walk setup failed"); traverse_commit_list(revs, mark_commit, mark_object, &cp); + + if (mark_recent) { + revs->ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(revs, mark_recent)) + die("unable to mark recent objects"); + if (prepare_revision_walk(revs)) + die("revision walk setup failed"); + traverse_commit_list(revs, mark_commit, mark_object, &cp); + } + display_progress(cp.progress, cp.count); } diff --git a/reachable.h b/reachable.h index 5d082adfec..141fe30879 100644 --- a/reachable.h +++ b/reachable.h @@ -2,6 +2,7 @@ #define REACHEABLE_H struct progress; -extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, struct progress *); +extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, + unsigned long mark_recent, struct progress *); #endif diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh new file mode 100755 index 0000000000..de941c2cb2 --- /dev/null +++ b/t/t6501-freshen-objects.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# This test covers the handling of objects which might have old +# mtimes in the filesystem (because they were used previously) +# and are just now becoming referenced again. +# +# We're going to do two things that are a little bit "fake" to +# help make our simulation easier: +# +# 1. We'll turn off reflogs. You can still run into +# problems with reflogs on, but your objects +# don't get pruned until both the reflog expiration +# has passed on their references, _and_ they are out +# of prune's expiration period. Dropping reflogs +# means we only have to deal with one variable in our tests, +# but the results generalize. +# +# 2. We'll use a temporary index file to create our +# works-in-progress. Most workflows would mention +# referenced objects in the index, which prune takes +# into account. However, many operations don't. For +# example, a partial commit with "git commit foo" +# will use a temporary index. Or they may not need +# an index at all (e.g., creating a new commit +# to refer to an existing tree). + +test_description='check pruning of dependent objects' +. ./test-lib.sh + +# We care about reachability, so we do not want to use +# the normal test_commit, which creates extra tags. +add () { + echo "$1" >"$1" && + git add "$1" +} +commit () { + test_tick && + add "$1" && + git commit -m "$1" +} + +test_expect_success 'disable reflogs' ' + git config core.logallrefupdates false && + rm -rf .git/logs +' + +test_expect_success 'setup basic history' ' + commit base +' + +test_expect_success 'create and abandon some objects' ' + git checkout -b experiment && + commit abandon && + git checkout master && + git branch -D experiment +' + +test_expect_success 'simulate time passing' ' + find .git/objects -type f | + xargs test-chmtime -v -86400 +' + +test_expect_success 'start writing new commit with old blob' ' + tree=$( + GIT_INDEX_FILE=index.tmp && + export GIT_INDEX_FILE && + git read-tree HEAD && + add unrelated && + add abandon && + git write-tree + ) +' + +test_expect_success 'simultaneous gc' ' + git gc --prune=12.hours.ago +' + +test_expect_success 'finish writing out commit' ' + commit=$(echo foo | git commit-tree -p HEAD $tree) && + git update-ref HEAD $commit +' + +# "abandon" blob should have been rescued by reference from new tree +test_expect_success 'repository passes fsck' ' + git fsck +' + +test_done |