summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtin/prune.c2
-rw-r--r--builtin/reflog.c2
-rw-r--r--reachable.c112
-rw-r--r--reachable.h3
-rwxr-xr-xt/t6501-freshen-objects.sh88
5 files changed, 204 insertions, 3 deletions
diff --git a/builtin/prune.c b/builtin/prune.c
index 763f53e06a..04d3b12ae4 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -135,7 +135,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
if (show_progress)
progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2);
- mark_reachable_objects(&revs, 1, progress);
+ mark_reachable_objects(&revs, 1, expire, progress);
stop_progress(&progress);
for_each_loose_file_in_objdir(get_object_directory(), prune_object,
prune_cruft, prune_subdir, NULL);
diff --git a/builtin/reflog.c b/builtin/reflog.c
index e8a8fb13b9..80bddc259b 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
init_revisions(&cb.revs, prefix);
if (cb.verbose)
printf("Marking reachable objects...");
- mark_reachable_objects(&cb.revs, 0, NULL);
+ mark_reachable_objects(&cb.revs, 0, 0, NULL);
if (cb.verbose)
putchar('\n');
}
diff --git a/reachable.c b/reachable.c
index d03f8294ee..55589a02eb 100644
--- a/reachable.c
+++ b/reachable.c
@@ -97,7 +97,109 @@ static void mark_commit(struct commit *c, void *data)
mark_object(&c->object, NULL, NULL, data);
}
+struct recent_data {
+ struct rev_info *revs;
+ unsigned long timestamp;
+};
+
+static void add_recent_object(const unsigned char *sha1,
+ unsigned long mtime,
+ struct recent_data *data)
+{
+ struct object *obj;
+ enum object_type type;
+
+ if (mtime <= data->timestamp)
+ return;
+
+ /*
+ * We do not want to call parse_object here, because
+ * inflating blobs and trees could be very expensive.
+ * However, we do need to know the correct type for
+ * later processing, and the revision machinery expects
+ * commits and tags to have been parsed.
+ */
+ type = sha1_object_info(sha1, NULL);
+ if (type < 0)
+ die("unable to get object info for %s", sha1_to_hex(sha1));
+
+ switch (type) {
+ case OBJ_TAG:
+ case OBJ_COMMIT:
+ obj = parse_object_or_die(sha1, NULL);
+ break;
+ case OBJ_TREE:
+ obj = (struct object *)lookup_tree(sha1);
+ break;
+ case OBJ_BLOB:
+ obj = (struct object *)lookup_blob(sha1);
+ break;
+ default:
+ die("unknown object type for %s: %s",
+ sha1_to_hex(sha1), typename(type));
+ }
+
+ if (!obj)
+ die("unable to lookup %s", sha1_to_hex(sha1));
+
+ add_pending_object(data->revs, obj, "");
+}
+
+static int add_recent_loose(const unsigned char *sha1,
+ const char *path, void *data)
+{
+ struct stat st;
+ struct object *obj = lookup_object(sha1);
+
+ if (obj && obj->flags & SEEN)
+ return 0;
+
+ if (stat(path, &st) < 0) {
+ /*
+ * It's OK if an object went away during our iteration; this
+ * could be due to a simultaneous repack. But anything else
+ * we should abort, since we might then fail to mark objects
+ * which should not be pruned.
+ */
+ if (errno == ENOENT)
+ return 0;
+ return error("unable to stat %s: %s",
+ sha1_to_hex(sha1), strerror(errno));
+ }
+
+ add_recent_object(sha1, st.st_mtime, data);
+ return 0;
+}
+
+static int add_recent_packed(const unsigned char *sha1,
+ struct packed_git *p, uint32_t pos,
+ void *data)
+{
+ struct object *obj = lookup_object(sha1);
+
+ if (obj && obj->flags & SEEN)
+ return 0;
+ add_recent_object(sha1, p->mtime, data);
+ return 0;
+}
+
+static int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
+ unsigned long timestamp)
+{
+ struct recent_data data;
+ int r;
+
+ data.revs = revs;
+ data.timestamp = timestamp;
+
+ r = for_each_loose_object(add_recent_loose, &data);
+ if (r)
+ return r;
+ return for_each_packed_object(add_recent_packed, &data);
+}
+
void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
+ unsigned long mark_recent,
struct progress *progress)
{
struct connectivity_progress cp;
@@ -133,5 +235,15 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
if (prepare_revision_walk(revs))
die("revision walk setup failed");
traverse_commit_list(revs, mark_commit, mark_object, &cp);
+
+ if (mark_recent) {
+ revs->ignore_missing_links = 1;
+ if (add_unseen_recent_objects_to_traversal(revs, mark_recent))
+ die("unable to mark recent objects");
+ if (prepare_revision_walk(revs))
+ die("revision walk setup failed");
+ traverse_commit_list(revs, mark_commit, mark_object, &cp);
+ }
+
display_progress(cp.progress, cp.count);
}
diff --git a/reachable.h b/reachable.h
index 5d082adfec..141fe30879 100644
--- a/reachable.h
+++ b/reachable.h
@@ -2,6 +2,7 @@
#define REACHEABLE_H
struct progress;
-extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, struct progress *);
+extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
+ unsigned long mark_recent, struct progress *);
#endif
diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh
new file mode 100755
index 0000000000..de941c2cb2
--- /dev/null
+++ b/t/t6501-freshen-objects.sh
@@ -0,0 +1,88 @@
+#!/bin/sh
+#
+# This test covers the handling of objects which might have old
+# mtimes in the filesystem (because they were used previously)
+# and are just now becoming referenced again.
+#
+# We're going to do two things that are a little bit "fake" to
+# help make our simulation easier:
+#
+# 1. We'll turn off reflogs. You can still run into
+# problems with reflogs on, but your objects
+# don't get pruned until both the reflog expiration
+# has passed on their references, _and_ they are out
+# of prune's expiration period. Dropping reflogs
+# means we only have to deal with one variable in our tests,
+# but the results generalize.
+#
+# 2. We'll use a temporary index file to create our
+# works-in-progress. Most workflows would mention
+# referenced objects in the index, which prune takes
+# into account. However, many operations don't. For
+# example, a partial commit with "git commit foo"
+# will use a temporary index. Or they may not need
+# an index at all (e.g., creating a new commit
+# to refer to an existing tree).
+
+test_description='check pruning of dependent objects'
+. ./test-lib.sh
+
+# We care about reachability, so we do not want to use
+# the normal test_commit, which creates extra tags.
+add () {
+ echo "$1" >"$1" &&
+ git add "$1"
+}
+commit () {
+ test_tick &&
+ add "$1" &&
+ git commit -m "$1"
+}
+
+test_expect_success 'disable reflogs' '
+ git config core.logallrefupdates false &&
+ rm -rf .git/logs
+'
+
+test_expect_success 'setup basic history' '
+ commit base
+'
+
+test_expect_success 'create and abandon some objects' '
+ git checkout -b experiment &&
+ commit abandon &&
+ git checkout master &&
+ git branch -D experiment
+'
+
+test_expect_success 'simulate time passing' '
+ find .git/objects -type f |
+ xargs test-chmtime -v -86400
+'
+
+test_expect_success 'start writing new commit with old blob' '
+ tree=$(
+ GIT_INDEX_FILE=index.tmp &&
+ export GIT_INDEX_FILE &&
+ git read-tree HEAD &&
+ add unrelated &&
+ add abandon &&
+ git write-tree
+ )
+'
+
+test_expect_success 'simultaneous gc' '
+ git gc --prune=12.hours.ago
+'
+
+test_expect_success 'finish writing out commit' '
+ commit=$(echo foo | git commit-tree -p HEAD $tree) &&
+ git update-ref HEAD $commit
+'
+
+# "abandon" blob should have been rescued by reference from new tree
+test_expect_success 'repository passes fsck' '
+ git fsck
+'
+
+test_done