summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2007-09-26 00:42:12 -0700
committerJunio C Hamano <gitster@pobox.com>2007-09-26 00:42:12 -0700
commit61ab92df40b2c256299ad3d812ee0de14353098d (patch)
treecbec5ce711a9037b146480312ea95c60bf20a6f4
parent5166810b1e16b22e342f2181a3535e70c6e7a119 (diff)
parent17815501a8f95c080891acd9537514adfe17c80e (diff)
downloadgit-61ab92df40b2c256299ad3d812ee0de14353098d.tar.gz
Merge branch 'jc/autogc' into js/rebase-i
* jc/autogc: git-gc --auto: run "repack -A -d -l" as necessary. git-gc --auto: restructure the way "repack" command line is built. git-gc --auto: protect ourselves from accumulated cruft git-gc --auto: add documentation. git-gc --auto: move threshold check to need_to_gc() function. repack -A -d: use --keep-unreachable when repacking pack-objects --keep-unreachable Export matches_pack_name() and fix its return value Invoke "git gc --auto" from commit, merge, am and rebase. Implement git gc --auto Conflicts: builtin-pack-objects.c
-rw-r--r--Documentation/config.txt13
-rw-r--r--Documentation/git-gc.txt16
-rw-r--r--builtin-gc.c131
-rw-r--r--builtin-pack-objects.c95
-rw-r--r--cache.h1
-rwxr-xr-xgit-am.sh2
-rwxr-xr-xgit-commit.sh1
-rwxr-xr-xgit-merge.sh1
-rwxr-xr-xgit-rebase--interactive.sh2
-rwxr-xr-xgit-repack.sh14
-rw-r--r--sha1_file.c14
11 files changed, 276 insertions, 14 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 015910f27a..2f04226988 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -439,6 +439,19 @@ gc.aggressiveWindow::
algorithm used by 'git gc --aggressive'. This defaults
to 10.
+gc.auto::
+ When there are approximately more than this many loose
+ objects in the repository, `git gc --auto` will pack them.
+ Some Porcelain commands use this command to perform a
+ light-weight garbage collection from time to time. Setting
+ this to 0 disables it.
+
+gc.autopacklimit::
+ When there are more than this many packs that are not
+ marked with `*.keep` file in the repository, `git gc
+ --auto` consolidates them into one larger pack. Setting
+ this to 0 disables this.
+
gc.packrefs::
`git gc` does not run `git pack-refs` in a bare repository by
default so that older dumb-transport clients can still fetch
diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt
index c7742ca963..b9d5660eac 100644
--- a/Documentation/git-gc.txt
+++ b/Documentation/git-gc.txt
@@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository
SYNOPSIS
--------
-'git-gc' [--prune] [--aggressive]
+'git-gc' [--prune] [--aggressive] [--auto]
DESCRIPTION
-----------
@@ -43,6 +43,20 @@ OPTIONS
persistent, so this option only needs to be used occasionally; every
few hundred changesets or so.
+--auto::
+ With this option, `git gc` checks if there are too many
+ loose objects in the repository and runs
+ gitlink:git-repack[1] with `-d -l` option to pack them.
+ The threshold for loose objects is set with `gc.auto` configuration
+ variable, and can be disabled by setting it to 0. Some
+ Porcelain commands use this after they perform operation
+ that could create many loose objects automatically.
+ Additionally, when there are too many packs are present,
+ they are consolidated into one larger pack by running
+ the `git-repack` command with `-A` option. The
+ threshold for number of packs is set with
+ `gc.autopacklimit` configuration variable.
+
Configuration
-------------
diff --git a/builtin-gc.c b/builtin-gc.c
index 9397482610..23ad2b6a21 100644
--- a/builtin-gc.c
+++ b/builtin-gc.c
@@ -20,6 +20,8 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]";
static int pack_refs = 1;
static int aggressive_window = -1;
+static int gc_auto_threshold = 6700;
+static int gc_auto_pack_limit = 20;
#define MAX_ADD 10
static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL};
@@ -41,6 +43,14 @@ static int gc_config(const char *var, const char *value)
aggressive_window = git_config_int(var, value);
return 0;
}
+ if (!strcmp(var, "gc.auto")) {
+ gc_auto_threshold = git_config_int(var, value);
+ return 0;
+ }
+ if (!strcmp(var, "gc.autopacklimit")) {
+ gc_auto_pack_limit = git_config_int(var, value);
+ return 0;
+ }
return git_default_config(var, value);
}
@@ -57,10 +67,113 @@ static void append_option(const char **cmd, const char *opt, int max_length)
cmd[i] = NULL;
}
+static int too_many_loose_objects(void)
+{
+ /*
+ * Quickly check if a "gc" is needed, by estimating how
+ * many loose objects there are. Because SHA-1 is evenly
+ * distributed, we can check only one and get a reasonable
+ * estimate.
+ */
+ char path[PATH_MAX];
+ const char *objdir = get_object_directory();
+ DIR *dir;
+ struct dirent *ent;
+ int auto_threshold;
+ int num_loose = 0;
+ int needed = 0;
+
+ if (gc_auto_threshold <= 0)
+ return 0;
+
+ if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) {
+ warning("insanely long object directory %.*s", 50, objdir);
+ return 0;
+ }
+ dir = opendir(path);
+ if (!dir)
+ return 0;
+
+ auto_threshold = (gc_auto_threshold + 255) / 256;
+ while ((ent = readdir(dir)) != NULL) {
+ if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
+ ent->d_name[38] != '\0')
+ continue;
+ if (++num_loose > auto_threshold) {
+ needed = 1;
+ break;
+ }
+ }
+ closedir(dir);
+ return needed;
+}
+
+static int too_many_packs(void)
+{
+ struct packed_git *p;
+ int cnt;
+
+ if (gc_auto_pack_limit <= 0)
+ return 0;
+
+ prepare_packed_git();
+ for (cnt = 0, p = packed_git; p; p = p->next) {
+ char path[PATH_MAX];
+ size_t len;
+ int keep;
+
+ if (!p->pack_local)
+ continue;
+ len = strlen(p->pack_name);
+ if (PATH_MAX <= len + 1)
+ continue; /* oops, give up */
+ memcpy(path, p->pack_name, len-5);
+ memcpy(path + len - 5, ".keep", 6);
+ keep = access(p->pack_name, F_OK) && (errno == ENOENT);
+ if (keep)
+ continue;
+ /*
+ * Perhaps check the size of the pack and count only
+ * very small ones here?
+ */
+ cnt++;
+ }
+ return gc_auto_pack_limit <= cnt;
+}
+
+static int need_to_gc(void)
+{
+ int ac = 0;
+
+ /*
+ * Setting gc.auto and gc.autopacklimit to 0 or negative can
+ * disable the automatic gc.
+ */
+ if (gc_auto_threshold <= 0 && gc_auto_pack_limit <= 0)
+ return 0;
+
+ /*
+ * If there are too many loose objects, but not too many
+ * packs, we run "repack -d -l". If there are too many packs,
+ * we run "repack -A -d -l". Otherwise we tell the caller
+ * there is no need.
+ */
+ argv_repack[ac++] = "repack";
+ if (too_many_packs())
+ argv_repack[ac++] = "-A";
+ else if (!too_many_loose_objects())
+ return 0;
+ argv_repack[ac++] = "-d";
+ argv_repack[ac++] = "-l";
+ argv_repack[ac++] = NULL;
+ return 1;
+}
+
int cmd_gc(int argc, const char **argv, const char *prefix)
{
int i;
int prune = 0;
+ int auto_gc = 0;
char buf[80];
git_config(gc_config);
@@ -82,12 +195,24 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
}
continue;
}
- /* perhaps other parameters later... */
+ if (!strcmp(arg, "--auto")) {
+ auto_gc = 1;
+ continue;
+ }
break;
}
if (i != argc)
usage(builtin_gc_usage);
+ if (auto_gc) {
+ /*
+ * Auto-gc should be least intrusive as possible.
+ */
+ prune = 0;
+ if (!need_to_gc())
+ return 0;
+ }
+
if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD))
return error(FAILED_RUN, argv_pack_refs[0]);
@@ -103,5 +228,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
if (run_command_v_opt(argv_rerere, RUN_GIT_CMD))
return error(FAILED_RUN, argv_rerere[0]);
+ if (auto_gc && too_many_loose_objects())
+ warning("There are too many unreachable loose objects; "
+ "run 'git prune' to remove them.");
+
return 0;
}
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index a15906bdb2..0be539ed7f 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -25,7 +25,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
[--window=N] [--window-memory=N] [--depth=N] \n\
[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
[--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
- [--stdout | base-name] [<ref-list | <object-list]";
+ [--stdout | base-name] [--keep-unreachable] [<ref-list | <object-list]";
struct object_entry {
struct pack_idx_entry idx;
@@ -61,7 +61,7 @@ static struct object_entry **written_list;
static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
static int non_empty;
-static int no_reuse_delta, no_reuse_object;
+static int no_reuse_delta, no_reuse_object, keep_unreachable;
static int local;
static int incremental;
static int allow_ofs_delta;
@@ -1807,15 +1807,19 @@ static void read_object_list_from_stdin(void)
}
}
+#define OBJECT_ADDED (1u<<20)
+
static void show_commit(struct commit *commit)
{
add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
+ commit->object.flags |= OBJECT_ADDED;
}
static void show_object(struct object_array_entry *p)
{
add_preferred_base_object(p->name);
add_object_entry(p->item->sha1, p->item->type, p->name, 0);
+ p->item->flags |= OBJECT_ADDED;
}
static void show_edge(struct commit *commit)
@@ -1823,6 +1827,86 @@ static void show_edge(struct commit *commit)
add_preferred_base(commit->object.sha1);
}
+struct in_pack_object {
+ off_t offset;
+ struct object *object;
+};
+
+struct in_pack {
+ int alloc;
+ int nr;
+ struct in_pack_object *array;
+};
+
+static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
+{
+ in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p);
+ in_pack->array[in_pack->nr].object = object;
+ in_pack->nr++;
+}
+
+/*
+ * Compare the objects in the offset order, in order to emulate the
+ * "git-rev-list --objects" output that produced the pack originally.
+ */
+static int ofscmp(const void *a_, const void *b_)
+{
+ struct in_pack_object *a = (struct in_pack_object *)a_;
+ struct in_pack_object *b = (struct in_pack_object *)b_;
+
+ if (a->offset < b->offset)
+ return -1;
+ else if (a->offset > b->offset)
+ return 1;
+ else
+ return hashcmp(a->object->sha1, b->object->sha1);
+}
+
+static void add_objects_in_unpacked_packs(struct rev_info *revs)
+{
+ struct packed_git *p;
+ struct in_pack in_pack;
+ uint32_t i;
+
+ memset(&in_pack, 0, sizeof(in_pack));
+
+ for (p = packed_git; p; p = p->next) {
+ const unsigned char *sha1;
+ struct object *o;
+
+ for (i = 0; i < revs->num_ignore_packed; i++) {
+ if (matches_pack_name(p, revs->ignore_packed[i]))
+ break;
+ }
+ if (revs->num_ignore_packed <= i)
+ continue;
+ if (open_pack_index(p))
+ die("cannot open pack index");
+
+ ALLOC_GROW(in_pack.array,
+ in_pack.nr + p->num_objects,
+ in_pack.alloc);
+
+ for (i = 0; i < p->num_objects; i++) {
+ sha1 = nth_packed_object_sha1(p, i);
+ o = lookup_unknown_object(sha1);
+ if (!(o->flags & OBJECT_ADDED))
+ mark_in_pack_object(o, p, &in_pack);
+ o->flags |= OBJECT_ADDED;
+ }
+ }
+
+ if (in_pack.nr) {
+ qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
+ ofscmp);
+ for (i = 0; i < in_pack.nr; i++) {
+ struct object *o = in_pack.array[i].object;
+ add_object_entry(o->sha1, o->type, "", 0);
+ }
+ }
+ free(in_pack.array);
+}
+
static void get_object_list(int ac, const char **av)
{
struct rev_info revs;
@@ -1854,6 +1938,9 @@ static void get_object_list(int ac, const char **av)
prepare_revision_walk(&revs);
mark_edges_uninteresting(revs.commits, &revs, show_edge);
traverse_commit_list(&revs, show_commit, show_object);
+
+ if (keep_unreachable)
+ add_objects_in_unpacked_packs(&revs);
}
static int adjust_perm(const char *path, mode_t mode)
@@ -1983,6 +2070,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
use_internal_rev_list = 1;
continue;
}
+ if (!strcmp("--keep-unreachable", arg)) {
+ keep_unreachable = 1;
+ continue;
+ }
if (!strcmp("--unpacked", arg) ||
!prefixcmp(arg, "--unpacked=") ||
!strcmp("--reflog", arg) ||
diff --git a/cache.h b/cache.h
index 8246500166..bb86fcce04 100644
--- a/cache.h
+++ b/cache.h
@@ -530,6 +530,7 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
+extern int matches_pack_name(struct packed_git *p, const char *name);
/* Dumb servers support */
extern int update_server_info(int);
diff --git a/git-am.sh b/git-am.sh
index 32c46d7ed4..b02ae6a76f 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -464,6 +464,8 @@ do
"$GIT_DIR"/hooks/post-applypatch
fi
+ git gc --auto
+
go_next
done
diff --git a/git-commit.sh b/git-commit.sh
index cb14f06216..44ccc4418e 100755
--- a/git-commit.sh
+++ b/git-commit.sh
@@ -611,6 +611,7 @@ git rerere
if test "$ret" = 0
then
+ git gc --auto
if test -x "$GIT_DIR"/hooks/post-commit
then
"$GIT_DIR"/hooks/post-commit
diff --git a/git-merge.sh b/git-merge.sh
index 6c513dcbdf..bf18f582da 100755
--- a/git-merge.sh
+++ b/git-merge.sh
@@ -82,6 +82,7 @@ finish () {
;;
*)
git update-ref -m "$rlogm" HEAD "$1" "$head" || exit 1
+ git gc --auto
;;
esac
;;
diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh
index 268a629c43..8e6e9431e8 100755
--- a/git-rebase--interactive.sh
+++ b/git-rebase--interactive.sh
@@ -326,6 +326,8 @@ do_next () {
rm -rf "$DOTEST" &&
warn "Successfully rebased and updated $HEADNAME."
+ git gc --auto
+
exit
}
diff --git a/git-repack.sh b/git-repack.sh
index 0aae1a3ed5..e72adc4d91 100755
--- a/git-repack.sh
+++ b/git-repack.sh
@@ -3,17 +3,19 @@
# Copyright (c) 2005 Linus Torvalds
#
-USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]'
+USAGE='[-a|-A] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]'
SUBDIRECTORY_OK='Yes'
. git-sh-setup
-no_update_info= all_into_one= remove_redundant=
+no_update_info= all_into_one= remove_redundant= keep_unreachable=
local= quiet= no_reuse= extra=
while test $# != 0
do
case "$1" in
-n) no_update_info=t ;;
-a) all_into_one=t ;;
+ -A) all_into_one=t
+ keep_unreachable=--keep-unreachable ;;
-d) remove_redundant=t ;;
-q) quiet=-q ;;
-f) no_reuse=--no-reuse-object ;;
@@ -59,7 +61,13 @@ case ",$all_into_one," in
fi
done
fi
- [ -z "$args" ] && args='--unpacked --incremental'
+ if test -z "$args"
+ then
+ args='--unpacked --incremental'
+ elif test -n "$keep_unreachable"
+ then
+ args="$args $keep_unreachable"
+ fi
;;
esac
diff --git a/sha1_file.c b/sha1_file.c
index 9978a58da6..5801c3e71b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1684,22 +1684,22 @@ off_t find_pack_entry_one(const unsigned char *sha1,
return 0;
}
-static int matches_pack_name(struct packed_git *p, const char *ig)
+int matches_pack_name(struct packed_git *p, const char *name)
{
const char *last_c, *c;
- if (!strcmp(p->pack_name, ig))
- return 0;
+ if (!strcmp(p->pack_name, name))
+ return 1;
for (c = p->pack_name, last_c = c; *c;)
if (*c == '/')
last_c = ++c;
else
++c;
- if (!strcmp(last_c, ig))
- return 0;
+ if (!strcmp(last_c, name))
+ return 1;
- return 1;
+ return 0;
}
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed)
@@ -1717,7 +1717,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons
if (ignore_packed) {
const char **ig;
for (ig = ignore_packed; *ig; ig++)
- if (!matches_pack_name(p, *ig))
+ if (matches_pack_name(p, *ig))
break;
if (*ig)
goto next;