diff options
author | Junio C Hamano <gitster@pobox.com> | 2007-09-26 00:42:12 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2007-09-26 00:42:12 -0700 |
commit | 61ab92df40b2c256299ad3d812ee0de14353098d (patch) | |
tree | cbec5ce711a9037b146480312ea95c60bf20a6f4 | |
parent | 5166810b1e16b22e342f2181a3535e70c6e7a119 (diff) | |
parent | 17815501a8f95c080891acd9537514adfe17c80e (diff) | |
download | git-61ab92df40b2c256299ad3d812ee0de14353098d.tar.gz |
Merge branch 'jc/autogc' into js/rebase-i
* jc/autogc:
git-gc --auto: run "repack -A -d -l" as necessary.
git-gc --auto: restructure the way "repack" command line is built.
git-gc --auto: protect ourselves from accumulated cruft
git-gc --auto: add documentation.
git-gc --auto: move threshold check to need_to_gc() function.
repack -A -d: use --keep-unreachable when repacking
pack-objects --keep-unreachable
Export matches_pack_name() and fix its return value
Invoke "git gc --auto" from commit, merge, am and rebase.
Implement git gc --auto
Conflicts:
builtin-pack-objects.c
-rw-r--r-- | Documentation/config.txt | 13 | ||||
-rw-r--r-- | Documentation/git-gc.txt | 16 | ||||
-rw-r--r-- | builtin-gc.c | 131 | ||||
-rw-r--r-- | builtin-pack-objects.c | 95 | ||||
-rw-r--r-- | cache.h | 1 | ||||
-rwxr-xr-x | git-am.sh | 2 | ||||
-rwxr-xr-x | git-commit.sh | 1 | ||||
-rwxr-xr-x | git-merge.sh | 1 | ||||
-rwxr-xr-x | git-rebase--interactive.sh | 2 | ||||
-rwxr-xr-x | git-repack.sh | 14 | ||||
-rw-r--r-- | sha1_file.c | 14 |
11 files changed, 276 insertions, 14 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index 015910f27a..2f04226988 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -439,6 +439,19 @@ gc.aggressiveWindow:: algorithm used by 'git gc --aggressive'. This defaults to 10. +gc.auto:: + When there are approximately more than this many loose + objects in the repository, `git gc --auto` will pack them. + Some Porcelain commands use this command to perform a + light-weight garbage collection from time to time. Setting + this to 0 disables it. + +gc.autopacklimit:: + When there are more than this many packs that are not + marked with `*.keep` file in the repository, `git gc + --auto` consolidates them into one larger pack. Setting + this to 0 disables this. + gc.packrefs:: `git gc` does not run `git pack-refs` in a bare repository by default so that older dumb-transport clients can still fetch diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index c7742ca963..b9d5660eac 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository SYNOPSIS -------- -'git-gc' [--prune] [--aggressive] +'git-gc' [--prune] [--aggressive] [--auto] DESCRIPTION ----------- @@ -43,6 +43,20 @@ OPTIONS persistent, so this option only needs to be used occasionally; every few hundred changesets or so. +--auto:: + With this option, `git gc` checks if there are too many + loose objects in the repository and runs + gitlink:git-repack[1] with `-d -l` option to pack them. + The threshold for loose objects is set with `gc.auto` configuration + variable, and can be disabled by setting it to 0. Some + Porcelain commands use this after they perform operation + that could create many loose objects automatically. + Additionally, when there are too many packs are present, + they are consolidated into one larger pack by running + the `git-repack` command with `-A` option. The + threshold for number of packs is set with + `gc.autopacklimit` configuration variable. + Configuration ------------- diff --git a/builtin-gc.c b/builtin-gc.c index 9397482610..23ad2b6a21 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -20,6 +20,8 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]"; static int pack_refs = 1; static int aggressive_window = -1; +static int gc_auto_threshold = 6700; +static int gc_auto_pack_limit = 20; #define MAX_ADD 10 static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL}; @@ -41,6 +43,14 @@ static int gc_config(const char *var, const char *value) aggressive_window = git_config_int(var, value); return 0; } + if (!strcmp(var, "gc.auto")) { + gc_auto_threshold = git_config_int(var, value); + return 0; + } + if (!strcmp(var, "gc.autopacklimit")) { + gc_auto_pack_limit = git_config_int(var, value); + return 0; + } return git_default_config(var, value); } @@ -57,10 +67,113 @@ static void append_option(const char **cmd, const char *opt, int max_length) cmd[i] = NULL; } +static int too_many_loose_objects(void) +{ + /* + * Quickly check if a "gc" is needed, by estimating how + * many loose objects there are. Because SHA-1 is evenly + * distributed, we can check only one and get a reasonable + * estimate. + */ + char path[PATH_MAX]; + const char *objdir = get_object_directory(); + DIR *dir; + struct dirent *ent; + int auto_threshold; + int num_loose = 0; + int needed = 0; + + if (gc_auto_threshold <= 0) + return 0; + + if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) { + warning("insanely long object directory %.*s", 50, objdir); + return 0; + } + dir = opendir(path); + if (!dir) + return 0; + + auto_threshold = (gc_auto_threshold + 255) / 256; + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != 38 || + ent->d_name[38] != '\0') + continue; + if (++num_loose > auto_threshold) { + needed = 1; + break; + } + } + closedir(dir); + return needed; +} + +static int too_many_packs(void) +{ + struct packed_git *p; + int cnt; + + if (gc_auto_pack_limit <= 0) + return 0; + + prepare_packed_git(); + for (cnt = 0, p = packed_git; p; p = p->next) { + char path[PATH_MAX]; + size_t len; + int keep; + + if (!p->pack_local) + continue; + len = strlen(p->pack_name); + if (PATH_MAX <= len + 1) + continue; /* oops, give up */ + memcpy(path, p->pack_name, len-5); + memcpy(path + len - 5, ".keep", 6); + keep = access(p->pack_name, F_OK) && (errno == ENOENT); + if (keep) + continue; + /* + * Perhaps check the size of the pack and count only + * very small ones here? + */ + cnt++; + } + return gc_auto_pack_limit <= cnt; +} + +static int need_to_gc(void) +{ + int ac = 0; + + /* + * Setting gc.auto and gc.autopacklimit to 0 or negative can + * disable the automatic gc. + */ + if (gc_auto_threshold <= 0 && gc_auto_pack_limit <= 0) + return 0; + + /* + * If there are too many loose objects, but not too many + * packs, we run "repack -d -l". If there are too many packs, + * we run "repack -A -d -l". Otherwise we tell the caller + * there is no need. + */ + argv_repack[ac++] = "repack"; + if (too_many_packs()) + argv_repack[ac++] = "-A"; + else if (!too_many_loose_objects()) + return 0; + argv_repack[ac++] = "-d"; + argv_repack[ac++] = "-l"; + argv_repack[ac++] = NULL; + return 1; +} + int cmd_gc(int argc, const char **argv, const char *prefix) { int i; int prune = 0; + int auto_gc = 0; char buf[80]; git_config(gc_config); @@ -82,12 +195,24 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } continue; } - /* perhaps other parameters later... */ + if (!strcmp(arg, "--auto")) { + auto_gc = 1; + continue; + } break; } if (i != argc) usage(builtin_gc_usage); + if (auto_gc) { + /* + * Auto-gc should be least intrusive as possible. + */ + prune = 0; + if (!need_to_gc()) + return 0; + } + if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD)) return error(FAILED_RUN, argv_pack_refs[0]); @@ -103,5 +228,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (run_command_v_opt(argv_rerere, RUN_GIT_CMD)) return error(FAILED_RUN, argv_rerere[0]); + if (auto_gc && too_many_loose_objects()) + warning("There are too many unreachable loose objects; " + "run 'git prune' to remove them."); + return 0; } diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index a15906bdb2..0be539ed7f 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -25,7 +25,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\ [--window=N] [--window-memory=N] [--depth=N] \n\ [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\ [--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\ - [--stdout | base-name] [<ref-list | <object-list]"; + [--stdout | base-name] [--keep-unreachable] [<ref-list | <object-list]"; struct object_entry { struct pack_idx_entry idx; @@ -61,7 +61,7 @@ static struct object_entry **written_list; static uint32_t nr_objects, nr_alloc, nr_result, nr_written; static int non_empty; -static int no_reuse_delta, no_reuse_object; +static int no_reuse_delta, no_reuse_object, keep_unreachable; static int local; static int incremental; static int allow_ofs_delta; @@ -1807,15 +1807,19 @@ static void read_object_list_from_stdin(void) } } +#define OBJECT_ADDED (1u<<20) + static void show_commit(struct commit *commit) { add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0); + commit->object.flags |= OBJECT_ADDED; } static void show_object(struct object_array_entry *p) { add_preferred_base_object(p->name); add_object_entry(p->item->sha1, p->item->type, p->name, 0); + p->item->flags |= OBJECT_ADDED; } static void show_edge(struct commit *commit) @@ -1823,6 +1827,86 @@ static void show_edge(struct commit *commit) add_preferred_base(commit->object.sha1); } +struct in_pack_object { + off_t offset; + struct object *object; +}; + +struct in_pack { + int alloc; + int nr; + struct in_pack_object *array; +}; + +static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack) +{ + in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p); + in_pack->array[in_pack->nr].object = object; + in_pack->nr++; +} + +/* + * Compare the objects in the offset order, in order to emulate the + * "git-rev-list --objects" output that produced the pack originally. + */ +static int ofscmp(const void *a_, const void *b_) +{ + struct in_pack_object *a = (struct in_pack_object *)a_; + struct in_pack_object *b = (struct in_pack_object *)b_; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + else + return hashcmp(a->object->sha1, b->object->sha1); +} + +static void add_objects_in_unpacked_packs(struct rev_info *revs) +{ + struct packed_git *p; + struct in_pack in_pack; + uint32_t i; + + memset(&in_pack, 0, sizeof(in_pack)); + + for (p = packed_git; p; p = p->next) { + const unsigned char *sha1; + struct object *o; + + for (i = 0; i < revs->num_ignore_packed; i++) { + if (matches_pack_name(p, revs->ignore_packed[i])) + break; + } + if (revs->num_ignore_packed <= i) + continue; + if (open_pack_index(p)) + die("cannot open pack index"); + + ALLOC_GROW(in_pack.array, + in_pack.nr + p->num_objects, + in_pack.alloc); + + for (i = 0; i < p->num_objects; i++) { + sha1 = nth_packed_object_sha1(p, i); + o = lookup_unknown_object(sha1); + if (!(o->flags & OBJECT_ADDED)) + mark_in_pack_object(o, p, &in_pack); + o->flags |= OBJECT_ADDED; + } + } + + if (in_pack.nr) { + qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]), + ofscmp); + for (i = 0; i < in_pack.nr; i++) { + struct object *o = in_pack.array[i].object; + add_object_entry(o->sha1, o->type, "", 0); + } + } + free(in_pack.array); +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -1854,6 +1938,9 @@ static void get_object_list(int ac, const char **av) prepare_revision_walk(&revs); mark_edges_uninteresting(revs.commits, &revs, show_edge); traverse_commit_list(&revs, show_commit, show_object); + + if (keep_unreachable) + add_objects_in_unpacked_packs(&revs); } static int adjust_perm(const char *path, mode_t mode) @@ -1983,6 +2070,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) use_internal_rev_list = 1; continue; } + if (!strcmp("--keep-unreachable", arg)) { + keep_unreachable = 1; + continue; + } if (!strcmp("--unpacked", arg) || !prefixcmp(arg, "--unpacked=") || !strcmp("--reflog", arg) || @@ -530,6 +530,7 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +extern int matches_pack_name(struct packed_git *p, const char *name); /* Dumb servers support */ extern int update_server_info(int); @@ -464,6 +464,8 @@ do "$GIT_DIR"/hooks/post-applypatch fi + git gc --auto + go_next done diff --git a/git-commit.sh b/git-commit.sh index cb14f06216..44ccc4418e 100755 --- a/git-commit.sh +++ b/git-commit.sh @@ -611,6 +611,7 @@ git rerere if test "$ret" = 0 then + git gc --auto if test -x "$GIT_DIR"/hooks/post-commit then "$GIT_DIR"/hooks/post-commit diff --git a/git-merge.sh b/git-merge.sh index 6c513dcbdf..bf18f582da 100755 --- a/git-merge.sh +++ b/git-merge.sh @@ -82,6 +82,7 @@ finish () { ;; *) git update-ref -m "$rlogm" HEAD "$1" "$head" || exit 1 + git gc --auto ;; esac ;; diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh index 268a629c43..8e6e9431e8 100755 --- a/git-rebase--interactive.sh +++ b/git-rebase--interactive.sh @@ -326,6 +326,8 @@ do_next () { rm -rf "$DOTEST" && warn "Successfully rebased and updated $HEADNAME." + git gc --auto + exit } diff --git a/git-repack.sh b/git-repack.sh index 0aae1a3ed5..e72adc4d91 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -3,17 +3,19 @@ # Copyright (c) 2005 Linus Torvalds # -USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]' +USAGE='[-a|-A] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]' SUBDIRECTORY_OK='Yes' . git-sh-setup -no_update_info= all_into_one= remove_redundant= +no_update_info= all_into_one= remove_redundant= keep_unreachable= local= quiet= no_reuse= extra= while test $# != 0 do case "$1" in -n) no_update_info=t ;; -a) all_into_one=t ;; + -A) all_into_one=t + keep_unreachable=--keep-unreachable ;; -d) remove_redundant=t ;; -q) quiet=-q ;; -f) no_reuse=--no-reuse-object ;; @@ -59,7 +61,13 @@ case ",$all_into_one," in fi done fi - [ -z "$args" ] && args='--unpacked --incremental' + if test -z "$args" + then + args='--unpacked --incremental' + elif test -n "$keep_unreachable" + then + args="$args $keep_unreachable" + fi ;; esac diff --git a/sha1_file.c b/sha1_file.c index 9978a58da6..5801c3e71b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1684,22 +1684,22 @@ off_t find_pack_entry_one(const unsigned char *sha1, return 0; } -static int matches_pack_name(struct packed_git *p, const char *ig) +int matches_pack_name(struct packed_git *p, const char *name) { const char *last_c, *c; - if (!strcmp(p->pack_name, ig)) - return 0; + if (!strcmp(p->pack_name, name)) + return 1; for (c = p->pack_name, last_c = c; *c;) if (*c == '/') last_c = ++c; else ++c; - if (!strcmp(last_c, ig)) - return 0; + if (!strcmp(last_c, name)) + return 1; - return 1; + return 0; } static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) @@ -1717,7 +1717,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons if (ignore_packed) { const char **ig; for (ig = ignore_packed; *ig; ig++) - if (!matches_pack_name(p, *ig)) + if (matches_pack_name(p, *ig)) break; if (*ig) goto next; |