From 9e5972413b4873dc143c4046c6e74eb608ace32b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:42 +0700 Subject: update-index: manually enable or disable untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Overall time saving on "git status" is about 40% in the best case scenario, removing ..collect_untracked() as the most time consuming function. read and refresh index operations are now at the top (which should drop when index-helper and/or watchman support is added). More numbers and analysis below. webkit.git ========== 169k files. 6k dirs. Lots of test data (i.e. not touched most of the time) Base status ----------- Index version 4 in split index mode and cache-tree populated. No untracked cache. It shows how time is consumed by "git status". The same settings are used for other repos below. 18:28:10.199679 builtin/commit.c:1394 performance: 0.000000451 s: cmd_status:setup 18:28:10.474847 read-cache.c:1407 performance: 0.274873831 s: read_index 18:28:10.475295 read-cache.c:1407 performance: 0.000000656 s: read_index 18:28:10.728443 preload-index.c:131 performance: 0.253147487 s: read_index_preload 18:28:10.741422 read-cache.c:1254 performance: 0.012868340 s: refresh_index 18:28:10.752300 wt-status.c:623 performance: 0.010421357 s: wt_status_collect_changes_worktree 18:28:10.762069 wt-status.c:629 performance: 0.009644748 s: wt_status_collect_changes_index 18:28:11.601019 wt-status.c:632 performance: 0.838859547 s: wt_status_collect_untracked 18:28:11.605939 builtin/commit.c:1421 performance: 0.004835004 s: cmd_status:update_index 18:28:11.606580 trace.c:415 performance: 1.407878388 s: git command: 'git' 'status' Populating status ----------------- This is after enabling untracked cache and the cache is still empty. We see a slight increase in .._collect_untracked() and update_index (because new cache has to be written to $GIT_DIR/index). 18:28:18.915213 builtin/commit.c:1394 performance: 0.000000326 s: cmd_status:setup 18:28:19.197364 read-cache.c:1407 performance: 0.281901416 s: read_index 18:28:19.197754 read-cache.c:1407 performance: 0.000000546 s: read_index 18:28:19.451355 preload-index.c:131 performance: 0.253599607 s: read_index_preload 18:28:19.464400 read-cache.c:1254 performance: 0.012935336 s: refresh_index 18:28:19.475115 wt-status.c:623 performance: 0.010236920 s: wt_status_collect_changes_worktree 18:28:19.486022 wt-status.c:629 performance: 0.010801685 s: wt_status_collect_changes_index 18:28:20.362660 wt-status.c:632 performance: 0.876551366 s: wt_status_collect_untracked 18:28:20.396199 builtin/commit.c:1421 performance: 0.033447969 s: cmd_status:update_index 18:28:20.396939 trace.c:415 performance: 1.482695902 s: git command: 'git' 'status' Populated status ---------------- After the cache is populated, wt_status_collect_untracked() drops 82% from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now read_index() and read_index_preload(). 18:28:20.408605 builtin/commit.c:1394 performance: 0.000000457 s: cmd_status:setup 18:28:20.692864 read-cache.c:1407 performance: 0.283980458 s: read_index 18:28:20.693273 read-cache.c:1407 performance: 0.000000661 s: read_index 18:28:20.958814 preload-index.c:131 performance: 0.265540254 s: read_index_preload 18:28:20.972375 read-cache.c:1254 performance: 0.013437429 s: refresh_index 18:28:20.983959 wt-status.c:623 performance: 0.011146646 s: wt_status_collect_changes_worktree 18:28:20.993948 wt-status.c:629 performance: 0.009879094 s: wt_status_collect_changes_index 18:28:21.138125 wt-status.c:632 performance: 0.144084737 s: wt_status_collect_untracked 18:28:21.173678 builtin/commit.c:1421 performance: 0.035463949 s: cmd_status:update_index 18:28:21.174251 trace.c:415 performance: 0.766707355 s: git command: 'git' 'status' gentoo-x86.git ============== This repository is a strange one with a balanced, wide and shallow worktree (about 100k files and 23k dirs) and no .gitignore in worktree. .._collect_untracked() time drops 88%, total time drops 56%. Base status ----------- 18:20:40.828642 builtin/commit.c:1394 performance: 0.000000496 s: cmd_status:setup 18:20:41.027233 read-cache.c:1407 performance: 0.198130532 s: read_index 18:20:41.027670 read-cache.c:1407 performance: 0.000000581 s: read_index 18:20:41.171716 preload-index.c:131 performance: 0.144045594 s: read_index_preload 18:20:41.179171 read-cache.c:1254 performance: 0.007320424 s: refresh_index 18:20:41.185785 wt-status.c:623 performance: 0.006144638 s: wt_status_collect_changes_worktree 18:20:41.192701 wt-status.c:629 performance: 0.006780184 s: wt_status_collect_changes_index 18:20:41.991723 wt-status.c:632 performance: 0.798927029 s: wt_status_collect_untracked 18:20:41.994664 builtin/commit.c:1421 performance: 0.002852772 s: cmd_status:update_index 18:20:41.995458 trace.c:415 performance: 1.168427502 s: git command: 'git' 'status' Populating status ----------------- 18:20:48.968848 builtin/commit.c:1394 performance: 0.000000380 s: cmd_status:setup 18:20:49.172918 read-cache.c:1407 performance: 0.203734214 s: read_index 18:20:49.173341 read-cache.c:1407 performance: 0.000000562 s: read_index 18:20:49.320013 preload-index.c:131 performance: 0.146671391 s: read_index_preload 18:20:49.328039 read-cache.c:1254 performance: 0.007921957 s: refresh_index 18:20:49.334680 wt-status.c:623 performance: 0.006172020 s: wt_status_collect_changes_worktree 18:20:49.342526 wt-status.c:629 performance: 0.007731746 s: wt_status_collect_changes_index 18:20:50.257510 wt-status.c:632 performance: 0.914864222 s: wt_status_collect_untracked 18:20:50.338371 builtin/commit.c:1421 performance: 0.080776477 s: cmd_status:update_index 18:20:50.338900 trace.c:415 performance: 1.371462446 s: git command: 'git' 'status' Populated status ---------------- 18:20:50.351160 builtin/commit.c:1394 performance: 0.000000571 s: cmd_status:setup 18:20:50.577358 read-cache.c:1407 performance: 0.225917338 s: read_index 18:20:50.577794 read-cache.c:1407 performance: 0.000000617 s: read_index 18:20:50.734140 preload-index.c:131 performance: 0.156345564 s: read_index_preload 18:20:50.745717 read-cache.c:1254 performance: 0.011463075 s: refresh_index 18:20:50.755176 wt-status.c:623 performance: 0.008877929 s: wt_status_collect_changes_worktree 18:20:50.763768 wt-status.c:629 performance: 0.008471633 s: wt_status_collect_changes_index 18:20:50.854885 wt-status.c:632 performance: 0.090988721 s: wt_status_collect_untracked 18:20:50.857765 builtin/commit.c:1421 performance: 0.002789097 s: cmd_status:update_index 18:20:50.858411 trace.c:415 performance: 0.508647673 s: git command: 'git' 'status' linux-2.6 ========= Reference repo. Not too big. .._collect_status() drops 84%. Total time drops 42%. Base status ----------- 18:34:09.870122 builtin/commit.c:1394 performance: 0.000000385 s: cmd_status:setup 18:34:09.943218 read-cache.c:1407 performance: 0.072871177 s: read_index 18:34:09.943614 read-cache.c:1407 performance: 0.000000491 s: read_index 18:34:10.004364 preload-index.c:131 performance: 0.060748102 s: read_index_preload 18:34:10.008190 read-cache.c:1254 performance: 0.003714285 s: refresh_index 18:34:10.012087 wt-status.c:623 performance: 0.002775446 s: wt_status_collect_changes_worktree 18:34:10.016054 wt-status.c:629 performance: 0.003862140 s: wt_status_collect_changes_index 18:34:10.214747 wt-status.c:632 performance: 0.198604837 s: wt_status_collect_untracked 18:34:10.216102 builtin/commit.c:1421 performance: 0.001244166 s: cmd_status:update_index 18:34:10.216817 trace.c:415 performance: 0.347670735 s: git command: 'git' 'status' Populating status ----------------- 18:34:16.595102 builtin/commit.c:1394 performance: 0.000000456 s: cmd_status:setup 18:34:16.666600 read-cache.c:1407 performance: 0.070992413 s: read_index 18:34:16.667012 read-cache.c:1407 performance: 0.000000606 s: read_index 18:34:16.729375 preload-index.c:131 performance: 0.062362492 s: read_index_preload 18:34:16.732565 read-cache.c:1254 performance: 0.003075517 s: refresh_index 18:34:16.736148 wt-status.c:623 performance: 0.002422201 s: wt_status_collect_changes_worktree 18:34:16.739990 wt-status.c:629 performance: 0.003746618 s: wt_status_collect_changes_index 18:34:16.948505 wt-status.c:632 performance: 0.208426710 s: wt_status_collect_untracked 18:34:16.961744 builtin/commit.c:1421 performance: 0.013151887 s: cmd_status:update_index 18:34:16.962233 trace.c:415 performance: 0.368537535 s: git command: 'git' 'status' Populated status ---------------- 18:34:16.970026 builtin/commit.c:1394 performance: 0.000000631 s: cmd_status:setup 18:34:17.046235 read-cache.c:1407 performance: 0.075904673 s: read_index 18:34:17.046644 read-cache.c:1407 performance: 0.000000681 s: read_index 18:34:17.113564 preload-index.c:131 performance: 0.066920253 s: read_index_preload 18:34:17.117281 read-cache.c:1254 performance: 0.003604055 s: refresh_index 18:34:17.121115 wt-status.c:623 performance: 0.002508345 s: wt_status_collect_changes_worktree 18:34:17.125089 wt-status.c:629 performance: 0.003871636 s: wt_status_collect_changes_index 18:34:17.156089 wt-status.c:632 performance: 0.030895703 s: wt_status_collect_untracked 18:34:17.169861 builtin/commit.c:1421 performance: 0.013686404 s: cmd_status:update_index 18:34:17.170391 trace.c:415 performance: 0.201474531 s: git command: 'git' 'status' Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/update-index.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'builtin/update-index.c') diff --git a/builtin/update-index.c b/builtin/update-index.c index 587898624c..6ea5c8dc20 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -741,6 +741,7 @@ static int reupdate_callback(struct parse_opt_ctx_t *ctx, int cmd_update_index(int argc, const char **argv, const char *prefix) { int newfd, entries, has_errors = 0, line_termination = '\n'; + int untracked_cache = -1; int read_from_stdin = 0; int prefix_length = prefix ? strlen(prefix) : 0; int preferred_index_format = 0; @@ -832,6 +833,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("write index in this format")), OPT_BOOL(0, "split-index", &split_index, N_("enable or disable split index")), + OPT_BOOL(0, "untracked-cache", &untracked_cache, + N_("enable/disable untracked cache")), OPT_END() }; @@ -938,6 +941,19 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) the_index.split_index = NULL; the_index.cache_changed |= SOMETHING_CHANGED; } + if (untracked_cache > 0 && !the_index.untracked) { + struct untracked_cache *uc; + + uc = xcalloc(1, sizeof(*uc)); + uc->exclude_per_dir = ".gitignore"; + /* should be the same flags used by git-status */ + uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; + the_index.untracked = uc; + the_index.cache_changed |= UNTRACKED_CHANGED; + } else if (!untracked_cache && the_index.untracked) { + the_index.untracked = NULL; + the_index.cache_changed |= UNTRACKED_CHANGED; + } if (active_cache_changed) { if (newfd < 0) { -- cgit v1.2.1 From f64cb88d3521b64b2db9353d14148328063745dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:43 +0700 Subject: update-index: test the system before enabling untracked cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helped-by: Eric Sunshine Helped-by: Junio C Hamano Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/update-index.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) (limited to 'builtin/update-index.c') diff --git a/builtin/update-index.c b/builtin/update-index.c index 6ea5c8dc20..c6951ad3ce 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -33,6 +33,7 @@ static int mark_valid_only; static int mark_skip_worktree_only; #define MARK_FLAG 1 #define UNMARK_FLAG 2 +static struct strbuf mtime_dir = STRBUF_INIT; __attribute__((format (printf, 1, 2))) static void report(const char *fmt, ...) @@ -48,6 +49,166 @@ static void report(const char *fmt, ...) va_end(vp); } +static void remove_test_directory(void) +{ + if (mtime_dir.len) + remove_dir_recursively(&mtime_dir, 0); +} + +static const char *get_mtime_path(const char *path) +{ + static struct strbuf sb = STRBUF_INIT; + strbuf_reset(&sb); + strbuf_addf(&sb, "%s/%s", mtime_dir.buf, path); + return sb.buf; +} + +static void xmkdir(const char *path) +{ + path = get_mtime_path(path); + if (mkdir(path, 0700)) + die_errno(_("failed to create directory %s"), path); +} + +static int xstat_mtime_dir(struct stat *st) +{ + if (stat(mtime_dir.buf, st)) + die_errno(_("failed to stat %s"), mtime_dir.buf); + return 0; +} + +static int create_file(const char *path) +{ + int fd; + path = get_mtime_path(path); + fd = open(path, O_CREAT | O_RDWR, 0644); + if (fd < 0) + die_errno(_("failed to create file %s"), path); + return fd; +} + +static void xunlink(const char *path) +{ + path = get_mtime_path(path); + if (unlink(path)) + die_errno(_("failed to delete file %s"), path); +} + +static void xrmdir(const char *path) +{ + path = get_mtime_path(path); + if (rmdir(path)) + die_errno(_("failed to delete directory %s"), path); +} + +static void avoid_racy(void) +{ + /* + * not use if we could usleep(10) if USE_NSEC is defined. The + * field nsec could be there, but the OS could choose to + * ignore it? + */ + sleep(1); +} + +static int test_if_untracked_cache_is_supported(void) +{ + struct stat st; + struct stat_data base; + int fd, ret = 0; + + strbuf_addstr(&mtime_dir, "mtime-test-XXXXXX"); + if (!mkdtemp(mtime_dir.buf)) + die_errno("Could not make temporary directory"); + + fprintf(stderr, _("Testing ")); + atexit(remove_test_directory); + xstat_mtime_dir(&st); + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + fd = create_file("newfile"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + close(fd); + fputc('\n', stderr); + fprintf_ln(stderr,_("directory stat info does not " + "change after adding a new file")); + goto done; + } + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + xmkdir("new-dir"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + close(fd); + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info does not change " + "after adding a new directory")); + goto done; + } + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + write_or_die(fd, "data", 4); + close(fd); + xstat_mtime_dir(&st); + if (match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info changes " + "after updating a file")); + goto done; + } + fputc('.', stderr); + + avoid_racy(); + close(create_file("new-dir/new")); + xstat_mtime_dir(&st); + if (match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info changes after " + "adding a file inside subdirectory")); + goto done; + } + fputc('.', stderr); + + avoid_racy(); + xunlink("newfile"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info does not " + "change after deleting a file")); + goto done; + } + fill_stat_data(&base, &st); + fputc('.', stderr); + + avoid_racy(); + xunlink("new-dir/new"); + xrmdir("new-dir"); + xstat_mtime_dir(&st); + if (!match_stat_data(&base, &st)) { + fputc('\n', stderr); + fprintf_ln(stderr, _("directory stat info does not " + "change after deleting a directory")); + goto done; + } + + if (rmdir(mtime_dir.buf)) + die_errno(_("failed to delete directory %s"), mtime_dir.buf); + fprintf_ln(stderr, _(" OK")); + ret = 1; + +done: + strbuf_release(&mtime_dir); + return ret; +} + static int mark_ce_flags(const char *path, int flag, int mark) { int namelen = strlen(path); @@ -835,6 +996,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) N_("enable or disable split index")), OPT_BOOL(0, "untracked-cache", &untracked_cache, N_("enable/disable untracked cache")), + OPT_SET_INT(0, "force-untracked-cache", &untracked_cache, + N_("enable untracked cache without testing the filesystem"), 2), OPT_END() }; @@ -944,6 +1107,11 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) if (untracked_cache > 0 && !the_index.untracked) { struct untracked_cache *uc; + if (untracked_cache < 2) { + setup_work_tree(); + if (!test_if_untracked_cache_is_supported()) + return 1; + } uc = xcalloc(1, sizeof(*uc)); uc->exclude_per_dir = ".gitignore"; /* should be the same flags used by git-status */ -- cgit v1.2.1 From 1e8fef609e78110e276df633c5ba1fb1f1589fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sun, 8 Mar 2015 17:12:46 +0700 Subject: untracked cache: guard and disable on system changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user enables untracked cache, then - move worktree to an unsupported filesystem - or simply upgrade OS - or move the whole (portable) disk from one machine to another - or access a shared fs from another machine there's no guarantee that untracked cache can still function properly. Record the worktree location and OS footprint in the cache. If it changes, err on the safe side and disable the cache. The user can 'update-index --untracked-cache' again to make sure all conditions are met. This adds a new requirement that setup_git_directory* must be called before read_cache() because we need worktree location by then, or the cache is dropped. This change does not cover all bases, you can fool it if you try hard. The point is to stop accidents. Helped-by: Eric Sunshine Helped-by: brian m. carlson Helped-by: Torsten Bögershausen Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/update-index.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'builtin/update-index.c') diff --git a/builtin/update-index.c b/builtin/update-index.c index c6951ad3ce..790a6aa9db 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -1104,7 +1104,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) the_index.split_index = NULL; the_index.cache_changed |= SOMETHING_CHANGED; } - if (untracked_cache > 0 && !the_index.untracked) { + if (untracked_cache > 0) { struct untracked_cache *uc; if (untracked_cache < 2) { @@ -1112,11 +1112,15 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) if (!test_if_untracked_cache_is_supported()) return 1; } - uc = xcalloc(1, sizeof(*uc)); - uc->exclude_per_dir = ".gitignore"; - /* should be the same flags used by git-status */ - uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; - the_index.untracked = uc; + if (!the_index.untracked) { + uc = xcalloc(1, sizeof(*uc)); + strbuf_init(&uc->ident, 100); + uc->exclude_per_dir = ".gitignore"; + /* should be the same flags used by git-status */ + uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; + the_index.untracked = uc; + } + add_untracked_ident(the_index.untracked); the_index.cache_changed |= UNTRACKED_CHANGED; } else if (!untracked_cache && the_index.untracked) { the_index.untracked = NULL; -- cgit v1.2.1