diff options
author | Patrick Steinhardt <ps@pks.im> | 2019-06-21 11:55:21 +0200 |
---|---|---|
committer | Patrick Steinhardt <ps@pks.im> | 2019-07-11 08:28:55 +0200 |
commit | d7f58eabad2a9f355634f2c8b88964b2fb4fe3fd (patch) | |
tree | 8dd45aa94c5b386e02b00235853c6357eb491bb8 | |
parent | d0868646397ca624500f4f0fcd0715f51909e898 (diff) | |
download | libgit2-d7f58eabad2a9f355634f2c8b88964b2fb4fe3fd.tar.gz |
config_file: implement stat cache to avoid repeated rehashing
To decide whether a config file has changed, we always hash its
complete contents. This is unnecessarily expensive, as
well-behaved filesystems will always update stat information for
files which have changed. So before computing the hash, we should
first check whether the stat info has actually changed for either
the configuration file or any of its includes. This avoids having
to re-read the configuration file and its includes every time
when we check whether it's been modified.
Tracing the for-each-ref example previous to this commit, one can
see that we repeatedly re-open both the repo configuration as
well as the global configuration:
$ strace lg2 for-each-ref |& grep config
access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory)
access("/home/pks/.config/git/config", F_OK) = 0
access("/etc/gitconfig", F_OK) = -1 ENOENT (No such file or directory)
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
access("/tmp/repo/.git/config", F_OK) = 0
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/home/pks/.gitconfig", 0x7ffd15c05290) = -1 ENOENT (No such file or directory)
access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
access("/home/pks/.config/git/config", F_OK) = 0
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/home/pks/.gitconfig", 0x7ffd15c051f0) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3
With the change, we only do stats for those files and open them a
single time, only:
$ strace lg2 for-each-ref |& grep config
access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory)
access("/home/pks/.config/git/config", F_OK) = 0
access("/etc/gitconfig", F_OK) = -1 ENOENT (No such file or directory)
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
access("/tmp/repo/.git/config", F_OK) = 0
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/home/pks/.gitconfig", 0x7ffe70540d20) = -1 ENOENT (No such file or directory)
access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
access("/home/pks/.config/git/config", F_OK) = 0
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
stat("/home/pks/.gitconfig", 0x7ffe70540ca0) = -1 ENOENT (No such file or directory)
stat("/home/pks/.gitconfig", 0x7ffe70540c80) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory)
stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory)
stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory)
stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory)
stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0
The following benchmark has been performed with and without the
stat cache in a best-of-ten run:
```
int lg2_repro(git_repository *repo, int argc, char **argv)
{
git_config *cfg;
int32_t dummy;
int i;
UNUSED(argc);
UNUSED(argv);
check_lg2(git_repository_config(&cfg, repo),
"Could not obtain config", NULL);
for (i = 1; i < 100000; ++i)
git_config_get_int32(&dummy, cfg, "foo.bar");
git_config_free(cfg);
return 0;
}
```
Without stat cache:
$ time lg2 repro
real 0m1.528s
user 0m0.568s
sys 0m0.944s
With stat cache:
$ time lg2 repro
real 0m0.526s
user 0m0.268s
sys 0m0.258s
This benchmark shows a nearly three-fold performance improvement.
This change requires that we check our configuration stress tests
as we're now in fact becoming more racy. If somebody is writing a
configuration file at nearly the same time (there is a window of
100ns on Windows-based systems), then it might be that we realize
that this file has actually changed and thus may not re-read it.
This will only happen if either an external process is rewriting
the configuration file or if the same process has multiple
`git_config` structures pointing to the same time, where one of
both is being used to write and the other one is used to read
values.
-rw-r--r-- | src/config_file.c | 11 | ||||
-rw-r--r-- | src/config_parse.h | 3 | ||||
-rw-r--r-- | tests/clar_libgit2.h | 6 | ||||
-rw-r--r-- | tests/config/stress.c | 1 |
4 files changed, 21 insertions, 0 deletions
diff --git a/src/config_file.c b/src/config_file.c index 013791ca8..d238cb7cf 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -143,6 +143,9 @@ static int config_is_modified(int *modified, git_config_file *file) *modified = 0; + if (!git_futils_filestamp_check(&file->stamp, file->path)) + goto check_includes; + if ((error = git_futils_readbuffer(&buf, file->path)) < 0) goto out; @@ -154,6 +157,7 @@ static int config_is_modified(int *modified, git_config_file *file) goto out; } +check_includes: git_array_foreach(file->includes, i, include) { if ((error = config_is_modified(modified, include)) < 0 || *modified) goto out; @@ -861,6 +865,7 @@ static int config_read( diskfile_parse_state parse_data; git_config_parser reader; git_buf contents = GIT_BUF_INIT; + struct stat st; int error; if (depth >= MAX_INCLUDE_DEPTH) { @@ -868,11 +873,17 @@ static int config_read( return -1; } + if (p_stat(file->path, &st) < 0) { + error = git_path_set_error(errno, file->path, "stat"); + goto out; + } + if ((error = git_futils_readbuffer(&contents, file->path)) < 0) goto out; git_parse_ctx_init(&reader.ctx, contents.ptr, contents.size); + git_futils_filestamp_set_from_stat(&file->stamp, &st); if ((error = git_hash_buf(&file->checksum, contents.ptr, contents.size)) < 0) goto out; diff --git a/src/config_parse.h b/src/config_parse.h index 322ab6534..4db31bd1f 100644 --- a/src/config_parse.h +++ b/src/config_parse.h @@ -8,7 +8,9 @@ #define INCLUDE_config_parse_h__ #include "common.h" + #include "array.h" +#include "fileops.h" #include "oid.h" #include "parse.h" @@ -16,6 +18,7 @@ extern const char *git_config_escapes; extern const char *git_config_escaped; typedef struct config_file { + git_futils_filestamp stamp; git_oid checksum; char *path; git_array_t(struct config_file) includes; diff --git a/tests/clar_libgit2.h b/tests/clar_libgit2.h index 5ea62e75e..63cd266d3 100644 --- a/tests/clar_libgit2.h +++ b/tests/clar_libgit2.h @@ -220,6 +220,12 @@ void cl_fake_home_cleanup(void *); void cl_sandbox_set_search_path_defaults(void); #ifdef GIT_WIN32 +# define cl_msleep(x) Sleep(x) +#else +# define cl_msleep(x) usleep(1000 * (x)) +#endif + +#ifdef GIT_WIN32 bool cl_sandbox_supports_8dot3(void); #endif diff --git a/tests/config/stress.c b/tests/config/stress.c index d86a11d4a..ad09c870f 100644 --- a/tests/config/stress.c +++ b/tests/config/stress.c @@ -123,6 +123,7 @@ void test_config_stress__quick_write(void) for (i = 0; i < 10; i++) { int32_t val; cl_git_pass(git_config_set_int32(config_w, key, i)); + cl_msleep(1); cl_git_pass(git_config_get_int32(&val, config_r, key)); cl_assert_equal_i(i, val); } |