diff options
author | Junio C Hamano <gitster@pobox.com> | 2021-05-16 21:05:23 +0900 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-05-16 21:05:23 +0900 |
commit | a737e1f1d25747481bd4925555006f569e461117 (patch) | |
tree | f7f7cb6a78b9e67f9eb6e9c35cdc85758c0ebd25 | |
parent | 644f4a20468da89c1325a539c0521336f7835a64 (diff) | |
parent | 87094fc2daa9613c2fad454dbb068a8f23ce8de8 (diff) | |
download | git-a737e1f1d25747481bd4925555006f569e461117.tar.gz |
Merge branch 'mt/parallel-checkout-part-3'
The final part of "parallel checkout".
* mt/parallel-checkout-part-3:
ci: run test round with parallel-checkout enabled
parallel-checkout: add tests related to .gitattributes
t0028: extract encoding helpers to lib-encoding.sh
parallel-checkout: add tests related to path collisions
parallel-checkout: add tests for basic operations
checkout-index: add parallel checkout support
builtin/checkout.c: complete parallel checkout support
make_transient_cache_entry(): optionally alloc from mem_pool
-rw-r--r-- | builtin/checkout--worker.c | 2 | ||||
-rw-r--r-- | builtin/checkout-index.c | 24 | ||||
-rw-r--r-- | builtin/checkout.c | 22 | ||||
-rw-r--r-- | builtin/difftool.c | 2 | ||||
-rw-r--r-- | cache.h | 14 | ||||
-rwxr-xr-x | ci/run-build-and-tests.sh | 1 | ||||
-rw-r--r-- | parallel-checkout.c | 18 | ||||
-rw-r--r-- | read-cache.c | 14 | ||||
-rw-r--r-- | t/README | 4 | ||||
-rw-r--r-- | t/lib-encoding.sh | 25 | ||||
-rw-r--r-- | t/lib-parallel-checkout.sh | 45 | ||||
-rwxr-xr-x | t/t0028-working-tree-encoding.sh | 25 | ||||
-rwxr-xr-x | t/t2080-parallel-checkout-basics.sh | 229 | ||||
-rwxr-xr-x | t/t2081-parallel-checkout-collisions.sh | 162 | ||||
-rwxr-xr-x | t/t2082-parallel-checkout-attributes.sh | 194 | ||||
-rw-r--r-- | unpack-trees.c | 2 |
16 files changed, 734 insertions, 49 deletions
diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c index 31e0de2f7e..289a9b8f89 100644 --- a/builtin/checkout--worker.c +++ b/builtin/checkout--worker.c @@ -39,7 +39,7 @@ static void packet_to_pc_item(const char *buffer, int len, } memset(pc_item, 0, sizeof(*pc_item)); - pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len); + pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len, NULL); pc_item->ce->ce_namelen = fixed_portion->name_len; pc_item->ce->ce_mode = fixed_portion->ce_mode; memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen); diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index c9a3c71914..e21620d964 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -12,6 +12,7 @@ #include "cache-tree.h" #include "parse-options.h" #include "entry.h" +#include "parallel-checkout.h" #define CHECKOUT_ALL 4 static int nul_term_line; @@ -115,7 +116,7 @@ static int checkout_file(const char *name, const char *prefix) return -1; } -static void checkout_all(const char *prefix, int prefix_length) +static int checkout_all(const char *prefix, int prefix_length) { int i, errs = 0; struct cache_entry *last_ce = NULL; @@ -144,11 +145,7 @@ static void checkout_all(const char *prefix, int prefix_length) } if (last_ce && to_tempfile) write_tempfile_record(last_ce->name, prefix); - if (errs) - /* we have already done our error reporting. - * exit with the same code as die(). - */ - exit(128); + return !!errs; } static const char * const builtin_checkout_index_usage[] = { @@ -184,6 +181,7 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix) int force = 0, quiet = 0, not_new = 0; int index_opt = 0; int err = 0; + int pc_workers, pc_threshold; struct option builtin_checkout_index_options[] = { OPT_BOOL('a', "all", &all, N_("check out all files in the index")), @@ -238,6 +236,10 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix) hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR); } + get_parallel_checkout_configs(&pc_workers, &pc_threshold); + if (pc_workers > 1) + init_parallel_checkout(); + /* Check out named files first */ for (i = 0; i < argc; i++) { const char *arg = argv[i]; @@ -277,12 +279,16 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix) strbuf_release(&buf); } + if (all) + err |= checkout_all(prefix, prefix_length); + + if (pc_workers > 1) + err |= run_parallel_checkout(&state, pc_workers, pc_threshold, + NULL, NULL); + if (err) return 1; - if (all) - checkout_all(prefix, prefix_length); - if (is_lock_file_locked(&lock_file) && write_locked_index(&the_index, &lock_file, COMMIT_LOCK)) die("Unable to write new index file"); diff --git a/builtin/checkout.c b/builtin/checkout.c index 0bf61e6eef..f4cd7747d3 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -27,6 +27,7 @@ #include "wt-status.h" #include "xdiff-interface.h" #include "entry.h" +#include "parallel-checkout.h" static const char * const checkout_usage[] = { N_("git checkout [<options>] <branch>"), @@ -230,7 +231,8 @@ static int checkout_stage(int stage, const struct cache_entry *ce, int pos, return error(_("path '%s' does not have their version"), ce->name); } -static int checkout_merged(int pos, const struct checkout *state, int *nr_checkouts) +static int checkout_merged(int pos, const struct checkout *state, + int *nr_checkouts, struct mem_pool *ce_mem_pool) { struct cache_entry *ce = active_cache[pos]; const char *path = ce->name; @@ -291,11 +293,10 @@ static int checkout_merged(int pos, const struct checkout *state, int *nr_checko if (write_object_file(result_buf.ptr, result_buf.size, blob_type, &oid)) die(_("Unable to add merge result for '%s'"), path); free(result_buf.ptr); - ce = make_transient_cache_entry(mode, &oid, path, 2); + ce = make_transient_cache_entry(mode, &oid, path, 2, ce_mem_pool); if (!ce) die(_("make_cache_entry failed for path '%s'"), path); status = checkout_entry(ce, state, NULL, nr_checkouts); - discard_cache_entry(ce); return status; } @@ -359,19 +360,27 @@ static int checkout_worktree(const struct checkout_opts *opts, int nr_checkouts = 0, nr_unmerged = 0; int errs = 0; int pos; + int pc_workers, pc_threshold; + struct mem_pool ce_mem_pool; state.force = 1; state.refresh_cache = 1; state.istate = &the_index; + mem_pool_init(&ce_mem_pool, 0); + get_parallel_checkout_configs(&pc_workers, &pc_threshold); init_checkout_metadata(&state.meta, info->refname, info->commit ? &info->commit->object.oid : &info->oid, NULL); enable_delayed_checkout(&state); + if (pc_workers > 1) + init_parallel_checkout(); + /* TODO: audit for interaction with sparse-index. */ ensure_full_index(&the_index); + for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -387,10 +396,15 @@ static int checkout_worktree(const struct checkout_opts *opts, &nr_checkouts, opts->overlay_mode); else if (opts->merge) errs |= checkout_merged(pos, &state, - &nr_unmerged); + &nr_unmerged, + &ce_mem_pool); pos = skip_same_name(ce, pos) - 1; } } + if (pc_workers > 1) + errs |= run_parallel_checkout(&state, pc_workers, pc_threshold, + NULL, NULL); + mem_pool_discard(&ce_mem_pool, should_validate_cache_entries()); remove_marked_cache_entries(&the_index, 1); remove_scheduled_dirs(); errs |= finish_delayed_checkout(&state, &nr_checkouts); diff --git a/builtin/difftool.c b/builtin/difftool.c index 0202a43052..89334b77fb 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -323,7 +323,7 @@ static int checkout_path(unsigned mode, struct object_id *oid, struct cache_entry *ce; int ret; - ce = make_transient_cache_entry(mode, oid, path, 0); + ce = make_transient_cache_entry(mode, oid, path, 0, NULL); ret = checkout_entry(ce, state, NULL, NULL); discard_cache_entry(ce); @@ -370,16 +370,20 @@ struct cache_entry *make_empty_cache_entry(struct index_state *istate, size_t name_len); /* - * Create a cache_entry that is not intended to be added to an index. - * Caller is responsible for discarding the cache_entry - * with `discard_cache_entry`. + * Create a cache_entry that is not intended to be added to an index. If + * `ce_mem_pool` is not NULL, the entry is allocated within the given memory + * pool. Caller is responsible for discarding "loose" entries with + * `discard_cache_entry()` and the memory pool with + * `mem_pool_discard(ce_mem_pool, should_validate_cache_entries())`. */ struct cache_entry *make_transient_cache_entry(unsigned int mode, const struct object_id *oid, const char *path, - int stage); + int stage, + struct mem_pool *ce_mem_pool); -struct cache_entry *make_empty_transient_cache_entry(size_t name_len); +struct cache_entry *make_empty_transient_cache_entry(size_t len, + struct mem_pool *ce_mem_pool); /* * Discard cache entry. diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index d19be40544..3ce81ffee9 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -26,6 +26,7 @@ linux-gcc) export GIT_TEST_ADD_I_USE_BUILTIN=1 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_WRITE_REV_INDEX=1 + export GIT_TEST_CHECKOUT_WORKERS=2 make test ;; linux-clang) diff --git a/parallel-checkout.c b/parallel-checkout.c index 09e8b10a35..6b1af32bb3 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -8,6 +8,7 @@ #include "sigchain.h" #include "streaming.h" #include "thread-utils.h" +#include "trace2.h" struct pc_worker { struct child_process cp; @@ -34,6 +35,20 @@ static const int DEFAULT_NUM_WORKERS = 1; void get_parallel_checkout_configs(int *num_workers, int *threshold) { + char *env_workers = getenv("GIT_TEST_CHECKOUT_WORKERS"); + + if (env_workers && *env_workers) { + if (strtol_i(env_workers, 10, num_workers)) { + die("invalid value for GIT_TEST_CHECKOUT_WORKERS: '%s'", + env_workers); + } + if (*num_workers < 1) + *num_workers = online_cpus(); + + *threshold = 0; + return; + } + if (git_config_get_int("checkout.workers", num_workers)) *num_workers = DEFAULT_NUM_WORKERS; else if (*num_workers < 1) @@ -326,6 +341,7 @@ void write_pc_item(struct parallel_checkout_item *pc_item, if (dir_sep && !has_dirs_only_path(path.buf, dir_sep - path.buf, state->base_dir_len)) { pc_item->status = PC_ITEM_COLLIDED; + trace2_data_string("pcheckout", NULL, "collision/dirname", path.buf); goto out; } @@ -341,6 +357,8 @@ void write_pc_item(struct parallel_checkout_item *pc_item, * call should have already caught these cases. */ pc_item->status = PC_ITEM_COLLIDED; + trace2_data_string("pcheckout", NULL, + "collision/basename", path.buf); } else { error_errno("failed to open file '%s'", path.buf); pc_item->status = PC_ITEM_FAILED; diff --git a/read-cache.c b/read-cache.c index fbf3a4ce7d..1b3c2eb408 100644 --- a/read-cache.c +++ b/read-cache.c @@ -839,8 +839,11 @@ struct cache_entry *make_empty_cache_entry(struct index_state *istate, size_t le return mem_pool__ce_calloc(find_mem_pool(istate), len); } -struct cache_entry *make_empty_transient_cache_entry(size_t len) +struct cache_entry *make_empty_transient_cache_entry(size_t len, + struct mem_pool *ce_mem_pool) { + if (ce_mem_pool) + return mem_pool__ce_calloc(ce_mem_pool, len); return xcalloc(1, cache_entry_size(len)); } @@ -874,8 +877,11 @@ struct cache_entry *make_cache_entry(struct index_state *istate, return ret; } -struct cache_entry *make_transient_cache_entry(unsigned int mode, const struct object_id *oid, - const char *path, int stage) +struct cache_entry *make_transient_cache_entry(unsigned int mode, + const struct object_id *oid, + const char *path, + int stage, + struct mem_pool *ce_mem_pool) { struct cache_entry *ce; int len; @@ -886,7 +892,7 @@ struct cache_entry *make_transient_cache_entry(unsigned int mode, const struct o } len = strlen(path); - ce = make_empty_transient_cache_entry(len); + ce = make_empty_transient_cache_entry(len, ce_mem_pool); oidcpy(&ce->oid, oid); memcpy(ce->name, path, len); @@ -439,6 +439,10 @@ GIT_TEST_WRITE_REV_INDEX=<boolean>, when true enables the GIT_TEST_SPARSE_INDEX=<boolean>, when true enables index writes to use the sparse-index format by default. +GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting +to <n> and 'checkout.thresholdForParallelism' to 0, forcing the +execution of the parallel-checkout code. + Naming Tests ------------ diff --git a/t/lib-encoding.sh b/t/lib-encoding.sh new file mode 100644 index 0000000000..2dabc8c73e --- /dev/null +++ b/t/lib-encoding.sh @@ -0,0 +1,25 @@ +# Encoding helpers + +test_lazy_prereq NO_UTF16_BOM ' + test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6 +' + +test_lazy_prereq NO_UTF32_BOM ' + test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12 +' + +write_utf16 () { + if test_have_prereq NO_UTF16_BOM + then + printf '\376\377' + fi && + iconv -f UTF-8 -t UTF-16 +} + +write_utf32 () { + if test_have_prereq NO_UTF32_BOM + then + printf '\0\0\376\377' + fi && + iconv -f UTF-8 -t UTF-32 +} diff --git a/t/lib-parallel-checkout.sh b/t/lib-parallel-checkout.sh new file mode 100644 index 0000000000..21f5759732 --- /dev/null +++ b/t/lib-parallel-checkout.sh @@ -0,0 +1,45 @@ +# Helpers for tests invoking parallel-checkout + +# Parallel checkout tests need full control of the number of workers +unset GIT_TEST_CHECKOUT_WORKERS + +set_checkout_config () { + if test $# -ne 2 + then + BUG "usage: set_checkout_config <workers> <threshold>" + fi && + + test_config_global checkout.workers $1 && + test_config_global checkout.thresholdForParallelism $2 +} + +# Run "${@:2}" and check that $1 checkout workers were used +test_checkout_workers () { + if test $# -lt 2 + then + BUG "too few arguments to test_checkout_workers" + fi && + + local expected_workers=$1 && + shift && + + local trace_file=trace-test-checkout-workers && + rm -f "$trace_file" && + GIT_TRACE2="$(pwd)/$trace_file" "$@" 2>&8 && + + local workers=$(grep "child_start\[..*\] git checkout--worker" "$trace_file" | wc -l) && + test $workers -eq $expected_workers && + rm "$trace_file" +} 8>&2 2>&4 + +# Verify that both the working tree and the index were created correctly +verify_checkout () { + if test $# -ne 1 + then + BUG "usage: verify_checkout <repository path>" + fi && + + git -C "$1" diff-index --ignore-submodules=none --exit-code HEAD -- && + git -C "$1" status --porcelain >"$1".status && + test_must_be_empty "$1".status +} diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh index f970a9806b..82905a2156 100755 --- a/t/t0028-working-tree-encoding.sh +++ b/t/t0028-working-tree-encoding.sh @@ -6,33 +6,10 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY/lib-encoding.sh" GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING -test_lazy_prereq NO_UTF16_BOM ' - test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6 -' - -test_lazy_prereq NO_UTF32_BOM ' - test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12 -' - -write_utf16 () { - if test_have_prereq NO_UTF16_BOM - then - printf '\376\377' - fi && - iconv -f UTF-8 -t UTF-16 -} - -write_utf32 () { - if test_have_prereq NO_UTF32_BOM - then - printf '\0\0\376\377' - fi && - iconv -f UTF-8 -t UTF-32 -} - test_expect_success 'setup test files' ' git config core.eol lf && diff --git a/t/t2080-parallel-checkout-basics.sh b/t/t2080-parallel-checkout-basics.sh new file mode 100755 index 0000000000..7087818550 --- /dev/null +++ b/t/t2080-parallel-checkout-basics.sh @@ -0,0 +1,229 @@ +#!/bin/sh + +test_description='parallel-checkout basics + +Ensure that parallel-checkout basically works on clone and checkout, spawning +the required number of workers and correctly populating both the index and the +working tree. +' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-parallel-checkout.sh" + +# Test parallel-checkout with a branch switch containing a variety of file +# creations, deletions, and modifications, involving different entry types. +# The branches B1 and B2 have the following paths: +# +# B1 B2 +# a/a (file) a (file) +# b (file) b/b (file) +# +# c/c (file) c (symlink) +# d (symlink) d/d (file) +# +# e/e (file) e (submodule) +# f (submodule) f/f (file) +# +# g (submodule) g (symlink) +# h (symlink) h (submodule) +# +# Additionally, the following paths are present on both branches, but with +# different contents: +# +# i (file) i (file) +# j (symlink) j (symlink) +# k (submodule) k (submodule) +# +# And the following paths are only present in one of the branches: +# +# l/l (file) - +# - m/m (file) +# +test_expect_success 'setup repo for checkout with various types of changes' ' + git init sub && + ( + cd sub && + git checkout -b B2 && + echo B2 >file && + git add file && + git commit -m file && + + git checkout -b B1 && + echo B1 >file && + git add file && + git commit -m file + ) && + + git init various && + ( + cd various && + + git checkout -b B1 && + mkdir a c e && + echo a/a >a/a && + echo b >b && + echo c/c >c/c && + test_ln_s_add c d && + echo e/e >e/e && + git submodule add ../sub f && + git submodule add ../sub g && + test_ln_s_add c h && + + echo "B1 i" >i && + test_ln_s_add c j && + git submodule add -b B1 ../sub k && + mkdir l && + echo l/l >l/l && + + git add . && + git commit -m B1 && + + git checkout -b B2 && + git rm -rf :^.gitmodules :^k && + mkdir b d f && + echo a >a && + echo b/b >b/b && + test_ln_s_add b c && + echo d/d >d/d && + git submodule add ../sub e && + echo f/f >f/f && + test_ln_s_add b g && + git submodule add ../sub h && + + echo "B2 i" >i && + test_ln_s_add b j && + git -C k checkout B2 && + mkdir m && + echo m/m >m/m && + + git add . && + git commit -m B2 && + + git checkout --recurse-submodules B1 + ) +' + +for mode in sequential parallel sequential-fallback +do + case $mode in + sequential) workers=1 threshold=0 expected_workers=0 ;; + parallel) workers=2 threshold=0 expected_workers=2 ;; + sequential-fallback) workers=2 threshold=100 expected_workers=0 ;; + esac + + test_expect_success "$mode checkout" ' + repo=various_$mode && + cp -R various $repo && + + # The just copied files have more recent timestamps than their + # associated index entries. So refresh the cached timestamps + # to avoid an "entry not up-to-date" error from `git checkout`. + # We only have to do this for the submodules as `git checkout` + # will already refresh the superproject index before performing + # the up-to-date check. + # + git -C $repo submodule foreach "git update-index --refresh" && + + set_checkout_config $workers $threshold && + test_checkout_workers $expected_workers \ + git -C $repo checkout --recurse-submodules B2 && + verify_checkout $repo + ' +done + +for mode in parallel sequential-fallback +do + case $mode in + parallel) workers=2 threshold=0 expected_workers=2 ;; + sequential-fallback) workers=2 threshold=100 expected_workers=0 ;; + esac + + test_expect_success "$mode checkout on clone" ' + repo=various_${mode}_clone && + set_checkout_config $workers $threshold && + test_checkout_workers $expected_workers \ + git clone --recurse-submodules --branch B2 various $repo && + verify_checkout $repo + ' +done + +# Just to be paranoid, actually compare the working trees' contents directly. +test_expect_success 'compare the working trees' ' + rm -rf various_*/.git && + rm -rf various_*/*/.git && + + # We use `git diff` instead of `diff -r` because the latter would + # follow symlinks, and not all `diff` implementations support the + # `--no-dereference` option. + # + git diff --no-index various_sequential various_parallel && + git diff --no-index various_sequential various_parallel_clone && + git diff --no-index various_sequential various_sequential-fallback && + git diff --no-index various_sequential various_sequential-fallback_clone +' + +# Currently, each submodule is checked out in a separated child process, but +# these subprocesses must also be able to use parallel checkout workers to +# write the submodules' entries. +test_expect_success 'submodules can use parallel checkout' ' + set_checkout_config 2 0 && + git init super && + ( + cd super && + git init sub && + test_commit -C sub A && + test_commit -C sub B && + git submodule add ./sub && + git commit -m sub && + rm sub/* && + test_checkout_workers 2 git checkout --recurse-submodules . + ) +' + +test_expect_success 'parallel checkout respects --[no]-force' ' + set_checkout_config 2 0 && + git init dirty && + ( + cd dirty && + mkdir D && + test_commit D/F && + test_commit F && + + rm -rf D && + echo changed >D && + echo changed >F.t && + + # We expect 0 workers because there is nothing to be done + test_checkout_workers 0 git checkout HEAD && + test_path_is_file D && + grep changed D && + grep changed F.t && + + test_checkout_workers 2 git checkout --force HEAD && + test_path_is_dir D && + grep D/F D/F.t && + grep F F.t + ) +' + +test_expect_success SYMLINKS 'parallel checkout checks for symlinks in leading dirs' ' + set_checkout_config 2 0 && + git init symlinks && + ( + cd symlinks && + mkdir D untracked && + # Commit 2 files to have enough work for 2 parallel workers + test_commit D/A && + test_commit D/B && + rm -rf D && + ln -s untracked D && + + test_checkout_workers 2 git checkout --force HEAD && + ! test -h D && + grep D/A D/A.t && + grep D/B D/B.t + ) +' + +test_done diff --git a/t/t2081-parallel-checkout-collisions.sh b/t/t2081-parallel-checkout-collisions.sh new file mode 100755 index 0000000000..f6fcfc0c1e --- /dev/null +++ b/t/t2081-parallel-checkout-collisions.sh @@ -0,0 +1,162 @@ +#!/bin/sh + +test_description="path collisions during parallel checkout + +Parallel checkout must detect path collisions to: + +1) Avoid racily writing to different paths that represent the same file on disk. +2) Report the colliding entries on clone. + +The tests in this file exercise parallel checkout's collision detection code in +both these mechanics. +" + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-parallel-checkout.sh" + +TEST_ROOT="$PWD" + +test_expect_success CASE_INSENSITIVE_FS 'setup' ' + empty_oid=$(git hash-object -w --stdin </dev/null) && + cat >objs <<-EOF && + 100644 $empty_oid FILE_X + 100644 $empty_oid FILE_x + 100644 $empty_oid file_X + 100644 $empty_oid file_x + EOF + git update-index --index-info <objs && + git commit -m "colliding files" && + git tag basename_collision && + + write_script "$TEST_ROOT"/logger_script <<-\EOF + echo "$@" >>filter.log + EOF +' + +test_workers_in_event_trace () +{ + test $1 -eq $(grep ".event.:.child_start..*checkout--worker" $2 | wc -l) +} + +test_expect_success CASE_INSENSITIVE_FS 'worker detects basename collision' ' + GIT_TRACE2_EVENT="$(pwd)/trace" git \ + -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \ + checkout . && + + test_workers_in_event_trace 2 trace && + collisions=$(grep -i "category.:.pcheckout.,.key.:.collision/basename.,.value.:.file_x.}" trace | wc -l) && + test $collisions -eq 3 +' + +test_expect_success CASE_INSENSITIVE_FS 'worker detects dirname collision' ' + test_config filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" && + empty_oid=$(git hash-object -w --stdin </dev/null) && + + # By setting a filter command to "a", we make it ineligible for parallel + # checkout, and thus it is checked out *first*. This way we can ensure + # that "A/B" and "A/C" will both collide with the regular file "a". + # + attr_oid=$(echo "a filter=logger" | git hash-object -w --stdin) && + + cat >objs <<-EOF && + 100644 $empty_oid A/B + 100644 $empty_oid A/C + 100644 $empty_oid a + 100644 $attr_oid .gitattributes + EOF + git rm -rf . && + git update-index --index-info <objs && + + rm -f trace filter.log && + GIT_TRACE2_EVENT="$(pwd)/trace" git \ + -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \ + checkout . && + + # Check that "a" (and only "a") was filtered + echo a >expected.log && + test_cmp filter.log expected.log && + + # Check that it used the right number of workers and detected the collisions + test_workers_in_event_trace 2 trace && + grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/B.}" trace && + grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/C.}" trace +' + +test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'do not follow symlinks colliding with leading dir' ' + empty_oid=$(git hash-object -w --stdin </dev/null) && + symlink_oid=$(echo "./e" | git hash-object -w --stdin) && + mkdir e && + + cat >objs <<-EOF && + 120000 $symlink_oid D + 100644 $empty_oid d/x + 100644 $empty_oid e/y + EOF + git rm -rf . && + git update-index --index-info <objs && + + set_checkout_config 2 0 && + test_checkout_workers 2 git checkout . && + test_path_is_dir e && + test_path_is_missing e/x +' + +# The two following tests check that parallel checkout correctly reports +# colliding entries on clone. The sequential code detects a collision by +# calling lstat() before trying to open(O_CREAT) a file. (Note that this only +# works for clone.) Then, to find the pair of a colliding item k, it searches +# cache_entry[0, k-1]. This is not sufficient in parallel checkout because: +# +# - A colliding file may be created between the lstat() and open() calls; +# - A colliding entry might appear in the second half of the cache_entry array. +# +test_expect_success CASE_INSENSITIVE_FS 'collision report on clone (w/ racy file creation)' ' + git reset --hard basename_collision && + set_checkout_config 2 0 && + test_checkout_workers 2 git clone . clone-repo 2>stderr && + + grep FILE_X stderr && + grep FILE_x stderr && + grep file_X stderr && + grep file_x stderr && + grep "the following paths have collided" stderr +' + +# This test ensures that the collision report code is correctly looking for +# colliding peers in the second half of the cache_entry array. This is done by +# defining a smudge command for the *last* array entry, which makes it +# non-eligible for parallel-checkout. Thus, it is checked out *first*, before +# spawning the workers. +# +# Note: this test doesn't work on Windows because, on this system, the +# collision report code uses strcmp() to find the colliding pairs when +# core.ignoreCase is false. And we need this setting for this test so that only +# 'file_x' matches the pattern of the filter attribute. But the test works on +# OSX, where the colliding pairs are found using inode. +# +test_expect_success CASE_INSENSITIVE_FS,!MINGW,!CYGWIN \ + 'collision report on clone (w/ colliding peer after the detected entry)' ' + + test_config_global filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" && + git reset --hard basename_collision && + echo "file_x filter=logger" >.gitattributes && + git add .gitattributes && + git commit -m "filter for file_x" && + + rm -rf clone-repo && + set_checkout_config 2 0 && + test_checkout_workers 2 \ + git -c core.ignoreCase=false clone . clone-repo 2>stderr && + + grep FILE_X stderr && + grep FILE_x stderr && + grep file_X stderr && + grep file_x stderr && + grep "the following paths have collided" stderr && + + # Check that only "file_x" was filtered + echo file_x >expected.log && + test_cmp clone-repo/filter.log expected.log +' + +test_done diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh new file mode 100755 index 0000000000..2525457961 --- /dev/null +++ b/t/t2082-parallel-checkout-attributes.sh @@ -0,0 +1,194 @@ +#!/bin/sh + +test_description='parallel-checkout: attributes + +Verify that parallel-checkout correctly creates files that require +conversions, as specified in .gitattributes. The main point here is +to check that the conv_attr data is correctly sent to the workers +and that it contains sufficient information to smudge files +properly (without access to the index or attribute stack). +' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-parallel-checkout.sh" +. "$TEST_DIRECTORY/lib-encoding.sh" + +test_expect_success 'parallel-checkout with ident' ' + set_checkout_config 2 0 && + git init ident && + ( + cd ident && + echo "A ident" >.gitattributes && + echo "\$Id\$" >A && + echo "\$Id\$" >B && + git add -A && + git commit -m id && + + rm A B && + test_checkout_workers 2 git reset --hard && + hexsz=$(test_oid hexsz) && + grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A && + grep "\\\$Id\\\$" B + ) +' + +test_expect_success 'parallel-checkout with re-encoding' ' + set_checkout_config 2 0 && + git init encoding && + ( + cd encoding && + echo text >utf8-text && + write_utf16 <utf8-text >utf16-text && + + echo "A working-tree-encoding=UTF-16" >.gitattributes && + cp utf16-text A && + cp utf8-text B && + git add A B .gitattributes && + git commit -m encoding && + + # Check that A is stored in UTF-8 + git cat-file -p :A >A.internal && + test_cmp_bin utf8-text A.internal && + + rm A B && + test_checkout_workers 2 git checkout A B && + + # Check that A (and only A) is re-encoded during checkout + test_cmp_bin utf16-text A && + test_cmp_bin utf8-text B + ) +' + +test_expect_success 'parallel-checkout with eol conversions' ' + set_checkout_config 2 0 && + git init eol && + ( + cd eol && + printf "multi\r\nline\r\ntext" >crlf-text && + printf "multi\nline\ntext" >lf-text && + + git config core.autocrlf false && + echo "A eol=crlf" >.gitattributes && + cp crlf-text A && + cp lf-text B && + git add A B .gitattributes && + git commit -m eol && + + # Check that A is stored with LF format + git cat-file -p :A >A.internal && + test_cmp_bin lf-text A.internal && + + rm A B && + test_checkout_workers 2 git checkout A B && + + # Check that A (and only A) is converted to CRLF during checkout + test_cmp_bin crlf-text A && + test_cmp_bin lf-text B + ) +' + +# Entries that require an external filter are not eligible for parallel +# checkout. Check that both the parallel-eligible and non-eligible entries are +# properly writen in a single checkout operation. +# +test_expect_success 'parallel-checkout and external filter' ' + set_checkout_config 2 0 && + git init filter && + ( + cd filter && + write_script <<-\EOF rot13.sh && + tr \ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM" + EOF + + git config filter.rot13.clean "\"$(pwd)/rot13.sh\"" && + git config filter.rot13.smudge "\"$(pwd)/rot13.sh\"" && + git config filter.rot13.required true && + + echo abcd >original && + echo nopq >rot13 && + + echo "A filter=rot13" >.gitattributes && + cp original A && + cp original B && + cp original C && + git add A B C .gitattributes && + git commit -m filter && + + # Check that A (and only A) was cleaned + git cat-file -p :A >A.internal && + test_cmp rot13 A.internal && + git cat-file -p :B >B.internal && + test_cmp original B.internal && + git cat-file -p :C >C.internal && + test_cmp original C.internal && + + rm A B C *.internal && + test_checkout_workers 2 git checkout A B C && + + # Check that A (and only A) was smudged during checkout + test_cmp original A && + test_cmp original B && + test_cmp original C + ) +' + +# The delayed queue is independent from the parallel queue, and they should be +# able to work together in the same checkout process. +# +test_expect_success PERL 'parallel-checkout and delayed checkout' ' + write_script rot13-filter.pl "$PERL_PATH" \ + <"$TEST_DIRECTORY"/t0021/rot13-filter.pl && + + test_config_global filter.delay.process \ + "\"$(pwd)/rot13-filter.pl\" --always-delay \"$(pwd)/delayed.log\" clean smudge delay" && + test_config_global filter.delay.required true && + + echo "abcd" >original && + echo "nopq" >rot13 && + + git init delayed && + ( + cd delayed && + echo "*.d filter=delay" >.gitattributes && + cp ../original W.d && + cp ../original X.d && + cp ../original Y && + cp ../original Z && + git add -A && + git commit -m delayed && + + # Check that *.d files were cleaned + git cat-file -p :W.d >W.d.internal && + test_cmp W.d.internal ../rot13 && + git cat-file -p :X.d >X.d.internal && + test_cmp X.d.internal ../rot13 && + git cat-file -p :Y >Y.internal && + test_cmp Y.internal ../original && + git cat-file -p :Z >Z.internal && + test_cmp Z.internal ../original && + + rm * + ) && + + set_checkout_config 2 0 && + test_checkout_workers 2 git -C delayed checkout -f && + verify_checkout delayed && + + # Check that the *.d files got to the delay queue and were filtered + grep "smudge W.d .* \[DELAYED\]" delayed.log && + grep "smudge X.d .* \[DELAYED\]" delayed.log && + test_cmp delayed/W.d original && + test_cmp delayed/X.d original && + + # Check that the parallel-eligible entries went to the right queue and + # were not filtered + ! grep "smudge Y .* \[DELAYED\]" delayed.log && + ! grep "smudge Z .* \[DELAYED\]" delayed.log && + test_cmp delayed/Y original && + test_cmp delayed/Z original +' + +test_done diff --git a/unpack-trees.c b/unpack-trees.c index 7a1804c314..f88a69f8e7 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1038,7 +1038,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, size_t len = traverse_path_len(info, tree_entry_len(n)); struct cache_entry *ce = is_transient ? - make_empty_transient_cache_entry(len) : + make_empty_transient_cache_entry(len, NULL) : make_empty_cache_entry(istate, len); ce->ce_mode = create_ce_mode(n->mode); |