summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2021-05-16 21:05:23 +0900
committerJunio C Hamano <gitster@pobox.com>2021-05-16 21:05:23 +0900
commita737e1f1d25747481bd4925555006f569e461117 (patch)
treef7f7cb6a78b9e67f9eb6e9c35cdc85758c0ebd25
parent644f4a20468da89c1325a539c0521336f7835a64 (diff)
parent87094fc2daa9613c2fad454dbb068a8f23ce8de8 (diff)
downloadgit-a737e1f1d25747481bd4925555006f569e461117.tar.gz
Merge branch 'mt/parallel-checkout-part-3'
The final part of "parallel checkout". * mt/parallel-checkout-part-3: ci: run test round with parallel-checkout enabled parallel-checkout: add tests related to .gitattributes t0028: extract encoding helpers to lib-encoding.sh parallel-checkout: add tests related to path collisions parallel-checkout: add tests for basic operations checkout-index: add parallel checkout support builtin/checkout.c: complete parallel checkout support make_transient_cache_entry(): optionally alloc from mem_pool
-rw-r--r--builtin/checkout--worker.c2
-rw-r--r--builtin/checkout-index.c24
-rw-r--r--builtin/checkout.c22
-rw-r--r--builtin/difftool.c2
-rw-r--r--cache.h14
-rwxr-xr-xci/run-build-and-tests.sh1
-rw-r--r--parallel-checkout.c18
-rw-r--r--read-cache.c14
-rw-r--r--t/README4
-rw-r--r--t/lib-encoding.sh25
-rw-r--r--t/lib-parallel-checkout.sh45
-rwxr-xr-xt/t0028-working-tree-encoding.sh25
-rwxr-xr-xt/t2080-parallel-checkout-basics.sh229
-rwxr-xr-xt/t2081-parallel-checkout-collisions.sh162
-rwxr-xr-xt/t2082-parallel-checkout-attributes.sh194
-rw-r--r--unpack-trees.c2
16 files changed, 734 insertions, 49 deletions
diff --git a/builtin/checkout--worker.c b/builtin/checkout--worker.c
index 31e0de2f7e..289a9b8f89 100644
--- a/builtin/checkout--worker.c
+++ b/builtin/checkout--worker.c
@@ -39,7 +39,7 @@ static void packet_to_pc_item(const char *buffer, int len,
}
memset(pc_item, 0, sizeof(*pc_item));
- pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len);
+ pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len, NULL);
pc_item->ce->ce_namelen = fixed_portion->name_len;
pc_item->ce->ce_mode = fixed_portion->ce_mode;
memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c
index c9a3c71914..e21620d964 100644
--- a/builtin/checkout-index.c
+++ b/builtin/checkout-index.c
@@ -12,6 +12,7 @@
#include "cache-tree.h"
#include "parse-options.h"
#include "entry.h"
+#include "parallel-checkout.h"
#define CHECKOUT_ALL 4
static int nul_term_line;
@@ -115,7 +116,7 @@ static int checkout_file(const char *name, const char *prefix)
return -1;
}
-static void checkout_all(const char *prefix, int prefix_length)
+static int checkout_all(const char *prefix, int prefix_length)
{
int i, errs = 0;
struct cache_entry *last_ce = NULL;
@@ -144,11 +145,7 @@ static void checkout_all(const char *prefix, int prefix_length)
}
if (last_ce && to_tempfile)
write_tempfile_record(last_ce->name, prefix);
- if (errs)
- /* we have already done our error reporting.
- * exit with the same code as die().
- */
- exit(128);
+ return !!errs;
}
static const char * const builtin_checkout_index_usage[] = {
@@ -184,6 +181,7 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
int force = 0, quiet = 0, not_new = 0;
int index_opt = 0;
int err = 0;
+ int pc_workers, pc_threshold;
struct option builtin_checkout_index_options[] = {
OPT_BOOL('a', "all", &all,
N_("check out all files in the index")),
@@ -238,6 +236,10 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR);
}
+ get_parallel_checkout_configs(&pc_workers, &pc_threshold);
+ if (pc_workers > 1)
+ init_parallel_checkout();
+
/* Check out named files first */
for (i = 0; i < argc; i++) {
const char *arg = argv[i];
@@ -277,12 +279,16 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
strbuf_release(&buf);
}
+ if (all)
+ err |= checkout_all(prefix, prefix_length);
+
+ if (pc_workers > 1)
+ err |= run_parallel_checkout(&state, pc_workers, pc_threshold,
+ NULL, NULL);
+
if (err)
return 1;
- if (all)
- checkout_all(prefix, prefix_length);
-
if (is_lock_file_locked(&lock_file) &&
write_locked_index(&the_index, &lock_file, COMMIT_LOCK))
die("Unable to write new index file");
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 0bf61e6eef..f4cd7747d3 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -27,6 +27,7 @@
#include "wt-status.h"
#include "xdiff-interface.h"
#include "entry.h"
+#include "parallel-checkout.h"
static const char * const checkout_usage[] = {
N_("git checkout [<options>] <branch>"),
@@ -230,7 +231,8 @@ static int checkout_stage(int stage, const struct cache_entry *ce, int pos,
return error(_("path '%s' does not have their version"), ce->name);
}
-static int checkout_merged(int pos, const struct checkout *state, int *nr_checkouts)
+static int checkout_merged(int pos, const struct checkout *state,
+ int *nr_checkouts, struct mem_pool *ce_mem_pool)
{
struct cache_entry *ce = active_cache[pos];
const char *path = ce->name;
@@ -291,11 +293,10 @@ static int checkout_merged(int pos, const struct checkout *state, int *nr_checko
if (write_object_file(result_buf.ptr, result_buf.size, blob_type, &oid))
die(_("Unable to add merge result for '%s'"), path);
free(result_buf.ptr);
- ce = make_transient_cache_entry(mode, &oid, path, 2);
+ ce = make_transient_cache_entry(mode, &oid, path, 2, ce_mem_pool);
if (!ce)
die(_("make_cache_entry failed for path '%s'"), path);
status = checkout_entry(ce, state, NULL, nr_checkouts);
- discard_cache_entry(ce);
return status;
}
@@ -359,19 +360,27 @@ static int checkout_worktree(const struct checkout_opts *opts,
int nr_checkouts = 0, nr_unmerged = 0;
int errs = 0;
int pos;
+ int pc_workers, pc_threshold;
+ struct mem_pool ce_mem_pool;
state.force = 1;
state.refresh_cache = 1;
state.istate = &the_index;
+ mem_pool_init(&ce_mem_pool, 0);
+ get_parallel_checkout_configs(&pc_workers, &pc_threshold);
init_checkout_metadata(&state.meta, info->refname,
info->commit ? &info->commit->object.oid : &info->oid,
NULL);
enable_delayed_checkout(&state);
+ if (pc_workers > 1)
+ init_parallel_checkout();
+
/* TODO: audit for interaction with sparse-index. */
ensure_full_index(&the_index);
+
for (pos = 0; pos < active_nr; pos++) {
struct cache_entry *ce = active_cache[pos];
if (ce->ce_flags & CE_MATCHED) {
@@ -387,10 +396,15 @@ static int checkout_worktree(const struct checkout_opts *opts,
&nr_checkouts, opts->overlay_mode);
else if (opts->merge)
errs |= checkout_merged(pos, &state,
- &nr_unmerged);
+ &nr_unmerged,
+ &ce_mem_pool);
pos = skip_same_name(ce, pos) - 1;
}
}
+ if (pc_workers > 1)
+ errs |= run_parallel_checkout(&state, pc_workers, pc_threshold,
+ NULL, NULL);
+ mem_pool_discard(&ce_mem_pool, should_validate_cache_entries());
remove_marked_cache_entries(&the_index, 1);
remove_scheduled_dirs();
errs |= finish_delayed_checkout(&state, &nr_checkouts);
diff --git a/builtin/difftool.c b/builtin/difftool.c
index 0202a43052..89334b77fb 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -323,7 +323,7 @@ static int checkout_path(unsigned mode, struct object_id *oid,
struct cache_entry *ce;
int ret;
- ce = make_transient_cache_entry(mode, oid, path, 0);
+ ce = make_transient_cache_entry(mode, oid, path, 0, NULL);
ret = checkout_entry(ce, state, NULL, NULL);
discard_cache_entry(ce);
diff --git a/cache.h b/cache.h
index abeaec2b2b..ba04ff8bd3 100644
--- a/cache.h
+++ b/cache.h
@@ -370,16 +370,20 @@ struct cache_entry *make_empty_cache_entry(struct index_state *istate,
size_t name_len);
/*
- * Create a cache_entry that is not intended to be added to an index.
- * Caller is responsible for discarding the cache_entry
- * with `discard_cache_entry`.
+ * Create a cache_entry that is not intended to be added to an index. If
+ * `ce_mem_pool` is not NULL, the entry is allocated within the given memory
+ * pool. Caller is responsible for discarding "loose" entries with
+ * `discard_cache_entry()` and the memory pool with
+ * `mem_pool_discard(ce_mem_pool, should_validate_cache_entries())`.
*/
struct cache_entry *make_transient_cache_entry(unsigned int mode,
const struct object_id *oid,
const char *path,
- int stage);
+ int stage,
+ struct mem_pool *ce_mem_pool);
-struct cache_entry *make_empty_transient_cache_entry(size_t name_len);
+struct cache_entry *make_empty_transient_cache_entry(size_t len,
+ struct mem_pool *ce_mem_pool);
/*
* Discard cache entry.
diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh
index d19be40544..3ce81ffee9 100755
--- a/ci/run-build-and-tests.sh
+++ b/ci/run-build-and-tests.sh
@@ -26,6 +26,7 @@ linux-gcc)
export GIT_TEST_ADD_I_USE_BUILTIN=1
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
export GIT_TEST_WRITE_REV_INDEX=1
+ export GIT_TEST_CHECKOUT_WORKERS=2
make test
;;
linux-clang)
diff --git a/parallel-checkout.c b/parallel-checkout.c
index 09e8b10a35..6b1af32bb3 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -8,6 +8,7 @@
#include "sigchain.h"
#include "streaming.h"
#include "thread-utils.h"
+#include "trace2.h"
struct pc_worker {
struct child_process cp;
@@ -34,6 +35,20 @@ static const int DEFAULT_NUM_WORKERS = 1;
void get_parallel_checkout_configs(int *num_workers, int *threshold)
{
+ char *env_workers = getenv("GIT_TEST_CHECKOUT_WORKERS");
+
+ if (env_workers && *env_workers) {
+ if (strtol_i(env_workers, 10, num_workers)) {
+ die("invalid value for GIT_TEST_CHECKOUT_WORKERS: '%s'",
+ env_workers);
+ }
+ if (*num_workers < 1)
+ *num_workers = online_cpus();
+
+ *threshold = 0;
+ return;
+ }
+
if (git_config_get_int("checkout.workers", num_workers))
*num_workers = DEFAULT_NUM_WORKERS;
else if (*num_workers < 1)
@@ -326,6 +341,7 @@ void write_pc_item(struct parallel_checkout_item *pc_item,
if (dir_sep && !has_dirs_only_path(path.buf, dir_sep - path.buf,
state->base_dir_len)) {
pc_item->status = PC_ITEM_COLLIDED;
+ trace2_data_string("pcheckout", NULL, "collision/dirname", path.buf);
goto out;
}
@@ -341,6 +357,8 @@ void write_pc_item(struct parallel_checkout_item *pc_item,
* call should have already caught these cases.
*/
pc_item->status = PC_ITEM_COLLIDED;
+ trace2_data_string("pcheckout", NULL,
+ "collision/basename", path.buf);
} else {
error_errno("failed to open file '%s'", path.buf);
pc_item->status = PC_ITEM_FAILED;
diff --git a/read-cache.c b/read-cache.c
index fbf3a4ce7d..1b3c2eb408 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -839,8 +839,11 @@ struct cache_entry *make_empty_cache_entry(struct index_state *istate, size_t le
return mem_pool__ce_calloc(find_mem_pool(istate), len);
}
-struct cache_entry *make_empty_transient_cache_entry(size_t len)
+struct cache_entry *make_empty_transient_cache_entry(size_t len,
+ struct mem_pool *ce_mem_pool)
{
+ if (ce_mem_pool)
+ return mem_pool__ce_calloc(ce_mem_pool, len);
return xcalloc(1, cache_entry_size(len));
}
@@ -874,8 +877,11 @@ struct cache_entry *make_cache_entry(struct index_state *istate,
return ret;
}
-struct cache_entry *make_transient_cache_entry(unsigned int mode, const struct object_id *oid,
- const char *path, int stage)
+struct cache_entry *make_transient_cache_entry(unsigned int mode,
+ const struct object_id *oid,
+ const char *path,
+ int stage,
+ struct mem_pool *ce_mem_pool)
{
struct cache_entry *ce;
int len;
@@ -886,7 +892,7 @@ struct cache_entry *make_transient_cache_entry(unsigned int mode, const struct o
}
len = strlen(path);
- ce = make_empty_transient_cache_entry(len);
+ ce = make_empty_transient_cache_entry(len, ce_mem_pool);
oidcpy(&ce->oid, oid);
memcpy(ce->name, path, len);
diff --git a/t/README b/t/README
index 8eb9e46b1d..a8cfd37387 100644
--- a/t/README
+++ b/t/README
@@ -439,6 +439,10 @@ GIT_TEST_WRITE_REV_INDEX=<boolean>, when true enables the
GIT_TEST_SPARSE_INDEX=<boolean>, when true enables index writes to use the
sparse-index format by default.
+GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting
+to <n> and 'checkout.thresholdForParallelism' to 0, forcing the
+execution of the parallel-checkout code.
+
Naming Tests
------------
diff --git a/t/lib-encoding.sh b/t/lib-encoding.sh
new file mode 100644
index 0000000000..2dabc8c73e
--- /dev/null
+++ b/t/lib-encoding.sh
@@ -0,0 +1,25 @@
+# Encoding helpers
+
+test_lazy_prereq NO_UTF16_BOM '
+ test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6
+'
+
+test_lazy_prereq NO_UTF32_BOM '
+ test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12
+'
+
+write_utf16 () {
+ if test_have_prereq NO_UTF16_BOM
+ then
+ printf '\376\377'
+ fi &&
+ iconv -f UTF-8 -t UTF-16
+}
+
+write_utf32 () {
+ if test_have_prereq NO_UTF32_BOM
+ then
+ printf '\0\0\376\377'
+ fi &&
+ iconv -f UTF-8 -t UTF-32
+}
diff --git a/t/lib-parallel-checkout.sh b/t/lib-parallel-checkout.sh
new file mode 100644
index 0000000000..21f5759732
--- /dev/null
+++ b/t/lib-parallel-checkout.sh
@@ -0,0 +1,45 @@
+# Helpers for tests invoking parallel-checkout
+
+# Parallel checkout tests need full control of the number of workers
+unset GIT_TEST_CHECKOUT_WORKERS
+
+set_checkout_config () {
+ if test $# -ne 2
+ then
+ BUG "usage: set_checkout_config <workers> <threshold>"
+ fi &&
+
+ test_config_global checkout.workers $1 &&
+ test_config_global checkout.thresholdForParallelism $2
+}
+
+# Run "${@:2}" and check that $1 checkout workers were used
+test_checkout_workers () {
+ if test $# -lt 2
+ then
+ BUG "too few arguments to test_checkout_workers"
+ fi &&
+
+ local expected_workers=$1 &&
+ shift &&
+
+ local trace_file=trace-test-checkout-workers &&
+ rm -f "$trace_file" &&
+ GIT_TRACE2="$(pwd)/$trace_file" "$@" 2>&8 &&
+
+ local workers=$(grep "child_start\[..*\] git checkout--worker" "$trace_file" | wc -l) &&
+ test $workers -eq $expected_workers &&
+ rm "$trace_file"
+} 8>&2 2>&4
+
+# Verify that both the working tree and the index were created correctly
+verify_checkout () {
+ if test $# -ne 1
+ then
+ BUG "usage: verify_checkout <repository path>"
+ fi &&
+
+ git -C "$1" diff-index --ignore-submodules=none --exit-code HEAD -- &&
+ git -C "$1" status --porcelain >"$1".status &&
+ test_must_be_empty "$1".status
+}
diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh
index f970a9806b..82905a2156 100755
--- a/t/t0028-working-tree-encoding.sh
+++ b/t/t0028-working-tree-encoding.sh
@@ -6,33 +6,10 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-encoding.sh"
GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
-test_lazy_prereq NO_UTF16_BOM '
- test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6
-'
-
-test_lazy_prereq NO_UTF32_BOM '
- test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12
-'
-
-write_utf16 () {
- if test_have_prereq NO_UTF16_BOM
- then
- printf '\376\377'
- fi &&
- iconv -f UTF-8 -t UTF-16
-}
-
-write_utf32 () {
- if test_have_prereq NO_UTF32_BOM
- then
- printf '\0\0\376\377'
- fi &&
- iconv -f UTF-8 -t UTF-32
-}
-
test_expect_success 'setup test files' '
git config core.eol lf &&
diff --git a/t/t2080-parallel-checkout-basics.sh b/t/t2080-parallel-checkout-basics.sh
new file mode 100755
index 0000000000..7087818550
--- /dev/null
+++ b/t/t2080-parallel-checkout-basics.sh
@@ -0,0 +1,229 @@
+#!/bin/sh
+
+test_description='parallel-checkout basics
+
+Ensure that parallel-checkout basically works on clone and checkout, spawning
+the required number of workers and correctly populating both the index and the
+working tree.
+'
+
+TEST_NO_CREATE_REPO=1
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-parallel-checkout.sh"
+
+# Test parallel-checkout with a branch switch containing a variety of file
+# creations, deletions, and modifications, involving different entry types.
+# The branches B1 and B2 have the following paths:
+#
+# B1 B2
+# a/a (file) a (file)
+# b (file) b/b (file)
+#
+# c/c (file) c (symlink)
+# d (symlink) d/d (file)
+#
+# e/e (file) e (submodule)
+# f (submodule) f/f (file)
+#
+# g (submodule) g (symlink)
+# h (symlink) h (submodule)
+#
+# Additionally, the following paths are present on both branches, but with
+# different contents:
+#
+# i (file) i (file)
+# j (symlink) j (symlink)
+# k (submodule) k (submodule)
+#
+# And the following paths are only present in one of the branches:
+#
+# l/l (file) -
+# - m/m (file)
+#
+test_expect_success 'setup repo for checkout with various types of changes' '
+ git init sub &&
+ (
+ cd sub &&
+ git checkout -b B2 &&
+ echo B2 >file &&
+ git add file &&
+ git commit -m file &&
+
+ git checkout -b B1 &&
+ echo B1 >file &&
+ git add file &&
+ git commit -m file
+ ) &&
+
+ git init various &&
+ (
+ cd various &&
+
+ git checkout -b B1 &&
+ mkdir a c e &&
+ echo a/a >a/a &&
+ echo b >b &&
+ echo c/c >c/c &&
+ test_ln_s_add c d &&
+ echo e/e >e/e &&
+ git submodule add ../sub f &&
+ git submodule add ../sub g &&
+ test_ln_s_add c h &&
+
+ echo "B1 i" >i &&
+ test_ln_s_add c j &&
+ git submodule add -b B1 ../sub k &&
+ mkdir l &&
+ echo l/l >l/l &&
+
+ git add . &&
+ git commit -m B1 &&
+
+ git checkout -b B2 &&
+ git rm -rf :^.gitmodules :^k &&
+ mkdir b d f &&
+ echo a >a &&
+ echo b/b >b/b &&
+ test_ln_s_add b c &&
+ echo d/d >d/d &&
+ git submodule add ../sub e &&
+ echo f/f >f/f &&
+ test_ln_s_add b g &&
+ git submodule add ../sub h &&
+
+ echo "B2 i" >i &&
+ test_ln_s_add b j &&
+ git -C k checkout B2 &&
+ mkdir m &&
+ echo m/m >m/m &&
+
+ git add . &&
+ git commit -m B2 &&
+
+ git checkout --recurse-submodules B1
+ )
+'
+
+for mode in sequential parallel sequential-fallback
+do
+ case $mode in
+ sequential) workers=1 threshold=0 expected_workers=0 ;;
+ parallel) workers=2 threshold=0 expected_workers=2 ;;
+ sequential-fallback) workers=2 threshold=100 expected_workers=0 ;;
+ esac
+
+ test_expect_success "$mode checkout" '
+ repo=various_$mode &&
+ cp -R various $repo &&
+
+ # The just copied files have more recent timestamps than their
+ # associated index entries. So refresh the cached timestamps
+ # to avoid an "entry not up-to-date" error from `git checkout`.
+ # We only have to do this for the submodules as `git checkout`
+ # will already refresh the superproject index before performing
+ # the up-to-date check.
+ #
+ git -C $repo submodule foreach "git update-index --refresh" &&
+
+ set_checkout_config $workers $threshold &&
+ test_checkout_workers $expected_workers \
+ git -C $repo checkout --recurse-submodules B2 &&
+ verify_checkout $repo
+ '
+done
+
+for mode in parallel sequential-fallback
+do
+ case $mode in
+ parallel) workers=2 threshold=0 expected_workers=2 ;;
+ sequential-fallback) workers=2 threshold=100 expected_workers=0 ;;
+ esac
+
+ test_expect_success "$mode checkout on clone" '
+ repo=various_${mode}_clone &&
+ set_checkout_config $workers $threshold &&
+ test_checkout_workers $expected_workers \
+ git clone --recurse-submodules --branch B2 various $repo &&
+ verify_checkout $repo
+ '
+done
+
+# Just to be paranoid, actually compare the working trees' contents directly.
+test_expect_success 'compare the working trees' '
+ rm -rf various_*/.git &&
+ rm -rf various_*/*/.git &&
+
+ # We use `git diff` instead of `diff -r` because the latter would
+ # follow symlinks, and not all `diff` implementations support the
+ # `--no-dereference` option.
+ #
+ git diff --no-index various_sequential various_parallel &&
+ git diff --no-index various_sequential various_parallel_clone &&
+ git diff --no-index various_sequential various_sequential-fallback &&
+ git diff --no-index various_sequential various_sequential-fallback_clone
+'
+
+# Currently, each submodule is checked out in a separated child process, but
+# these subprocesses must also be able to use parallel checkout workers to
+# write the submodules' entries.
+test_expect_success 'submodules can use parallel checkout' '
+ set_checkout_config 2 0 &&
+ git init super &&
+ (
+ cd super &&
+ git init sub &&
+ test_commit -C sub A &&
+ test_commit -C sub B &&
+ git submodule add ./sub &&
+ git commit -m sub &&
+ rm sub/* &&
+ test_checkout_workers 2 git checkout --recurse-submodules .
+ )
+'
+
+test_expect_success 'parallel checkout respects --[no]-force' '
+ set_checkout_config 2 0 &&
+ git init dirty &&
+ (
+ cd dirty &&
+ mkdir D &&
+ test_commit D/F &&
+ test_commit F &&
+
+ rm -rf D &&
+ echo changed >D &&
+ echo changed >F.t &&
+
+ # We expect 0 workers because there is nothing to be done
+ test_checkout_workers 0 git checkout HEAD &&
+ test_path_is_file D &&
+ grep changed D &&
+ grep changed F.t &&
+
+ test_checkout_workers 2 git checkout --force HEAD &&
+ test_path_is_dir D &&
+ grep D/F D/F.t &&
+ grep F F.t
+ )
+'
+
+test_expect_success SYMLINKS 'parallel checkout checks for symlinks in leading dirs' '
+ set_checkout_config 2 0 &&
+ git init symlinks &&
+ (
+ cd symlinks &&
+ mkdir D untracked &&
+ # Commit 2 files to have enough work for 2 parallel workers
+ test_commit D/A &&
+ test_commit D/B &&
+ rm -rf D &&
+ ln -s untracked D &&
+
+ test_checkout_workers 2 git checkout --force HEAD &&
+ ! test -h D &&
+ grep D/A D/A.t &&
+ grep D/B D/B.t
+ )
+'
+
+test_done
diff --git a/t/t2081-parallel-checkout-collisions.sh b/t/t2081-parallel-checkout-collisions.sh
new file mode 100755
index 0000000000..f6fcfc0c1e
--- /dev/null
+++ b/t/t2081-parallel-checkout-collisions.sh
@@ -0,0 +1,162 @@
+#!/bin/sh
+
+test_description="path collisions during parallel checkout
+
+Parallel checkout must detect path collisions to:
+
+1) Avoid racily writing to different paths that represent the same file on disk.
+2) Report the colliding entries on clone.
+
+The tests in this file exercise parallel checkout's collision detection code in
+both these mechanics.
+"
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-parallel-checkout.sh"
+
+TEST_ROOT="$PWD"
+
+test_expect_success CASE_INSENSITIVE_FS 'setup' '
+ empty_oid=$(git hash-object -w --stdin </dev/null) &&
+ cat >objs <<-EOF &&
+ 100644 $empty_oid FILE_X
+ 100644 $empty_oid FILE_x
+ 100644 $empty_oid file_X
+ 100644 $empty_oid file_x
+ EOF
+ git update-index --index-info <objs &&
+ git commit -m "colliding files" &&
+ git tag basename_collision &&
+
+ write_script "$TEST_ROOT"/logger_script <<-\EOF
+ echo "$@" >>filter.log
+ EOF
+'
+
+test_workers_in_event_trace ()
+{
+ test $1 -eq $(grep ".event.:.child_start..*checkout--worker" $2 | wc -l)
+}
+
+test_expect_success CASE_INSENSITIVE_FS 'worker detects basename collision' '
+ GIT_TRACE2_EVENT="$(pwd)/trace" git \
+ -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \
+ checkout . &&
+
+ test_workers_in_event_trace 2 trace &&
+ collisions=$(grep -i "category.:.pcheckout.,.key.:.collision/basename.,.value.:.file_x.}" trace | wc -l) &&
+ test $collisions -eq 3
+'
+
+test_expect_success CASE_INSENSITIVE_FS 'worker detects dirname collision' '
+ test_config filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" &&
+ empty_oid=$(git hash-object -w --stdin </dev/null) &&
+
+ # By setting a filter command to "a", we make it ineligible for parallel
+ # checkout, and thus it is checked out *first*. This way we can ensure
+ # that "A/B" and "A/C" will both collide with the regular file "a".
+ #
+ attr_oid=$(echo "a filter=logger" | git hash-object -w --stdin) &&
+
+ cat >objs <<-EOF &&
+ 100644 $empty_oid A/B
+ 100644 $empty_oid A/C
+ 100644 $empty_oid a
+ 100644 $attr_oid .gitattributes
+ EOF
+ git rm -rf . &&
+ git update-index --index-info <objs &&
+
+ rm -f trace filter.log &&
+ GIT_TRACE2_EVENT="$(pwd)/trace" git \
+ -c checkout.workers=2 -c checkout.thresholdForParallelism=0 \
+ checkout . &&
+
+ # Check that "a" (and only "a") was filtered
+ echo a >expected.log &&
+ test_cmp filter.log expected.log &&
+
+ # Check that it used the right number of workers and detected the collisions
+ test_workers_in_event_trace 2 trace &&
+ grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/B.}" trace &&
+ grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/C.}" trace
+'
+
+test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'do not follow symlinks colliding with leading dir' '
+ empty_oid=$(git hash-object -w --stdin </dev/null) &&
+ symlink_oid=$(echo "./e" | git hash-object -w --stdin) &&
+ mkdir e &&
+
+ cat >objs <<-EOF &&
+ 120000 $symlink_oid D
+ 100644 $empty_oid d/x
+ 100644 $empty_oid e/y
+ EOF
+ git rm -rf . &&
+ git update-index --index-info <objs &&
+
+ set_checkout_config 2 0 &&
+ test_checkout_workers 2 git checkout . &&
+ test_path_is_dir e &&
+ test_path_is_missing e/x
+'
+
+# The two following tests check that parallel checkout correctly reports
+# colliding entries on clone. The sequential code detects a collision by
+# calling lstat() before trying to open(O_CREAT) a file. (Note that this only
+# works for clone.) Then, to find the pair of a colliding item k, it searches
+# cache_entry[0, k-1]. This is not sufficient in parallel checkout because:
+#
+# - A colliding file may be created between the lstat() and open() calls;
+# - A colliding entry might appear in the second half of the cache_entry array.
+#
+test_expect_success CASE_INSENSITIVE_FS 'collision report on clone (w/ racy file creation)' '
+ git reset --hard basename_collision &&
+ set_checkout_config 2 0 &&
+ test_checkout_workers 2 git clone . clone-repo 2>stderr &&
+
+ grep FILE_X stderr &&
+ grep FILE_x stderr &&
+ grep file_X stderr &&
+ grep file_x stderr &&
+ grep "the following paths have collided" stderr
+'
+
+# This test ensures that the collision report code is correctly looking for
+# colliding peers in the second half of the cache_entry array. This is done by
+# defining a smudge command for the *last* array entry, which makes it
+# non-eligible for parallel-checkout. Thus, it is checked out *first*, before
+# spawning the workers.
+#
+# Note: this test doesn't work on Windows because, on this system, the
+# collision report code uses strcmp() to find the colliding pairs when
+# core.ignoreCase is false. And we need this setting for this test so that only
+# 'file_x' matches the pattern of the filter attribute. But the test works on
+# OSX, where the colliding pairs are found using inode.
+#
+test_expect_success CASE_INSENSITIVE_FS,!MINGW,!CYGWIN \
+ 'collision report on clone (w/ colliding peer after the detected entry)' '
+
+ test_config_global filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" &&
+ git reset --hard basename_collision &&
+ echo "file_x filter=logger" >.gitattributes &&
+ git add .gitattributes &&
+ git commit -m "filter for file_x" &&
+
+ rm -rf clone-repo &&
+ set_checkout_config 2 0 &&
+ test_checkout_workers 2 \
+ git -c core.ignoreCase=false clone . clone-repo 2>stderr &&
+
+ grep FILE_X stderr &&
+ grep FILE_x stderr &&
+ grep file_X stderr &&
+ grep file_x stderr &&
+ grep "the following paths have collided" stderr &&
+
+ # Check that only "file_x" was filtered
+ echo file_x >expected.log &&
+ test_cmp clone-repo/filter.log expected.log
+'
+
+test_done
diff --git a/t/t2082-parallel-checkout-attributes.sh b/t/t2082-parallel-checkout-attributes.sh
new file mode 100755
index 0000000000..2525457961
--- /dev/null
+++ b/t/t2082-parallel-checkout-attributes.sh
@@ -0,0 +1,194 @@
+#!/bin/sh
+
+test_description='parallel-checkout: attributes
+
+Verify that parallel-checkout correctly creates files that require
+conversions, as specified in .gitattributes. The main point here is
+to check that the conv_attr data is correctly sent to the workers
+and that it contains sufficient information to smudge files
+properly (without access to the index or attribute stack).
+'
+
+TEST_NO_CREATE_REPO=1
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-parallel-checkout.sh"
+. "$TEST_DIRECTORY/lib-encoding.sh"
+
+test_expect_success 'parallel-checkout with ident' '
+ set_checkout_config 2 0 &&
+ git init ident &&
+ (
+ cd ident &&
+ echo "A ident" >.gitattributes &&
+ echo "\$Id\$" >A &&
+ echo "\$Id\$" >B &&
+ git add -A &&
+ git commit -m id &&
+
+ rm A B &&
+ test_checkout_workers 2 git reset --hard &&
+ hexsz=$(test_oid hexsz) &&
+ grep -E "\\\$Id: [0-9a-f]{$hexsz} \\\$" A &&
+ grep "\\\$Id\\\$" B
+ )
+'
+
+test_expect_success 'parallel-checkout with re-encoding' '
+ set_checkout_config 2 0 &&
+ git init encoding &&
+ (
+ cd encoding &&
+ echo text >utf8-text &&
+ write_utf16 <utf8-text >utf16-text &&
+
+ echo "A working-tree-encoding=UTF-16" >.gitattributes &&
+ cp utf16-text A &&
+ cp utf8-text B &&
+ git add A B .gitattributes &&
+ git commit -m encoding &&
+
+ # Check that A is stored in UTF-8
+ git cat-file -p :A >A.internal &&
+ test_cmp_bin utf8-text A.internal &&
+
+ rm A B &&
+ test_checkout_workers 2 git checkout A B &&
+
+ # Check that A (and only A) is re-encoded during checkout
+ test_cmp_bin utf16-text A &&
+ test_cmp_bin utf8-text B
+ )
+'
+
+test_expect_success 'parallel-checkout with eol conversions' '
+ set_checkout_config 2 0 &&
+ git init eol &&
+ (
+ cd eol &&
+ printf "multi\r\nline\r\ntext" >crlf-text &&
+ printf "multi\nline\ntext" >lf-text &&
+
+ git config core.autocrlf false &&
+ echo "A eol=crlf" >.gitattributes &&
+ cp crlf-text A &&
+ cp lf-text B &&
+ git add A B .gitattributes &&
+ git commit -m eol &&
+
+ # Check that A is stored with LF format
+ git cat-file -p :A >A.internal &&
+ test_cmp_bin lf-text A.internal &&
+
+ rm A B &&
+ test_checkout_workers 2 git checkout A B &&
+
+ # Check that A (and only A) is converted to CRLF during checkout
+ test_cmp_bin crlf-text A &&
+ test_cmp_bin lf-text B
+ )
+'
+
+# Entries that require an external filter are not eligible for parallel
+# checkout. Check that both the parallel-eligible and non-eligible entries are
+# properly writen in a single checkout operation.
+#
+test_expect_success 'parallel-checkout and external filter' '
+ set_checkout_config 2 0 &&
+ git init filter &&
+ (
+ cd filter &&
+ write_script <<-\EOF rot13.sh &&
+ tr \
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM"
+ EOF
+
+ git config filter.rot13.clean "\"$(pwd)/rot13.sh\"" &&
+ git config filter.rot13.smudge "\"$(pwd)/rot13.sh\"" &&
+ git config filter.rot13.required true &&
+
+ echo abcd >original &&
+ echo nopq >rot13 &&
+
+ echo "A filter=rot13" >.gitattributes &&
+ cp original A &&
+ cp original B &&
+ cp original C &&
+ git add A B C .gitattributes &&
+ git commit -m filter &&
+
+ # Check that A (and only A) was cleaned
+ git cat-file -p :A >A.internal &&
+ test_cmp rot13 A.internal &&
+ git cat-file -p :B >B.internal &&
+ test_cmp original B.internal &&
+ git cat-file -p :C >C.internal &&
+ test_cmp original C.internal &&
+
+ rm A B C *.internal &&
+ test_checkout_workers 2 git checkout A B C &&
+
+ # Check that A (and only A) was smudged during checkout
+ test_cmp original A &&
+ test_cmp original B &&
+ test_cmp original C
+ )
+'
+
+# The delayed queue is independent from the parallel queue, and they should be
+# able to work together in the same checkout process.
+#
+test_expect_success PERL 'parallel-checkout and delayed checkout' '
+ write_script rot13-filter.pl "$PERL_PATH" \
+ <"$TEST_DIRECTORY"/t0021/rot13-filter.pl &&
+
+ test_config_global filter.delay.process \
+ "\"$(pwd)/rot13-filter.pl\" --always-delay \"$(pwd)/delayed.log\" clean smudge delay" &&
+ test_config_global filter.delay.required true &&
+
+ echo "abcd" >original &&
+ echo "nopq" >rot13 &&
+
+ git init delayed &&
+ (
+ cd delayed &&
+ echo "*.d filter=delay" >.gitattributes &&
+ cp ../original W.d &&
+ cp ../original X.d &&
+ cp ../original Y &&
+ cp ../original Z &&
+ git add -A &&
+ git commit -m delayed &&
+
+ # Check that *.d files were cleaned
+ git cat-file -p :W.d >W.d.internal &&
+ test_cmp W.d.internal ../rot13 &&
+ git cat-file -p :X.d >X.d.internal &&
+ test_cmp X.d.internal ../rot13 &&
+ git cat-file -p :Y >Y.internal &&
+ test_cmp Y.internal ../original &&
+ git cat-file -p :Z >Z.internal &&
+ test_cmp Z.internal ../original &&
+
+ rm *
+ ) &&
+
+ set_checkout_config 2 0 &&
+ test_checkout_workers 2 git -C delayed checkout -f &&
+ verify_checkout delayed &&
+
+ # Check that the *.d files got to the delay queue and were filtered
+ grep "smudge W.d .* \[DELAYED\]" delayed.log &&
+ grep "smudge X.d .* \[DELAYED\]" delayed.log &&
+ test_cmp delayed/W.d original &&
+ test_cmp delayed/X.d original &&
+
+ # Check that the parallel-eligible entries went to the right queue and
+ # were not filtered
+ ! grep "smudge Y .* \[DELAYED\]" delayed.log &&
+ ! grep "smudge Z .* \[DELAYED\]" delayed.log &&
+ test_cmp delayed/Y original &&
+ test_cmp delayed/Z original
+'
+
+test_done
diff --git a/unpack-trees.c b/unpack-trees.c
index 7a1804c314..f88a69f8e7 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1038,7 +1038,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
size_t len = traverse_path_len(info, tree_entry_len(n));
struct cache_entry *ce =
is_transient ?
- make_empty_transient_cache_entry(len) :
+ make_empty_transient_cache_entry(len, NULL) :
make_empty_cache_entry(istate, len);
ce->ce_mode = create_ce_mode(n->mode);