diff options
author | Jeff Hostetler <jeffhost@microsoft.com> | 2017-04-19 17:06:15 +0000 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2017-04-19 20:33:01 -0700 |
commit | 350d8701437f2d5031513bb45f1d14545d5a8911 (patch) | |
tree | 109db27d65a337fffb62d8cf6f775c22af02e2f4 /t | |
parent | a6db3fbb6e4ec11695e2a3af8bc2cb9119cb1941 (diff) | |
download | git-350d8701437f2d5031513bb45f1d14545d5a8911.tar.gz |
p0006-read-tree-checkout: perf test to time read-tree
Created t/perf/repos/many-files.sh to generate large, but
artificial repositories.
Created t/perf/inflate-repo.sh to alter an EXISTING repo
to have a set of large commits. This can be used to create
a branch with 1M+ files in repositories like git.git or
linux.git, but with more realistic content. It does this
by making multiple copies of the entire worktree in a series
of sub-directories.
The branch name and ballast structure created by both scripts
match, so either script can be used to generate very large
test repositories for the following perf test.
Created t/perf/p0006-read-tree-checkout.sh to measure
performance on various read-tree, checkout, and update-index
operations. This test can run using either normal repos or
ones from the above scripts.
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't')
-rwxr-xr-x | t/perf/p0006-read-tree-checkout.sh | 67 | ||||
-rw-r--r-- | t/perf/repos/.gitignore | 1 | ||||
-rwxr-xr-x | t/perf/repos/inflate-repo.sh | 85 | ||||
-rwxr-xr-x | t/perf/repos/many-files.sh | 110 |
4 files changed, 263 insertions, 0 deletions
diff --git a/t/perf/p0006-read-tree-checkout.sh b/t/perf/p0006-read-tree-checkout.sh new file mode 100755 index 0000000000..78cc23fe2f --- /dev/null +++ b/t/perf/p0006-read-tree-checkout.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# +# This test measures the performance of various read-tree +# and checkout operations. It is primarily interested in +# the algorithmic costs of index operations and recursive +# tree traversal -- and NOT disk I/O on thousands of files. + +test_description="Tests performance of read-tree" + +. ./perf-lib.sh + +test_perf_default_repo + +# If the test repo was generated by ./repos/many-files.sh +# then we know something about the data shape and branches, +# so we can isolate testing to the ballast-related commits +# and setup sparse-checkout so we don't have to populate +# the ballast files and directories. +# +# Otherwise, we make some general assumptions about the +# repo and consider the entire history of the current +# branch to be the ballast. + +test_expect_success "setup repo" ' + if git rev-parse --verify refs/heads/p0006-ballast^{commit} + then + echo Assuming synthetic repo from many-files.sh + git branch br_base master + git branch br_ballast p0006-ballast^ + git branch br_ballast_alias p0006-ballast^ + git branch br_ballast_plus_1 p0006-ballast + git config --local core.sparsecheckout 1 + cat >.git/info/sparse-checkout <<-EOF + /* + !ballast/* + EOF + else + echo Assuming non-synthetic repo... + git branch br_base $(git rev-list HEAD | tail -n 1) + git branch br_ballast HEAD^ || error "no ancestor commit from current head" + git branch br_ballast_alias HEAD^ + git branch br_ballast_plus_1 HEAD + fi && + git checkout -q br_ballast && + nr_files=$(git ls-files | wc -l) +' + +test_perf "read-tree br_base br_ballast ($nr_files)" ' + git read-tree -m br_base br_ballast -n +' + +test_perf "switch between br_base br_ballast ($nr_files)" ' + git checkout -q br_base && + git checkout -q br_ballast +' + +test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" ' + git checkout -q br_ballast_plus_1 && + git checkout -q br_ballast +' + +test_perf "switch between aliases ($nr_files)" ' + git checkout -q br_ballast_alias && + git checkout -q br_ballast +' + +test_done diff --git a/t/perf/repos/.gitignore b/t/perf/repos/.gitignore new file mode 100644 index 0000000000..72e3dc3e19 --- /dev/null +++ b/t/perf/repos/.gitignore @@ -0,0 +1 @@ +gen-*/ diff --git a/t/perf/repos/inflate-repo.sh b/t/perf/repos/inflate-repo.sh new file mode 100755 index 0000000000..fcfc992b5b --- /dev/null +++ b/t/perf/repos/inflate-repo.sh @@ -0,0 +1,85 @@ +#!/bin/sh +# Inflate the size of an EXISTING repo. +# +# This script should be run inside the worktree of a TEST repo. +# It will use the contents of the current HEAD to generate a +# commit containing copies of the current worktree such that the +# total size of the commit has at least <target_size> files. +# +# Usage: [-t target_size] [-b branch_name] + +set -e + +target_size=10000 +branch_name=p0006-ballast +ballast=ballast + +while test "$#" -ne 0 +do + case "$1" in + -b) + shift; + test "$#" -ne 0 || { echo 'error: -b requires an argument' >&2; exit 1; } + branch_name=$1; + shift ;; + -t) + shift; + test "$#" -ne 0 || { echo 'error: -t requires an argument' >&2; exit 1; } + target_size=$1; + shift ;; + *) + echo "error: unknown option '$1'" >&2; exit 1 ;; + esac +done + +git ls-tree -r HEAD >GEN_src_list +nr_src_files=$(cat GEN_src_list | wc -l) + +src_branch=$(git symbolic-ref --short HEAD) + +echo "Branch $src_branch initially has $nr_src_files files." + +if test $target_size -le $nr_src_files +then + echo "Repository already exceeds target size $target_size." + rm GEN_src_list + exit 1 +fi + +# Create well-known branch and add 1 file change to start +# if off before the ballast. +git checkout -b $branch_name HEAD +echo "$target_size" > inflate-repo.params +git add inflate-repo.params +git commit -q -m params + +# Create ballast for in our branch. +copy=1 +nr_files=$nr_src_files +while test $nr_files -lt $target_size +do + sed -e "s| | $ballast/$copy/|" <GEN_src_list | + git update-index --index-info + + nr_files=$(expr $nr_files + $nr_src_files) + copy=$(expr $copy + 1) +done +rm GEN_src_list +git commit -q -m "ballast" + +# Modify 1 file and commit. +echo "$target_size" >> inflate-repo.params +git add inflate-repo.params +git commit -q -m "ballast plus 1" + +nr_files=$(git ls-files | wc -l) + +# Checkout master to put repo in canonical state (because +# the perf test may need to clone and enable sparse-checkout +# before attempting to checkout a commit with the ballast +# (because it may contain 100K directories and 1M files)). +git checkout $src_branch + +echo "Repository inflated. Branch $branch_name has $nr_files files." + +exit 0 diff --git a/t/perf/repos/many-files.sh b/t/perf/repos/many-files.sh new file mode 100755 index 0000000000..28720e4e10 --- /dev/null +++ b/t/perf/repos/many-files.sh @@ -0,0 +1,110 @@ +#!/bin/sh +# Generate test data repository using the given parameters. +# When omitted, we create "gen-many-files-d-w-f.git". +# +# Usage: [-r repo] [-d depth] [-w width] [-f files] +# +# -r repo: path to the new repo to be generated +# -d depth: the depth of sub-directories +# -w width: the number of sub-directories at each level +# -f files: the number of files created in each directory +# +# Note that all files will have the same SHA-1 and each +# directory at a level will have the same SHA-1, so we +# will potentially have a large index, but not a large +# ODB. +# +# Ballast will be created under "ballast/". + +EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 + +set -e + +# (5, 10, 9) will create 999,999 ballast files. +# (4, 10, 9) will create 99,999 ballast files. +depth=5 +width=10 +files=9 + +while test "$#" -ne 0 +do + case "$1" in + -r) + shift; + test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; } + repo=$1; + shift ;; + -d) + shift; + test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; } + depth=$1; + shift ;; + -w) + shift; + test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; } + width=$1; + shift ;; + -f) + shift; + test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; } + files=$1; + shift ;; + *) + echo "error: unknown option '$1'" >&2; exit 1 ;; + esac +done + +# Inflate the index with thousands of empty files. +# usage: dir depth width files +fill_index() { + awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 ' + function make_paths(dir, depth, width, files, f, w) { + for (f = 1; f <= files; f++) { + print dir "/file" f + } + if (depth > 0) { + for (w = 1; w <= width; w++) { + make_paths(dir "/dir" w, depth - 1, width, files) + } + } + } + END { make_paths(arg_dir, arg_depth, arg_width, arg_files) } + ' </dev/null | + sed "s/^/100644 $EMPTY_BLOB /" | + git update-index --index-info + return 0 +} + +[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git + +mkdir $repo +cd $repo +git init . + +# Create an initial commit just to define master. +touch many-files.empty +echo "$depth $width $files" >many-files.params +git add many-files.* +git commit -q -m params + +# Create ballast for p0006 based upon the given params and +# inflate the index with thousands of empty files and commit. +git checkout -b p0006-ballast +fill_index "ballast" $depth $width $files +git commit -q -m "ballast" + +nr_files=$(git ls-files | wc -l) + +# Modify 1 file and commit. +echo "$depth $width $files" >>many-files.params +git add many-files.params +git commit -q -m "ballast plus 1" + +# Checkout master to put repo in canonical state (because +# the perf test may need to clone and enable sparse-checkout +# before attempting to checkout a commit with the ballast +# (because it may contain 100K directories and 1M files)). +git checkout master + +echo "Repository "$repo" ($depth, $width, $files) created. Ballast $nr_files." +exit 0 |