diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-21 12:51:46 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-21 12:51:46 +0200 |
commit | ac62760c52abf28d1fd863f0c0dd48bc4a23d223 (patch) | |
tree | cab7b4165898b86f8aebc1081fdb46a99f9c9f35 /test/git/performance | |
parent | f164627a85ed7b816759871a76db258515b85678 (diff) | |
download | gitpython-ac62760c52abf28d1fd863f0c0dd48bc4a23d223.tar.gz |
index.add: now uses gitdb.store functionality instead of git-hash-file. The python version is about as fast, but could support multithreading using async
Diffstat (limited to 'test/git/performance')
-rw-r--r-- | test/git/performance/lib.py | 23 | ||||
-rw-r--r-- | test/git/performance/test_odb.py | 96 |
2 files changed, 70 insertions, 49 deletions
diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py index 7d2a9f4a..4ac1f1da 100644 --- a/test/git/performance/lib.py +++ b/test/git/performance/lib.py @@ -4,6 +4,11 @@ from test.testlib import * import shutil import tempfile +from git.db import ( + GitCmdObjectDB, + GitDB + ) + from git import ( Repo ) @@ -31,9 +36,14 @@ class TestBigRepoR(TestBase): """TestCase providing access to readonly 'big' repositories using the following member variables: - * gitrepo + * gitrorepo - * Read-Only git repository - actually the repo of git itself""" + * Read-Only git repository - actually the repo of git itself + + * puregitrorepo + + * As gitrepo, but uses pure python implementation + """ #{ Invariants head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca' @@ -43,20 +53,23 @@ class TestBigRepoR(TestBase): @classmethod def setUpAll(cls): super(TestBigRepoR, cls).setUpAll() - cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo)) + repo_path = resolve_or_fail(k_env_git_repo) + cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB) + cls.puregitrorepo = Repo(repo_path, odbt=GitDB) class TestBigRepoRW(TestBigRepoR): """As above, but provides a big repository that we can write to. - Provides ``self.gitrwrepo``""" + Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``""" @classmethod def setUpAll(cls): super(TestBigRepoRW, cls).setUpAll() dirname = tempfile.mktemp() os.mkdir(dirname) - cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True) + cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) + cls.puregitrwrepo = Repo(dirname, odbt=GitDB) @classmethod def tearDownAll(cls): diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py index 7b1ee838..0809469a 100644 --- a/test/git/performance/test_odb.py +++ b/test/git/performance/test_odb.py @@ -12,50 +12,58 @@ from lib import ( class TestObjDBPerformance(TestBigRepoR): def test_random_access(self): - - # GET COMMITS - # TODO: use the actual db for this - st = time() - root_commit = self.gitrorepo.commit(self.head_sha_2k) - commits = list(root_commit.traverse()) - nc = len(commits) - elapsed = time() - st - - print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ] + for repo in (self.gitrorepo, self.puregitrorepo): + # GET COMMITS + st = time() + root_commit = repo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed) + results[0].append(elapsed) + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st - # GET TREES - # walk all trees of all commits - st = time() - blobs_per_commit = list() - nt = 0 - for commit in commits: - tree = commit.tree - blobs = list() - for item in tree.traverse(): - nt += 1 - if item.type == 'blob': - blobs.append(item) - # direct access for speed - # END while trees are there for walking - blobs_per_commit.append(blobs) - # END for each commit - elapsed = time() - st - - print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed) - - # GET BLOBS - st = time() - nb = 0 - too_many = 15000 - for blob_list in blobs_per_commit: - for blob in blob_list: - blob.data - # END for each blobsha - nb += len(blob_list) - if nb > too_many: - break - # END for each bloblist - elapsed = time() - st + print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed) + results[1].append(elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + for blob_list in blobs_per_commit: + for blob in blob_list: + blob.data + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i blob and their data in %g s ( %f blobs / s )" % (type(repo.odb), nb, elapsed, nb / elapsed) + results[2].append(elapsed) + # END for each repo type - print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed) + # final results + for test_name, a, b in results: + print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a) + # END for each result |