summaryrefslogtreecommitdiff
path: root/test/git/performance
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-21 12:51:46 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-21 12:51:46 +0200
commitac62760c52abf28d1fd863f0c0dd48bc4a23d223 (patch)
treecab7b4165898b86f8aebc1081fdb46a99f9c9f35 /test/git/performance
parentf164627a85ed7b816759871a76db258515b85678 (diff)
downloadgitpython-ac62760c52abf28d1fd863f0c0dd48bc4a23d223.tar.gz
index.add: now uses gitdb.store functionality instead of git-hash-file. The python version is about as fast, but could support multithreading using async
Diffstat (limited to 'test/git/performance')
-rw-r--r--test/git/performance/lib.py23
-rw-r--r--test/git/performance/test_odb.py96
2 files changed, 70 insertions, 49 deletions
diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py
index 7d2a9f4a..4ac1f1da 100644
--- a/test/git/performance/lib.py
+++ b/test/git/performance/lib.py
@@ -4,6 +4,11 @@ from test.testlib import *
import shutil
import tempfile
+from git.db import (
+ GitCmdObjectDB,
+ GitDB
+ )
+
from git import (
Repo
)
@@ -31,9 +36,14 @@ class TestBigRepoR(TestBase):
"""TestCase providing access to readonly 'big' repositories using the following
member variables:
- * gitrepo
+ * gitrorepo
- * Read-Only git repository - actually the repo of git itself"""
+ * Read-Only git repository - actually the repo of git itself
+
+ * puregitrorepo
+
+ * As gitrepo, but uses pure python implementation
+ """
#{ Invariants
head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
@@ -43,20 +53,23 @@ class TestBigRepoR(TestBase):
@classmethod
def setUpAll(cls):
super(TestBigRepoR, cls).setUpAll()
- cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo))
+ repo_path = resolve_or_fail(k_env_git_repo)
+ cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB)
+ cls.puregitrorepo = Repo(repo_path, odbt=GitDB)
class TestBigRepoRW(TestBigRepoR):
"""As above, but provides a big repository that we can write to.
- Provides ``self.gitrwrepo``"""
+ Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``"""
@classmethod
def setUpAll(cls):
super(TestBigRepoRW, cls).setUpAll()
dirname = tempfile.mktemp()
os.mkdir(dirname)
- cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True)
+ cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB)
+ cls.puregitrwrepo = Repo(dirname, odbt=GitDB)
@classmethod
def tearDownAll(cls):
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
index 7b1ee838..0809469a 100644
--- a/test/git/performance/test_odb.py
+++ b/test/git/performance/test_odb.py
@@ -12,50 +12,58 @@ from lib import (
class TestObjDBPerformance(TestBigRepoR):
def test_random_access(self):
-
- # GET COMMITS
- # TODO: use the actual db for this
- st = time()
- root_commit = self.gitrorepo.commit(self.head_sha_2k)
- commits = list(root_commit.traverse())
- nc = len(commits)
- elapsed = time() - st
-
- print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
+ results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ]
+ for repo in (self.gitrorepo, self.puregitrorepo):
+ # GET COMMITS
+ st = time()
+ root_commit = repo.commit(self.head_sha_2k)
+ commits = list(root_commit.traverse())
+ nc = len(commits)
+ elapsed = time() - st
+ print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
+ results[0].append(elapsed)
+
+ # GET TREES
+ # walk all trees of all commits
+ st = time()
+ blobs_per_commit = list()
+ nt = 0
+ for commit in commits:
+ tree = commit.tree
+ blobs = list()
+ for item in tree.traverse():
+ nt += 1
+ if item.type == 'blob':
+ blobs.append(item)
+ # direct access for speed
+ # END while trees are there for walking
+ blobs_per_commit.append(blobs)
+ # END for each commit
+ elapsed = time() - st
- # GET TREES
- # walk all trees of all commits
- st = time()
- blobs_per_commit = list()
- nt = 0
- for commit in commits:
- tree = commit.tree
- blobs = list()
- for item in tree.traverse():
- nt += 1
- if item.type == 'blob':
- blobs.append(item)
- # direct access for speed
- # END while trees are there for walking
- blobs_per_commit.append(blobs)
- # END for each commit
- elapsed = time() - st
-
- print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed)
-
- # GET BLOBS
- st = time()
- nb = 0
- too_many = 15000
- for blob_list in blobs_per_commit:
- for blob in blob_list:
- blob.data
- # END for each blobsha
- nb += len(blob_list)
- if nb > too_many:
- break
- # END for each bloblist
- elapsed = time() - st
+ print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
+ results[1].append(elapsed)
+
+ # GET BLOBS
+ st = time()
+ nb = 0
+ too_many = 15000
+ for blob_list in blobs_per_commit:
+ for blob in blob_list:
+ blob.data
+ # END for each blobsha
+ nb += len(blob_list)
+ if nb > too_many:
+ break
+ # END for each bloblist
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i blob and their data in %g s ( %f blobs / s )" % (type(repo.odb), nb, elapsed, nb / elapsed)
+ results[2].append(elapsed)
+ # END for each repo type
- print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed)
+ # final results
+ for test_name, a, b in results:
+ print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a)
+ # END for each result