index.add: now uses gitdb.store functionality instead of git-hash-file. The python version is about as fast, but could support multithreading using async

author: Sebastian Thiel <byronimo@gmail.com> 2010-06-21 12:51:46 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2010-06-21 12:51:46 +0200
commit: ac62760c52abf28d1fd863f0c0dd48bc4a23d223 (patch)
tree: cab7b4165898b86f8aebc1081fdb46a99f9c9f35 /test/git/performance
parent: f164627a85ed7b816759871a76db258515b85678 (diff)
download: gitpython-ac62760c52abf28d1fd863f0c0dd48bc4a23d223.tar.gz
2 files changed, 70 insertions, 49 deletions
diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py
index 7d2a9f4a..4ac1f1da 100644
--- a/test/git/performance/lib.py
+++ b/test/git/performance/lib.py
@@ -4,6 +4,11 @@ from test.testlib import *
 import shutil
 import tempfile
 
+from git.db import (
+						GitCmdObjectDB,
+						GitDB
+					)
+
 from git import (
 	Repo
 	)
@@ -31,9 +36,14 @@ class TestBigRepoR(TestBase):
 	"""TestCase providing access to readonly 'big' repositories using the following 
 	member variables:
 	
-	* gitrepo
+	* gitrorepo
 	
-	 * Read-Only git repository - actually the repo of git itself"""
+	 * Read-Only git repository - actually the repo of git itself
+	 
+    * puregitrorepo
+    
+     * As gitrepo, but uses pure python implementation
+    """
 	 
 	#{ Invariants
 	head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
@@ -43,20 +53,23 @@ class TestBigRepoR(TestBase):
 	@classmethod
 	def setUpAll(cls):
 		super(TestBigRepoR, cls).setUpAll()
-		cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo))
+		repo_path = resolve_or_fail(k_env_git_repo)
+		cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB)
+		cls.puregitrorepo = Repo(repo_path, odbt=GitDB)
 
 
 class TestBigRepoRW(TestBigRepoR):
 	"""As above, but provides a big repository that we can write to.
 	
-	Provides ``self.gitrwrepo``"""
+	Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``"""
 	
 	@classmethod
 	def setUpAll(cls):
 		super(TestBigRepoRW, cls).setUpAll()
 		dirname = tempfile.mktemp()
 		os.mkdir(dirname)
-		cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True)
+		cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB)
+		cls.puregitrwrepo = Repo(dirname, odbt=GitDB)
 	
 	@classmethod
 	def tearDownAll(cls):
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
index 7b1ee838..0809469a 100644
--- a/test/git/performance/test_odb.py
+++ b/test/git/performance/test_odb.py
@@ -12,50 +12,58 @@ from lib import (
 class TestObjDBPerformance(TestBigRepoR):
 	
 	def test_random_access(self):
-		
-		# GET COMMITS
-		# TODO: use the actual db for this
-		st = time()
-		root_commit = self.gitrorepo.commit(self.head_sha_2k)
-		commits = list(root_commit.traverse())
-		nc = len(commits)
-		elapsed = time() - st
-		
-		print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
+		results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ]
+		for repo in (self.gitrorepo, self.puregitrorepo):
+			# GET COMMITS
+			st = time()
+			root_commit = repo.commit(self.head_sha_2k)
+			commits = list(root_commit.traverse())
+			nc = len(commits)
+			elapsed = time() - st
 			
+			print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
+			results[0].append(elapsed)
+				
+			# GET TREES
+			# walk all trees of all commits
+			st = time()
+			blobs_per_commit = list()
+			nt = 0
+			for commit in commits:
+				tree = commit.tree
+				blobs = list()
+				for item in tree.traverse():
+					nt += 1
+					if item.type == 'blob':
+						blobs.append(item)
+					# direct access for speed
+				# END while trees are there for walking
+				blobs_per_commit.append(blobs)
+			# END for each commit
+			elapsed = time() - st
 			
-		# GET TREES
-		# walk all trees of all commits
-		st = time()
-		blobs_per_commit = list()
-		nt = 0
-		for commit in commits:
-			tree = commit.tree
-			blobs = list()
-			for item in tree.traverse():
-				nt += 1
-				if item.type == 'blob':
-					blobs.append(item)
-				# direct access for speed
-			# END while trees are there for walking
-			blobs_per_commit.append(blobs)
-		# END for each commit
-		elapsed = time() - st
-		
-		print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed)
-		
-		# GET BLOBS
-		st = time()
-		nb = 0
-		too_many = 15000
-		for blob_list in blobs_per_commit:
-			for blob in blob_list:
-				blob.data
-			# END for each blobsha
-			nb += len(blob_list)
-			if nb > too_many:
-				break
-		# END for each bloblist
-		elapsed = time() - st
+			print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
+			results[1].append(elapsed)
+			
+			# GET BLOBS
+			st = time()
+			nb = 0
+			too_many = 15000
+			for blob_list in blobs_per_commit:
+				for blob in blob_list:
+					blob.data
+				# END for each blobsha
+				nb += len(blob_list)
+				if nb > too_many:
+					break
+			# END for each bloblist
+			elapsed = time() - st
+			
+			print >> sys.stderr, "%s: Retrieved %i blob and their data in %g s ( %f blobs / s )" % (type(repo.odb), nb, elapsed, nb / elapsed)
+			results[2].append(elapsed)
+		# END for each repo type
 		
-		print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed)
+		# final results
+		for test_name, a, b in results:
+			print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a)
+		# END for each result
author	Sebastian Thiel <byronimo@gmail.com>	2010-06-21 12:51:46 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2010-06-21 12:51:46 +0200
commit	ac62760c52abf28d1fd863f0c0dd48bc4a23d223 (patch)
tree	cab7b4165898b86f8aebc1081fdb46a99f9c9f35 /test/git/performance
parent	f164627a85ed7b816759871a76db258515b85678 (diff)
download	gitpython-ac62760c52abf28d1fd863f0c0dd48bc4a23d223.tar.gz