summaryrefslogtreecommitdiff
path: root/git/test/performance/test_pack.py
diff options
context:
space:
mode:
Diffstat (limited to 'git/test/performance/test_pack.py')
-rw-r--r--git/test/performance/test_pack.py90
1 files changed, 90 insertions, 0 deletions
diff --git a/git/test/performance/test_pack.py b/git/test/performance/test_pack.py
new file mode 100644
index 00000000..da952b17
--- /dev/null
+++ b/git/test/performance/test_pack.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Performance tests for object store"""
+from lib import (
+ TestBigRepoR
+ )
+
+from gitdb.exc import UnsupportedOperation
+from gitdb.db.pack import PackedDB
+
+import sys
+import os
+from time import time
+import random
+
+class TestPackedDBPerformance(TestBigRepoR):
+
+ def _test_pack_random_access(self):
+ pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
+
+ # sha lookup
+ st = time()
+ sha_list = list(pdb.sha_iter())
+ elapsed = time() - st
+ ns = len(sha_list)
+ print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
+
+ # sha lookup: best-case and worst case access
+ pdb_pack_info = pdb._pack_info
+ # END shuffle shas
+ st = time()
+ for sha in sha_list:
+ pdb_pack_info(sha)
+ # END for each sha to look up
+ elapsed = time() - st
+
+ # discard cache
+ del(pdb._entities)
+ pdb.entities()
+ print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed)
+ # END for each random mode
+
+ # query info and streams only
+ max_items = 10000 # can wait longer when testing memory
+ for pdb_fun in (pdb.info, pdb.stream):
+ st = time()
+ for sha in sha_list[:max_items]:
+ pdb_fun(sha)
+ elapsed = time() - st
+ print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
+ # END for each function
+
+ # retrieve stream and read all
+ max_items = 5000
+ pdb_stream = pdb.stream
+ total_size = 0
+ st = time()
+ for sha in sha_list[:max_items]:
+ stream = pdb_stream(sha)
+ stream.read()
+ total_size += stream.size
+ elapsed = time() - st
+ total_kib = total_size / 1000
+ print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
+
+ def test_correctness(self):
+ pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
+ # disabled for now as it used to work perfectly, checking big repositories takes a long time
+ print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
+ for crc in range(2):
+ count = 0
+ st = time()
+ for entity in pdb.entities():
+ pack_verify = entity.is_valid_stream
+ sha_by_index = entity.index().sha
+ for index in xrange(entity.index().size()):
+ try:
+ assert pack_verify(sha_by_index(index), use_crc=crc)
+ count += 1
+ except UnsupportedOperation:
+ pass
+ # END ignore old indices
+ # END for each index
+ # END for each entity
+ elapsed = time() - st
+ print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
+ # END for each verify mode
+