From 155158e1410ff036812a87975cce6cb91aa8280e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 17:15:12 +0200 Subject: Added PackedDB test with generalized type to allows other implementations to be tested as well at some point --- git/test/performance/db/odb_impl.py | 1 - git/test/performance/db/packedodb_impl.py | 107 +++++++++++++++++++++++++ git/test/performance/db/test_packedodb_pure.py | 7 ++ git/test/performance/test_pack.py | 99 ----------------------- 4 files changed, 114 insertions(+), 100 deletions(-) create mode 100644 git/test/performance/db/packedodb_impl.py create mode 100644 git/test/performance/db/test_packedodb_pure.py delete mode 100644 git/test/performance/test_pack.py (limited to 'git') diff --git a/git/test/performance/db/odb_impl.py b/git/test/performance/db/odb_impl.py index fd1abdee..677cf6a8 100644 --- a/git/test/performance/db/odb_impl.py +++ b/git/test/performance/db/odb_impl.py @@ -3,7 +3,6 @@ from time import time import sys import stat -import copy from git.test.performance.lib import ( TestBigRepoR, diff --git a/git/test/performance/db/packedodb_impl.py b/git/test/performance/db/packedodb_impl.py new file mode 100644 index 00000000..b95a8d13 --- /dev/null +++ b/git/test/performance/db/packedodb_impl.py @@ -0,0 +1,107 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Performance tests for object store""" +from git.test.performance.lib import ( + TestBigRepoR, + GlobalsItemDeletorMetaCls + ) + +from git.exc import UnsupportedOperation + +import sys +import os +from time import time +import random + + +class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls): + ModuleToDelete = 'TestPurePackedODBPerformanceBase' + +class TestPurePackedODBPerformanceBase(TestBigRepoR): + __metaclass__ = PerfBaseDeletorMetaClass + + #{ Configuration + PackedODBCls = None + #} END configuration + + @classmethod + def setUpAll(cls): + super(TestPurePackedODBPerformanceBase, cls).setUpAll() + if cls.PackedODBCls is None: + raise AssertionError("PackedODBCls must be set in subclass") + #END assert configuration + cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack")) + + def test_pack_random_access(self): + pdb = self.ropdb + + # sha lookup + st = time() + sha_list = list(pdb.sha_iter()) + elapsed = time() - st + ns = len(sha_list) + print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed) + + # sha lookup: best-case and worst case access + pdb_pack_info = pdb._pack_info + # END shuffle shas + st = time() + for sha in sha_list: + pdb_pack_info(sha) + # END for each sha to look up + elapsed = time() - st + + # discard cache + del(pdb._entities) + pdb.entities() + print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed) + # END for each random mode + + # query info and streams only + max_items = 10000 # can wait longer when testing memory + for pdb_fun in (pdb.info, pdb.stream): + st = time() + for sha in sha_list[:max_items]: + pdb_fun(sha) + elapsed = time() - st + print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed) + # END for each function + + # retrieve stream and read all + max_items = 5000 + pdb_stream = pdb.stream + total_size = 0 + st = time() + for sha in sha_list[:max_items]: + stream = pdb_stream(sha) + stream.read() + total_size += stream.size + elapsed = time() - st + total_kib = total_size / 1000 + print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed) + + def test_correctness(self): + pdb = self.ropdb + # disabled for now as it used to work perfectly, checking big repositories takes a long time + print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)" + for crc in range(2): + count = 0 + st = time() + for entity in pdb.entities(): + pack_verify = entity.is_valid_stream + sha_by_index = entity.index().sha + for index in xrange(entity.index().size()): + try: + assert pack_verify(sha_by_index(index), use_crc=crc) + count += 1 + except UnsupportedOperation: + pass + # END ignore old indices + # END for each index + # END for each entity + elapsed = time() - st + print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed) + # END for each verify mode + diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py new file mode 100644 index 00000000..7b9f2930 --- /dev/null +++ b/git/test/performance/db/test_packedodb_pure.py @@ -0,0 +1,7 @@ +from packedodb_impl import TestPurePackedODBPerformanceBase +from git.db.py.pack import PurePackedODB + +class TestPurePackedODB(TestPurePackedODBPerformanceBase): + #{ Configuration + PackedODBCls = PurePackedODB + #} END configuration diff --git a/git/test/performance/test_pack.py b/git/test/performance/test_pack.py deleted file mode 100644 index 8c1207bc..00000000 --- a/git/test/performance/test_pack.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors -# -# This module is part of GitDB and is released under -# the New BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Performance tests for object store""" -from lib import ( - TestBigRepoR - ) - -from git.exc import UnsupportedOperation -from git.db.py.pack import PurePackedODB - -import sys -import os -from time import time -import random - -class TestPurePackedODBPerformance(TestBigRepoR): - - #{ Configuration - PackedODBCls = PurePackedODB - #} END configuration - - @classmethod - def setUpAll(cls): - super(TestPurePackedODBPerformance, cls).setUpAll() - cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack")) - - def test_pack_random_access(self): - pdb = self.ropdb - - # sha lookup - st = time() - sha_list = list(pdb.sha_iter()) - elapsed = time() - st - ns = len(sha_list) - print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed) - - # sha lookup: best-case and worst case access - pdb_pack_info = pdb._pack_info - # END shuffle shas - st = time() - for sha in sha_list: - pdb_pack_info(sha) - # END for each sha to look up - elapsed = time() - st - - # discard cache - del(pdb._entities) - pdb.entities() - print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed) - # END for each random mode - - # query info and streams only - max_items = 10000 # can wait longer when testing memory - for pdb_fun in (pdb.info, pdb.stream): - st = time() - for sha in sha_list[:max_items]: - pdb_fun(sha) - elapsed = time() - st - print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed) - # END for each function - - # retrieve stream and read all - max_items = 5000 - pdb_stream = pdb.stream - total_size = 0 - st = time() - for sha in sha_list[:max_items]: - stream = pdb_stream(sha) - stream.read() - total_size += stream.size - elapsed = time() - st - total_kib = total_size / 1000 - print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed) - - def test_correctness(self): - pdb = self.ropdb - # disabled for now as it used to work perfectly, checking big repositories takes a long time - print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)" - for crc in range(2): - count = 0 - st = time() - for entity in pdb.entities(): - pack_verify = entity.is_valid_stream - sha_by_index = entity.index().sha - for index in xrange(entity.index().size()): - try: - assert pack_verify(sha_by_index(index), use_crc=crc) - count += 1 - except UnsupportedOperation: - pass - # END ignore old indices - # END for each index - # END for each entity - elapsed = time() - st - print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed) - # END for each verify mode - -- cgit v1.2.1