diff options
Diffstat (limited to 'git/test')
27 files changed, 1750 insertions, 37 deletions
diff --git a/git/test/__init__.py b/git/test/__init__.py index 757cbad1..63d25743 100644 --- a/git/test/__init__.py +++ b/git/test/__init__.py @@ -3,3 +3,12 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import gitdb.util + +def _init_pool(): + """Assure the pool is actually threaded""" + size = 2 + print "Setting ThreadPool to %i" % size + gitdb.util.pool.set_size(size) + diff --git a/git/test/db/__init__.py b/git/test/db/__init__.py new file mode 100644 index 00000000..8a681e42 --- /dev/null +++ b/git/test/db/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/test/db/lib.py b/git/test/db/lib.py new file mode 100644 index 00000000..5f4f9c36 --- /dev/null +++ b/git/test/db/lib.py @@ -0,0 +1,215 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Base classes for object db testing""" +from gitdb.test.lib import ( + with_rw_directory, + with_packs_rw, + ZippedStoreShaWriter, + fixture_path, + TestBase + ) + +from gitdb.stream import Sha1Writer + +# import database types we want to support +# they will be set to None if the respective library could not be loaded +from gitdb.db.py import PureGitDB + +from gitdb.base import ( + IStream, + OStream, + OInfo + ) + +from gitdb.exc import BadObject +from gitdb.typ import str_blob_type + +from async import IteratorReader +from cStringIO import StringIO +from struct import pack + + +__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path') + +class TestDBBase(TestBase): + """Base class providing testing routines on databases""" + + # data + two_lines = "1234\nhello world" + all_data = (two_lines, ) + + # all supported database types. Add your own type + ref_db_types = (PureGitDB, ) + + def _assert_object_writing_simple(self, db): + # write a bunch of objects and query their streams and info + null_objs = db.size() + ni = 250 + for i in xrange(ni): + data = pack(">L", i) + istream = IStream(str_blob_type, len(data), StringIO(data)) + new_istream = db.store(istream) + assert new_istream is istream + assert db.has_object(istream.binsha) + + info = db.info(istream.binsha) + assert isinstance(info, OInfo) + assert info.type == istream.type and info.size == istream.size + + stream = db.stream(istream.binsha) + assert isinstance(stream, OStream) + assert stream.binsha == info.binsha and stream.type == info.type + assert stream.read() == data + # END for each item + + assert db.size() == null_objs + ni + shas = list(db.sha_iter()) + assert len(shas) == db.size() + assert len(shas[0]) == 20 + + + def _assert_object_writing(self, db): + """General tests to verify object writing, compatible to ObjectDBW + :note: requires write access to the database""" + # start in 'dry-run' mode, using a simple sha1 writer + ostreams = (ZippedStoreShaWriter, None) + for ostreamcls in ostreams: + for data in self.all_data: + dry_run = ostreamcls is not None + ostream = None + if ostreamcls is not None: + ostream = ostreamcls() + assert isinstance(ostream, Sha1Writer) + # END create ostream + + prev_ostream = db.set_ostream(ostream) + assert type(prev_ostream) in ostreams or prev_ostream in ostreams + + istream = IStream(str_blob_type, len(data), StringIO(data)) + + # store returns same istream instance, with new sha set + my_istream = db.store(istream) + sha = istream.binsha + assert my_istream is istream + assert db.has_object(sha) != dry_run + assert len(sha) == 20 + + # verify data - the slow way, we want to run code + if not dry_run: + info = db.info(sha) + assert str_blob_type == info.type + assert info.size == len(data) + + ostream = db.stream(sha) + assert ostream.read() == data + assert ostream.type == str_blob_type + assert ostream.size == len(data) + else: + self.failUnlessRaises(BadObject, db.info, sha) + self.failUnlessRaises(BadObject, db.stream, sha) + + # DIRECT STREAM COPY + # our data hase been written in object format to the StringIO + # we pasesd as output stream. No physical database representation + # was created. + # Test direct stream copy of object streams, the result must be + # identical to what we fed in + ostream.seek(0) + istream.stream = ostream + assert istream.binsha is not None + prev_sha = istream.binsha + + db.set_ostream(ZippedStoreShaWriter()) + db.store(istream) + assert istream.binsha == prev_sha + new_ostream = db.ostream() + + # note: only works as long our store write uses the same compression + # level, which is zip_best + assert ostream.getvalue() == new_ostream.getvalue() + # END for each data set + # END for each dry_run mode + + def _assert_object_writing_async(self, db): + """Test generic object writing using asynchronous access""" + ni = 5000 + def istream_generator(offset=0, ni=ni): + for data_src in xrange(ni): + data = str(data_src + offset) + yield IStream(str_blob_type, len(data), StringIO(data)) + # END for each item + # END generator utility + + # for now, we are very trusty here as we expect it to work if it worked + # in the single-stream case + + # write objects + reader = IteratorReader(istream_generator()) + istream_reader = db.store_async(reader) + istreams = istream_reader.read() # read all + assert istream_reader.task().error() is None + assert len(istreams) == ni + + for stream in istreams: + assert stream.error is None + assert len(stream.binsha) == 20 + assert isinstance(stream, IStream) + # END assert each stream + + # test has-object-async - we must have all previously added ones + reader = IteratorReader( istream.binsha for istream in istreams ) + hasobject_reader = db.has_object_async(reader) + count = 0 + for sha, has_object in hasobject_reader: + assert has_object + count += 1 + # END for each sha + assert count == ni + + # read the objects we have just written + reader = IteratorReader( istream.binsha for istream in istreams ) + ostream_reader = db.stream_async(reader) + + # read items individually to prevent hitting possible sys-limits + count = 0 + for ostream in ostream_reader: + assert isinstance(ostream, OStream) + count += 1 + # END for each ostream + assert ostream_reader.task().error() is None + assert count == ni + + # get info about our items + reader = IteratorReader( istream.binsha for istream in istreams ) + info_reader = db.info_async(reader) + + count = 0 + for oinfo in info_reader: + assert isinstance(oinfo, OInfo) + count += 1 + # END for each oinfo instance + assert count == ni + + + # combined read-write using a converter + # add 2500 items, and obtain their output streams + nni = 2500 + reader = IteratorReader(istream_generator(offset=ni, ni=nni)) + istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ] + + istream_reader = db.store_async(reader) + istream_reader.set_post_cb(istream_to_sha) + + ostream_reader = db.stream_async(istream_reader) + + count = 0 + # read it individually, otherwise we might run into the ulimit + for ostream in ostream_reader: + assert isinstance(ostream, OStream) + count += 1 + # END for each ostream + assert count == nni + + diff --git a/git/test/db/test_base.py b/git/test/db/test_base.py new file mode 100644 index 00000000..0a381beb --- /dev/null +++ b/git/test/db/test_base.py @@ -0,0 +1,18 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.db import RefSpec + +class TestBase(TestDBBase): + + @with_rw_directory + def test_basics(self, path): + self.failUnlessRaises(ValueError, RefSpec, None, None) + rs = RefSpec(None, "something") + assert rs.force == False + assert rs.delete_destination() + assert rs.source is None + assert rs.destination == "something" + diff --git a/git/test/db/test_git.py b/git/test/db/test_git.py new file mode 100644 index 00000000..62f33bb1 --- /dev/null +++ b/git/test/db/test_git.py @@ -0,0 +1,47 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.exc import BadObject +from gitdb.db.py import PureGitODB +from gitdb.base import OStream, OInfo +from gitdb.util import hex_to_bin, bin_to_hex + +class TestGitDB(TestDBBase): + + def test_reading(self): + gdb = PureGitODB(fixture_path('../../../.git/objects')) + + # we have packs and loose objects, alternates doesn't necessarily exist + assert 1 < len(gdb.databases()) < 4 + + # access should be possible + gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") + assert isinstance(gdb.info(gitdb_sha), OInfo) + assert isinstance(gdb.stream(gitdb_sha), OStream) + assert gdb.size() > 200 + sha_list = list(gdb.sha_iter()) + assert len(sha_list) == gdb.size() + + + # This is actually a test for compound functionality, but it doesn't + # have a separate test module + # test partial shas + # this one as uneven and quite short + assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc") + + # mix even/uneven hexshas + for i, binsha in enumerate(sha_list): + assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha + # END for each sha + + self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000") + + @with_rw_directory + def test_writing(self, path): + gdb = PureGitODB(path) + + # its possible to write objects + self._assert_object_writing(gdb) + self._assert_object_writing_async(gdb) diff --git a/git/test/db/test_loose.py b/git/test/db/test_loose.py new file mode 100644 index 00000000..b1d33fd6 --- /dev/null +++ b/git/test/db/test_loose.py @@ -0,0 +1,34 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.db.py import PureLooseObjectODB +from gitdb.exc import BadObject +from gitdb.util import bin_to_hex + +class TestLooseDB(TestDBBase): + + @with_rw_directory + def test_basics(self, path): + ldb = PureLooseObjectODB(path) + + # write data + self._assert_object_writing(ldb) + self._assert_object_writing_async(ldb) + + # verify sha iteration and size + shas = list(ldb.sha_iter()) + assert shas and len(shas[0]) == 20 + + assert len(shas) == ldb.size() + + # verify find short object + long_sha = bin_to_hex(shas[-1]) + for short_sha in (long_sha[:20], long_sha[:5]): + assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha + # END for each sha + + self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000') + # raises if no object could be foudn + diff --git a/git/test/db/test_mem.py b/git/test/db/test_mem.py new file mode 100644 index 00000000..79005b50 --- /dev/null +++ b/git/test/db/test_mem.py @@ -0,0 +1,30 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.db.py import ( + PureMemoryDB, + PureLooseObjectODB + ) + +class TestPureMemoryDB(TestDBBase): + + @with_rw_directory + def test_writing(self, path): + mdb = PureMemoryDB() + + # write data + self._assert_object_writing_simple(mdb) + + # test stream copy + ldb = PureLooseObjectODB(path) + assert ldb.size() == 0 + num_streams_copied = mdb.stream_copy(mdb.sha_iter(), ldb) + assert num_streams_copied == mdb.size() + + assert ldb.size() == mdb.size() + for sha in mdb.sha_iter(): + assert ldb.has_object(sha) + assert ldb.stream(sha).read() == mdb.stream(sha).read() + # END verify objects where copied and are equal diff --git a/git/test/db/test_pack.py b/git/test/db/test_pack.py new file mode 100644 index 00000000..5456df41 --- /dev/null +++ b/git/test/db/test_pack.py @@ -0,0 +1,72 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.db.py import PurePackedODB +from gitdb.test.lib import fixture_path + +from gitdb.exc import BadObject, AmbiguousObjectName + +import os +import random + +class TestPackDB(TestDBBase): + + @with_packs_rw + def test_writing(self, path): + pdb = PurePackedODB(path) + + # on demand, we init our pack cache + num_packs = len(pdb.entities()) + assert pdb._st_mtime != 0 + + # test pack directory changed: + # packs removed - rename a file, should affect the glob + pack_path = pdb.entities()[0].pack().path() + new_pack_path = pack_path + "renamed" + os.rename(pack_path, new_pack_path) + + pdb.update_cache(force=True) + assert len(pdb.entities()) == num_packs - 1 + + # packs added + os.rename(new_pack_path, pack_path) + pdb.update_cache(force=True) + assert len(pdb.entities()) == num_packs + + # bang on the cache + # access the Entities directly, as there is no iteration interface + # yet ( or required for now ) + sha_list = list(pdb.sha_iter()) + assert len(sha_list) == pdb.size() + + # hit all packs in random order + random.shuffle(sha_list) + + for sha in sha_list: + info = pdb.info(sha) + stream = pdb.stream(sha) + # END for each sha to query + + + # test short finding - be a bit more brutal here + max_bytes = 19 + min_bytes = 2 + num_ambiguous = 0 + for i, sha in enumerate(sha_list): + short_sha = sha[:max((i % max_bytes), min_bytes)] + try: + assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha + except AmbiguousObjectName: + num_ambiguous += 1 + pass # valid, we can have short objects + # END exception handling + # END for each sha to find + + # we should have at least one ambiguous, considering the small sizes + # but in our pack, there is no ambigious ... + # assert num_ambiguous + + # non-existing + self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4) diff --git a/git/test/db/test_ref.py b/git/test/db/test_ref.py new file mode 100644 index 00000000..330dab70 --- /dev/null +++ b/git/test/db/test_ref.py @@ -0,0 +1,60 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.db.py import PureReferenceDB + +from gitdb.util import ( + NULL_BIN_SHA, + hex_to_bin + ) + +import os + +class TestPureReferenceDB(TestDBBase): + + def make_alt_file(self, alt_path, alt_list): + """Create an alternates file which contains the given alternates. + The list can be empty""" + alt_file = open(alt_path, "wb") + for alt in alt_list: + alt_file.write(alt + "\n") + alt_file.close() + + @with_rw_directory + def test_writing(self, path): + NULL_BIN_SHA = '\0' * 20 + + alt_path = os.path.join(path, 'alternates') + rdb = PureReferenceDB(alt_path) + assert len(rdb.databases()) == 0 + assert rdb.size() == 0 + assert len(list(rdb.sha_iter())) == 0 + + # try empty, non-existing + assert not rdb.has_object(NULL_BIN_SHA) + + + # setup alternate file + # add two, one is invalid + own_repo_path = fixture_path('../../../.git/objects') # use own repo + self.make_alt_file(alt_path, [own_repo_path, "invalid/path"]) + rdb.update_cache() + assert len(rdb.databases()) == 1 + + # we should now find a default revision of ours + gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") + assert rdb.has_object(gitdb_sha) + + # remove valid + self.make_alt_file(alt_path, ["just/one/invalid/path"]) + rdb.update_cache() + assert len(rdb.databases()) == 0 + + # add valid + self.make_alt_file(alt_path, [own_repo_path]) + rdb.update_cache() + assert len(rdb.databases()) == 1 + + diff --git a/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b Binary files differnew file mode 100644 index 00000000..021c2db3 --- /dev/null +++ b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx Binary files differnew file mode 100644 index 00000000..fda5969b --- /dev/null +++ b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack Binary files differnew file mode 100644 index 00000000..a3209d2b --- /dev/null +++ b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx Binary files differnew file mode 100644 index 00000000..a7d6c717 --- /dev/null +++ b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack Binary files differnew file mode 100644 index 00000000..955c424c --- /dev/null +++ b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx Binary files differnew file mode 100644 index 00000000..87c635f4 --- /dev/null +++ b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack Binary files differnew file mode 100644 index 00000000..a69b28ac --- /dev/null +++ b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack diff --git a/git/test/lib/__init__.py b/git/test/lib/__init__.py index 77512794..b09a86b1 100644 --- a/git/test/lib/__init__.py +++ b/git/test/lib/__init__.py @@ -8,6 +8,7 @@ import inspect from mock import * from asserts import * from helper import * +from base import * __all__ = [ name for name, obj in locals().items() if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/git/test/lib/base.py b/git/test/lib/base.py new file mode 100644 index 00000000..9224f5f6 --- /dev/null +++ b/git/test/lib/base.py @@ -0,0 +1,200 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Utilities used in ODB testing""" +from gitdb import OStream +from gitdb.db.py import PureGitDB +from gitdb.stream import ( + Sha1Writer, + ZippedStoreShaWriter + ) + +from gitdb.util import zlib + +import sys +import random +from array import array +from cStringIO import StringIO + +import glob +import unittest +import tempfile +import shutil +import os +import gc + + +#{ Decorators + +def with_rw_directory(func): + """Create a temporary directory which can be written to, remove it if the + test suceeds, but leave it otherwise to aid additional debugging""" + def wrapper(self): + path = tempfile.mktemp(prefix=func.__name__) + os.mkdir(path) + keep = False + try: + try: + return func(self, path) + except Exception: + print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path) + keep = True + raise + finally: + # Need to collect here to be sure all handles have been closed. It appears + # a windows-only issue. In fact things should be deleted, as well as + # memory maps closed, once objects go out of scope. For some reason + # though this is not the case here unless we collect explicitly. + if not keep: + gc.collect() + shutil.rmtree(path) + # END handle exception + # END wrapper + + wrapper.__name__ = func.__name__ + return wrapper + + +def with_rw_repo(func): + """Create a copy of our repository and put it into a writable location. It will + be removed if the test doesn't result in an error. + As we can currently only copy the fully working tree, tests must not rely on + being on a certain branch or on anything really except for the default tags + that should exist + Wrapped function obtains a git repository """ + def wrapper(self, path): + src_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + assert(os.path.isdir(path)) + os.rmdir(path) # created by wrapper, but must not exist for copy operation + shutil.copytree(src_dir, path) + target_gitdir = os.path.join(path, '.git') + assert os.path.isdir(target_gitdir) + return func(self, PureGitDB(target_gitdir)) + #END wrapper + wrapper.__name__ = func.__name__ + return with_rw_directory(wrapper) + + + +def with_packs_rw(func): + """Function that provides a path into which the packs for testing should be + copied. Will pass on the path to the actual function afterwards + + :note: needs with_rw_directory wrapped around it""" + def wrapper(self, path): + src_pack_glob = fixture_path('packs/*') + copy_files_globbed(src_pack_glob, path, hard_link_ok=True) + return func(self, path) + # END wrapper + + wrapper.__name__ = func.__name__ + return with_rw_directory(wrapper) + +#} END decorators + +#{ Routines + +def repo_dir(): + """:return: path to our own repository, being our own .git directory. + :note: doesn't work in bare repositories""" + base = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), '.git') + assert os.path.isdir(base) + return base + + +def maketemp(*args): + """Wrapper around default tempfile.mktemp to fix an osx issue""" + tdir = tempfile.mktemp(*args) + if sys.platform == 'darwin': + tdir = '/private' + tdir + return tdir + +def fixture_path(relapath=''): + """:return: absolute path into the fixture directory + :param relapath: relative path into the fixtures directory, or '' + to obtain the fixture directory itself""" + return os.path.join(os.path.dirname(__file__), 'fixtures', relapath) + +def copy_files_globbed(source_glob, target_dir, hard_link_ok=False): + """Copy all files found according to the given source glob into the target directory + :param hard_link_ok: if True, hard links will be created if possible. Otherwise + the files will be copied""" + for src_file in glob.glob(source_glob): + if hard_link_ok and hasattr(os, 'link'): + target = os.path.join(target_dir, os.path.basename(src_file)) + try: + os.link(src_file, target) + except OSError: + shutil.copy(src_file, target_dir) + # END handle cross device links ( and resulting failure ) + else: + shutil.copy(src_file, target_dir) + # END try hard link + # END for each file to copy + + +def make_bytes(size_in_bytes, randomize=False): + """:return: string with given size in bytes + :param randomize: try to produce a very random stream""" + actual_size = size_in_bytes / 4 + producer = xrange(actual_size) + if randomize: + producer = list(producer) + random.shuffle(producer) + # END randomize + a = array('i', producer) + return a.tostring() + +def make_object(type, data): + """:return: bytes resembling an uncompressed object""" + odata = "blob %i\0" % len(data) + return odata + data + +def make_memory_file(size_in_bytes, randomize=False): + """:return: tuple(size_of_stream, stream) + :param randomize: try to produce a very random stream""" + d = make_bytes(size_in_bytes, randomize) + return len(d), StringIO(d) + +#} END routines + +#{ Stream Utilities + +class DummyStream(object): + def __init__(self): + self.was_read = False + self.bytes = 0 + self.closed = False + + def read(self, size): + self.was_read = True + self.bytes = size + + def close(self): + self.closed = True + + def _assert(self): + assert self.was_read + + +class DeriveTest(OStream): + def __init__(self, sha, type, size, stream, *args, **kwargs): + self.myarg = kwargs.pop('myarg') + self.args = args + + def _assert(self): + assert self.args + assert self.myarg + +#} END stream utilitiess + +#{ Bases + +class TestBase(unittest.TestCase): + """Base class for all tests""" + # The non-database specific tests just provides a default pure git database + rorepo = PureGitDB(repo_dir()) + +#} END bases + diff --git a/git/test/performance/test_pack.py b/git/test/performance/test_pack.py new file mode 100644 index 00000000..da952b17 --- /dev/null +++ b/git/test/performance/test_pack.py @@ -0,0 +1,90 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Performance tests for object store""" +from lib import ( + TestBigRepoR + ) + +from gitdb.exc import UnsupportedOperation +from gitdb.db.pack import PackedDB + +import sys +import os +from time import time +import random + +class TestPackedDBPerformance(TestBigRepoR): + + def _test_pack_random_access(self): + pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) + + # sha lookup + st = time() + sha_list = list(pdb.sha_iter()) + elapsed = time() - st + ns = len(sha_list) + print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed) + + # sha lookup: best-case and worst case access + pdb_pack_info = pdb._pack_info + # END shuffle shas + st = time() + for sha in sha_list: + pdb_pack_info(sha) + # END for each sha to look up + elapsed = time() - st + + # discard cache + del(pdb._entities) + pdb.entities() + print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed) + # END for each random mode + + # query info and streams only + max_items = 10000 # can wait longer when testing memory + for pdb_fun in (pdb.info, pdb.stream): + st = time() + for sha in sha_list[:max_items]: + pdb_fun(sha) + elapsed = time() - st + print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed) + # END for each function + + # retrieve stream and read all + max_items = 5000 + pdb_stream = pdb.stream + total_size = 0 + st = time() + for sha in sha_list[:max_items]: + stream = pdb_stream(sha) + stream.read() + total_size += stream.size + elapsed = time() - st + total_kib = total_size / 1000 + print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed) + + def test_correctness(self): + pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) + # disabled for now as it used to work perfectly, checking big repositories takes a long time + print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)" + for crc in range(2): + count = 0 + st = time() + for entity in pdb.entities(): + pack_verify = entity.is_valid_stream + sha_by_index = entity.index().sha + for index in xrange(entity.index().size()): + try: + assert pack_verify(sha_by_index(index), use_crc=crc) + count += 1 + except UnsupportedOperation: + pass + # END ignore old indices + # END for each index + # END for each entity + elapsed = time() - st + print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed) + # END for each verify mode + diff --git a/git/test/performance/test_pack_streaming.py b/git/test/performance/test_pack_streaming.py new file mode 100644 index 00000000..795ed1e2 --- /dev/null +++ b/git/test/performance/test_pack_streaming.py @@ -0,0 +1,80 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Specific test for pack streams only""" +from lib import ( + TestBigRepoR + ) + +from gitdb.db.pack import PackedDB +from gitdb.stream import NullStream +from gitdb.pack import PackEntity + +import os +import sys +from time import time +from nose import SkipTest + +class CountedNullStream(NullStream): + __slots__ = '_bw' + def __init__(self): + self._bw = 0 + + def bytes_written(self): + return self._bw + + def write(self, d): + self._bw += NullStream.write(self, d) + + +class TestPackStreamingPerformance(TestBigRepoR): + + def test_pack_writing(self): + # see how fast we can write a pack from object streams. + # This will not be fast, as we take time for decompressing the streams as well + ostream = CountedNullStream() + pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) + + ni = 5000 + count = 0 + total_size = 0 + st = time() + objs = list() + for sha in pdb.sha_iter(): + count += 1 + objs.append(pdb.stream(sha)) + if count == ni: + break + #END gather objects for pack-writing + elapsed = time() - st + print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed) + + st = time() + PackEntity.write_pack(objs, ostream.write) + elapsed = time() - st + total_kb = ostream.bytes_written() / 1000 + print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + + + def test_stream_reading(self): + raise SkipTest() + pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) + + # streaming only, meant for --with-profile runs + ni = 5000 + count = 0 + pdb_stream = pdb.stream + total_size = 0 + st = time() + for sha in pdb.sha_iter(): + if count == ni: + break + stream = pdb_stream(sha) + stream.read() + total_size += stream.size + count += 1 + elapsed = time() - st + total_kib = total_size / 1000 + print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed) + diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py index 7f17d722..196e9003 100644 --- a/git/test/performance/test_streams.py +++ b/git/test/performance/test_streams.py @@ -1,9 +1,17 @@ """Performance data streaming performance""" +from gitdb.db.py import * +from gitdb.base import * +from gitdb.stream import * +from gitdb.util import ( + pool, + bin_to_hex + ) from git.test.lib import * from gitdb import * from gitdb.util import bin_to_hex +from cStringIO import StringIO from time import time import os import sys @@ -14,9 +22,35 @@ from gitdb.test.lib import make_memory_file from lib import ( TestBigRepoR + make_memory_file, + with_rw_directory ) +#{ Utilities +def read_chunked_stream(stream): + total = 0 + while True: + chunk = stream.read(chunk_size) + total += len(chunk) + if len(chunk) < chunk_size: + break + # END read stream loop + assert total == stream.size + return stream + + +class TestStreamReader(ChannelThreadTask): + """Expects input streams and reads them in chunks. It will read one at a time, + requireing a queue chunk of size 1""" + def __init__(self, *args): + super(TestStreamReader, self).__init__(*args) + self.fun = read_chunked_stream + self.max_chunksize = 1 + + +#} END utilities + class TestObjDBPerformance(TestBigRepoR): large_data_size_bytes = 1000*1000*10 # some MiB should do it @@ -129,3 +163,134 @@ class TestObjDBPerformance(TestBigRepoR): # compare print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc) # END for each randomization factor + + @with_rw_directory + def test_large_data_streaming(self, path): + ldb = PureLooseObjectODB(path) + string_ios = list() # list of streams we previously created + + # serial mode + for randomize in range(2): + desc = (randomize and 'random ') or '' + print >> sys.stderr, "Creating %s data ..." % desc + st = time() + size, stream = make_memory_file(self.large_data_size_bytes, randomize) + elapsed = time() - st + print >> sys.stderr, "Done (in %f s)" % elapsed + string_ios.append(stream) + + # writing - due to the compression it will seem faster than it is + st = time() + sha = ldb.store(IStream('blob', size, stream)).binsha + elapsed_add = time() - st + assert ldb.has_object(sha) + db_file = ldb.readable_db_object_path(bin_to_hex(sha)) + fsize_kib = os.path.getsize(db_file) / 1000 + + + size_kib = size / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) + + # reading all at once + st = time() + ostream = ldb.stream(sha) + shadata = ostream.read() + elapsed_readall = time() - st + + stream.seek(0) + assert shadata == stream.getvalue() + print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) + + + # reading in chunks of 1 MiB + cs = 512*1000 + chunks = list() + st = time() + ostream = ldb.stream(sha) + while True: + data = ostream.read(cs) + chunks.append(data) + if len(data) < cs: + break + # END read in chunks + elapsed_readchunks = time() - st + + stream.seek(0) + assert ''.join(chunks) == stream.getvalue() + + cs_kib = cs / 1000 + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + + # del db file so we keep something to do + os.remove(db_file) + # END for each randomization factor + + + # multi-threaded mode + # want two, should be supported by most of todays cpus + pool.set_size(2) + total_kib = 0 + nsios = len(string_ios) + for stream in string_ios: + stream.seek(0) + total_kib += len(stream.getvalue()) / 1000 + # END rewind + + def istream_iter(): + for stream in string_ios: + stream.seek(0) + yield IStream(str_blob_type, len(stream.getvalue()), stream) + # END for each stream + # END util + + # write multiple objects at once, involving concurrent compression + reader = IteratorReader(istream_iter()) + istream_reader = ldb.store_async(reader) + istream_reader.task().max_chunksize = 1 + + st = time() + istreams = istream_reader.read(nsios) + assert len(istreams) == nsios + elapsed = time() - st + + print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) + + # decompress multiple at once, by reading them + # chunk size is not important as the stream will not really be decompressed + + # until its read + istream_reader = IteratorReader(iter([ i.binsha for i in istreams ])) + ostream_reader = ldb.stream_async(istream_reader) + + chunk_task = TestStreamReader(ostream_reader, "chunker", None) + output_reader = pool.add_task(chunk_task) + output_reader.task().max_chunksize = 1 + + st = time() + assert len(output_reader.read(nsios)) == nsios + elapsed = time() - st + + print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) + + # store the files, and read them back. For the reading, we use a task + # as well which is chunked into one item per task. Reading all will + # very quickly result in two threads handling two bytestreams of + # chained compression/decompression streams + reader = IteratorReader(istream_iter()) + istream_reader = ldb.store_async(reader) + istream_reader.task().max_chunksize = 1 + + istream_to_sha = lambda items: [ i.binsha for i in items ] + istream_reader.set_post_cb(istream_to_sha) + + ostream_reader = ldb.stream_async(istream_reader) + + chunk_task = TestStreamReader(ostream_reader, "chunker", None) + output_reader = pool.add_task(chunk_task) + output_reader.max_chunksize = 1 + + st = time() + assert len(output_reader.read(nsios)) == nsios + elapsed = time() - st + + print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) diff --git a/git/test/test_base.py b/git/test/test_base.py index e630d151..408b9833 100644 --- a/git/test/test_base.py +++ b/git/test/test_base.py @@ -15,6 +15,22 @@ from git.objects.util import get_object_type_by_name from gitdb.util import hex_to_bin import tempfile +################## +from lib import ( + TestBase, + DummyStream, + DeriveTest, + ) + +from gitdb import * +from gitdb.util import ( + NULL_BIN_SHA + ) + +from gitdb.typ import ( + str_blob_type + ) + class TestBase(TestBase): type_tuples = ( ("blob", "8741fc1d09d61f02ffd8cded15ff603eff1ec070", "blob.py"), @@ -98,3 +114,85 @@ class TestBase(TestBase): assert not rw_repo.config_reader("repository").getboolean("core", "bare") assert rw_remote_repo.config_reader("repository").getboolean("core", "bare") assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib')) + + + +class TestBaseTypes(TestBase): + + def test_streams(self): + # test info + sha = NULL_BIN_SHA + s = 20 + blob_id = 3 + + info = OInfo(sha, str_blob_type, s) + assert info.binsha == sha + assert info.type == str_blob_type + assert info.type_id == blob_id + assert info.size == s + + # test pack info + # provides type_id + pinfo = OPackInfo(0, blob_id, s) + assert pinfo.type == str_blob_type + assert pinfo.type_id == blob_id + assert pinfo.pack_offset == 0 + + dpinfo = ODeltaPackInfo(0, blob_id, s, sha) + assert dpinfo.type == str_blob_type + assert dpinfo.type_id == blob_id + assert dpinfo.delta_info == sha + assert dpinfo.pack_offset == 0 + + + # test ostream + stream = DummyStream() + ostream = OStream(*(info + (stream, ))) + assert ostream.stream is stream + ostream.read(15) + stream._assert() + assert stream.bytes == 15 + ostream.read(20) + assert stream.bytes == 20 + + # test packstream + postream = OPackStream(*(pinfo + (stream, ))) + assert postream.stream is stream + postream.read(10) + stream._assert() + assert stream.bytes == 10 + + # test deltapackstream + dpostream = ODeltaPackStream(*(dpinfo + (stream, ))) + dpostream.stream is stream + dpostream.read(5) + stream._assert() + assert stream.bytes == 5 + + # derive with own args + DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert() + + # test istream + istream = IStream(str_blob_type, s, stream) + assert istream.binsha == None + istream.binsha = sha + assert istream.binsha == sha + + assert len(istream.binsha) == 20 + assert len(istream.hexsha) == 40 + + assert istream.size == s + istream.size = s * 2 + istream.size == s * 2 + assert istream.type == str_blob_type + istream.type = "something" + assert istream.type == "something" + assert istream.stream is stream + istream.stream = None + assert istream.stream is None + + assert istream.error is None + istream.error = Exception() + assert isinstance(istream.error, Exception) + + diff --git a/git/test/test_example.py b/git/test/test_example.py new file mode 100644 index 00000000..c2e78407 --- /dev/null +++ b/git/test/test_example.py @@ -0,0 +1,64 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module with examples from the tutorial section of the docs""" +from lib import * +from gitdb import IStream +from gitdb.db.py import PureLooseObjectODB +from gitdb.util import pool + +from cStringIO import StringIO + +from async import IteratorReader + +class TestExamples(TestBase): + + def test_base(self): + ldb = PureLooseObjectODB(fixture_path("../../../.git/objects")) + + for sha1 in ldb.sha_iter(): + oinfo = ldb.info(sha1) + ostream = ldb.stream(sha1) + assert oinfo[:3] == ostream[:3] + + assert len(ostream.read()) == ostream.size + assert ldb.has_object(oinfo.binsha) + # END for each sha in database + # assure we close all files + try: + del(ostream) + del(oinfo) + except UnboundLocalError: + pass + # END ignore exception if there are no loose objects + + data = "my data" + istream = IStream("blob", len(data), StringIO(data)) + + # the object does not yet have a sha + assert istream.binsha is None + ldb.store(istream) + # now the sha is set + assert len(istream.binsha) == 20 + assert ldb.has_object(istream.binsha) + + + # async operation + # Create a reader from an iterator + reader = IteratorReader(ldb.sha_iter()) + + # get reader for object streams + info_reader = ldb.stream_async(reader) + + # read one + info = info_reader.read(1)[0] + + # read all the rest until depletion + ostreams = info_reader.read() + + # set the pool to use two threads + pool.set_size(2) + + # synchronize the mode of operation + pool.set_size(0) diff --git a/git/test/test_pack.py b/git/test/test_pack.py new file mode 100644 index 00000000..4a7f1caf --- /dev/null +++ b/git/test/test_pack.py @@ -0,0 +1,247 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Test everything about packs reading and writing""" +from lib import ( + TestBase, + with_rw_directory, + with_packs_rw, + fixture_path + ) +from gitdb.stream import DeltaApplyReader + +from gitdb.pack import ( + PackEntity, + PackIndexFile, + PackFile + ) + +from gitdb.base import ( + OInfo, + OStream, + ) + +from gitdb.fun import delta_types +from gitdb.exc import UnsupportedOperation +from gitdb.util import to_bin_sha +from itertools import izip, chain +from nose import SkipTest + +import os +import sys +import tempfile + + +#{ Utilities +def bin_sha_from_filename(filename): + return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:]) +#} END utilities + +class TestPack(TestBase): + + packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67) + packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30) + packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42) + packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2]) + packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2]) + packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2]) + + + def _assert_index_file(self, index, version, size): + assert index.packfile_checksum() != index.indexfile_checksum() + assert len(index.packfile_checksum()) == 20 + assert len(index.indexfile_checksum()) == 20 + assert index.version() == version + assert index.size() == size + assert len(index.offsets()) == size + + # get all data of all objects + for oidx in xrange(index.size()): + sha = index.sha(oidx) + assert oidx == index.sha_to_index(sha) + + entry = index.entry(oidx) + assert len(entry) == 3 + + assert entry[0] == index.offset(oidx) + assert entry[1] == sha + assert entry[2] == index.crc(oidx) + + # verify partial sha + for l in (4,8,11,17,20): + assert index.partial_sha_to_index(sha[:l], l*2) == oidx + + # END for each object index in indexfile + self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2) + + + def _assert_pack_file(self, pack, version, size): + assert pack.version() == 2 + assert pack.size() == size + assert len(pack.checksum()) == 20 + + num_obj = 0 + for obj in pack.stream_iter(): + num_obj += 1 + info = pack.info(obj.pack_offset) + stream = pack.stream(obj.pack_offset) + + assert info.pack_offset == stream.pack_offset + assert info.type_id == stream.type_id + assert hasattr(stream, 'read') + + # it should be possible to read from both streams + assert obj.read() == stream.read() + + streams = pack.collect_streams(obj.pack_offset) + assert streams + + # read the stream + try: + dstream = DeltaApplyReader.new(streams) + except ValueError: + # ignore these, old git versions use only ref deltas, + # which we havent resolved ( as we are without an index ) + # Also ignore non-delta streams + continue + # END get deltastream + + # read all + data = dstream.read() + assert len(data) == dstream.size + + # test seek + dstream.seek(0) + assert dstream.read() == data + + + # read chunks + # NOTE: the current implementation is safe, it basically transfers + # all calls to the underlying memory map + + # END for each object + assert num_obj == size + + + def test_pack_index(self): + # check version 1 and 2 + for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2): + index = PackIndexFile(indexfile) + self._assert_index_file(index, version, size) + # END run tests + + def test_pack(self): + # there is this special version 3, but apparently its like 2 ... + for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2): + pack = PackFile(packfile) + self._assert_pack_file(pack, version, size) + # END for each pack to test + + @with_rw_directory + def test_pack_entity(self, rw_dir): + pack_objs = list() + for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), + (self.packfile_v2_2, self.packindexfile_v2), + (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): + packfile, version, size = packinfo + indexfile, version, size = indexinfo + entity = PackEntity(packfile) + assert entity.pack().path() == packfile + assert entity.index().path() == indexfile + pack_objs.extend(entity.stream_iter()) + + count = 0 + for info, stream in izip(entity.info_iter(), entity.stream_iter()): + count += 1 + assert info.binsha == stream.binsha + assert len(info.binsha) == 20 + assert info.type_id == stream.type_id + assert info.size == stream.size + + # we return fully resolved items, which is implied by the sha centric access + assert not info.type_id in delta_types + + # try all calls + assert len(entity.collect_streams(info.binsha)) + oinfo = entity.info(info.binsha) + assert isinstance(oinfo, OInfo) + assert oinfo.binsha is not None + ostream = entity.stream(info.binsha) + assert isinstance(ostream, OStream) + assert ostream.binsha is not None + + # verify the stream + try: + assert entity.is_valid_stream(info.binsha, use_crc=True) + except UnsupportedOperation: + pass + # END ignore version issues + assert entity.is_valid_stream(info.binsha, use_crc=False) + # END for each info, stream tuple + assert count == size + + # END for each entity + + # pack writing - write all packs into one + # index path can be None + pack_path = tempfile.mktemp('', "pack", rw_dir) + index_path = tempfile.mktemp('', 'index', rw_dir) + iteration = 0 + def rewind_streams(): + for obj in pack_objs: + obj.stream.seek(0) + #END utility + for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)): + pfile = open(ppath, 'wb') + iwrite = None + if ipath: + ifile = open(ipath, 'wb') + iwrite = ifile.write + #END handle ip + + # make sure we rewind the streams ... we work on the same objects over and over again + if iteration > 0: + rewind_streams() + #END rewind streams + iteration += 1 + + pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj) + pfile.close() + assert os.path.getsize(ppath) > 100 + + # verify pack + pf = PackFile(ppath) + assert pf.size() == len(pack_objs) + assert pf.version() == PackFile.pack_version_default + assert pf.checksum() == pack_sha + + # verify index + if ipath is not None: + ifile.close() + assert os.path.getsize(ipath) > 100 + idx = PackIndexFile(ipath) + assert idx.version() == PackIndexFile.index_version_default + assert idx.packfile_checksum() == pack_sha + assert idx.indexfile_checksum() == index_sha + assert idx.size() == len(pack_objs) + #END verify files exist + #END for each packpath, indexpath pair + + # verify the packs throughly + rewind_streams() + entity = PackEntity.create(pack_objs, rw_dir) + count = 0 + for info in entity.info_iter(): + count += 1 + for use_crc in range(2): + assert entity.is_valid_stream(info.binsha, use_crc) + # END for each crc mode + #END for each info + assert count == len(pack_objs) + + + def test_pack_64(self): + # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets + # of course without really needing such a huge pack + raise SkipTest() diff --git a/git/test/test_refs.py b/git/test/test_refs.py index 2338b4e4..649542f3 100644 --- a/git/test/test_refs.py +++ b/git/test/test_refs.py @@ -4,12 +4,13 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from mock import * -from git.test.lib import * -from git import * -import git.refs as refs -from git.util import Actor -from git.objects.tag import TagObject +from gitdb.test.lib import * +from gitdb.ref import * +import gitdb.ref as ref + +from gitdb.util import Actor +from gitdb.object.tag import TagObject + from itertools import chain import os @@ -17,7 +18,7 @@ class TestRefs(TestBase): def test_from_path(self): # should be able to create any reference directly - for ref_type in ( Reference, Head, TagReference, RemoteReference ): + for ref_type in (Reference, Head, TagReference, RemoteReference): for name in ('rela_name', 'path/rela_name'): full_path = ref_type.to_full_path(name) instance = ref_type.from_path(self.rorepo, full_path) @@ -27,20 +28,20 @@ class TestRefs(TestBase): def test_tag_base(self): tag_object_refs = list() - for tag in self.rorepo.tags: + for tag in TagReference.list_items(self.rorepo): assert "refs/tags" in tag.path assert tag.name - assert isinstance( tag.commit, Commit ) + assert isinstance(tag.commit, tag.CommitCls) if tag.tag is not None: - tag_object_refs.append( tag ) + tag_object_refs.append(tag) tagobj = tag.tag # have no dict self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1) - assert isinstance( tagobj, TagObject ) + assert isinstance(tagobj, TagObject) assert tagobj.tag == tag.name - assert isinstance( tagobj.tagger, Actor ) - assert isinstance( tagobj.tagged_date, int ) - assert isinstance( tagobj.tagger_tz_offset, int ) + assert isinstance(tagobj.tagger, Actor) + assert isinstance(tagobj.tagged_date, int) + assert isinstance(tagobj.tagger_tz_offset, int) assert tagobj.message assert tag.object == tagobj # can't assign the object @@ -48,15 +49,15 @@ class TestRefs(TestBase): # END if we have a tag object # END for tag in repo-tags assert tag_object_refs - assert isinstance(self.rorepo.tags['0.1.5'], TagReference) + assert isinstance(TagReference.list_items(self.rorepo)['0.5.0'], TagReference) def test_tags(self): # tag refs can point to tag objects or to commits s = set() ref_count = 0 - for ref in chain(self.rorepo.tags, self.rorepo.heads): + for ref in chain(TagReference.list_items(self.rorepo), Head.list_items(self.rorepo)): ref_count += 1 - assert isinstance(ref, refs.Reference) + assert isinstance(ref, Reference) assert str(ref) == ref.name assert repr(ref) assert ref == ref @@ -66,9 +67,9 @@ class TestRefs(TestBase): assert len(s) == ref_count assert len(s|s) == ref_count - @with_rw_repo('HEAD', bare=False) - def test_heads(self, rwrepo): - for head in rwrepo.heads: + @with_rw_repo + def test_heads(self, rw_repo): + for head in Head.iter_items(rw_repo): assert head.name assert head.path assert "refs/heads" in head.path @@ -88,7 +89,7 @@ class TestRefs(TestBase): # after the clone, we might still have a tracking branch setup head.set_tracking_branch(None) assert head.tracking_branch() is None - remote_ref = rwrepo.remotes[0].refs[0] + remote_ref = RemoteReference.list_items(rw_repo)[0] assert head.set_tracking_branch(remote_ref) is head assert head.tracking_branch() == remote_ref head.set_tracking_branch(None) @@ -96,7 +97,7 @@ class TestRefs(TestBase): # END for each head # verify REFLOG gets altered - head = rwrepo.head + head = HEAD(rw_repo) cur_head = head.ref cur_commit = cur_head.commit pcommit = cur_head.commit.parents[0].parents[0] @@ -130,7 +131,7 @@ class TestRefs(TestBase): assert len(cur_head.log()) == blog_len+2 # a new branch has just a single entry - other_head = Head.create(rwrepo, 'mynewhead', pcommit, logmsg='new head created') + other_head = Head.create(rw_repo, 'mynewhead', pcommit, logmsg='new head created') log = other_head.log() assert len(log) == 1 assert log[0].oldhexsha == pcommit.NULL_HEX_SHA @@ -139,24 +140,25 @@ class TestRefs(TestBase): def test_refs(self): types_found = set() - for ref in self.rorepo.refs: + for ref in Reference.list_items(self.rorepo): types_found.add(type(ref)) assert len(types_found) >= 3 def test_is_valid(self): assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False - assert self.rorepo.head.is_valid() - assert self.rorepo.head.reference.is_valid() + assert HEAD(self.rorepo).is_valid() + assert HEAD(self.rorepo).reference.is_valid() assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False def test_orig_head(self): - assert type(self.rorepo.head.orig_head()) == SymbolicReference + assert type(HEAD(self.rorepo).orig_head()) == SymbolicReference - @with_rw_repo('0.1.6') + @with_rw_repo def test_head_reset(self, rw_repo): - cur_head = rw_repo.head + cur_head = HEAD(rw_repo) old_head_commit = cur_head.commit new_head_commit = cur_head.ref.commit.parents[0] + cur_head.reset(new_head_commit, index=True) # index only assert cur_head.reference.commit == new_head_commit @@ -176,10 +178,9 @@ class TestRefs(TestBase): cur_head.reset(new_head_commit) rw_repo.index.checkout(["lib"], force=True)# - # now that we have a write write repo, change the HEAD reference - its # like git-reset --soft - heads = rw_repo.heads + heads = Head.list_items(rw_repo) assert heads for head in heads: cur_head.reference = head @@ -198,7 +199,7 @@ class TestRefs(TestBase): self.failUnlessRaises(TypeError, getattr, cur_head, "reference") # tags are references, hence we can point to them - some_tag = rw_repo.tags[0] + some_tag = TagReference.list_items(rw_repo)[0] cur_head.reference = some_tag assert not cur_head.is_detached assert cur_head.commit == some_tag.commit @@ -231,7 +232,7 @@ class TestRefs(TestBase): old_name = new_head.name assert new_head.rename("hello").name == "hello" - assert new_head.rename("hello/world").name == "hello/world" + assert new_head.rename("hello/world").name == "hello/world" # yes, this must work assert new_head.rename(old_name).name == old_name and new_head.path == old_path # rename with force @@ -414,7 +415,7 @@ class TestRefs(TestBase): symbol_ref_path = "refs/symbol_ref" symref = SymbolicReference(rw_repo, symbol_ref_path) assert symref.path == symbol_ref_path - symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path) + symbol_ref_abspath = os.path.join(rw_repo.root_path(), symref.path) # set it symref.reference = new_head @@ -471,7 +472,7 @@ class TestRefs(TestBase): rw_repo.head.reference = Head.create(rw_repo, "master") # At least the head should still exist - assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD')) + assert os.path.isfile(os.path.join(rw_repo.root_path(), 'HEAD')) refs = list(SymbolicReference.iter_items(rw_repo)) assert len(refs) == 1 @@ -517,5 +518,5 @@ class TestRefs(TestBase): assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD') def test_reflog(self): - assert isinstance(self.rorepo.heads.master.log(), RefLog) + assert isinstance(Head.list_items(self.rorepo).master.log(), RefLog) diff --git a/git/test/test_stream.py b/git/test/test_stream.py new file mode 100644 index 00000000..b2d4bc14 --- /dev/null +++ b/git/test/test_stream.py @@ -0,0 +1,155 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Test for object db""" +from lib import ( + TestBase, + DummyStream, + Sha1Writer, + make_bytes, + make_object, + fixture_path + ) + +from gitdb import * +from gitdb.util import ( + NULL_HEX_SHA, + hex_to_bin + ) + +from gitdb.util import zlib +from gitdb.typ import ( + str_blob_type + ) + +from gitdb.db.py import PureLooseObjectODB +import time +import tempfile +import os + + + + +class TestStream(TestBase): + """Test stream classes""" + + data_sizes = (15, 10000, 1000*1024+512) + + def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None): + """Make stream tests - the orig_stream is seekable, allowing it to be + rewound and reused + :param cdata: the data we expect to read from stream, the contents + :param rewind_stream: function called to rewind the stream to make it ready + for reuse""" + ns = 10 + assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata)) + + # read in small steps + ss = len(cdata) / ns + for i in range(ns): + data = stream.read(ss) + chunk = cdata[i*ss:(i+1)*ss] + assert data == chunk + # END for each step + rest = stream.read() + if rest: + assert rest == cdata[-len(rest):] + # END handle rest + + if isinstance(stream, DecompressMemMapReader): + assert len(stream.data()) == stream.compressed_bytes_read() + # END handle special type + + rewind_stream(stream) + + # read everything + rdata = stream.read() + assert rdata == cdata + + if isinstance(stream, DecompressMemMapReader): + assert len(stream.data()) == stream.compressed_bytes_read() + # END handle special type + + def test_decompress_reader(self): + for close_on_deletion in range(2): + for with_size in range(2): + for ds in self.data_sizes: + cdata = make_bytes(ds, randomize=False) + + # zdata = zipped actual data + # cdata = original content data + + # create reader + if with_size: + # need object data + zdata = zlib.compress(make_object(str_blob_type, cdata)) + type, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion) + assert size == len(cdata) + assert type == str_blob_type + + # even if we don't set the size, it will be set automatically on first read + test_reader = DecompressMemMapReader(zdata, close_on_deletion=False) + assert test_reader._s == len(cdata) + else: + # here we need content data + zdata = zlib.compress(cdata) + reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata)) + assert reader._s == len(cdata) + # END get reader + + self._assert_stream_reader(reader, cdata, lambda r: r.seek(0)) + + # put in a dummy stream for closing + dummy = DummyStream() + reader._m = dummy + + assert not dummy.closed + del(reader) + assert dummy.closed == close_on_deletion + # END for each datasize + # END whether size should be used + # END whether stream should be closed when deleted + + def test_sha_writer(self): + writer = Sha1Writer() + assert 2 == writer.write("hi") + assert len(writer.sha(as_hex=1)) == 40 + assert len(writer.sha(as_hex=0)) == 20 + + # make sure it does something ;) + prev_sha = writer.sha() + writer.write("hi again") + assert writer.sha() != prev_sha + + def test_compressed_writer(self): + for ds in self.data_sizes: + fd, path = tempfile.mkstemp() + ostream = FDCompressedSha1Writer(fd) + data = make_bytes(ds, randomize=False) + + # for now, just a single write, code doesn't care about chunking + assert len(data) == ostream.write(data) + ostream.close() + + # its closed already + self.failUnlessRaises(OSError, os.close, fd) + + # read everything back, compare to data we zip + fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0)) + written_data = os.read(fd, os.path.getsize(path)) + assert len(written_data) == os.path.getsize(path) + os.close(fd) + assert written_data == zlib.compress(data, 1) # best speed + + os.remove(path) + # END for each os + + def test_decompress_reader_special_case(self): + odb = PureLooseObjectODB(fixture_path('objects')) + ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b')) + + # if there is a bug, we will be missing one byte exactly ! + data = ostream.read() + assert len(data) == ostream.size + diff --git a/git/test/test_util.py b/git/test/test_util.py index e55a6d15..151fe5bc 100644 --- a/git/test/test_util.py +++ b/git/test/test_util.py @@ -7,7 +7,7 @@ import os import tempfile -from git.test.lib import * +from lib import TestBase from git.util import * from git.objects.util import * from git import * @@ -15,6 +15,14 @@ from git.cmd import dashify import time +from gitdb.util import ( + to_hex_sha, + to_bin_sha, + NULL_HEX_SHA, + LockedFD, + Actor + ) + class TestUtils(TestBase): def setup(self): @@ -107,3 +115,118 @@ class TestUtils(TestBase): assert isinstance(Actor.committer(cr), Actor) assert isinstance(Actor.author(cr), Actor) #END assure config reader is handled + + def test_basics(self): + assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA + assert len(to_bin_sha(NULL_HEX_SHA)) == 20 + assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA + + def _cmp_contents(self, file_path, data): + # raise if data from file at file_path + # does not match data string + fp = open(file_path, "rb") + try: + assert fp.read() == data + finally: + fp.close() + + def test_lockedfd(self): + my_file = tempfile.mktemp() + orig_data = "hello" + new_data = "world" + my_file_fp = open(my_file, "wb") + my_file_fp.write(orig_data) + my_file_fp.close() + + try: + lfd = LockedFD(my_file) + lockfilepath = lfd._lockfilepath() + + # cannot end before it was started + self.failUnlessRaises(AssertionError, lfd.rollback) + self.failUnlessRaises(AssertionError, lfd.commit) + + # open for writing + assert not os.path.isfile(lockfilepath) + wfd = lfd.open(write=True) + assert lfd._fd is wfd + assert os.path.isfile(lockfilepath) + + # write data and fail + os.write(wfd, new_data) + lfd.rollback() + assert lfd._fd is None + self._cmp_contents(my_file, orig_data) + assert not os.path.isfile(lockfilepath) + + # additional call doesnt fail + lfd.commit() + lfd.rollback() + + # test reading + lfd = LockedFD(my_file) + rfd = lfd.open(write=False) + assert os.read(rfd, len(orig_data)) == orig_data + + assert os.path.isfile(lockfilepath) + # deletion rolls back + del(lfd) + assert not os.path.isfile(lockfilepath) + + + # write data - concurrently + lfd = LockedFD(my_file) + olfd = LockedFD(my_file) + assert not os.path.isfile(lockfilepath) + wfdstream = lfd.open(write=True, stream=True) # this time as stream + assert os.path.isfile(lockfilepath) + # another one fails + self.failUnlessRaises(IOError, olfd.open) + + wfdstream.write(new_data) + lfd.commit() + assert not os.path.isfile(lockfilepath) + self._cmp_contents(my_file, new_data) + + # could test automatic _end_writing on destruction + finally: + os.remove(my_file) + # END final cleanup + + # try non-existing file for reading + lfd = LockedFD(tempfile.mktemp()) + try: + lfd.open(write=False) + except OSError: + assert not os.path.exists(lfd._lockfilepath()) + else: + self.fail("expected OSError") + # END handle exceptions + + +class TestActor(TestBase): + def test_from_string_should_separate_name_and_email(self): + a = Actor._from_string("Michael Trier <mtrier@example.com>") + assert "Michael Trier" == a.name + assert "mtrier@example.com" == a.email + + # base type capabilities + assert a == a + assert not ( a != a ) + m = set() + m.add(a) + m.add(a) + assert len(m) == 1 + + def test_from_string_should_handle_just_name(self): + a = Actor._from_string("Michael Trier") + assert "Michael Trier" == a.name + assert None == a.email + + def test_should_display_representation(self): + a = Actor._from_string("Michael Trier <mtrier@example.com>") + assert '<git.Actor "Michael Trier <mtrier@example.com>">' == repr(a) + + def test_str_should_alias_name(self): + a = Actor._from_string("Michael Trier <mtrier@example.com>") + assert a.name == str(a) |