Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work

author: Sebastian Thiel <byronimo@gmail.com> 2011-05-05 19:43:22 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2011-05-05 19:43:22 +0200
commit: 4177eefd7bdaea96a529b00ba9cf751924ede202 (patch)
tree: 958614c21bd97267e0d06f71bb18d4215ddd87b5 /git/test/db
parent: f54546a9b857ae728033482f3c5c18c9ff3393c3 (diff)
download: gitpython-4177eefd7bdaea96a529b00ba9cf751924ede202.tar.gz
8 files changed, 480 insertions, 0 deletions
diff --git a/git/test/db/__init__.py b/git/test/db/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/test/db/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/lib.py b/git/test/db/lib.py
new file mode 100644
index 00000000..5f4f9c36
--- /dev/null
+++ b/git/test/db/lib.py
@@ -0,0 +1,215 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Base classes for object db testing"""
+from gitdb.test.lib import (
+	with_rw_directory,
+	with_packs_rw,
+	ZippedStoreShaWriter,
+	fixture_path,
+	TestBase
+	)
+
+from gitdb.stream import Sha1Writer
+
+# import database types we want to support
+# they will be set to None if the respective library could not be loaded
+from gitdb.db.py import PureGitDB
+
+from gitdb.base import (
+							IStream,
+							OStream,
+							OInfo
+						)
+				
+from gitdb.exc import BadObject
+from gitdb.typ import str_blob_type
+
+from async import IteratorReader
+from cStringIO import StringIO
+from struct import pack
+
+
+__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')
+		
+class TestDBBase(TestBase):
+	"""Base class providing testing routines on databases"""
+	
+	# data
+	two_lines = "1234\nhello world"
+	all_data = (two_lines, )
+	
+	# all supported database types. Add your own type 
+	ref_db_types = (PureGitDB, )
+	
+	def _assert_object_writing_simple(self, db):
+		# write a bunch of objects and query their streams and info
+		null_objs = db.size()
+		ni = 250
+		for i in xrange(ni):
+			data = pack(">L", i)
+			istream = IStream(str_blob_type, len(data), StringIO(data))
+			new_istream = db.store(istream)
+			assert new_istream is istream
+			assert db.has_object(istream.binsha)
+			
+			info = db.info(istream.binsha)
+			assert isinstance(info, OInfo)
+			assert info.type == istream.type and info.size == istream.size
+			
+			stream = db.stream(istream.binsha)
+			assert isinstance(stream, OStream)
+			assert stream.binsha == info.binsha and stream.type == info.type
+			assert stream.read() == data
+		# END for each item
+		
+		assert db.size() == null_objs + ni
+		shas = list(db.sha_iter())
+		assert len(shas) == db.size()
+		assert len(shas[0]) == 20
+		
+	
+	def _assert_object_writing(self, db):
+		"""General tests to verify object writing, compatible to ObjectDBW
+		:note: requires write access to the database"""
+		# start in 'dry-run' mode, using a simple sha1 writer
+		ostreams = (ZippedStoreShaWriter, None)
+		for ostreamcls in ostreams:
+			for data in self.all_data:
+				dry_run = ostreamcls is not None
+				ostream = None
+				if ostreamcls is not None:
+					ostream = ostreamcls()
+					assert isinstance(ostream, Sha1Writer)
+				# END create ostream
+				
+				prev_ostream = db.set_ostream(ostream)
+				assert type(prev_ostream) in ostreams or prev_ostream in ostreams 
+					
+				istream = IStream(str_blob_type, len(data), StringIO(data))
+				
+				# store returns same istream instance, with new sha set
+				my_istream = db.store(istream)
+				sha = istream.binsha
+				assert my_istream is istream
+				assert db.has_object(sha) != dry_run
+				assert len(sha) == 20	
+				
+				# verify data - the slow way, we want to run code
+				if not dry_run:
+					info = db.info(sha)
+					assert str_blob_type == info.type
+					assert info.size == len(data)
+					
+					ostream = db.stream(sha)
+					assert ostream.read() == data
+					assert ostream.type == str_blob_type
+					assert ostream.size == len(data)
+				else:
+					self.failUnlessRaises(BadObject, db.info, sha)
+					self.failUnlessRaises(BadObject, db.stream, sha)
+					
+					# DIRECT STREAM COPY
+					# our data hase been written in object format to the StringIO
+					# we pasesd as output stream. No physical database representation
+					# was created.
+					# Test direct stream copy of object streams, the result must be 
+					# identical to what we fed in
+					ostream.seek(0)
+					istream.stream = ostream
+					assert istream.binsha is not None
+					prev_sha = istream.binsha
+					
+					db.set_ostream(ZippedStoreShaWriter())
+					db.store(istream)
+					assert istream.binsha == prev_sha
+					new_ostream = db.ostream()
+					
+					# note: only works as long our store write uses the same compression
+					# level, which is zip_best
+					assert ostream.getvalue() == new_ostream.getvalue()
+			# END for each data set
+		# END for each dry_run mode
+		
+	def _assert_object_writing_async(self, db):
+		"""Test generic object writing using asynchronous access"""
+		ni = 5000
+		def istream_generator(offset=0, ni=ni):
+			for data_src in xrange(ni):
+				data = str(data_src + offset)
+				yield IStream(str_blob_type, len(data), StringIO(data))
+			# END for each item
+		# END generator utility
+		
+		# for now, we are very trusty here as we expect it to work if it worked
+		# in the single-stream case
+		
+		# write objects
+		reader = IteratorReader(istream_generator())
+		istream_reader = db.store_async(reader)
+		istreams = istream_reader.read()		# read all
+		assert istream_reader.task().error() is None
+		assert len(istreams) == ni
+		
+		for stream in istreams:
+			assert stream.error is None
+			assert len(stream.binsha) == 20
+			assert isinstance(stream, IStream)
+		# END assert each stream
+		
+		# test has-object-async - we must have all previously added ones
+		reader = IteratorReader( istream.binsha for istream in istreams )
+		hasobject_reader = db.has_object_async(reader)
+		count = 0
+		for sha, has_object in hasobject_reader:
+			assert has_object
+			count += 1
+		# END for each sha
+		assert count == ni
+		
+		# read the objects we have just written
+		reader = IteratorReader( istream.binsha for istream in istreams )
+		ostream_reader = db.stream_async(reader)
+		
+		# read items individually to prevent hitting possible sys-limits
+		count = 0
+		for ostream in ostream_reader:
+			assert isinstance(ostream, OStream)
+			count += 1
+		# END for each ostream
+		assert ostream_reader.task().error() is None
+		assert count == ni
+		
+		# get info about our items
+		reader = IteratorReader( istream.binsha for istream in istreams )
+		info_reader = db.info_async(reader)
+		
+		count = 0
+		for oinfo in info_reader:
+			assert isinstance(oinfo, OInfo)
+			count += 1
+		# END for each oinfo instance
+		assert count == ni
+		
+		  
+		# combined read-write using a converter
+		# add 2500 items, and obtain their output streams
+		nni = 2500
+		reader = IteratorReader(istream_generator(offset=ni, ni=nni))
+		istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ]
+		
+		istream_reader = db.store_async(reader)
+		istream_reader.set_post_cb(istream_to_sha)
+		
+		ostream_reader = db.stream_async(istream_reader)
+		
+		count = 0
+		# read it individually, otherwise we might run into the ulimit
+		for ostream in ostream_reader:
+			assert isinstance(ostream, OStream)
+			count += 1
+		# END for each ostream
+		assert count == nni
+		
+		
diff --git a/git/test/db/test_base.py b/git/test/db/test_base.py
new file mode 100644
index 00000000..0a381beb
--- /dev/null
+++ b/git/test/db/test_base.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from gitdb.db import RefSpec
+
+class TestBase(TestDBBase):
+	
+	@with_rw_directory
+	def test_basics(self, path):
+		self.failUnlessRaises(ValueError, RefSpec, None, None)
+		rs = RefSpec(None, "something")
+		assert rs.force == False
+		assert rs.delete_destination()
+		assert rs.source is None
+		assert rs.destination == "something"
+		
diff --git a/git/test/db/test_git.py b/git/test/db/test_git.py
new file mode 100644
index 00000000..62f33bb1
--- /dev/null
+++ b/git/test/db/test_git.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from gitdb.exc import BadObject
+from gitdb.db.py import PureGitODB
+from gitdb.base import OStream, OInfo
+from gitdb.util import hex_to_bin, bin_to_hex
+		
+class TestGitDB(TestDBBase):
+	
+	def test_reading(self):
+		gdb = PureGitODB(fixture_path('../../../.git/objects'))
+		
+		# we have packs and loose objects, alternates doesn't necessarily exist
+		assert 1 < len(gdb.databases()) < 4
+		
+		# access should be possible
+		gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
+		assert isinstance(gdb.info(gitdb_sha), OInfo)
+		assert isinstance(gdb.stream(gitdb_sha), OStream)
+		assert gdb.size() > 200
+		sha_list = list(gdb.sha_iter())
+		assert len(sha_list) == gdb.size()
+		
+		
+		# This is actually a test for compound functionality, but it doesn't 
+		# have a separate test module
+		# test partial shas
+		# this one as uneven and quite short
+		assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc")
+		
+		# mix even/uneven hexshas
+		for i, binsha in enumerate(sha_list):
+			assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
+		# END for each sha
+		
+		self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
+		
+	@with_rw_directory
+	def test_writing(self, path):
+		gdb = PureGitODB(path)
+		
+		# its possible to write objects
+		self._assert_object_writing(gdb)
+		self._assert_object_writing_async(gdb)
diff --git a/git/test/db/test_loose.py b/git/test/db/test_loose.py
new file mode 100644
index 00000000..b1d33fd6
--- /dev/null
+++ b/git/test/db/test_loose.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from gitdb.db.py import PureLooseObjectODB
+from gitdb.exc import BadObject
+from gitdb.util import bin_to_hex
+		
+class TestLooseDB(TestDBBase):
+	
+	@with_rw_directory
+	def test_basics(self, path):
+		ldb = PureLooseObjectODB(path)
+		
+		# write data
+		self._assert_object_writing(ldb)
+		self._assert_object_writing_async(ldb)
+	
+		# verify sha iteration and size
+		shas = list(ldb.sha_iter())
+		assert shas and len(shas[0]) == 20
+		
+		assert len(shas) == ldb.size()
+		
+		# verify find short object
+		long_sha = bin_to_hex(shas[-1])
+		for short_sha in (long_sha[:20], long_sha[:5]):
+			assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
+		# END for each sha
+		
+		self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
+		# raises if no object could be foudn
+		
diff --git a/git/test/db/test_mem.py b/git/test/db/test_mem.py
new file mode 100644
index 00000000..79005b50
--- /dev/null
+++ b/git/test/db/test_mem.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from gitdb.db.py import (
+						PureMemoryDB,
+						PureLooseObjectODB
+					)
+		
+class TestPureMemoryDB(TestDBBase):
+	
+	@with_rw_directory
+	def test_writing(self, path):
+		mdb = PureMemoryDB()
+		
+		# write data
+		self._assert_object_writing_simple(mdb)
+		
+		# test stream copy
+		ldb = PureLooseObjectODB(path)
+		assert ldb.size() == 0
+		num_streams_copied = mdb.stream_copy(mdb.sha_iter(), ldb)
+		assert num_streams_copied == mdb.size()
+		
+		assert ldb.size() == mdb.size()
+		for sha in mdb.sha_iter():
+			assert ldb.has_object(sha)
+			assert ldb.stream(sha).read() == mdb.stream(sha).read() 
+		# END verify objects where copied and are equal
diff --git a/git/test/db/test_pack.py b/git/test/db/test_pack.py
new file mode 100644
index 00000000..5456df41
--- /dev/null
+++ b/git/test/db/test_pack.py
@@ -0,0 +1,72 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from gitdb.db.py import PurePackedODB
+from gitdb.test.lib import fixture_path
+
+from gitdb.exc import BadObject, AmbiguousObjectName
+
+import os
+import random
+
+class TestPackDB(TestDBBase):
+	
+	@with_packs_rw
+	def test_writing(self, path):
+		pdb = PurePackedODB(path)
+		
+		# on demand, we init our pack cache
+		num_packs = len(pdb.entities())
+		assert pdb._st_mtime != 0
+		
+		# test pack directory changed: 
+		# packs removed - rename a file, should affect the glob
+		pack_path = pdb.entities()[0].pack().path()
+		new_pack_path = pack_path + "renamed"
+		os.rename(pack_path, new_pack_path)
+		
+		pdb.update_cache(force=True)
+		assert len(pdb.entities()) == num_packs - 1
+		
+		# packs added
+		os.rename(new_pack_path, pack_path)
+		pdb.update_cache(force=True)
+		assert len(pdb.entities()) == num_packs
+	
+		# bang on the cache
+		# access the Entities directly, as there is no iteration interface
+		# yet ( or required for now )
+		sha_list = list(pdb.sha_iter())
+		assert len(sha_list) == pdb.size()
+		
+		# hit all packs in random order
+		random.shuffle(sha_list)
+		
+		for sha in sha_list:
+			info = pdb.info(sha)
+			stream = pdb.stream(sha)
+		# END for each sha to query
+		
+		
+		# test short finding - be a bit more brutal here
+		max_bytes = 19
+		min_bytes = 2
+		num_ambiguous = 0
+		for i, sha in enumerate(sha_list):
+			short_sha = sha[:max((i % max_bytes), min_bytes)]
+			try:
+				assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha
+			except AmbiguousObjectName:
+				num_ambiguous += 1
+				pass # valid, we can have short objects
+			# END exception handling
+		# END for each sha to find
+		
+		# we should have at least one ambiguous, considering the small sizes
+		# but in our pack, there is no ambigious ... 
+		# assert num_ambiguous
+		
+		# non-existing
+		self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4)
diff --git a/git/test/db/test_ref.py b/git/test/db/test_ref.py
new file mode 100644
index 00000000..330dab70
--- /dev/null
+++ b/git/test/db/test_ref.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from gitdb.db.py import PureReferenceDB
+
+from gitdb.util import (
+						NULL_BIN_SHA,
+						hex_to_bin
+						)
+
+import os
+		
+class TestPureReferenceDB(TestDBBase):
+	
+	def make_alt_file(self, alt_path, alt_list):
+		"""Create an alternates file which contains the given alternates.
+		The list can be empty"""
+		alt_file = open(alt_path, "wb")
+		for alt in alt_list:
+			alt_file.write(alt + "\n")
+		alt_file.close()
+	
+	@with_rw_directory
+	def test_writing(self, path):
+		NULL_BIN_SHA = '\0'  * 20
+		
+		alt_path = os.path.join(path, 'alternates')
+		rdb = PureReferenceDB(alt_path)
+		assert len(rdb.databases()) == 0
+		assert rdb.size() == 0
+		assert len(list(rdb.sha_iter())) == 0
+		
+		# try empty, non-existing
+		assert not rdb.has_object(NULL_BIN_SHA)
+		
+		
+		# setup alternate file
+		# add two, one is invalid
+		own_repo_path = fixture_path('../../../.git/objects')		# use own repo
+		self.make_alt_file(alt_path, [own_repo_path, "invalid/path"])
+		rdb.update_cache()
+		assert len(rdb.databases()) == 1
+		
+		# we should now find a default revision of ours
+		gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
+		assert rdb.has_object(gitdb_sha)
+		
+		# remove valid
+		self.make_alt_file(alt_path, ["just/one/invalid/path"])
+		rdb.update_cache()
+		assert len(rdb.databases()) == 0
+		
+		# add valid
+		self.make_alt_file(alt_path, [own_repo_path])
+		rdb.update_cache()
+		assert len(rdb.databases()) == 1
+		
+
author	Sebastian Thiel <byronimo@gmail.com>	2011-05-05 19:43:22 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2011-05-05 19:43:22 +0200
commit	4177eefd7bdaea96a529b00ba9cf751924ede202 (patch)
tree	958614c21bd97267e0d06f71bb18d4215ddd87b5 /git/test/db
parent	f54546a9b857ae728033482f3c5c18c9ff3393c3 (diff)
download	gitpython-4177eefd7bdaea96a529b00ba9cf751924ede202.tar.gz