9 files changed, 1527 insertions, 0 deletions
diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py
new file mode 100644
index 00000000..046c699d
--- /dev/null
+++ b/git/db/py/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from base import *
+from loose import *
+from mem import *
+from pack import *
+from git import *
+from ref import *
+from resolve import *
+from transport import *
diff --git a/git/db/py/base.py b/git/db/py/base.py
new file mode 100644
index 00000000..c378b10e
--- /dev/null
+++ b/git/db/py/base.py
@@ -0,0 +1,351 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic implementations for the interface building blocks"""
+
+from gitdb.db.interface import *
+
+from gitdb.util import (
+		pool,
+		join,
+		normpath,
+		abspath,
+		dirname,
+		LazyMixin, 
+		hex_to_bin,
+		bin_to_hex,
+		expandvars,
+		expanduser,
+		exists,
+		is_git_dir
+	)
+
+from gitdb.config import GitConfigParser
+from gitdb.exc import 	(
+						BadObject, 
+						AmbiguousObjectName,
+						InvalidDBRoot
+						)
+
+from async import ChannelThreadTask
+
+from itertools import chain
+import sys
+import os
+
+
+__all__ = (	'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', 
+			'PureConfigurationMixin', 'PureRepositoryPathsMixin')
+
+
+class PureObjectDBR(ObjectDBR):
+	
+	#{ Query Interface 
+		
+	def has_object_async(self, reader):
+		task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
+		return pool.add_task(task) 
+		
+	def info_async(self, reader):
+		task = ChannelThreadTask(reader, str(self.info_async), self.info)
+		return pool.add_task(task)
+		
+	def stream_async(self, reader):
+		# base implementation just uses the stream method repeatedly
+		task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
+		return pool.add_task(task)
+	
+	def partial_to_complete_sha_hex(self, partial_hexsha):
+		len_partial_hexsha = len(partial_hexsha)
+		if len_partial_hexsha % 2 != 0:
+			partial_binsha = hex_to_bin(partial_hexsha + "0")
+		else:
+			partial_binsha = hex_to_bin(partial_hexsha)
+		# END assure successful binary conversion
+		return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha))
+	
+	#} END query interface
+	
+	
+class PureObjectDBW(ObjectDBW):
+	
+	def __init__(self, *args, **kwargs):
+		super(PureObjectDBW, self).__init__(*args, **kwargs)
+		self._ostream = None
+	
+	#{ Edit Interface
+	def set_ostream(self, stream):
+		cstream = self._ostream
+		self._ostream = stream
+		return cstream
+		
+	def ostream(self):
+		return self._ostream
+	
+	def store_async(self, reader):
+		task = ChannelThreadTask(reader, str(self.store_async), self.store) 
+		return pool.add_task(task)
+	
+	#} END edit interface
+	
+
+class PureRootPathDB(RootPathDB):
+	
+	def __init__(self, root_path):
+		super(PureRootPathDB, self).__init__(root_path)
+		self._root_path = root_path
+		
+		
+	#{ Interface 
+	def root_path(self):
+		return self._root_path
+	
+	def db_path(self, rela_path):
+		return join(self._root_path, rela_path)
+	#} END interface
+		
+
+def _databases_recursive(database, output):
+	"""Fill output list with database from db, in order. Deals with Loose, Packed 
+	and compound databases."""
+	if isinstance(database, CompoundDB):
+		compounds = list()
+		dbs = database.databases()
+		output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+		for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+			_databases_recursive(cdb, output)
+	else:
+		output.append(database)
+	# END handle database type
+	
+
+class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB):
+	def _set_cache_(self, attr):
+		if attr == '_dbs':
+			self._dbs = list()
+		elif attr == '_db_cache':
+			self._db_cache = dict()
+		else:
+			super(PureCompoundDB, self)._set_cache_(attr)
+	
+	def _db_query(self, sha):
+		""":return: database containing the given 20 byte sha
+		:raise BadObject:"""
+		# most databases use binary representations, prevent converting 
+		# it everytime a database is being queried
+		try:
+			return self._db_cache[sha]
+		except KeyError:
+			pass
+		# END first level cache
+		
+		for db in self._dbs:
+			if db.has_object(sha):
+				self._db_cache[sha] = db
+				return db
+		# END for each database
+		raise BadObject(sha)
+	
+	#{ PureObjectDBR interface 
+	
+	def has_object(self, sha):
+		try:
+			self._db_query(sha)
+			return True
+		except BadObject:
+			return False
+		# END handle exceptions
+		
+	def info(self, sha):
+		return self._db_query(sha).info(sha)
+		
+	def stream(self, sha):
+		return self._db_query(sha).stream(sha)
+
+	def size(self):
+		return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+		
+	def sha_iter(self):
+		return chain(*(db.sha_iter() for db in self._dbs))
+		
+	#} END object DBR Interface
+	
+	#{ Interface
+	
+	def databases(self):
+		return tuple(self._dbs)
+
+	def update_cache(self, force=False):
+		# something might have changed, clear everything
+		self._db_cache.clear()
+		stat = False
+		for db in self._dbs:
+			if isinstance(db, CachingDB):
+				stat |= db.update_cache(force)
+			# END if is caching db
+		# END for each database to update
+		return stat
+		
+	def partial_to_complete_sha_hex(self, partial_hexsha):
+		databases = self.databases()
+		
+		len_partial_hexsha = len(partial_hexsha)
+		if len_partial_hexsha % 2 != 0:
+			partial_binsha = hex_to_bin(partial_hexsha + "0")
+		else:
+			partial_binsha = hex_to_bin(partial_hexsha)
+		# END assure successful binary conversion 
+		
+		candidate = None
+		for db in self._dbs:
+			full_bin_sha = None
+			try:
+				if hasattr(db, 'partial_to_complete_sha_hex'):
+					full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+				else:
+					full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+				# END handle database type
+			except BadObject:
+				continue
+			# END ignore bad objects
+			if full_bin_sha:
+				if candidate and candidate != full_bin_sha:
+					raise AmbiguousObjectName(partial_hexsha)
+				candidate = full_bin_sha
+			# END handle candidate
+		# END for each db
+		if not candidate:
+			raise BadObject(partial_binsha)
+		return candidate
+		
+	def partial_to_complete_sha(self, partial_binsha, hex_len):
+		"""Simple adaptor to feed into our implementation"""
+		return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len])
+	#} END interface
+	
+		
+class PureRepositoryPathsMixin(RepositoryPathsMixin):
+	# slots has no effect here, its just to keep track of used attrs
+	__slots__  = ("_git_path", '_bare')
+	
+	#{ Configuration 
+	repo_dir = '.git'
+	objs_dir = 'objects'
+	#} END configuration
+	
+	#{ Subclass Interface
+	def _initialize(self, path):
+		epath = abspath(expandvars(expanduser(path or os.getcwd())))
+
+		if not exists(epath):
+			raise InvalidDBRoot(epath)
+		#END check file 
+
+		self._working_tree_dir = None
+		self._git_path = None
+		curpath = epath
+		
+		# walk up the path to find the .git dir
+		while curpath:
+			if is_git_dir(curpath):
+				self._git_path = curpath
+				self._working_tree_dir = os.path.dirname(curpath)
+				break
+			gitpath = join(curpath, self.repo_dir)
+			if is_git_dir(gitpath):
+				self._git_path = gitpath
+				self._working_tree_dir = curpath
+				break
+			curpath, dummy = os.path.split(curpath)
+			if not dummy:
+				break
+		# END while curpath
+		
+		if self._git_path is None:
+			raise InvalidDBRoot(epath)
+		# END path not found
+
+		self._bare = self._git_path.endswith(self.repo_dir)
+		if hasattr(self, 'config_reader'):
+			try:
+				self._bare = self.config_reader("repository").getboolean('core','bare') 
+			except Exception:
+				# lets not assume the option exists, although it should
+				pass
+		#END check bare flag
+
+	
+	#} end subclass interface
+	
+	#{ Interface
+	
+	def is_bare(self):
+		return self._bare
+		
+	def git_path(self):
+		return self._git_path
+		
+	def working_tree_path(self):
+		if self.is_bare():
+			raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_path())
+		#END assertion
+		return dirname(self.git_path())
+		
+	def objects_path(self):
+		return join(self.git_path(), self.objs_dir)
+		
+	def working_dir(self):
+		if self.is_bare():
+			return self.git_path()
+		else:
+			return self.working_tree_dir()
+		#END handle bare state
+		
+	#} END interface
+		
+		
+class PureConfigurationMixin(ConfigurationMixin):
+	
+	#{ Configuration
+	system_config_file_name = "gitconfig"
+	repo_config_file_name = "config"
+	#} END
+	
+	def __init__(self, *args, **kwargs):
+		"""Verify prereqs"""
+		assert hasattr(self, 'git_path')
+	
+	def _path_at_level(self, level ):
+		# we do not support an absolute path of the gitconfig on windows , 
+		# use the global config instead
+		if sys.platform == "win32" and level == "system":
+			level = "global"
+		#END handle windows
+			
+		if level == "system":
+			return "/etc/%s" % self.system_config_file_name
+		elif level == "global":
+			return normpath(expanduser("~/.%s" % self.system_config_file_name))
+		elif level == "repository":
+			return join(self.git_path(), self.repo_config_file_name)
+		#END handle level
+		
+		raise ValueError("Invalid configuration level: %r" % level)
+		
+	#{ Interface
+	
+	def config_reader(self, config_level=None):
+		files = None
+		if config_level is None:
+			files = [ self._path_at_level(f) for f in self.config_level ]
+		else:
+			files = [ self._path_at_level(config_level) ]
+		#END handle level
+		return GitConfigParser(files, read_only=True)
+		
+	def config_writer(self, config_level="repository"):
+		return GitConfigParser(self._path_at_level(config_level), read_only=False)
+	
+	#} END interface
+	
diff --git a/git/db/py/git.py b/git/db/py/git.py
new file mode 100644
index 00000000..bc148c6f
--- /dev/null
+++ b/git/db/py/git.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of PureGitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+						PureCompoundDB, 
+						PureObjectDBW, 
+						PureRootPathDB, 
+						PureRepositoryPathsMixin,
+						PureConfigurationMixin,
+					)
+
+from resolve import PureReferencesMixin
+
+from loose import PureLooseObjectODB
+from pack import PurePackedODB
+from ref import PureReferenceDB
+
+from gitdb.util import (
+						LazyMixin, 
+						normpath,
+						join,
+						dirname
+					)
+from gitdb.exc import (
+						InvalidDBRoot, 
+						BadObject, 
+						AmbiguousObjectName
+						)
+import os
+
+__all__ = ('PureGitODB', 'PureGitDB')
+
+
+class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB):
+	"""A git-style object-only database, which contains all objects in the 'objects'
+	subdirectory.
+	:note: The type needs to be initialized on the ./objects directory to function, 
+		as it deals solely with object lookup. Use a PureGitDB type if you need
+		reference and push support."""
+	# Configuration
+	PackDBCls = PurePackedODB
+	LooseDBCls = PureLooseObjectODB
+	PureReferenceDBCls = PureReferenceDB
+	
+	# Directories
+	packs_dir = 'pack'
+	loose_dir = ''
+	alternates_dir = os.path.join('info', 'alternates')
+	
+	def __init__(self, root_path):
+		"""Initialize ourselves on a git ./objects directory"""
+		super(PureGitODB, self).__init__(root_path)
+		
+	def _set_cache_(self, attr):
+		if attr == '_dbs' or attr == '_loose_db':
+			self._dbs = list()
+			loose_db = None
+			for subpath, dbcls in ((self.packs_dir, self.PackDBCls), 
+									(self.loose_dir, self.LooseDBCls),
+									(self.alternates_dir, self.PureReferenceDBCls)):
+				path = self.db_path(subpath)
+				if os.path.exists(path):
+					self._dbs.append(dbcls(path))
+					if dbcls is self.LooseDBCls:
+						loose_db = self._dbs[-1]
+					# END remember loose db
+				# END check path exists
+			# END for each db type
+			
+			# should have at least one subdb
+			if not self._dbs:
+				raise InvalidDBRoot(self.root_path())
+			# END handle error
+			
+			# we the first one should have the store method
+			assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
+			
+			# finally set the value
+			self._loose_db = loose_db
+		else:
+			super(PureGitODB, self)._set_cache_(attr)
+		# END handle attrs
+		
+	#{ PureObjectDBW interface
+		
+	def store(self, istream):
+		return self._loose_db.store(istream)
+		
+	def ostream(self):
+		return self._loose_db.ostream()
+	
+	def set_ostream(self, ostream):
+		return self._loose_db.set_ostream(ostream)
+		
+	#} END objectdbw interface
+	
+	
+class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin):
+	"""Git like database with support for object lookup as well as reference resolution.
+	Our rootpath is set to the actual .git directory (bare on unbare).
+	
+	The root_path will be the git objects directory. Use git_path() to obtain the actual top-level 
+	git directory."""
+	#directories
+	
+	def __init__(self, root_path):
+		"""Initialize ourselves on the .git directory, or the .git/objects directory."""
+		PureRepositoryPathsMixin._initialize(self, root_path)
+		super(PureGitDB, self).__init__(self.objects_path())
+	
+	
+	
diff --git a/git/db/py/loose.py b/git/db/py/loose.py
new file mode 100644
index 00000000..34e31da6
--- /dev/null
+++ b/git/db/py/loose.py
@@ -0,0 +1,262 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+						PureRootPathDB, 
+						PureObjectDBR, 
+						PureObjectDBW
+				)
+
+
+from gitdb.exc import (
+	InvalidDBRoot, 
+	BadObject,
+	AmbiguousObjectName
+	)
+
+from gitdb.stream import (
+		DecompressMemMapReader,
+		FDCompressedSha1Writer,
+		FDStream,
+		Sha1Writer
+	)
+
+from gitdb.base import (
+							OStream,
+							OInfo
+						)
+
+from gitdb.util import (
+		file_contents_ro_filepath,
+		ENOENT,
+		hex_to_bin,
+		bin_to_hex,
+		exists,
+		chmod,
+		isdir,
+		isfile,
+		remove,
+		mkdir,
+		rename,
+		dirname,
+		basename,
+		join
+	)
+
+from gitdb.fun import ( 
+	chunk_size,
+	loose_object_header_info, 
+	write_object,
+	stream_copy
+	)
+
+import tempfile
+import mmap
+import sys
+import os
+
+
+__all__ = ( 'PureLooseObjectODB', )
+
+
+class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW):
+	"""A database which operates on loose object files"""
+	
+	# CONFIGURATION
+	# chunks in which data will be copied between streams
+	stream_chunk_size = chunk_size
+	
+	# On windows we need to keep it writable, otherwise it cannot be removed
+	# either
+	new_objects_mode = 0444
+	if os.name == 'nt':
+		new_objects_mode = 0644
+			
+	
+	def __init__(self, root_path):
+		super(PureLooseObjectODB, self).__init__(root_path)
+		self._hexsha_to_file = dict()
+		# Additional Flags - might be set to 0 after the first failure
+		# Depending on the root, this might work for some mounts, for others not, which
+		# is why it is per instance
+		self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+	
+	#{ Interface 
+	def object_path(self, hexsha):
+		"""
+		:return: path at which the object with the given hexsha would be stored, 
+			relative to the database root"""
+		return join(hexsha[:2], hexsha[2:])
+	
+	def readable_db_object_path(self, hexsha):
+		"""
+		:return: readable object path to the object identified by hexsha
+		:raise BadObject: If the object file does not exist"""
+		try:
+			return self._hexsha_to_file[hexsha]
+		except KeyError:
+			pass
+		# END ignore cache misses 
+			
+		# try filesystem
+		path = self.db_path(self.object_path(hexsha))
+		if exists(path):
+			self._hexsha_to_file[hexsha] = path
+			return path
+		# END handle cache
+		raise BadObject(hexsha)
+		
+	def partial_to_complete_sha_hex(self, partial_hexsha):
+		""":return: 20 byte binary sha1 string which matches the given name uniquely
+		:param name: hexadecimal partial name
+		:raise AmbiguousObjectName: 
+		:raise BadObject: """
+		candidate = None
+		for binsha in self.sha_iter():
+			if bin_to_hex(binsha).startswith(partial_hexsha):
+				# it can't ever find the same object twice
+				if candidate is not None:
+					raise AmbiguousObjectName(partial_hexsha)
+				candidate = binsha
+		# END for each object
+		if candidate is None:
+			raise BadObject(partial_hexsha)
+		return candidate
+		
+	#} END interface
+	
+	def _map_loose_object(self, sha):
+		"""
+		:return: memory map of that file to allow random read access
+		:raise BadObject: if object could not be located"""
+		db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+		try:
+			return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+		except OSError,e:
+			if e.errno != ENOENT:
+				# try again without noatime
+				try:
+					return file_contents_ro_filepath(db_path)
+				except OSError:
+					raise BadObject(sha)
+				# didn't work because of our flag, don't try it again
+				self._fd_open_flags = 0
+			else:
+				raise BadObject(sha)
+			# END handle error
+		# END exception handling
+		try:
+			return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+		finally:
+			os.close(fd)
+		# END assure file is closed
+		
+	def set_ostream(self, stream):
+		""":raise TypeError: if the stream does not support the Sha1Writer interface"""
+		if stream is not None and not isinstance(stream, Sha1Writer):
+			raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+		return super(PureLooseObjectODB, self).set_ostream(stream)
+			
+	def info(self, sha):
+		m = self._map_loose_object(sha)
+		try:
+			type, size = loose_object_header_info(m)
+			return OInfo(sha, type, size)
+		finally:
+			m.close()
+		# END assure release of system resources
+		
+	def stream(self, sha):
+		m = self._map_loose_object(sha)
+		type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+		return OStream(sha, type, size, stream)
+		
+	def has_object(self, sha):
+		try:
+			self.readable_db_object_path(bin_to_hex(sha))
+			return True
+		except BadObject:
+			return False
+		# END check existance
+	
+	def store(self, istream):
+		"""note: The sha we produce will be hex by nature"""
+		tmp_path = None
+		writer = self.ostream()
+		if writer is None:
+			# open a tmp file to write the data to
+			fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+			
+			if istream.binsha is None:
+				writer = FDCompressedSha1Writer(fd)
+			else:
+				writer = FDStream(fd)
+			# END handle direct stream copies
+		# END handle custom writer
+	
+		try:
+			try:
+				if istream.binsha is not None:
+					# copy as much as possible, the actual uncompressed item size might
+					# be smaller than the compressed version
+					stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+				else:
+					# write object with header, we have to make a new one
+					write_object(istream.type, istream.size, istream.read, writer.write,
+									chunk_size=self.stream_chunk_size)
+				# END handle direct stream copies
+			finally:
+				if tmp_path:
+					writer.close()
+			# END assure target stream is closed
+		except:
+			if tmp_path:
+				os.remove(tmp_path)
+			raise
+		# END assure tmpfile removal on error
+		
+		hexsha = None
+		if istream.binsha:
+			hexsha = istream.hexsha
+		else:
+			hexsha = writer.sha(as_hex=True)
+		# END handle sha
+		
+		if tmp_path:
+			obj_path = self.db_path(self.object_path(hexsha))
+			obj_dir = dirname(obj_path)
+			if not isdir(obj_dir):
+				mkdir(obj_dir)
+			# END handle destination directory
+			# rename onto existing doesn't work on windows
+			if os.name == 'nt' and isfile(obj_path):
+				remove(obj_path)
+			# END handle win322
+			rename(tmp_path, obj_path)
+			
+			# make sure its readable for all ! It started out as rw-- tmp file
+			# but needs to be rwrr
+			chmod(obj_path, self.new_objects_mode)
+		# END handle dry_run
+		
+		istream.binsha = hex_to_bin(hexsha)
+		return istream
+		
+	def sha_iter(self):
+		# find all files which look like an object, extract sha from there
+		for root, dirs, files in os.walk(self.root_path()):
+			root_base = basename(root)
+			if len(root_base) != 2:
+				continue
+				
+			for f in files:
+				if len(f) != 38:
+					continue
+				yield hex_to_bin(root_base + f)
+			# END for each file
+		# END for each walk iteration
+		
+	def size(self):
+		return len(tuple(self.sha_iter()))
+	
diff --git a/git/db/py/mem.py b/git/db/py/mem.py
new file mode 100644
index 00000000..ba922e96
--- /dev/null
+++ b/git/db/py/mem.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains the MemoryDatabase implementation"""
+from loose import PureLooseObjectODB
+from base import (
+						PureObjectDBR, 
+						PureObjectDBW
+					)
+
+from gitdb.base import (
+							OStream,
+							IStream,
+						)
+
+from gitdb.exc import (
+						BadObject,
+						UnsupportedOperation
+						)
+from gitdb.stream import (
+							ZippedStoreShaWriter,
+							DecompressMemMapReader,
+						)
+
+from cStringIO import StringIO
+
+__all__ = ("PureMemoryDB", )
+
+class PureMemoryDB(PureObjectDBR, PureObjectDBW):
+	"""A memory database stores everything to memory, providing fast IO and object
+	retrieval. It should be used to buffer results and obtain SHAs before writing
+	it to the actual physical storage, as it allows to query whether object already
+	exists in the target storage before introducing actual IO
+	
+	:note: memory is currently not threadsafe, hence the async methods cannot be used
+		for storing"""
+	
+	def __init__(self):
+		super(PureMemoryDB, self).__init__()
+		self._db = PureLooseObjectODB("path/doesnt/matter")
+		
+		# maps 20 byte shas to their OStream objects
+		self._cache = dict()
+		
+	def set_ostream(self, stream):
+		raise UnsupportedOperation("PureMemoryDB's always stream into memory")
+		
+	def store(self, istream):
+		zstream = ZippedStoreShaWriter()
+		self._db.set_ostream(zstream)
+		
+		istream = self._db.store(istream)
+		zstream.close()		# close to flush
+		zstream.seek(0)
+		
+		# don't provide a size, the stream is written in object format, hence the 
+		# header needs decompression
+		decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) 
+		self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+		
+		return istream
+		
+	def store_async(self, reader):
+		raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access")
+	
+	def has_object(self, sha):
+		return sha in self._cache
+
+	def info(self, sha):
+		# we always return streams, which are infos as well
+		return self.stream(sha)
+	
+	def stream(self, sha):
+		try:
+			ostream = self._cache[sha]
+			# rewind stream for the next one to read
+			ostream.stream.seek(0)
+			return ostream
+		except KeyError:
+			raise BadObject(sha)
+		# END exception handling
+	
+	def size(self):
+		return len(self._cache)
+		
+	def sha_iter(self):
+		return self._cache.iterkeys()
+		
+		
+	#{ Interface 
+	def stream_copy(self, sha_iter, odb):
+		"""Copy the streams as identified by sha's yielded by sha_iter into the given odb
+		The streams will be copied directly
+		:note: the object will only be written if it did not exist in the target db
+		:return: amount of streams actually copied into odb. If smaller than the amount
+			of input shas, one or more objects did already exist in odb"""
+		count = 0
+		for sha in sha_iter:
+			if odb.has_object(sha):
+				continue
+			# END check object existance
+			
+			ostream = self.stream(sha)
+			# compressed data including header
+			sio = StringIO(ostream.stream.data())
+			istream = IStream(ostream.type, ostream.size, sio, sha)
+			
+			odb.store(istream)
+			count += 1
+		# END for each sha
+		return count
+	#} END interface
diff --git a/git/db/py/pack.py b/git/db/py/pack.py
new file mode 100644
index 00000000..1d0e9bfc
--- /dev/null
+++ b/git/db/py/pack.py
@@ -0,0 +1,212 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing a database to deal with packs"""
+from gitdb.db import CachingDB
+from base import (
+						PureRootPathDB, 
+						PureObjectDBR 
+				)
+
+from gitdb.util import LazyMixin
+
+from gitdb.exc import (
+							BadObject,
+							UnsupportedOperation,
+							AmbiguousObjectName
+						)
+
+from gitdb.pack import PackEntity
+
+import os
+import glob
+
+__all__ = ('PurePackedODB', )
+
+#{ Utilities
+
+
+class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin):
+	"""A database operating on a set of object packs"""
+	
+	# the type to use when instantiating a pack entity
+	PackEntityCls = PackEntity
+	
+	# sort the priority list every N queries
+	# Higher values are better, performance tests don't show this has 
+	# any effect, but it should have one
+	_sort_interval = 500
+	
+	def __init__(self, root_path):
+		super(PurePackedODB, self).__init__(root_path)
+		# list of lists with three items:
+		# * hits - number of times the pack was hit with a request
+		# * entity - Pack entity instance
+		# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+		# self._entities = list()		# lazy loaded list
+		self._hit_count	= 0				# amount of hits
+		self._st_mtime = 0				# last modification data of our root path
+		
+	def _set_cache_(self, attr):
+		if attr == '_entities':
+			self._entities = list()
+			self.update_cache(force=True)
+		# END handle entities initialization
+		
+	def _sort_entities(self):
+		self._entities.sort(key=lambda l: l[0], reverse=True)
+		
+	def _pack_info(self, sha):
+		""":return: tuple(entity, index) for an item at the given sha
+		:param sha: 20 or 40 byte sha
+		:raise BadObject:
+		:note: This method is not thread-safe, but may be hit in multi-threaded
+			operation. The worst thing that can happen though is a counter that 
+			was not incremented, or the list being in wrong order. So we safe
+			the time for locking here, lets see how that goes"""
+		# presort ?
+		if self._hit_count % self._sort_interval == 0:
+			self._sort_entities()
+		# END update sorting
+		
+		for item in self._entities:
+			index = item[2](sha)
+			if index is not None:
+				item[0] += 1			# one hit for you
+				self._hit_count += 1	# general hit count
+				return (item[1], index)
+			# END index found in pack
+		# END for each item
+		
+		# no hit, see whether we have to update packs
+		# NOTE: considering packs don't change very often, we safe this call
+		# and leave it to the super-caller to trigger that
+		raise BadObject(sha)
+	
+	#{ Object DB Read 
+	
+	def has_object(self, sha):
+		try:
+			self._pack_info(sha)
+			return True
+		except BadObject:
+			return False
+		# END exception handling
+		
+	def info(self, sha):
+		entity, index = self._pack_info(sha)
+		return entity.info_at_index(index)
+	
+	def stream(self, sha):
+		entity, index = self._pack_info(sha)
+		return entity.stream_at_index(index)
+		
+	def sha_iter(self):
+		sha_list = list()
+		for entity in self.entities():
+			index = entity.index()
+			sha_by_index = index.sha
+			for index in xrange(index.size()):
+				yield sha_by_index(index)
+			# END for each index
+		# END for each entity
+	
+	def size(self):
+		sizes = [item[1].index().size() for item in self._entities]
+		return reduce(lambda x,y: x+y, sizes, 0)
+	
+	#} END object db read
+	
+	#{ object db write
+	
+	def store(self, istream):
+		"""Storing individual objects is not feasible as a pack is designed to 
+		hold multiple objects. Writing or rewriting packs for single objects is
+		inefficient"""
+		raise UnsupportedOperation()
+		
+	def store_async(self, reader):
+		# TODO: add PureObjectDBRW before implementing this
+		raise NotImplementedError()
+	
+	#} END object db write
+	
+	
+	#{ Interface 
+	
+	def update_cache(self, force=False):
+		"""
+		Update our cache with the acutally existing packs on disk. Add new ones, 
+		and remove deleted ones. We keep the unchanged ones
+		
+		:param force: If True, the cache will be updated even though the directory
+			does not appear to have changed according to its modification timestamp.
+		:return: True if the packs have been updated so there is new information, 
+			False if there was no change to the pack database"""
+		stat = os.stat(self.root_path())
+		if not force and stat.st_mtime <= self._st_mtime:
+			return False
+		# END abort early on no change
+		self._st_mtime = stat.st_mtime
+		
+		# packs are supposed to be prefixed with pack- by git-convention
+		# get all pack files, figure out what changed
+		pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+		our_pack_files = set(item[1].pack().path() for item in self._entities)
+		
+		# new packs
+		for pack_file in (pack_files - our_pack_files):
+			# init the hit-counter/priority with the size, a good measure for hit-
+			# probability. Its implemented so that only 12 bytes will be read
+			entity = self.PackEntityCls(pack_file)
+			self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+		# END for each new packfile
+		
+		# removed packs
+		for pack_file in (our_pack_files - pack_files):
+			del_index = -1
+			for i, item in enumerate(self._entities):
+				if item[1].pack().path() == pack_file:
+					del_index = i
+					break
+				# END found index
+			# END for each entity
+			assert del_index != -1
+			del(self._entities[del_index])
+		# END for each removed pack
+		
+		# reinitialize prioritiess
+		self._sort_entities()
+		return True
+		
+	def entities(self):
+		""":return: list of pack entities operated upon by this database"""
+		return [ item[1] for item in self._entities ]
+		
+	def partial_to_complete_sha(self, partial_binsha, canonical_length):
+		""":return: 20 byte sha as inferred by the given partial binary sha
+		:param partial_binsha: binary sha with less than 20 bytes 
+		:param canonical_length: length of the corresponding canonical representation.
+			It is required as binary sha's cannot display whether the original hex sha
+			had an odd or even number of characters
+		:raise AmbiguousObjectName: 
+		:raise BadObject: """
+		candidate = None
+		for item in self._entities:
+			item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+			if item_index is not None:
+				sha = item[1].index().sha(item_index)
+				if candidate and candidate != sha:
+					raise AmbiguousObjectName(partial_binsha)
+				candidate = sha
+			# END handle full sha could be found
+		# END for each entity
+		
+		if candidate:
+			return candidate
+		
+		# still not found ?
+		raise BadObject(partial_binsha)
+	
+	#} END interface
diff --git a/git/db/py/ref.py b/git/db/py/ref.py
new file mode 100644
index 00000000..951f0437
--- /dev/null
+++ b/git/db/py/ref.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import PureCompoundDB
+
+import os
+__all__ = ('PureReferenceDB', )
+
+class PureReferenceDB(PureCompoundDB):
+	"""A database consisting of database referred to in a file"""
+	
+	# Configuration
+	# Specifies the object database to use for the paths found in the alternates
+	# file. If None, it defaults to the PureGitODB
+	ObjectDBCls = None
+	
+	def __init__(self, ref_file):
+		super(PureReferenceDB, self).__init__()
+		self._ref_file = ref_file
+		
+	def _set_cache_(self, attr):
+		if attr == '_dbs':
+			self._dbs = list()
+			self._update_dbs_from_ref_file()
+		else:
+			super(PureReferenceDB, self)._set_cache_(attr)
+		# END handle attrs
+		
+	def _update_dbs_from_ref_file(self):
+		dbcls = self.ObjectDBCls
+		if dbcls is None:
+			# late import
+			from git import PureGitODB
+			dbcls = PureGitODB
+		# END get db type
+		
+		# try to get as many as possible, don't fail if some are unavailable
+		ref_paths = list()
+		try:
+			ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
+		except (OSError, IOError):
+			pass
+		# END handle alternates
+		
+		ref_paths_set = set(ref_paths)
+		cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+		
+		# remove existing
+		for path in (cur_ref_paths_set - ref_paths_set):
+			for i, db in enumerate(self._dbs[:]):
+				if db.root_path() == path:
+					del(self._dbs[i])
+					continue
+				# END del matching db
+		# END for each path to remove
+		
+		# add new
+		# sort them to maintain order
+		added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+		for path in added_paths:
+			try:
+				db = dbcls(path)
+				# force an update to verify path
+				if isinstance(db, PureCompoundDB):
+					db.databases()
+				# END verification
+				self._dbs.append(db)
+			except Exception, e:
+				# ignore invalid paths or issues
+				pass
+		# END for each path to add
+		
+	def update_cache(self, force=False):
+		# re-read alternates and update databases
+		self._update_dbs_from_ref_file()
+		return super(PureReferenceDB, self).update_cache(force)
diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py
new file mode 100644
index 00000000..86c1e594
--- /dev/null
+++ b/git/db/py/resolve.py
@@ -0,0 +1,297 @@
+"""Module with an implementation for refspec parsing. It is the pure-python
+version assuming compatible interface for reference and object types"""
+
+from gitdb.db.interface import ReferencesMixin
+from gitdb.exc import BadObject
+from gitdb.ref import SymbolicReference
+from gitdb.object.base import Object
+from gitdb.util import (
+							join,
+							isdir, 
+							isfile,
+							hex_to_bin, 
+							bin_to_hex,
+							is_git_dir
+						)
+from string import digits
+import os
+import re
+
+__all__ = ["PureReferencesMixin"]
+
+#{ Utilities
+
+def short_to_long(odb, hexsha):
+	""":return: long hexadecimal sha1 from the given less-than-40 byte hexsha
+		or None if no candidate could be found.
+	:param hexsha: hexsha with less than 40 byte"""
+	try:
+		return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
+	except BadObject:
+		return None
+	# END exception handling
+	
+	
+def name_to_object(repo, name, return_ref=False):
+	"""
+	:return: object specified by the given name, hexshas ( short and long )
+		as well as references are supported
+	:param return_ref: if name specifies a reference, we will return the reference
+		instead of the object. Otherwise it will raise BadObject
+	"""
+	hexsha = None
+	
+	# is it a hexsha ? Try the most common ones, which is 7 to 40
+	if repo.re_hexsha_shortened.match(name):
+		if len(name) != 40:
+			# find long sha for short sha
+			hexsha = short_to_long(repo.odb, name)
+		else:
+			hexsha = name
+		# END handle short shas
+	#END find sha if it matches
+	
+	# if we couldn't find an object for what seemed to be a short hexsha 
+	# try to find it as reference anyway, it could be named 'aaa' for instance
+	if hexsha is None:
+		for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
+			try:
+				hexsha = SymbolicReference.dereference_recursive(repo, base % name)
+				if return_ref:
+					return SymbolicReference(repo, base % name)
+				#END handle symbolic ref
+				break
+			except ValueError:
+				pass
+		# END for each base
+	# END handle hexsha
+
+	# didn't find any ref, this is an error
+	if return_ref:
+		raise BadObject("Couldn't find reference named %r" % name)
+	#END handle return ref
+
+	# tried everything ? fail
+	if hexsha is None:
+		raise BadObject(name)
+	# END assert hexsha was found
+	
+	return Object.new_from_sha(repo, hex_to_bin(hexsha))
+
+def deref_tag(tag):
+	"""Recursively dereference a tag and return the resulting object"""
+	while True:
+		try:
+			tag = tag.object
+		except AttributeError:
+			break
+	# END dereference tag
+	return tag
+
+def to_commit(obj):
+	"""Convert the given object to a commit if possible and return it"""
+	if obj.type == 'tag':
+		obj = deref_tag(obj)
+		
+	if obj.type != "commit":
+		raise ValueError("Cannot convert object %r to type commit" % obj)
+	# END verify type
+	return obj
+
+def rev_parse(repo, rev):
+	"""
+	:return: Object at the given revision, either Commit, Tag, Tree or Blob
+	:param rev: git-rev-parse compatible revision specification, please see
+		http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
+		for details
+	:note: Currently there is no access to the rev-log, rev-specs may only contain
+		topological tokens such ~ and ^.
+	:raise BadObject: if the given revision could not be found
+	:raise ValueError: If rev couldn't be parsed
+	:raise IndexError: If invalid reflog index is specified"""
+	
+	# colon search mode ?
+	if rev.startswith(':/'):
+		# colon search mode
+		raise NotImplementedError("commit by message search ( regex )")
+	# END handle search
+	
+	obj = None
+	ref = None
+	output_type = "commit"
+	start = 0
+	parsed_to = 0
+	lr = len(rev)
+	while start < lr:
+		if rev[start] not in "^~:@":
+			start += 1
+			continue
+		# END handle start
+		
+		token = rev[start]
+		
+		if obj is None:
+			# token is a rev name
+			if start == 0:
+				ref = repo.head.ref
+			else:
+				if token == '@':
+					ref = name_to_object(repo, rev[:start], return_ref=True)
+				else:
+					obj = name_to_object(repo, rev[:start])
+				#END handle token
+			#END handle refname
+			
+			if ref is not None:
+				obj = ref.commit
+			#END handle ref
+		# END initialize obj on first token
+		
+		
+		start += 1
+		
+		# try to parse {type}
+		if start < lr and rev[start] == '{':
+			end = rev.find('}', start)
+			if end == -1:
+				raise ValueError("Missing closing brace to define type in %s" % rev)
+			output_type = rev[start+1:end]	# exclude brace
+			
+			# handle type 
+			if output_type == 'commit':
+				pass # default
+			elif output_type == 'tree':
+				try:
+					obj = to_commit(obj).tree
+				except (AttributeError, ValueError):
+					pass	# error raised later
+				# END exception handling
+			elif output_type in ('', 'blob'):
+				if obj.type == 'tag':
+					obj = deref_tag(obj)
+				else:
+					# cannot do anything for non-tags
+					pass
+				# END handle tag
+			elif token == '@':
+				# try single int
+				assert ref is not None, "Require Reference to access reflog"
+				revlog_index = None
+				try:
+					# transform reversed index into the format of our revlog
+					revlog_index = -(int(output_type)+1)
+				except ValueError:
+					# TODO: Try to parse the other date options, using parse_date
+					# maybe
+					raise NotImplementedError("Support for additional @{...} modes not implemented")
+				#END handle revlog index
+				
+				try:
+					entry = ref.log_entry(revlog_index)
+				except IndexError:
+					raise IndexError("Invalid revlog index: %i" % revlog_index)
+				#END handle index out of bound
+				
+				obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))
+				
+				# make it pass the following checks
+				output_type = None
+			else:
+				raise ValueError("Invalid output type: %s ( in %s )"  % (output_type, rev))
+			# END handle output type
+			
+			# empty output types don't require any specific type, its just about dereferencing tags
+			if output_type and obj.type != output_type:
+				raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type))
+			# END verify ouput type
+			
+			start = end+1					# skip brace
+			parsed_to = start
+			continue
+		# END parse type
+		
+		# try to parse a number
+		num = 0
+		if token != ":":
+			found_digit = False
+			while start < lr:
+				if rev[start] in digits:
+					num = num * 10 + int(rev[start])
+					start += 1
+					found_digit = True
+				else:
+					break
+				# END handle number
+			# END number parse loop
+			
+			# no explicit number given, 1 is the default
+			# It could be 0 though 
+			if not found_digit:
+				num = 1
+			# END set default num
+		# END number parsing only if non-blob mode
+		
+		
+		parsed_to = start
+		# handle hiererarchy walk
+		try:
+			if token == "~":
+				obj = to_commit(obj)
+				for item in xrange(num):
+					obj = obj.parents[0]
+				# END for each history item to walk
+			elif token == "^":
+				obj = to_commit(obj)
+				# must be n'th parent
+				if num:
+					obj = obj.parents[num-1]
+			elif token == ":":
+				if obj.type != "tree":
+					obj = obj.tree
+				# END get tree type
+				obj = obj[rev[start:]]
+				parsed_to = lr
+			else:
+				raise ValueError("Invalid token: %r" % token)
+			# END end handle tag
+		except (IndexError, AttributeError):
+			raise BadObject("Invalid Revision in %s" % rev)
+		# END exception handling
+	# END parse loop
+	
+	# still no obj ? Its probably a simple name
+	if obj is None:
+		obj = name_to_object(repo, rev)
+		parsed_to = lr
+	# END handle simple name
+	
+	if obj is None:
+		raise ValueError("Revision specifier could not be parsed: %s" % rev)
+
+	if parsed_to != lr:
+		raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
+	
+	return obj
+
+#} END utilities
+
+class PureReferencesMixin(ReferencesMixin):
+	"""Pure-Python refparse implementation"""
+	
+	re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+	re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+	
+	def resolve(self, name):
+		return rev_parse(self, name)
+		
+	@property
+	def references(self):
+		raise NotImplementedError()
+		
+	@property
+	def heads(self):
+		raise NotImplementedError()
+		
+	@property
+	def tags(self):
+		raise NotImplementedError()
diff --git a/git/db/py/transport.py b/git/db/py/transport.py
new file mode 100644
index 00000000..783fb8d5
--- /dev/null
+++ b/git/db/py/transport.py
@@ -0,0 +1,89 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Implement a transport compatible database which sends objects using the git protocol"""
+
+from gitdb.db.interface import ( TransportDB, 
+								PushInfo,
+								FetchInfo,
+								RefSpec )
+
+__all__ = ["PureTransportDB"]
+
+class PurePushInfo(PushInfo):
+	"""TODO: Implementation"""
+	__slots__ = tuple()
+	
+		
+		
+class PureFetchInfo(FetchInfo):
+	"""TODO"""
+	__slots__ = tuple()
+	
+
+class PureTransportDB(TransportDB):
+	"""A database which allows to transport objects from and to different locations
+	which are specified by urls (location) and refspecs (what to transport, 
+	see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html).
+	
+	At the beginning of a transport operation, it will be determined which objects
+	have to be sent (either by this or by the other side).
+	
+	Afterwards a pack with the required objects is sent (or received). If there is 
+	nothing to send, the pack will be empty.
+	
+	The communication itself if implemented using a protocol instance which deals
+	with the actual formatting of the lines sent.
+	
+	As refspecs involve symbolic names for references to be handled, we require
+	RefParse functionality. How this is done is up to the actual implementation."""
+	# The following variables need to be set by the derived class
+	#{Configuration
+	protocol = None
+	#}end configuraiton
+	
+	#{ Interface
+	
+	def fetch(self, url, refspecs, progress=None, **kwargs):
+		"""Fetch the objects defined by the given refspec from the given url.
+		:param url: url identifying the source of the objects. It may also be 
+			a symbol from which the respective url can be resolved, like the
+			name of the remote. The implementation should allow objects as input
+			as well, these are assumed to resovle to a meaningful string though.
+		:param refspecs: iterable of reference specifiers or RefSpec instance, 
+			identifying the references to be fetch from the remote.
+		:param progress: callable which receives progress messages for user consumption
+		:param kwargs: may be used for additional parameters that the actual implementation could 
+			find useful.
+		:return: List of PureFetchInfo compatible instances which provide information about what 
+			was previously fetched, in the order of the input refspecs.
+		:note: even if the operation fails, one of the returned PureFetchInfo instances
+			may still contain errors or failures in only part of the refspecs.
+		:raise: if any issue occours during the transport or if the url is not 
+			supported by the protocol.
+		"""
+		raise NotImplementedError()
+		
+	def push(self, url, refspecs, progress=None, **kwargs):
+		"""Transport the objects identified by the given refspec to the remote
+		at the given url.
+		:param url: Decribes the location which is to receive the objects
+			see fetch() for more details
+		:param refspecs: iterable of refspecs strings or RefSpec instances
+			to identify the objects to push
+		:param progress: see fetch() 
+		:param kwargs: additional arguments which may be provided by the caller
+			as they may be useful to the actual implementation
+		:todo: what to return ?
+		:raise: if any issue arises during transport or if the url cannot be handled"""
+		raise NotImplementedError()
+		
+	@property
+	def remotes(self):
+		""":return: An IterableList of Remote objects allowing to access and manipulate remotes
+		:note: Remote objects can also be used for the actual push or fetch operation"""
+		raise NotImplementedError()
+		
+	#}end interface
+