1 files changed, 262 insertions, 0 deletions
diff --git a/git/db/py/loose.py b/git/db/py/loose.py
new file mode 100644
index 00000000..34e31da6
--- /dev/null
+++ b/git/db/py/loose.py
@@ -0,0 +1,262 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+						PureRootPathDB, 
+						PureObjectDBR, 
+						PureObjectDBW
+				)
+
+
+from gitdb.exc import (
+	InvalidDBRoot, 
+	BadObject,
+	AmbiguousObjectName
+	)
+
+from gitdb.stream import (
+		DecompressMemMapReader,
+		FDCompressedSha1Writer,
+		FDStream,
+		Sha1Writer
+	)
+
+from gitdb.base import (
+							OStream,
+							OInfo
+						)
+
+from gitdb.util import (
+		file_contents_ro_filepath,
+		ENOENT,
+		hex_to_bin,
+		bin_to_hex,
+		exists,
+		chmod,
+		isdir,
+		isfile,
+		remove,
+		mkdir,
+		rename,
+		dirname,
+		basename,
+		join
+	)
+
+from gitdb.fun import ( 
+	chunk_size,
+	loose_object_header_info, 
+	write_object,
+	stream_copy
+	)
+
+import tempfile
+import mmap
+import sys
+import os
+
+
+__all__ = ( 'PureLooseObjectODB', )
+
+
+class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW):
+	"""A database which operates on loose object files"""
+	
+	# CONFIGURATION
+	# chunks in which data will be copied between streams
+	stream_chunk_size = chunk_size
+	
+	# On windows we need to keep it writable, otherwise it cannot be removed
+	# either
+	new_objects_mode = 0444
+	if os.name == 'nt':
+		new_objects_mode = 0644
+			
+	
+	def __init__(self, root_path):
+		super(PureLooseObjectODB, self).__init__(root_path)
+		self._hexsha_to_file = dict()
+		# Additional Flags - might be set to 0 after the first failure
+		# Depending on the root, this might work for some mounts, for others not, which
+		# is why it is per instance
+		self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+	
+	#{ Interface 
+	def object_path(self, hexsha):
+		"""
+		:return: path at which the object with the given hexsha would be stored, 
+			relative to the database root"""
+		return join(hexsha[:2], hexsha[2:])
+	
+	def readable_db_object_path(self, hexsha):
+		"""
+		:return: readable object path to the object identified by hexsha
+		:raise BadObject: If the object file does not exist"""
+		try:
+			return self._hexsha_to_file[hexsha]
+		except KeyError:
+			pass
+		# END ignore cache misses 
+			
+		# try filesystem
+		path = self.db_path(self.object_path(hexsha))
+		if exists(path):
+			self._hexsha_to_file[hexsha] = path
+			return path
+		# END handle cache
+		raise BadObject(hexsha)
+		
+	def partial_to_complete_sha_hex(self, partial_hexsha):
+		""":return: 20 byte binary sha1 string which matches the given name uniquely
+		:param name: hexadecimal partial name
+		:raise AmbiguousObjectName: 
+		:raise BadObject: """
+		candidate = None
+		for binsha in self.sha_iter():
+			if bin_to_hex(binsha).startswith(partial_hexsha):
+				# it can't ever find the same object twice
+				if candidate is not None:
+					raise AmbiguousObjectName(partial_hexsha)
+				candidate = binsha
+		# END for each object
+		if candidate is None:
+			raise BadObject(partial_hexsha)
+		return candidate
+		
+	#} END interface
+	
+	def _map_loose_object(self, sha):
+		"""
+		:return: memory map of that file to allow random read access
+		:raise BadObject: if object could not be located"""
+		db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+		try:
+			return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+		except OSError,e:
+			if e.errno != ENOENT:
+				# try again without noatime
+				try:
+					return file_contents_ro_filepath(db_path)
+				except OSError:
+					raise BadObject(sha)
+				# didn't work because of our flag, don't try it again
+				self._fd_open_flags = 0
+			else:
+				raise BadObject(sha)
+			# END handle error
+		# END exception handling
+		try:
+			return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+		finally:
+			os.close(fd)
+		# END assure file is closed
+		
+	def set_ostream(self, stream):
+		""":raise TypeError: if the stream does not support the Sha1Writer interface"""
+		if stream is not None and not isinstance(stream, Sha1Writer):
+			raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+		return super(PureLooseObjectODB, self).set_ostream(stream)
+			
+	def info(self, sha):
+		m = self._map_loose_object(sha)
+		try:
+			type, size = loose_object_header_info(m)
+			return OInfo(sha, type, size)
+		finally:
+			m.close()
+		# END assure release of system resources
+		
+	def stream(self, sha):
+		m = self._map_loose_object(sha)
+		type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+		return OStream(sha, type, size, stream)
+		
+	def has_object(self, sha):
+		try:
+			self.readable_db_object_path(bin_to_hex(sha))
+			return True
+		except BadObject:
+			return False
+		# END check existance
+	
+	def store(self, istream):
+		"""note: The sha we produce will be hex by nature"""
+		tmp_path = None
+		writer = self.ostream()
+		if writer is None:
+			# open a tmp file to write the data to
+			fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+			
+			if istream.binsha is None:
+				writer = FDCompressedSha1Writer(fd)
+			else:
+				writer = FDStream(fd)
+			# END handle direct stream copies
+		# END handle custom writer
+	
+		try:
+			try:
+				if istream.binsha is not None:
+					# copy as much as possible, the actual uncompressed item size might
+					# be smaller than the compressed version
+					stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+				else:
+					# write object with header, we have to make a new one
+					write_object(istream.type, istream.size, istream.read, writer.write,
+									chunk_size=self.stream_chunk_size)
+				# END handle direct stream copies
+			finally:
+				if tmp_path:
+					writer.close()
+			# END assure target stream is closed
+		except:
+			if tmp_path:
+				os.remove(tmp_path)
+			raise
+		# END assure tmpfile removal on error
+		
+		hexsha = None
+		if istream.binsha:
+			hexsha = istream.hexsha
+		else:
+			hexsha = writer.sha(as_hex=True)
+		# END handle sha
+		
+		if tmp_path:
+			obj_path = self.db_path(self.object_path(hexsha))
+			obj_dir = dirname(obj_path)
+			if not isdir(obj_dir):
+				mkdir(obj_dir)
+			# END handle destination directory
+			# rename onto existing doesn't work on windows
+			if os.name == 'nt' and isfile(obj_path):
+				remove(obj_path)
+			# END handle win322
+			rename(tmp_path, obj_path)
+			
+			# make sure its readable for all ! It started out as rw-- tmp file
+			# but needs to be rwrr
+			chmod(obj_path, self.new_objects_mode)
+		# END handle dry_run
+		
+		istream.binsha = hex_to_bin(hexsha)
+		return istream
+		
+	def sha_iter(self):
+		# find all files which look like an object, extract sha from there
+		for root, dirs, files in os.walk(self.root_path()):
+			root_base = basename(root)
+			if len(root_base) != 2:
+				continue
+				
+			for f in files:
+				if len(f) != 38:
+					continue
+				yield hex_to_bin(root_base + f)
+			# END for each file
+		# END for each walk iteration
+		
+	def size(self):
+		return len(tuple(self.sha_iter()))
+