summaryrefslogtreecommitdiff
path: root/git/db/py/loose.py
diff options
context:
space:
mode:
Diffstat (limited to 'git/db/py/loose.py')
-rw-r--r--git/db/py/loose.py262
1 files changed, 262 insertions, 0 deletions
diff --git a/git/db/py/loose.py b/git/db/py/loose.py
new file mode 100644
index 00000000..34e31da6
--- /dev/null
+++ b/git/db/py/loose.py
@@ -0,0 +1,262 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+ PureRootPathDB,
+ PureObjectDBR,
+ PureObjectDBW
+ )
+
+
+from gitdb.exc import (
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
+
+from gitdb.stream import (
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ FDStream,
+ Sha1Writer
+ )
+
+from gitdb.base import (
+ OStream,
+ OInfo
+ )
+
+from gitdb.util import (
+ file_contents_ro_filepath,
+ ENOENT,
+ hex_to_bin,
+ bin_to_hex,
+ exists,
+ chmod,
+ isdir,
+ isfile,
+ remove,
+ mkdir,
+ rename,
+ dirname,
+ basename,
+ join
+ )
+
+from gitdb.fun import (
+ chunk_size,
+ loose_object_header_info,
+ write_object,
+ stream_copy
+ )
+
+import tempfile
+import mmap
+import sys
+import os
+
+
+__all__ = ( 'PureLooseObjectODB', )
+
+
+class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW):
+ """A database which operates on loose object files"""
+
+ # CONFIGURATION
+ # chunks in which data will be copied between streams
+ stream_chunk_size = chunk_size
+
+ # On windows we need to keep it writable, otherwise it cannot be removed
+ # either
+ new_objects_mode = 0444
+ if os.name == 'nt':
+ new_objects_mode = 0644
+
+
+ def __init__(self, root_path):
+ super(PureLooseObjectODB, self).__init__(root_path)
+ self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+ #{ Interface
+ def object_path(self, hexsha):
+ """
+ :return: path at which the object with the given hexsha would be stored,
+ relative to the database root"""
+ return join(hexsha[:2], hexsha[2:])
+
+ def readable_db_object_path(self, hexsha):
+ """
+ :return: readable object path to the object identified by hexsha
+ :raise BadObject: If the object file does not exist"""
+ try:
+ return self._hexsha_to_file[hexsha]
+ except KeyError:
+ pass
+ # END ignore cache misses
+
+ # try filesystem
+ path = self.db_path(self.object_path(hexsha))
+ if exists(path):
+ self._hexsha_to_file[hexsha] = path
+ return path
+ # END handle cache
+ raise BadObject(hexsha)
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: 20 byte binary sha1 string which matches the given name uniquely
+ :param name: hexadecimal partial name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for binsha in self.sha_iter():
+ if bin_to_hex(binsha).startswith(partial_hexsha):
+ # it can't ever find the same object twice
+ if candidate is not None:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = binsha
+ # END for each object
+ if candidate is None:
+ raise BadObject(partial_hexsha)
+ return candidate
+
+ #} END interface
+
+ def _map_loose_object(self, sha):
+ """
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+ try:
+ return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ return file_contents_ro_filepath(db_path)
+ except OSError:
+ raise BadObject(sha)
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(sha)
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
+
+ def set_ostream(self, stream):
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+ if stream is not None and not isinstance(stream, Sha1Writer):
+ raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+ return super(PureLooseObjectODB, self).set_ostream(stream)
+
+ def info(self, sha):
+ m = self._map_loose_object(sha)
+ try:
+ type, size = loose_object_header_info(m)
+ return OInfo(sha, type, size)
+ finally:
+ m.close()
+ # END assure release of system resources
+
+ def stream(self, sha):
+ m = self._map_loose_object(sha)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ return OStream(sha, type, size, stream)
+
+ def has_object(self, sha):
+ try:
+ self.readable_db_object_path(bin_to_hex(sha))
+ return True
+ except BadObject:
+ return False
+ # END check existance
+
+ def store(self, istream):
+ """note: The sha we produce will be hex by nature"""
+ tmp_path = None
+ writer = self.ostream()
+ if writer is None:
+ # open a tmp file to write the data to
+ fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+
+ if istream.binsha is None:
+ writer = FDCompressedSha1Writer(fd)
+ else:
+ writer = FDStream(fd)
+ # END handle direct stream copies
+ # END handle custom writer
+
+ try:
+ try:
+ if istream.binsha is not None:
+ # copy as much as possible, the actual uncompressed item size might
+ # be smaller than the compressed version
+ stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+ else:
+ # write object with header, we have to make a new one
+ write_object(istream.type, istream.size, istream.read, writer.write,
+ chunk_size=self.stream_chunk_size)
+ # END handle direct stream copies
+ finally:
+ if tmp_path:
+ writer.close()
+ # END assure target stream is closed
+ except:
+ if tmp_path:
+ os.remove(tmp_path)
+ raise
+ # END assure tmpfile removal on error
+
+ hexsha = None
+ if istream.binsha:
+ hexsha = istream.hexsha
+ else:
+ hexsha = writer.sha(as_hex=True)
+ # END handle sha
+
+ if tmp_path:
+ obj_path = self.db_path(self.object_path(hexsha))
+ obj_dir = dirname(obj_path)
+ if not isdir(obj_dir):
+ mkdir(obj_dir)
+ # END handle destination directory
+ # rename onto existing doesn't work on windows
+ if os.name == 'nt' and isfile(obj_path):
+ remove(obj_path)
+ # END handle win322
+ rename(tmp_path, obj_path)
+
+ # make sure its readable for all ! It started out as rw-- tmp file
+ # but needs to be rwrr
+ chmod(obj_path, self.new_objects_mode)
+ # END handle dry_run
+
+ istream.binsha = hex_to_bin(hexsha)
+ return istream
+
+ def sha_iter(self):
+ # find all files which look like an object, extract sha from there
+ for root, dirs, files in os.walk(self.root_path()):
+ root_base = basename(root)
+ if len(root_base) != 2:
+ continue
+
+ for f in files:
+ if len(f) != 38:
+ continue
+ yield hex_to_bin(root_base + f)
+ # END for each file
+ # END for each walk iteration
+
+ def size(self):
+ return len(tuple(self.sha_iter()))
+