diff options
Diffstat (limited to 'git/db/py')
-rw-r--r-- | git/db/py/__init__.py | 13 | ||||
-rw-r--r-- | git/db/py/base.py | 351 | ||||
-rw-r--r-- | git/db/py/git.py | 113 | ||||
-rw-r--r-- | git/db/py/loose.py | 262 | ||||
-rw-r--r-- | git/db/py/mem.py | 113 | ||||
-rw-r--r-- | git/db/py/pack.py | 212 | ||||
-rw-r--r-- | git/db/py/ref.py | 77 | ||||
-rw-r--r-- | git/db/py/resolve.py | 297 | ||||
-rw-r--r-- | git/db/py/transport.py | 89 |
9 files changed, 1527 insertions, 0 deletions
diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py new file mode 100644 index 00000000..046c699d --- /dev/null +++ b/git/db/py/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php + +from base import * +from loose import * +from mem import * +from pack import * +from git import * +from ref import * +from resolve import * +from transport import * diff --git a/git/db/py/base.py b/git/db/py/base.py new file mode 100644 index 00000000..c378b10e --- /dev/null +++ b/git/db/py/base.py @@ -0,0 +1,351 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains basic implementations for the interface building blocks""" + +from gitdb.db.interface import * + +from gitdb.util import ( + pool, + join, + normpath, + abspath, + dirname, + LazyMixin, + hex_to_bin, + bin_to_hex, + expandvars, + expanduser, + exists, + is_git_dir + ) + +from gitdb.config import GitConfigParser +from gitdb.exc import ( + BadObject, + AmbiguousObjectName, + InvalidDBRoot + ) + +from async import ChannelThreadTask + +from itertools import chain +import sys +import os + + +__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', + 'PureConfigurationMixin', 'PureRepositoryPathsMixin') + + +class PureObjectDBR(ObjectDBR): + + #{ Query Interface + + def has_object_async(self, reader): + task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) + return pool.add_task(task) + + def info_async(self, reader): + task = ChannelThreadTask(reader, str(self.info_async), self.info) + return pool.add_task(task) + + def stream_async(self, reader): + # base implementation just uses the stream method repeatedly + task = ChannelThreadTask(reader, str(self.stream_async), self.stream) + return pool.add_task(task) + + def partial_to_complete_sha_hex(self, partial_hexsha): + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha)) + + #} END query interface + + +class PureObjectDBW(ObjectDBW): + + def __init__(self, *args, **kwargs): + super(PureObjectDBW, self).__init__(*args, **kwargs) + self._ostream = None + + #{ Edit Interface + def set_ostream(self, stream): + cstream = self._ostream + self._ostream = stream + return cstream + + def ostream(self): + return self._ostream + + def store_async(self, reader): + task = ChannelThreadTask(reader, str(self.store_async), self.store) + return pool.add_task(task) + + #} END edit interface + + +class PureRootPathDB(RootPathDB): + + def __init__(self, root_path): + super(PureRootPathDB, self).__init__(root_path) + self._root_path = root_path + + + #{ Interface + def root_path(self): + return self._root_path + + def db_path(self, rela_path): + return join(self._root_path, rela_path) + #} END interface + + +def _databases_recursive(database, output): + """Fill output list with database from db, in order. Deals with Loose, Packed + and compound databases.""" + if isinstance(database, CompoundDB): + compounds = list() + dbs = database.databases() + output.extend(db for db in dbs if not isinstance(db, CompoundDB)) + for cdb in (db for db in dbs if isinstance(db, CompoundDB)): + _databases_recursive(cdb, output) + else: + output.append(database) + # END handle database type + + +class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + elif attr == '_db_cache': + self._db_cache = dict() + else: + super(PureCompoundDB, self)._set_cache_(attr) + + def _db_query(self, sha): + """:return: database containing the given 20 byte sha + :raise BadObject:""" + # most databases use binary representations, prevent converting + # it everytime a database is being queried + try: + return self._db_cache[sha] + except KeyError: + pass + # END first level cache + + for db in self._dbs: + if db.has_object(sha): + self._db_cache[sha] = db + return db + # END for each database + raise BadObject(sha) + + #{ PureObjectDBR interface + + def has_object(self, sha): + try: + self._db_query(sha) + return True + except BadObject: + return False + # END handle exceptions + + def info(self, sha): + return self._db_query(sha).info(sha) + + def stream(self, sha): + return self._db_query(sha).stream(sha) + + def size(self): + return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) + + def sha_iter(self): + return chain(*(db.sha_iter() for db in self._dbs)) + + #} END object DBR Interface + + #{ Interface + + def databases(self): + return tuple(self._dbs) + + def update_cache(self, force=False): + # something might have changed, clear everything + self._db_cache.clear() + stat = False + for db in self._dbs: + if isinstance(db, CachingDB): + stat |= db.update_cache(force) + # END if is caching db + # END for each database to update + return stat + + def partial_to_complete_sha_hex(self, partial_hexsha): + databases = self.databases() + + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + + candidate = None + for db in self._dbs: + full_bin_sha = None + try: + if hasattr(db, 'partial_to_complete_sha_hex'): + full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) + else: + full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) + # END handle database type + except BadObject: + continue + # END ignore bad objects + if full_bin_sha: + if candidate and candidate != full_bin_sha: + raise AmbiguousObjectName(partial_hexsha) + candidate = full_bin_sha + # END handle candidate + # END for each db + if not candidate: + raise BadObject(partial_binsha) + return candidate + + def partial_to_complete_sha(self, partial_binsha, hex_len): + """Simple adaptor to feed into our implementation""" + return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len]) + #} END interface + + +class PureRepositoryPathsMixin(RepositoryPathsMixin): + # slots has no effect here, its just to keep track of used attrs + __slots__ = ("_git_path", '_bare') + + #{ Configuration + repo_dir = '.git' + objs_dir = 'objects' + #} END configuration + + #{ Subclass Interface + def _initialize(self, path): + epath = abspath(expandvars(expanduser(path or os.getcwd()))) + + if not exists(epath): + raise InvalidDBRoot(epath) + #END check file + + self._working_tree_dir = None + self._git_path = None + curpath = epath + + # walk up the path to find the .git dir + while curpath: + if is_git_dir(curpath): + self._git_path = curpath + self._working_tree_dir = os.path.dirname(curpath) + break + gitpath = join(curpath, self.repo_dir) + if is_git_dir(gitpath): + self._git_path = gitpath + self._working_tree_dir = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + # END while curpath + + if self._git_path is None: + raise InvalidDBRoot(epath) + # END path not found + + self._bare = self._git_path.endswith(self.repo_dir) + if hasattr(self, 'config_reader'): + try: + self._bare = self.config_reader("repository").getboolean('core','bare') + except Exception: + # lets not assume the option exists, although it should + pass + #END check bare flag + + + #} end subclass interface + + #{ Interface + + def is_bare(self): + return self._bare + + def git_path(self): + return self._git_path + + def working_tree_path(self): + if self.is_bare(): + raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_path()) + #END assertion + return dirname(self.git_path()) + + def objects_path(self): + return join(self.git_path(), self.objs_dir) + + def working_dir(self): + if self.is_bare(): + return self.git_path() + else: + return self.working_tree_dir() + #END handle bare state + + #} END interface + + +class PureConfigurationMixin(ConfigurationMixin): + + #{ Configuration + system_config_file_name = "gitconfig" + repo_config_file_name = "config" + #} END + + def __init__(self, *args, **kwargs): + """Verify prereqs""" + assert hasattr(self, 'git_path') + + def _path_at_level(self, level ): + # we do not support an absolute path of the gitconfig on windows , + # use the global config instead + if sys.platform == "win32" and level == "system": + level = "global" + #END handle windows + + if level == "system": + return "/etc/%s" % self.system_config_file_name + elif level == "global": + return normpath(expanduser("~/.%s" % self.system_config_file_name)) + elif level == "repository": + return join(self.git_path(), self.repo_config_file_name) + #END handle level + + raise ValueError("Invalid configuration level: %r" % level) + + #{ Interface + + def config_reader(self, config_level=None): + files = None + if config_level is None: + files = [ self._path_at_level(f) for f in self.config_level ] + else: + files = [ self._path_at_level(config_level) ] + #END handle level + return GitConfigParser(files, read_only=True) + + def config_writer(self, config_level="repository"): + return GitConfigParser(self._path_at_level(config_level), read_only=False) + + #} END interface + diff --git a/git/db/py/git.py b/git/db/py/git.py new file mode 100644 index 00000000..bc148c6f --- /dev/null +++ b/git/db/py/git.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureCompoundDB, + PureObjectDBW, + PureRootPathDB, + PureRepositoryPathsMixin, + PureConfigurationMixin, + ) + +from resolve import PureReferencesMixin + +from loose import PureLooseObjectODB +from pack import PurePackedODB +from ref import PureReferenceDB + +from gitdb.util import ( + LazyMixin, + normpath, + join, + dirname + ) +from gitdb.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) +import os + +__all__ = ('PureGitODB', 'PureGitDB') + + +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): + """A git-style object-only database, which contains all objects in the 'objects' + subdirectory. + :note: The type needs to be initialized on the ./objects directory to function, + as it deals solely with object lookup. Use a PureGitDB type if you need + reference and push support.""" + # Configuration + PackDBCls = PurePackedODB + LooseDBCls = PureLooseObjectODB + PureReferenceDBCls = PureReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + alternates_dir = os.path.join('info', 'alternates') + + def __init__(self, root_path): + """Initialize ourselves on a git ./objects directory""" + super(PureGitODB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.PureReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(PureGitODB, self)._set_cache_(attr) + # END handle attrs + + #{ PureObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + + +class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): + """Git like database with support for object lookup as well as reference resolution. + Our rootpath is set to the actual .git directory (bare on unbare). + + The root_path will be the git objects directory. Use git_path() to obtain the actual top-level + git directory.""" + #directories + + def __init__(self, root_path): + """Initialize ourselves on the .git directory, or the .git/objects directory.""" + PureRepositoryPathsMixin._initialize(self, root_path) + super(PureGitDB, self).__init__(self.objects_path()) + + + diff --git a/git/db/py/loose.py b/git/db/py/loose.py new file mode 100644 index 00000000..34e31da6 --- /dev/null +++ b/git/db/py/loose.py @@ -0,0 +1,262 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureRootPathDB, + PureObjectDBR, + PureObjectDBW + ) + + +from gitdb.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) + +from gitdb.stream import ( + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer + ) + +from gitdb.base import ( + OStream, + OInfo + ) + +from gitdb.util import ( + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join + ) + +from gitdb.fun import ( + chunk_size, + loose_object_header_info, + write_object, + stream_copy + ) + +import tempfile +import mmap +import sys +import os + + +__all__ = ( 'PureLooseObjectODB', ) + + +class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + # On windows we need to keep it writable, otherwise it cannot be removed + # either + new_objects_mode = 0444 + if os.name == 'nt': + new_objects_mode = 0644 + + + def __init__(self, root_path): + super(PureLooseObjectODB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(bin_to_hex(sha))) + try: + return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + return file_contents_ro_filepath(db_path) + except OSError: + raise BadObject(sha) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(sha) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(PureLooseObjectODB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(bin_to_hex(sha)) + return True + except BadObject: + return False + # END check existance + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + + if istream.binsha is None: + writer = FDCompressedSha1Writer(fd) + else: + writer = FDStream(fd) + # END handle direct stream copies + # END handle custom writer + + try: + try: + if istream.binsha is not None: + # copy as much as possible, the actual uncompressed item size might + # be smaller than the compressed version + stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + else: + # write object with header, we have to make a new one + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + # END handle direct stream copies + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + hexsha = None + if istream.binsha: + hexsha = istream.hexsha + else: + hexsha = writer.sha(as_hex=True) + # END handle sha + + if tmp_path: + obj_path = self.db_path(self.object_path(hexsha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + # rename onto existing doesn't work on windows + if os.name == 'nt' and isfile(obj_path): + remove(obj_path) + # END handle win322 + rename(tmp_path, obj_path) + + # make sure its readable for all ! It started out as rw-- tmp file + # but needs to be rwrr + chmod(obj_path, self.new_objects_mode) + # END handle dry_run + + istream.binsha = hex_to_bin(hexsha) + return istream + + def sha_iter(self): + # find all files which look like an object, extract sha from there + for root, dirs, files in os.walk(self.root_path()): + root_base = basename(root) + if len(root_base) != 2: + continue + + for f in files: + if len(f) != 38: + continue + yield hex_to_bin(root_base + f) + # END for each file + # END for each walk iteration + + def size(self): + return len(tuple(self.sha_iter())) + diff --git a/git/db/py/mem.py b/git/db/py/mem.py new file mode 100644 index 00000000..ba922e96 --- /dev/null +++ b/git/db/py/mem.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains the MemoryDatabase implementation""" +from loose import PureLooseObjectODB +from base import ( + PureObjectDBR, + PureObjectDBW + ) + +from gitdb.base import ( + OStream, + IStream, + ) + +from gitdb.exc import ( + BadObject, + UnsupportedOperation + ) +from gitdb.stream import ( + ZippedStoreShaWriter, + DecompressMemMapReader, + ) + +from cStringIO import StringIO + +__all__ = ("PureMemoryDB", ) + +class PureMemoryDB(PureObjectDBR, PureObjectDBW): + """A memory database stores everything to memory, providing fast IO and object + retrieval. It should be used to buffer results and obtain SHAs before writing + it to the actual physical storage, as it allows to query whether object already + exists in the target storage before introducing actual IO + + :note: memory is currently not threadsafe, hence the async methods cannot be used + for storing""" + + def __init__(self): + super(PureMemoryDB, self).__init__() + self._db = PureLooseObjectODB("path/doesnt/matter") + + # maps 20 byte shas to their OStream objects + self._cache = dict() + + def set_ostream(self, stream): + raise UnsupportedOperation("PureMemoryDB's always stream into memory") + + def store(self, istream): + zstream = ZippedStoreShaWriter() + self._db.set_ostream(zstream) + + istream = self._db.store(istream) + zstream.close() # close to flush + zstream.seek(0) + + # don't provide a size, the stream is written in object format, hence the + # header needs decompression + decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) + self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) + + return istream + + def store_async(self, reader): + raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access") + + def has_object(self, sha): + return sha in self._cache + + def info(self, sha): + # we always return streams, which are infos as well + return self.stream(sha) + + def stream(self, sha): + try: + ostream = self._cache[sha] + # rewind stream for the next one to read + ostream.stream.seek(0) + return ostream + except KeyError: + raise BadObject(sha) + # END exception handling + + def size(self): + return len(self._cache) + + def sha_iter(self): + return self._cache.iterkeys() + + + #{ Interface + def stream_copy(self, sha_iter, odb): + """Copy the streams as identified by sha's yielded by sha_iter into the given odb + The streams will be copied directly + :note: the object will only be written if it did not exist in the target db + :return: amount of streams actually copied into odb. If smaller than the amount + of input shas, one or more objects did already exist in odb""" + count = 0 + for sha in sha_iter: + if odb.has_object(sha): + continue + # END check object existance + + ostream = self.stream(sha) + # compressed data including header + sio = StringIO(ostream.stream.data()) + istream = IStream(ostream.type, ostream.size, sio, sha) + + odb.store(istream) + count += 1 + # END for each sha + return count + #} END interface diff --git a/git/db/py/pack.py b/git/db/py/pack.py new file mode 100644 index 00000000..1d0e9bfc --- /dev/null +++ b/git/db/py/pack.py @@ -0,0 +1,212 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module containing a database to deal with packs""" +from gitdb.db import CachingDB +from base import ( + PureRootPathDB, + PureObjectDBR + ) + +from gitdb.util import LazyMixin + +from gitdb.exc import ( + BadObject, + UnsupportedOperation, + AmbiguousObjectName + ) + +from gitdb.pack import PackEntity + +import os +import glob + +__all__ = ('PurePackedODB', ) + +#{ Utilities + + +class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin): + """A database operating on a set of object packs""" + + # the type to use when instantiating a pack entity + PackEntityCls = PackEntity + + # sort the priority list every N queries + # Higher values are better, performance tests don't show this has + # any effect, but it should have one + _sort_interval = 500 + + def __init__(self, root_path): + super(PurePackedODB, self).__init__(root_path) + # list of lists with three items: + # * hits - number of times the pack was hit with a request + # * entity - Pack entity instance + # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query + # self._entities = list() # lazy loaded list + self._hit_count = 0 # amount of hits + self._st_mtime = 0 # last modification data of our root path + + def _set_cache_(self, attr): + if attr == '_entities': + self._entities = list() + self.update_cache(force=True) + # END handle entities initialization + + def _sort_entities(self): + self._entities.sort(key=lambda l: l[0], reverse=True) + + def _pack_info(self, sha): + """:return: tuple(entity, index) for an item at the given sha + :param sha: 20 or 40 byte sha + :raise BadObject: + :note: This method is not thread-safe, but may be hit in multi-threaded + operation. The worst thing that can happen though is a counter that + was not incremented, or the list being in wrong order. So we safe + the time for locking here, lets see how that goes""" + # presort ? + if self._hit_count % self._sort_interval == 0: + self._sort_entities() + # END update sorting + + for item in self._entities: + index = item[2](sha) + if index is not None: + item[0] += 1 # one hit for you + self._hit_count += 1 # general hit count + return (item[1], index) + # END index found in pack + # END for each item + + # no hit, see whether we have to update packs + # NOTE: considering packs don't change very often, we safe this call + # and leave it to the super-caller to trigger that + raise BadObject(sha) + + #{ Object DB Read + + def has_object(self, sha): + try: + self._pack_info(sha) + return True + except BadObject: + return False + # END exception handling + + def info(self, sha): + entity, index = self._pack_info(sha) + return entity.info_at_index(index) + + def stream(self, sha): + entity, index = self._pack_info(sha) + return entity.stream_at_index(index) + + def sha_iter(self): + sha_list = list() + for entity in self.entities(): + index = entity.index() + sha_by_index = index.sha + for index in xrange(index.size()): + yield sha_by_index(index) + # END for each index + # END for each entity + + def size(self): + sizes = [item[1].index().size() for item in self._entities] + return reduce(lambda x,y: x+y, sizes, 0) + + #} END object db read + + #{ object db write + + def store(self, istream): + """Storing individual objects is not feasible as a pack is designed to + hold multiple objects. Writing or rewriting packs for single objects is + inefficient""" + raise UnsupportedOperation() + + def store_async(self, reader): + # TODO: add PureObjectDBRW before implementing this + raise NotImplementedError() + + #} END object db write + + + #{ Interface + + def update_cache(self, force=False): + """ + Update our cache with the acutally existing packs on disk. Add new ones, + and remove deleted ones. We keep the unchanged ones + + :param force: If True, the cache will be updated even though the directory + does not appear to have changed according to its modification timestamp. + :return: True if the packs have been updated so there is new information, + False if there was no change to the pack database""" + stat = os.stat(self.root_path()) + if not force and stat.st_mtime <= self._st_mtime: + return False + # END abort early on no change + self._st_mtime = stat.st_mtime + + # packs are supposed to be prefixed with pack- by git-convention + # get all pack files, figure out what changed + pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) + our_pack_files = set(item[1].pack().path() for item in self._entities) + + # new packs + for pack_file in (pack_files - our_pack_files): + # init the hit-counter/priority with the size, a good measure for hit- + # probability. Its implemented so that only 12 bytes will be read + entity = self.PackEntityCls(pack_file) + self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) + # END for each new packfile + + # removed packs + for pack_file in (our_pack_files - pack_files): + del_index = -1 + for i, item in enumerate(self._entities): + if item[1].pack().path() == pack_file: + del_index = i + break + # END found index + # END for each entity + assert del_index != -1 + del(self._entities[del_index]) + # END for each removed pack + + # reinitialize prioritiess + self._sort_entities() + return True + + def entities(self): + """:return: list of pack entities operated upon by this database""" + return [ item[1] for item in self._entities ] + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for item in self._entities: + item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) + if item_index is not None: + sha = item[1].index().sha(item_index) + if candidate and candidate != sha: + raise AmbiguousObjectName(partial_binsha) + candidate = sha + # END handle full sha could be found + # END for each entity + + if candidate: + return candidate + + # still not found ? + raise BadObject(partial_binsha) + + #} END interface diff --git a/git/db/py/ref.py b/git/db/py/ref.py new file mode 100644 index 00000000..951f0437 --- /dev/null +++ b/git/db/py/ref.py @@ -0,0 +1,77 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import PureCompoundDB + +import os +__all__ = ('PureReferenceDB', ) + +class PureReferenceDB(PureCompoundDB): + """A database consisting of database referred to in a file""" + + # Configuration + # Specifies the object database to use for the paths found in the alternates + # file. If None, it defaults to the PureGitODB + ObjectDBCls = None + + def __init__(self, ref_file): + super(PureReferenceDB, self).__init__() + self._ref_file = ref_file + + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + self._update_dbs_from_ref_file() + else: + super(PureReferenceDB, self)._set_cache_(attr) + # END handle attrs + + def _update_dbs_from_ref_file(self): + dbcls = self.ObjectDBCls + if dbcls is None: + # late import + from git import PureGitODB + dbcls = PureGitODB + # END get db type + + # try to get as many as possible, don't fail if some are unavailable + ref_paths = list() + try: + ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()] + except (OSError, IOError): + pass + # END handle alternates + + ref_paths_set = set(ref_paths) + cur_ref_paths_set = set(db.root_path() for db in self._dbs) + + # remove existing + for path in (cur_ref_paths_set - ref_paths_set): + for i, db in enumerate(self._dbs[:]): + if db.root_path() == path: + del(self._dbs[i]) + continue + # END del matching db + # END for each path to remove + + # add new + # sort them to maintain order + added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) + for path in added_paths: + try: + db = dbcls(path) + # force an update to verify path + if isinstance(db, PureCompoundDB): + db.databases() + # END verification + self._dbs.append(db) + except Exception, e: + # ignore invalid paths or issues + pass + # END for each path to add + + def update_cache(self, force=False): + # re-read alternates and update databases + self._update_dbs_from_ref_file() + return super(PureReferenceDB, self).update_cache(force) diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py new file mode 100644 index 00000000..86c1e594 --- /dev/null +++ b/git/db/py/resolve.py @@ -0,0 +1,297 @@ +"""Module with an implementation for refspec parsing. It is the pure-python +version assuming compatible interface for reference and object types""" + +from gitdb.db.interface import ReferencesMixin +from gitdb.exc import BadObject +from gitdb.ref import SymbolicReference +from gitdb.object.base import Object +from gitdb.util import ( + join, + isdir, + isfile, + hex_to_bin, + bin_to_hex, + is_git_dir + ) +from string import digits +import os +import re + +__all__ = ["PureReferencesMixin"] + +#{ Utilities + +def short_to_long(odb, hexsha): + """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha + or None if no candidate could be found. + :param hexsha: hexsha with less than 40 byte""" + try: + return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) + except BadObject: + return None + # END exception handling + + +def name_to_object(repo, name, return_ref=False): + """ + :return: object specified by the given name, hexshas ( short and long ) + as well as references are supported + :param return_ref: if name specifies a reference, we will return the reference + instead of the object. Otherwise it will raise BadObject + """ + hexsha = None + + # is it a hexsha ? Try the most common ones, which is 7 to 40 + if repo.re_hexsha_shortened.match(name): + if len(name) != 40: + # find long sha for short sha + hexsha = short_to_long(repo.odb, name) + else: + hexsha = name + # END handle short shas + #END find sha if it matches + + # if we couldn't find an object for what seemed to be a short hexsha + # try to find it as reference anyway, it could be named 'aaa' for instance + if hexsha is None: + for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): + try: + hexsha = SymbolicReference.dereference_recursive(repo, base % name) + if return_ref: + return SymbolicReference(repo, base % name) + #END handle symbolic ref + break + except ValueError: + pass + # END for each base + # END handle hexsha + + # didn't find any ref, this is an error + if return_ref: + raise BadObject("Couldn't find reference named %r" % name) + #END handle return ref + + # tried everything ? fail + if hexsha is None: + raise BadObject(name) + # END assert hexsha was found + + return Object.new_from_sha(repo, hex_to_bin(hexsha)) + +def deref_tag(tag): + """Recursively dereference a tag and return the resulting object""" + while True: + try: + tag = tag.object + except AttributeError: + break + # END dereference tag + return tag + +def to_commit(obj): + """Convert the given object to a commit if possible and return it""" + if obj.type == 'tag': + obj = deref_tag(obj) + + if obj.type != "commit": + raise ValueError("Cannot convert object %r to type commit" % obj) + # END verify type + return obj + +def rev_parse(repo, rev): + """ + :return: Object at the given revision, either Commit, Tag, Tree or Blob + :param rev: git-rev-parse compatible revision specification, please see + http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html + for details + :note: Currently there is no access to the rev-log, rev-specs may only contain + topological tokens such ~ and ^. + :raise BadObject: if the given revision could not be found + :raise ValueError: If rev couldn't be parsed + :raise IndexError: If invalid reflog index is specified""" + + # colon search mode ? + if rev.startswith(':/'): + # colon search mode + raise NotImplementedError("commit by message search ( regex )") + # END handle search + + obj = None + ref = None + output_type = "commit" + start = 0 + parsed_to = 0 + lr = len(rev) + while start < lr: + if rev[start] not in "^~:@": + start += 1 + continue + # END handle start + + token = rev[start] + + if obj is None: + # token is a rev name + if start == 0: + ref = repo.head.ref + else: + if token == '@': + ref = name_to_object(repo, rev[:start], return_ref=True) + else: + obj = name_to_object(repo, rev[:start]) + #END handle token + #END handle refname + + if ref is not None: + obj = ref.commit + #END handle ref + # END initialize obj on first token + + + start += 1 + + # try to parse {type} + if start < lr and rev[start] == '{': + end = rev.find('}', start) + if end == -1: + raise ValueError("Missing closing brace to define type in %s" % rev) + output_type = rev[start+1:end] # exclude brace + + # handle type + if output_type == 'commit': + pass # default + elif output_type == 'tree': + try: + obj = to_commit(obj).tree + except (AttributeError, ValueError): + pass # error raised later + # END exception handling + elif output_type in ('', 'blob'): + if obj.type == 'tag': + obj = deref_tag(obj) + else: + # cannot do anything for non-tags + pass + # END handle tag + elif token == '@': + # try single int + assert ref is not None, "Require Reference to access reflog" + revlog_index = None + try: + # transform reversed index into the format of our revlog + revlog_index = -(int(output_type)+1) + except ValueError: + # TODO: Try to parse the other date options, using parse_date + # maybe + raise NotImplementedError("Support for additional @{...} modes not implemented") + #END handle revlog index + + try: + entry = ref.log_entry(revlog_index) + except IndexError: + raise IndexError("Invalid revlog index: %i" % revlog_index) + #END handle index out of bound + + obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) + + # make it pass the following checks + output_type = None + else: + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + # END handle output type + + # empty output types don't require any specific type, its just about dereferencing tags + if output_type and obj.type != output_type: + raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) + # END verify ouput type + + start = end+1 # skip brace + parsed_to = start + continue + # END parse type + + # try to parse a number + num = 0 + if token != ":": + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + # no explicit number given, 1 is the default + # It could be 0 though + if not found_digit: + num = 1 + # END set default num + # END number parsing only if non-blob mode + + + parsed_to = start + # handle hiererarchy walk + try: + if token == "~": + obj = to_commit(obj) + for item in xrange(num): + obj = obj.parents[0] + # END for each history item to walk + elif token == "^": + obj = to_commit(obj) + # must be n'th parent + if num: + obj = obj.parents[num-1] + elif token == ":": + if obj.type != "tree": + obj = obj.tree + # END get tree type + obj = obj[rev[start:]] + parsed_to = lr + else: + raise ValueError("Invalid token: %r" % token) + # END end handle tag + except (IndexError, AttributeError): + raise BadObject("Invalid Revision in %s" % rev) + # END exception handling + # END parse loop + + # still no obj ? Its probably a simple name + if obj is None: + obj = name_to_object(repo, rev) + parsed_to = lr + # END handle simple name + + if obj is None: + raise ValueError("Revision specifier could not be parsed: %s" % rev) + + if parsed_to != lr: + raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) + + return obj + +#} END utilities + +class PureReferencesMixin(ReferencesMixin): + """Pure-Python refparse implementation""" + + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + + def resolve(self, name): + return rev_parse(self, name) + + @property + def references(self): + raise NotImplementedError() + + @property + def heads(self): + raise NotImplementedError() + + @property + def tags(self): + raise NotImplementedError() diff --git a/git/db/py/transport.py b/git/db/py/transport.py new file mode 100644 index 00000000..783fb8d5 --- /dev/null +++ b/git/db/py/transport.py @@ -0,0 +1,89 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Implement a transport compatible database which sends objects using the git protocol""" + +from gitdb.db.interface import ( TransportDB, + PushInfo, + FetchInfo, + RefSpec ) + +__all__ = ["PureTransportDB"] + +class PurePushInfo(PushInfo): + """TODO: Implementation""" + __slots__ = tuple() + + + +class PureFetchInfo(FetchInfo): + """TODO""" + __slots__ = tuple() + + +class PureTransportDB(TransportDB): + """A database which allows to transport objects from and to different locations + which are specified by urls (location) and refspecs (what to transport, + see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). + + At the beginning of a transport operation, it will be determined which objects + have to be sent (either by this or by the other side). + + Afterwards a pack with the required objects is sent (or received). If there is + nothing to send, the pack will be empty. + + The communication itself if implemented using a protocol instance which deals + with the actual formatting of the lines sent. + + As refspecs involve symbolic names for references to be handled, we require + RefParse functionality. How this is done is up to the actual implementation.""" + # The following variables need to be set by the derived class + #{Configuration + protocol = None + #}end configuraiton + + #{ Interface + + def fetch(self, url, refspecs, progress=None, **kwargs): + """Fetch the objects defined by the given refspec from the given url. + :param url: url identifying the source of the objects. It may also be + a symbol from which the respective url can be resolved, like the + name of the remote. The implementation should allow objects as input + as well, these are assumed to resovle to a meaningful string though. + :param refspecs: iterable of reference specifiers or RefSpec instance, + identifying the references to be fetch from the remote. + :param progress: callable which receives progress messages for user consumption + :param kwargs: may be used for additional parameters that the actual implementation could + find useful. + :return: List of PureFetchInfo compatible instances which provide information about what + was previously fetched, in the order of the input refspecs. + :note: even if the operation fails, one of the returned PureFetchInfo instances + may still contain errors or failures in only part of the refspecs. + :raise: if any issue occours during the transport or if the url is not + supported by the protocol. + """ + raise NotImplementedError() + + def push(self, url, refspecs, progress=None, **kwargs): + """Transport the objects identified by the given refspec to the remote + at the given url. + :param url: Decribes the location which is to receive the objects + see fetch() for more details + :param refspecs: iterable of refspecs strings or RefSpec instances + to identify the objects to push + :param progress: see fetch() + :param kwargs: additional arguments which may be provided by the caller + as they may be useful to the actual implementation + :todo: what to return ? + :raise: if any issue arises during transport or if the url cannot be handled""" + raise NotImplementedError() + + @property + def remotes(self): + """:return: An IterableList of Remote objects allowing to access and manipulate remotes + :note: Remote objects can also be used for the actual push or fetch operation""" + raise NotImplementedError() + + #}end interface + |