diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2011-05-05 19:43:22 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2011-05-05 19:43:22 +0200 |
commit | 4177eefd7bdaea96a529b00ba9cf751924ede202 (patch) | |
tree | 958614c21bd97267e0d06f71bb18d4215ddd87b5 /git/objects | |
parent | f54546a9b857ae728033482f3c5c18c9ff3393c3 (diff) | |
download | gitpython-4177eefd7bdaea96a529b00ba9cf751924ede202.tar.gz |
Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work
Diffstat (limited to 'git/objects')
-rw-r--r-- | git/objects/base.py | 173 | ||||
-rw-r--r-- | git/objects/blob.py | 27 | ||||
-rw-r--r-- | git/objects/commit.py | 259 | ||||
-rw-r--r-- | git/objects/fun.py | 199 | ||||
-rw-r--r-- | git/objects/submodule/base.py | 3 | ||||
-rw-r--r-- | git/objects/tag.py | 73 | ||||
-rw-r--r-- | git/objects/tree.py | 282 | ||||
-rw-r--r-- | git/objects/util.py | 1 |
8 files changed, 993 insertions, 24 deletions
diff --git a/git/objects/base.py b/git/objects/base.py index 42d7b600..24967e7b 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -3,6 +3,177 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from gitdb.object.base import Object, IndexObject + +from util import get_object_type_by_name +from git.util import ( + hex_to_bin, + bin_to_hex, + dirname, + basename, + LazyMixin, + join_path_native, + stream_copy + ) + +from git.typ import ObjectType + +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" + __all__ = ("Object", "IndexObject") +class Object(LazyMixin): + """Implements an Object which may be Blobs, Trees, Commits and Tags""" + NULL_HEX_SHA = '0'*40 + NULL_BIN_SHA = '\0'*20 + + TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag) + __slots__ = ("odb", "binsha", "size" ) + + type = None # to be set by subclass + type_id = None # to be set by subclass + + def __init__(self, odb, binsha): + """Initialize an object by identifying it by its binary sha. + All keyword arguments will be set on demand if None. + + :param odb: repository this object is located in + + :param binsha: 20 byte SHA1""" + super(Object,self).__init__() + self.odb = odb + self.binsha = binsha + assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) + + @classmethod + def new(cls, odb, id): + """ + :return: New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a binsha even though + the input id may have been a Reference or Rev-Spec + + :param id: reference, rev-spec, or hexsha + + :note: This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a binsha.""" + return odb.rev_parse(str(id)) + + @classmethod + def new_from_sha(cls, odb, sha1): + """ + :return: new object instance of a type appropriate to represent the given + binary sha1 + :param sha1: 20 byte binary sha1""" + if sha1 == cls.NULL_BIN_SHA: + # the NULL binsha is always the root commit + return get_object_type_by_name('commit')(odb, sha1) + #END handle special case + oinfo = odb.info(sha1) + inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha) + inst.size = oinfo.size + return inst + + def _set_cache_(self, attr): + """Retrieve object information""" + if attr == "size": + oinfo = self.odb.info(self.binsha) + self.size = oinfo.size + # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """:return: True if the objects have the same SHA1""" + return self.binsha == other.binsha + + def __ne__(self, other): + """:return: True if the objects do not have the same SHA1 """ + return self.binsha != other.binsha + + def __hash__(self): + """:return: Hash of our id allowing objects to be used in dicts and sets""" + return hash(self.binsha) + + def __str__(self): + """:return: string of our SHA1 as understood by all git commands""" + return bin_to_hex(self.binsha) + + def __repr__(self): + """:return: string with pythonic representation of our object""" + return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) + + @property + def hexsha(self): + """:return: 40 byte hex version of our 20 byte binary sha""" + return bin_to_hex(self.binsha) + + @property + def data_stream(self): + """ :return: File Object compatible stream to the uncompressed raw data of the object + :note: returned streams must be read in order""" + return self.odb.stream(self.binsha) + + def stream_data(self, ostream): + """Writes our data directly to the given output stream + :param ostream: File object compatible stream object. + :return: self""" + istream = self.odb.stream(self.binsha) + stream_copy(istream, ostream) + return self + + +class IndexObject(Object): + """Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects""" + __slots__ = ("path", "mode") + + # for compatability with iterable lists + _id_attribute_ = 'path' + + def __init__(self, odb, binsha, mode=None, path=None): + """Initialize a newly instanced IndexObject + :param odb: is the object database we are located in + :param binsha: 20 byte sha1 + :param mode: is the stat compatible file mode as int, use the stat module + to evaluate the infomration + :param path: + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + :note: + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree.""" + super(IndexObject, self).__init__(odb, binsha) + if mode is not None: + self.mode = mode + if path is not None: + self.path = path + + def __hash__(self): + """:return: + Hash of our path as index items are uniquely identifyable by path, not + by their data !""" + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + # END hanlde slot attribute + + @property + def name(self): + """:return: Name portion of the path, effectively being the basename""" + return basename(self.path) + + @property + def abspath(self): + """ + :return: + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. """ + assert False, "Only works if repository is not bare - provide this check in an interface" + return join_path_native(dirname(self.odb.root_path()), self.path) + diff --git a/git/objects/blob.py b/git/objects/blob.py index 38834436..326c5459 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -5,9 +5,32 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin -from gitdb.object.blob import Blob as GitDB_Blob +from mimetypes import guess_type +from gitdb.typ import ObjectType + +import base __all__ = ('Blob', ) -class Blob(GitDB_Blob, RepoAliasMixin): +class Blob(base.IndexObject, RepoAliasMixin): + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = ObjectType.blob + type_id = ObjectType.blob_id + + # valid blob modes + executable_mode = 0100755 + file_mode = 0100644 + link_mode = 0120000 + __slots__ = tuple() + + @property + def mime_type(self): + """ + :return: String describing the mime type of this file (based on the filename) + :note: Defaults to 'text/plain' in case the actual file type is unknown. """ + guesses = None + if self.path: + guesses = guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/git/objects/commit.py b/git/objects/commit.py index d932ab1a..30dcaa0a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -3,28 +3,68 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import RepoAliasMixin -from gitdb.object.commit import Commit as GitDB_Commit -from git.diff import Diffable +import base + +from gitdb.typ import ObjectType +from tree import Tree +from cStringIO import StringIO + from gitdb.util import ( + hex_to_bin, + Actor, + RepoAliasMixin, Iterable, Actor ) -from gitdb import IStream +from util import ( + Traversable, + Serializable, + altz_to_utctz_str, + parse_actor_and_date + ) +from git.diff import Diffable +from gitdb.base import IStream from cStringIO import StringIO from util import parse_date from time import altzone import os +import sys __all__ = ('Commit', ) -class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): - """Provides additional git-command based functionality to the default gitdb commit object""" +class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): + """Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary.""" __slots__ = tuple() + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_date = "GIT_AUTHOR_DATE" + env_committer_date = "GIT_COMMITTER_DATE" + + # CONFIGURATION KEYS + conf_encoding = 'i18n.commitencoding' + + # INVARIANTS + default_encoding = "UTF-8" + + + # object configuration + type = ObjectType.commit + type_id = ObjectType.commit_id + + __slots__ = ("tree", + "author", "authored_date", "author_tz_offset", + "committer", "committed_date", "committer_tz_offset", + "message", "parents", "encoding") + _id_attribute_ = "binsha" + + def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit @@ -221,4 +261,211 @@ class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): return new_commit + def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, + committer=None, committed_date=None, committer_tz_offset=None, + message=None, parents=None, encoding=None): + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. + + :param binsha: 20 byte sha1 + :param parents: tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + :param tree: Tree + Tree object + :param author: Actor + is the author string ( will be implicitly converted into an Actor object ) + :param authored_date: int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + :param author_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param committer: Actor + is the committer string + :param committed_date: int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + :param committer_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param message: string + is the commit message + :param encoding: string + encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph + :return: git.Commit + + :note: Timezone information is in the same format and in the same sign + as what time.altzone returns. The sign is inverted compared to git's + UTC timezone.""" + super(Commit,self).__init__(odb, binsha) + if tree is not None: + assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + if attr in Commit.__slots__: + # read the data in a chunk, its faster - then provide a file wrapper + binsha, typename, self.size, stream = self.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) + else: + super(Commit, self)._set_cache_(attr) + # END handle attrs + + @property + def summary(self): + """:return: First line of the commit message""" + return self.message.split('\n', 1)[0] + + @classmethod + def _iter_from_process_or_stream(cls, odb, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database + + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" + stream = proc_or_stream + if not hasattr(stream,'readline'): + stream = proc_or_stream.stdout + + readline = stream.readline + while True: + line = readline() + if not line: + break + hexsha = line.strip() + if len(hexsha) > 40: + # split additional information, as returned by bisect for instance + hexsha, rest = line.split(None, 1) + # END handle extra info + + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield cls(odb, hex_to_bin(hexsha)) + # END for each line in stream + + #{ Serializable Implementation + + def _serialize(self, stream): + write = stream.write + write("tree %s\n" % self.tree) + for p in self.parents: + write("parent %s\n" % p) + + a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + + c = self.committer + fmt = "%s %s <%s> %s %s\n" + write(fmt % ("author", aname, a.email, + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))) + + # encode committer + aname = c.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + write(fmt % ("committer", aname, c.email, + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))) + + if self.encoding != self.default_encoding: + write("encoding %s\n" % self.encoding) + + write("\n") + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding + return self + + def _deserialize(self, stream): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + readline = stream.readline + self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + + self.parents = list() + next_line = None + while True: + parent_line = readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1]))) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + + + # now we can have the encoding line, or an empty line followed by the optional + # message. + self.encoding = self.default_encoding + # read encoding or empty line to separate message + enc = readline() + enc = enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # now comes the message separator + readline() + # END handle encoding + + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) + # END handle author's encoding + + # decode committer name + try: + self.committer.name = self.committer.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) + # END handle author's encoding + + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) + # END exception handling + return self + #} END serializable implementation + diff --git a/git/objects/fun.py b/git/objects/fun.py index 2443bad7..6f2eaaad 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,4 +1,201 @@ """Module with functions which are supposed to be as fast as possible""" -from gitdb.object.fun import * +from stat import S_ISDIR + +__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', + 'traverse_tree_recursive') + + + + +def tree_to_stream(entries, write): + """Write the give list of entries into a stream using its write method + :param entries: **sorted** list of tuples with (binsha, mode, name) + :param write: write method which takes a data string""" + ord_zero = ord('0') + bit_mask = 7 # 3 bits set + + for binsha, mode, name in entries: + mode_str = '' + for i in xrange(6): + mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str + # END for each 8 octal value + + # git slices away the first octal if its zero + if mode_str[0] == '0': + mode_str = mode_str[1:] + # END save a byte + + # here it comes: if the name is actually unicode, the replacement below + # will not work as the binsha is not part of the ascii unicode encoding - + # hence we must convert to an utf8 string for it to work properly. + # According to my tests, this is exactly what git does, that is it just + # takes the input literally, which appears to be utf8 on linux. + if isinstance(name, unicode): + name = name.encode("utf8") + write("%s %s\0%s" % (mode_str, name, binsha)) + # END for each item + + +def tree_entries_from_data(data): + """Reads the binary representation of a tree and returns tuples of Tree items + :param data: data block with tree data + :return: list(tuple(binsha, mode, tree_relative_path), ...)""" + ord_zero = ord('0') + len_data = len(data) + i = 0 + out = list() + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded + name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + out.append((sha, mode, name)) + # END for each byte in data stream + return out + + +def _find_by_name(tree_data, name, is_dir, start_at): + """return data entry matching the given name and tree mode + or None. + Before the item is returned, the respective data item is set + None in the tree_data list to mark it done""" + try: + item = tree_data[start_at] + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[start_at] = None + return item + except IndexError: + pass + # END exception handling + for index, item in enumerate(tree_data): + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[index] = None + return item + # END if item matches + # END for each item + return None + +def _to_full_path(item, path_prefix): + """Rebuild entry with given path prefix""" + if not item: + return item + return (item[0], item[1], path_prefix+item[2]) + +def traverse_trees_recursive(odb, tree_shas, path_prefix): + """ + :return: list with entries according to the given binary tree-shas. + The result is encoded in a list + of n tuple|None per blob/commit, (n == len(tree_shas)), where + * [0] == 20 byte sha + * [1] == mode as int + * [2] == path relative to working tree root + The entry tuple is None if the respective blob/commit did not + exist in the given tree. + :param tree_shas: iterable of shas pointing to trees. All trees must + be on the same level. A tree-sha may be None in which case None + :param path_prefix: a prefix to be added to the returned paths on this level, + set it '' for the first iteration + :note: The ordering of the returned items will be partially lost""" + trees_data = list() + nt = len(tree_shas) + for tree_sha in tree_shas: + if tree_sha is None: + data = list() + else: + data = tree_entries_from_data(odb.stream(tree_sha).read()) + # END handle muted trees + trees_data.append(data) + # END for each sha to get data for + + out = list() + out_append = out.append + + # find all matching entries and recursively process them together if the match + # is a tree. If the match is a non-tree item, put it into the result. + # Processed items will be set None + for ti, tree_data in enumerate(trees_data): + for ii, item in enumerate(tree_data): + if not item: + continue + # END skip already done items + entries = [ None for n in range(nt) ] + entries[ti] = item + sha, mode, name = item # its faster to unpack + is_dir = S_ISDIR(mode) # type mode bits + + # find this item in all other tree data items + # wrap around, but stop one before our current index, hence + # ti+nt, not ti+1+nt + for tio in range(ti+1, ti+nt): + tio = tio % nt + entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) + # END for each other item data + + # if we are a directory, enter recursion + if is_dir: + out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) + else: + out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) + # END handle recursion + + # finally mark it done + tree_data[ii] = None + # END for each item + + # we are done with one tree, set all its data empty + del(tree_data[:]) + # END for each tree_data chunk + return out + +def traverse_tree_recursive(odb, tree_sha, path_prefix): + """ + :return: list of entries of the tree pointed to by the binary tree_sha. An entry + has the following format: + * [0] 20 byte sha + * [1] mode as int + * [2] path relative to the repository + :param path_prefix: prefix to prepend to the front of all returned paths""" + entries = list() + data = tree_entries_from_data(odb.stream(tree_sha).read()) + + # unpacking/packing is faster than accessing individual items + for sha, mode, name in data: + if S_ISDIR(mode): + entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) + else: + entries.append((sha, mode, path_prefix+name)) + # END for each item + + return entries diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 7997e5e5..9b45d9b6 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -73,6 +73,9 @@ class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin): # this is a bogus type for base class compatability type = 'submodule' + # this type doesn't really have a type id + type_id = 0 + __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') _cache_attrs = ('path', '_url', '_branch_path') diff --git a/git/objects/tag.py b/git/objects/tag.py index 59b2362e..0bd1d20c 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -4,10 +4,77 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ +import base from git.util import RepoAliasMixin -from gitdb.object.tag import TagObject as GitDB_TagObject +from gitdb.util import hex_to_bin +from util import ( + get_object_type_by_name, + parse_actor_and_date + ) +from gitdb.typ import ObjectType + __all__ = ("TagObject", ) -class TagObject(GitDB_TagObject, RepoAliasMixin): +class TagObject(base.Object, RepoAliasMixin): """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - __slots__ = tuple() + type = ObjectType.tag + type_id = ObjectType.tag_id + + __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) + + def __init__(self, odb, binsha, object=None, tag=None, + tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): + """Initialize a tag object with additional data + + :param odb: repository this object is located in + :param binsha: 20 byte SHA1 + :param object: Object instance of object we are pointing to + :param tag: name of this tag + :param tagger: Actor identifying the tagger + :param tagged_date: int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the + authored_date is in, in a format similar to time.altzone""" + super(TagObject, self).__init__(odb, binsha ) + if object is not None: + self.object = object + if tag is not None: + self.tag = tag + if tagger is not None: + self.tagger = tagger + if tagged_date is not None: + self.tagged_date = tagged_date + if tagger_tz_offset is not None: + self.tagger_tz_offset = tagger_tz_offset + if message is not None: + self.message = message + + def _set_cache_(self, attr): + """Cache all our attributes at once""" + if attr in TagObject.__slots__: + ostream = self.odb.stream(self.binsha) + lines = ostream.read().splitlines() + + obj, hexsha = lines[0].split(" ") # object <hexsha> + type_token, type_name = lines[1].split(" ") # type <type_name> + self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha)) + + self.tag = lines[2][4:] # tag <tag name> + + tagger_info = lines[3][7:]# tagger <actor> <date> + self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in case there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + + diff --git a/git/objects/tree.py b/git/objects/tree.py index 00ef07fc..1b5f7561 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -4,26 +4,286 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin -from gitdb.object.tree import Tree as GitDB_Tree -from gitdb.object.tree import TreeModifier import git.diff as diff - +from gitdb.typ import ObjectType +from base import IndexObject from blob import Blob -from submodule.base import Submodule +from submodule import Submodule + +from fun import ( + tree_entries_from_data, + tree_to_stream + ) + +from gitdb.util import ( + to_bin_sha, + join_path + ) +import util __all__ = ("TreeModifier", "Tree") -class Tree(GitDB_Tree, diff.Diffable): - """As opposed to the default GitDB tree implementation, this one can be diffed - and returns our own types""" - __slots__ = tuple() +class TreeModifier(object): + """A utility class providing methods to alter the underlying cache in a list-like fashion. + + Once all adjustments are complete, the _cache, which really is a refernce to + the cache of a tree, will be sorted. Assuring it will be in a serializable state""" + __slots__ = '_cache' + + def __init__(self, cache): + self._cache = cache + + def _index_by_name(self, name): + """:return: index of an item with name, or -1 if not found""" + for i, t in enumerate(self._cache): + if t[2] == name: + return i + # END found item + # END for each item in cache + return -1 + + #{ Interface + def set_done(self): + """Call this method once you are done modifying the tree information. + It may be called several times, but be aware that each call will cause + a sort operation + :return self:""" + self._cache.sort(key=lambda t: t[2]) # sort by name + return self + #} END interface + + #{ Mutators + def add(self, sha, mode, name, force=False): + """Add the given item to the tree. If an item with the given name already + exists, nothing will be done, but a ValueError will be raised if the + sha and mode of the existing item do not match the one you add, unless + force is True + + :param sha: The 20 or 40 byte sha of the item to add + :param mode: int representing the stat compatible mode of the item + :param force: If True, an item with your name and information will overwrite + any existing item with the same name, no matter which information it has + :return: self""" + if '/' in name: + raise ValueError("Name must not contain '/' characters") + if (mode >> 12) not in Tree._map_id_to_type: + raise ValueError("Invalid object type according to mode %o" % mode) + + sha = to_bin_sha(sha) + index = self._index_by_name(name) + item = (sha, mode, name) + if index == -1: + self._cache.append(item) + else: + if force: + self._cache[index] = item + else: + ex_item = self._cache[index] + if ex_item[0] != sha or ex_item[1] != mode: + raise ValueError("Item %r existed with different properties" % name) + # END handle mismatch + # END handle force + # END handle name exists + return self + + def add_unchecked(self, binsha, mode, name): + """Add the given item to the tree, its correctness is assumed, which + puts the caller into responsibility to assure the input is correct. + For more information on the parameters, see ``add`` + :param binsha: 20 byte binary sha""" + self._cache.append((binsha, mode, name)) + + def __delitem__(self, name): + """Deletes an item with the given name if it exists""" + index = self._index_by_name(name) + if index > -1: + del(self._cache[index]) + + #} END mutators + + +class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin): + """Tree objects represent an ordered list of Blobs and other Trees. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + """ + + type = ObjectType.tree + type_id = ObjectType.tree_id + + __slots__ = "_cache" + + # actual integer ids for comparison + commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link + blob_id = 010 + symlink_id = 012 + tree_id = 004 + #{ Configuration + + # override in subclass if you would like your own types to be instantiated instead _map_id_to_type = { - GitDB_Tree.commit_id : Submodule, - GitDB_Tree.blob_id : Blob, - GitDB_Tree.symlink_id : Blob + commit_id : Submodule, + blob_id : Blob, + symlink_id : Blob # tree id added once Tree is defined } + #} end configuration + + + def __init__(self, repo, binsha, mode=tree_id<<12, path=None): + super(Tree, self).__init__(repo, binsha, mode, path) + + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return tuple(index_object._iter_convert_to_object(index_object._cache)) + return tuple() + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + ostream = self.odb.stream(self.binsha) + self._cache = tree_entries_from_data(ostream.read()) + else: + super(Tree, self)._set_cache_(attr) + # END handle attribute + + def _iter_convert_to_object(self, iterable): + """Iterable yields tuples of (binsha, mode, name), which will be converted + to the respective object representation""" + for binsha, mode, name in iterable: + path = join_path(self.path, name) + try: + yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) + except KeyError: + raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) + # END for each item + + def __div__(self, file): + """Find the named object in this tree's contents + :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` + + :raise KeyError: if given file or tree does not exist in tree""" + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for info in self._cache: + if info[2] == file: # [2] == name + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + # END for each obj + raise KeyError( msg % file ) + # END handle long paths + + + @property + def trees(self): + """:return: list(Tree, ...) list of trees directly below this tree""" + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """:return: list(Blob, ...) list of blobs directly below this tree""" + return [ i for i in self if i.type == "blob" ] + + @property + def cache(self): + """ + :return: An object allowing to modify the internal cache. This can be used + to change the tree's contents. When done, make sure you call ``set_done`` + on the tree modifier, or serialization behaviour will be incorrect. + See the ``TreeModifier`` for more information on how to alter the cache""" + return TreeModifier(self._cache) + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see util.Traversable.traverse + Trees are set to visit_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + + # List protocol + def __getslice__(self, i, j): + return list(self._iter_convert_to_object(self._cache[i:j])) + + def __iter__(self): + return self._iter_convert_to_object(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self, item): + if isinstance(item, int): + info = self._cache[item] + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self, item): + if isinstance(item, IndexObject): + for info in self._cache: + if item.binsha == info[0]: + return True + # END compare sha + # END for each entry + # END handle item is index object + # compatability + + # treat item as repo-relative path + path = self.path + for info in self._cache: + if item == join_path(path, info[2]): + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._iter_convert_to_object(self._cache)) + + def _serialize(self, stream): + """Serialize this tree into the stream. Please note that we will assume + our tree data to be in a sorted state. If this is not the case, serialization + will not generate a correct tree representation as these are assumed to be sorted + by algorithms""" + tree_to_stream(self._cache, stream.write) + return self + + def _deserialize(self, stream): + self._cache = tree_entries_from_data(stream.read()) + return self + + +# END tree + # finalize map definition Tree._map_id_to_type[Tree.tree_id] = Tree diff --git a/git/objects/util.py b/git/objects/util.py index 4c9323b8..8ac590f2 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -20,6 +20,7 @@ __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 'verify_utctz', 'Actor') + #{ Functions def mode_str_to_int(modestr): |