summaryrefslogtreecommitdiff
path: root/git/objects
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2011-05-05 19:43:22 +0200
committerSebastian Thiel <byronimo@gmail.com>2011-05-05 19:43:22 +0200
commit4177eefd7bdaea96a529b00ba9cf751924ede202 (patch)
tree958614c21bd97267e0d06f71bb18d4215ddd87b5 /git/objects
parentf54546a9b857ae728033482f3c5c18c9ff3393c3 (diff)
downloadgitpython-4177eefd7bdaea96a529b00ba9cf751924ede202.tar.gz
Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work
Diffstat (limited to 'git/objects')
-rw-r--r--git/objects/base.py173
-rw-r--r--git/objects/blob.py27
-rw-r--r--git/objects/commit.py259
-rw-r--r--git/objects/fun.py199
-rw-r--r--git/objects/submodule/base.py3
-rw-r--r--git/objects/tag.py73
-rw-r--r--git/objects/tree.py282
-rw-r--r--git/objects/util.py1
8 files changed, 993 insertions, 24 deletions
diff --git a/git/objects/base.py b/git/objects/base.py
index 42d7b600..24967e7b 100644
--- a/git/objects/base.py
+++ b/git/objects/base.py
@@ -3,6 +3,177 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from gitdb.object.base import Object, IndexObject
+
+from util import get_object_type_by_name
+from git.util import (
+ hex_to_bin,
+ bin_to_hex,
+ dirname,
+ basename,
+ LazyMixin,
+ join_path_native,
+ stream_copy
+ )
+
+from git.typ import ObjectType
+
+_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
+
__all__ = ("Object", "IndexObject")
+class Object(LazyMixin):
+ """Implements an Object which may be Blobs, Trees, Commits and Tags"""
+ NULL_HEX_SHA = '0'*40
+ NULL_BIN_SHA = '\0'*20
+
+ TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag)
+ __slots__ = ("odb", "binsha", "size" )
+
+ type = None # to be set by subclass
+ type_id = None # to be set by subclass
+
+ def __init__(self, odb, binsha):
+ """Initialize an object by identifying it by its binary sha.
+ All keyword arguments will be set on demand if None.
+
+ :param odb: repository this object is located in
+
+ :param binsha: 20 byte SHA1"""
+ super(Object,self).__init__()
+ self.odb = odb
+ self.binsha = binsha
+ assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
+
+ @classmethod
+ def new(cls, odb, id):
+ """
+ :return: New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a binsha even though
+ the input id may have been a Reference or Rev-Spec
+
+ :param id: reference, rev-spec, or hexsha
+
+ :note: This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a binsha."""
+ return odb.rev_parse(str(id))
+
+ @classmethod
+ def new_from_sha(cls, odb, sha1):
+ """
+ :return: new object instance of a type appropriate to represent the given
+ binary sha1
+ :param sha1: 20 byte binary sha1"""
+ if sha1 == cls.NULL_BIN_SHA:
+ # the NULL binsha is always the root commit
+ return get_object_type_by_name('commit')(odb, sha1)
+ #END handle special case
+ oinfo = odb.info(sha1)
+ inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha)
+ inst.size = oinfo.size
+ return inst
+
+ def _set_cache_(self, attr):
+ """Retrieve object information"""
+ if attr == "size":
+ oinfo = self.odb.info(self.binsha)
+ self.size = oinfo.size
+ # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
+
+ def __eq__(self, other):
+ """:return: True if the objects have the same SHA1"""
+ return self.binsha == other.binsha
+
+ def __ne__(self, other):
+ """:return: True if the objects do not have the same SHA1 """
+ return self.binsha != other.binsha
+
+ def __hash__(self):
+ """:return: Hash of our id allowing objects to be used in dicts and sets"""
+ return hash(self.binsha)
+
+ def __str__(self):
+ """:return: string of our SHA1 as understood by all git commands"""
+ return bin_to_hex(self.binsha)
+
+ def __repr__(self):
+ """:return: string with pythonic representation of our object"""
+ return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
+
+ @property
+ def hexsha(self):
+ """:return: 40 byte hex version of our 20 byte binary sha"""
+ return bin_to_hex(self.binsha)
+
+ @property
+ def data_stream(self):
+ """ :return: File Object compatible stream to the uncompressed raw data of the object
+ :note: returned streams must be read in order"""
+ return self.odb.stream(self.binsha)
+
+ def stream_data(self, ostream):
+ """Writes our data directly to the given output stream
+ :param ostream: File object compatible stream object.
+ :return: self"""
+ istream = self.odb.stream(self.binsha)
+ stream_copy(istream, ostream)
+ return self
+
+
+class IndexObject(Object):
+ """Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects"""
+ __slots__ = ("path", "mode")
+
+ # for compatability with iterable lists
+ _id_attribute_ = 'path'
+
+ def __init__(self, odb, binsha, mode=None, path=None):
+ """Initialize a newly instanced IndexObject
+ :param odb: is the object database we are located in
+ :param binsha: 20 byte sha1
+ :param mode: is the stat compatible file mode as int, use the stat module
+ to evaluate the infomration
+ :param path:
+ is the path to the file in the file system, relative to the git repository root, i.e.
+ file.ext or folder/other.ext
+ :note:
+ Path may not be set of the index object has been created directly as it cannot
+ be retrieved without knowing the parent tree."""
+ super(IndexObject, self).__init__(odb, binsha)
+ if mode is not None:
+ self.mode = mode
+ if path is not None:
+ self.path = path
+
+ def __hash__(self):
+ """:return:
+ Hash of our path as index items are uniquely identifyable by path, not
+ by their data !"""
+ return hash(self.path)
+
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+ # END hanlde slot attribute
+
+ @property
+ def name(self):
+ """:return: Name portion of the path, effectively being the basename"""
+ return basename(self.path)
+
+ @property
+ def abspath(self):
+ """
+ :return:
+ Absolute path to this index object in the file system ( as opposed to the
+ .path field which is a path relative to the git repository ).
+
+ The returned path will be native to the system and contains '\' on windows. """
+ assert False, "Only works if repository is not bare - provide this check in an interface"
+ return join_path_native(dirname(self.odb.root_path()), self.path)
+
diff --git a/git/objects/blob.py b/git/objects/blob.py
index 38834436..326c5459 100644
--- a/git/objects/blob.py
+++ b/git/objects/blob.py
@@ -5,9 +5,32 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
from git.util import RepoAliasMixin
-from gitdb.object.blob import Blob as GitDB_Blob
+from mimetypes import guess_type
+from gitdb.typ import ObjectType
+
+import base
__all__ = ('Blob', )
-class Blob(GitDB_Blob, RepoAliasMixin):
+class Blob(base.IndexObject, RepoAliasMixin):
+ """A Blob encapsulates a git blob object"""
+ DEFAULT_MIME_TYPE = "text/plain"
+ type = ObjectType.blob
+ type_id = ObjectType.blob_id
+
+ # valid blob modes
+ executable_mode = 0100755
+ file_mode = 0100644
+ link_mode = 0120000
+
__slots__ = tuple()
+
+ @property
+ def mime_type(self):
+ """
+ :return: String describing the mime type of this file (based on the filename)
+ :note: Defaults to 'text/plain' in case the actual file type is unknown. """
+ guesses = None
+ if self.path:
+ guesses = guess_type(self.path)
+ return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
diff --git a/git/objects/commit.py b/git/objects/commit.py
index d932ab1a..30dcaa0a 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -3,28 +3,68 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.util import RepoAliasMixin
-from gitdb.object.commit import Commit as GitDB_Commit
-from git.diff import Diffable
+import base
+
+from gitdb.typ import ObjectType
+from tree import Tree
+from cStringIO import StringIO
+
from gitdb.util import (
+ hex_to_bin,
+ Actor,
+ RepoAliasMixin,
Iterable,
Actor
)
-from gitdb import IStream
+from util import (
+ Traversable,
+ Serializable,
+ altz_to_utctz_str,
+ parse_actor_and_date
+ )
+from git.diff import Diffable
+from gitdb.base import IStream
from cStringIO import StringIO
from util import parse_date
from time import altzone
import os
+import sys
__all__ = ('Commit', )
-class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin):
- """Provides additional git-command based functionality to the default gitdb commit object"""
+class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable):
+ """Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary."""
__slots__ = tuple()
+ # ENVIRONMENT VARIABLES
+ # read when creating new commits
+ env_author_date = "GIT_AUTHOR_DATE"
+ env_committer_date = "GIT_COMMITTER_DATE"
+
+ # CONFIGURATION KEYS
+ conf_encoding = 'i18n.commitencoding'
+
+ # INVARIANTS
+ default_encoding = "UTF-8"
+
+
+ # object configuration
+ type = ObjectType.commit
+ type_id = ObjectType.commit_id
+
+ __slots__ = ("tree",
+ "author", "authored_date", "author_tz_offset",
+ "committer", "committed_date", "committer_tz_offset",
+ "message", "parents", "encoding")
+ _id_attribute_ = "binsha"
+
+
def count(self, paths='', **kwargs):
"""Count the number of commits reachable from this commit
@@ -221,4 +261,211 @@ class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin):
return new_commit
+ def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set on first query.
+
+ :param binsha: 20 byte sha1
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ Tree object
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :param parents:
+ List or tuple of Commit objects which are our parent(s) in the commit
+ dependency graph
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone."""
+ super(Commit,self).__init__(odb, binsha)
+ if tree is not None:
+ assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
+ if tree is not None:
+ self.tree = tree
+ if author is not None:
+ self.author = author
+ if authored_date is not None:
+ self.authored_date = authored_date
+ if author_tz_offset is not None:
+ self.author_tz_offset = author_tz_offset
+ if committer is not None:
+ self.committer = committer
+ if committed_date is not None:
+ self.committed_date = committed_date
+ if committer_tz_offset is not None:
+ self.committer_tz_offset = committer_tz_offset
+ if message is not None:
+ self.message = message
+ if parents is not None:
+ self.parents = parents
+ if encoding is not None:
+ self.encoding = encoding
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ if attr in Commit.__slots__:
+ # read the data in a chunk, its faster - then provide a file wrapper
+ binsha, typename, self.size, stream = self.odb.stream(self.binsha)
+ self._deserialize(StringIO(stream.read()))
+ else:
+ super(Commit, self)._set_cache_(attr)
+ # END handle attrs
+
+ @property
+ def summary(self):
+ """:return: First line of the commit message"""
+ return self.message.split('\n', 1)[0]
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, odb, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
+
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
+ stream = proc_or_stream
+ if not hasattr(stream,'readline'):
+ stream = proc_or_stream.stdout
+
+ readline = stream.readline
+ while True:
+ line = readline()
+ if not line:
+ break
+ hexsha = line.strip()
+ if len(hexsha) > 40:
+ # split additional information, as returned by bisect for instance
+ hexsha, rest = line.split(None, 1)
+ # END handle extra info
+
+ assert len(hexsha) == 40, "Invalid line: %s" % hexsha
+ yield cls(odb, hex_to_bin(hexsha))
+ # END for each line in stream
+
+ #{ Serializable Implementation
+
+ def _serialize(self, stream):
+ write = stream.write
+ write("tree %s\n" % self.tree)
+ for p in self.parents:
+ write("parent %s\n" % p)
+
+ a = self.author
+ aname = a.name
+ if isinstance(aname, unicode):
+ aname = aname.encode(self.encoding)
+ # END handle unicode in name
+
+ c = self.committer
+ fmt = "%s %s <%s> %s %s\n"
+ write(fmt % ("author", aname, a.email,
+ self.authored_date,
+ altz_to_utctz_str(self.author_tz_offset)))
+
+ # encode committer
+ aname = c.name
+ if isinstance(aname, unicode):
+ aname = aname.encode(self.encoding)
+ # END handle unicode in name
+ write(fmt % ("committer", aname, c.email,
+ self.committed_date,
+ altz_to_utctz_str(self.committer_tz_offset)))
+
+ if self.encoding != self.default_encoding:
+ write("encoding %s\n" % self.encoding)
+
+ write("\n")
+
+ # write plain bytes, be sure its encoded according to our encoding
+ if isinstance(self.message, unicode):
+ write(self.message.encode(self.encoding))
+ else:
+ write(self.message)
+ # END handle encoding
+ return self
+
+ def _deserialize(self, stream):
+ """:param from_rev_list: if true, the stream format is coming from the rev-list command
+ Otherwise it is assumed to be a plain data stream from our object"""
+ readline = stream.readline
+ self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
+
+ self.parents = list()
+ next_line = None
+ while True:
+ parent_line = readline()
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
+ # END for each parent line
+ self.parents = tuple(self.parents)
+
+ self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
+
+
+ # now we can have the encoding line, or an empty line followed by the optional
+ # message.
+ self.encoding = self.default_encoding
+ # read encoding or empty line to separate message
+ enc = readline()
+ enc = enc.strip()
+ if enc:
+ self.encoding = enc[enc.find(' ')+1:]
+ # now comes the message separator
+ readline()
+ # END handle encoding
+
+ # decode the authors name
+ try:
+ self.author.name = self.author.name.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
+ # END handle author's encoding
+
+ # decode committer name
+ try:
+ self.committer.name = self.committer.name.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
+ # END handle author's encoding
+
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()
+ try:
+ self.message = self.message.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
+ # END exception handling
+ return self
+
#} END serializable implementation
+
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 2443bad7..6f2eaaad 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -1,4 +1,201 @@
"""Module with functions which are supposed to be as fast as possible"""
-from gitdb.object.fun import *
+from stat import S_ISDIR
+
+__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
+ 'traverse_tree_recursive')
+
+
+
+
+def tree_to_stream(entries, write):
+ """Write the give list of entries into a stream using its write method
+ :param entries: **sorted** list of tuples with (binsha, mode, name)
+ :param write: write method which takes a data string"""
+ ord_zero = ord('0')
+ bit_mask = 7 # 3 bits set
+
+ for binsha, mode, name in entries:
+ mode_str = ''
+ for i in xrange(6):
+ mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
+ # END for each 8 octal value
+
+ # git slices away the first octal if its zero
+ if mode_str[0] == '0':
+ mode_str = mode_str[1:]
+ # END save a byte
+
+ # here it comes: if the name is actually unicode, the replacement below
+ # will not work as the binsha is not part of the ascii unicode encoding -
+ # hence we must convert to an utf8 string for it to work properly.
+ # According to my tests, this is exactly what git does, that is it just
+ # takes the input literally, which appears to be utf8 on linux.
+ if isinstance(name, unicode):
+ name = name.encode("utf8")
+ write("%s %s\0%s" % (mode_str, name, binsha))
+ # END for each item
+
+
+def tree_entries_from_data(data):
+ """Reads the binary representation of a tree and returns tuples of Tree items
+ :param data: data block with tree data
+ :return: list(tuple(binsha, mode, tree_relative_path), ...)"""
+ ord_zero = ord('0')
+ len_data = len(data)
+ i = 0
+ out = list()
+ while i < len_data:
+ mode = 0
+
+ # read mode
+ # Some git versions truncate the leading 0, some don't
+ # The type will be extracted from the mode later
+ while data[i] != ' ':
+ # move existing mode integer up one level being 3 bits
+ # and add the actual ordinal value of the character
+ mode = (mode << 3) + (ord(data[i]) - ord_zero)
+ i += 1
+ # END while reading mode
+
+ # byte is space now, skip it
+ i += 1
+
+ # parse name, it is NULL separated
+
+ ns = i
+ while data[i] != '\0':
+ i += 1
+ # END while not reached NULL
+
+ # default encoding for strings in git is utf8
+ # Only use the respective unicode object if the byte stream was encoded
+ name = data[ns:i]
+ name_enc = name.decode("utf-8")
+ if len(name) > len(name_enc):
+ name = name_enc
+ # END handle encoding
+
+ # byte is NULL, get next 20
+ i += 1
+ sha = data[i:i+20]
+ i = i + 20
+ out.append((sha, mode, name))
+ # END for each byte in data stream
+ return out
+
+
+def _find_by_name(tree_data, name, is_dir, start_at):
+ """return data entry matching the given name and tree mode
+ or None.
+ Before the item is returned, the respective data item is set
+ None in the tree_data list to mark it done"""
+ try:
+ item = tree_data[start_at]
+ if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+ tree_data[start_at] = None
+ return item
+ except IndexError:
+ pass
+ # END exception handling
+ for index, item in enumerate(tree_data):
+ if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+ tree_data[index] = None
+ return item
+ # END if item matches
+ # END for each item
+ return None
+
+def _to_full_path(item, path_prefix):
+ """Rebuild entry with given path prefix"""
+ if not item:
+ return item
+ return (item[0], item[1], path_prefix+item[2])
+
+def traverse_trees_recursive(odb, tree_shas, path_prefix):
+ """
+ :return: list with entries according to the given binary tree-shas.
+ The result is encoded in a list
+ of n tuple|None per blob/commit, (n == len(tree_shas)), where
+ * [0] == 20 byte sha
+ * [1] == mode as int
+ * [2] == path relative to working tree root
+ The entry tuple is None if the respective blob/commit did not
+ exist in the given tree.
+ :param tree_shas: iterable of shas pointing to trees. All trees must
+ be on the same level. A tree-sha may be None in which case None
+ :param path_prefix: a prefix to be added to the returned paths on this level,
+ set it '' for the first iteration
+ :note: The ordering of the returned items will be partially lost"""
+ trees_data = list()
+ nt = len(tree_shas)
+ for tree_sha in tree_shas:
+ if tree_sha is None:
+ data = list()
+ else:
+ data = tree_entries_from_data(odb.stream(tree_sha).read())
+ # END handle muted trees
+ trees_data.append(data)
+ # END for each sha to get data for
+
+ out = list()
+ out_append = out.append
+
+ # find all matching entries and recursively process them together if the match
+ # is a tree. If the match is a non-tree item, put it into the result.
+ # Processed items will be set None
+ for ti, tree_data in enumerate(trees_data):
+ for ii, item in enumerate(tree_data):
+ if not item:
+ continue
+ # END skip already done items
+ entries = [ None for n in range(nt) ]
+ entries[ti] = item
+ sha, mode, name = item # its faster to unpack
+ is_dir = S_ISDIR(mode) # type mode bits
+
+ # find this item in all other tree data items
+ # wrap around, but stop one before our current index, hence
+ # ti+nt, not ti+1+nt
+ for tio in range(ti+1, ti+nt):
+ tio = tio % nt
+ entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
+ # END for each other item data
+
+ # if we are a directory, enter recursion
+ if is_dir:
+ out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/'))
+ else:
+ out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
+ # END handle recursion
+
+ # finally mark it done
+ tree_data[ii] = None
+ # END for each item
+
+ # we are done with one tree, set all its data empty
+ del(tree_data[:])
+ # END for each tree_data chunk
+ return out
+
+def traverse_tree_recursive(odb, tree_sha, path_prefix):
+ """
+ :return: list of entries of the tree pointed to by the binary tree_sha. An entry
+ has the following format:
+ * [0] 20 byte sha
+ * [1] mode as int
+ * [2] path relative to the repository
+ :param path_prefix: prefix to prepend to the front of all returned paths"""
+ entries = list()
+ data = tree_entries_from_data(odb.stream(tree_sha).read())
+
+ # unpacking/packing is faster than accessing individual items
+ for sha, mode, name in data:
+ if S_ISDIR(mode):
+ entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
+ else:
+ entries.append((sha, mode, path_prefix+name))
+ # END for each item
+
+ return entries
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
index 7997e5e5..9b45d9b6 100644
--- a/git/objects/submodule/base.py
+++ b/git/objects/submodule/base.py
@@ -73,6 +73,9 @@ class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin):
# this is a bogus type for base class compatability
type = 'submodule'
+ # this type doesn't really have a type id
+ type_id = 0
+
__slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__')
_cache_attrs = ('path', '_url', '_branch_path')
diff --git a/git/objects/tag.py b/git/objects/tag.py
index 59b2362e..0bd1d20c 100644
--- a/git/objects/tag.py
+++ b/git/objects/tag.py
@@ -4,10 +4,77 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
""" Module containing all object based types. """
+import base
from git.util import RepoAliasMixin
-from gitdb.object.tag import TagObject as GitDB_TagObject
+from gitdb.util import hex_to_bin
+from util import (
+ get_object_type_by_name,
+ parse_actor_and_date
+ )
+from gitdb.typ import ObjectType
+
__all__ = ("TagObject", )
-class TagObject(GitDB_TagObject, RepoAliasMixin):
+class TagObject(base.Object, RepoAliasMixin):
"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
- __slots__ = tuple()
+ type = ObjectType.tag
+ type_id = ObjectType.tag_id
+
+ __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
+
+ def __init__(self, odb, binsha, object=None, tag=None,
+ tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
+ """Initialize a tag object with additional data
+
+ :param odb: repository this object is located in
+ :param binsha: 20 byte SHA1
+ :param object: Object instance of object we are pointing to
+ :param tag: name of this tag
+ :param tagger: Actor identifying the tagger
+ :param tagged_date: int_seconds_since_epoch
+ is the DateTime of the tag creation - use time.gmtime to convert
+ it into a different format
+ :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
+ authored_date is in, in a format similar to time.altzone"""
+ super(TagObject, self).__init__(odb, binsha )
+ if object is not None:
+ self.object = object
+ if tag is not None:
+ self.tag = tag
+ if tagger is not None:
+ self.tagger = tagger
+ if tagged_date is not None:
+ self.tagged_date = tagged_date
+ if tagger_tz_offset is not None:
+ self.tagger_tz_offset = tagger_tz_offset
+ if message is not None:
+ self.message = message
+
+ def _set_cache_(self, attr):
+ """Cache all our attributes at once"""
+ if attr in TagObject.__slots__:
+ ostream = self.odb.stream(self.binsha)
+ lines = ostream.read().splitlines()
+
+ obj, hexsha = lines[0].split(" ") # object <hexsha>
+ type_token, type_name = lines[1].split(" ") # type <type_name>
+ self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))
+
+ self.tag = lines[2][4:] # tag <tag name>
+
+ tagger_info = lines[3][7:]# tagger <actor> <date>
+ self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
+
+ # line 4 empty - it could mark the beginning of the next header
+ # in case there really is no message, it would not exist. Otherwise
+ # a newline separates header from message
+ if len(lines) > 5:
+ self.message = "\n".join(lines[5:])
+ else:
+ self.message = ''
+ # END check our attributes
+ else:
+ super(TagObject, self)._set_cache_(attr)
+
+
+
diff --git a/git/objects/tree.py b/git/objects/tree.py
index 00ef07fc..1b5f7561 100644
--- a/git/objects/tree.py
+++ b/git/objects/tree.py
@@ -4,26 +4,286 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
from git.util import RepoAliasMixin
-from gitdb.object.tree import Tree as GitDB_Tree
-from gitdb.object.tree import TreeModifier
import git.diff as diff
-
+from gitdb.typ import ObjectType
+from base import IndexObject
from blob import Blob
-from submodule.base import Submodule
+from submodule import Submodule
+
+from fun import (
+ tree_entries_from_data,
+ tree_to_stream
+ )
+
+from gitdb.util import (
+ to_bin_sha,
+ join_path
+ )
+import util
__all__ = ("TreeModifier", "Tree")
-class Tree(GitDB_Tree, diff.Diffable):
- """As opposed to the default GitDB tree implementation, this one can be diffed
- and returns our own types"""
- __slots__ = tuple()
+class TreeModifier(object):
+ """A utility class providing methods to alter the underlying cache in a list-like fashion.
+
+ Once all adjustments are complete, the _cache, which really is a refernce to
+ the cache of a tree, will be sorted. Assuring it will be in a serializable state"""
+ __slots__ = '_cache'
+
+ def __init__(self, cache):
+ self._cache = cache
+
+ def _index_by_name(self, name):
+ """:return: index of an item with name, or -1 if not found"""
+ for i, t in enumerate(self._cache):
+ if t[2] == name:
+ return i
+ # END found item
+ # END for each item in cache
+ return -1
+
+ #{ Interface
+ def set_done(self):
+ """Call this method once you are done modifying the tree information.
+ It may be called several times, but be aware that each call will cause
+ a sort operation
+ :return self:"""
+ self._cache.sort(key=lambda t: t[2]) # sort by name
+ return self
+ #} END interface
+
+ #{ Mutators
+ def add(self, sha, mode, name, force=False):
+ """Add the given item to the tree. If an item with the given name already
+ exists, nothing will be done, but a ValueError will be raised if the
+ sha and mode of the existing item do not match the one you add, unless
+ force is True
+
+ :param sha: The 20 or 40 byte sha of the item to add
+ :param mode: int representing the stat compatible mode of the item
+ :param force: If True, an item with your name and information will overwrite
+ any existing item with the same name, no matter which information it has
+ :return: self"""
+ if '/' in name:
+ raise ValueError("Name must not contain '/' characters")
+ if (mode >> 12) not in Tree._map_id_to_type:
+ raise ValueError("Invalid object type according to mode %o" % mode)
+
+ sha = to_bin_sha(sha)
+ index = self._index_by_name(name)
+ item = (sha, mode, name)
+ if index == -1:
+ self._cache.append(item)
+ else:
+ if force:
+ self._cache[index] = item
+ else:
+ ex_item = self._cache[index]
+ if ex_item[0] != sha or ex_item[1] != mode:
+ raise ValueError("Item %r existed with different properties" % name)
+ # END handle mismatch
+ # END handle force
+ # END handle name exists
+ return self
+
+ def add_unchecked(self, binsha, mode, name):
+ """Add the given item to the tree, its correctness is assumed, which
+ puts the caller into responsibility to assure the input is correct.
+ For more information on the parameters, see ``add``
+ :param binsha: 20 byte binary sha"""
+ self._cache.append((binsha, mode, name))
+
+ def __delitem__(self, name):
+ """Deletes an item with the given name if it exists"""
+ index = self._index_by_name(name)
+ if index > -1:
+ del(self._cache[index])
+
+ #} END mutators
+
+
+class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin):
+ """Tree objects represent an ordered list of Blobs and other Trees.
+
+ ``Tree as a list``::
+
+ Access a specific blob using the
+ tree['filename'] notation.
+
+ You may as well access by index
+ blob = tree[0]
+ """
+
+ type = ObjectType.tree
+ type_id = ObjectType.tree_id
+
+ __slots__ = "_cache"
+
+ # actual integer ids for comparison
+ commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link
+ blob_id = 010
+ symlink_id = 012
+ tree_id = 004
+ #{ Configuration
+
+ # override in subclass if you would like your own types to be instantiated instead
_map_id_to_type = {
- GitDB_Tree.commit_id : Submodule,
- GitDB_Tree.blob_id : Blob,
- GitDB_Tree.symlink_id : Blob
+ commit_id : Submodule,
+ blob_id : Blob,
+ symlink_id : Blob
# tree id added once Tree is defined
}
+ #} end configuration
+
+
+ def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
+ super(Tree, self).__init__(repo, binsha, mode, path)
+
+ @classmethod
+ def _get_intermediate_items(cls, index_object):
+ if index_object.type == "tree":
+ return tuple(index_object._iter_convert_to_object(index_object._cache))
+ return tuple()
+
+ def _set_cache_(self, attr):
+ if attr == "_cache":
+ # Set the data when we need it
+ ostream = self.odb.stream(self.binsha)
+ self._cache = tree_entries_from_data(ostream.read())
+ else:
+ super(Tree, self)._set_cache_(attr)
+ # END handle attribute
+
+ def _iter_convert_to_object(self, iterable):
+ """Iterable yields tuples of (binsha, mode, name), which will be converted
+ to the respective object representation"""
+ for binsha, mode, name in iterable:
+ path = join_path(self.path, name)
+ try:
+ yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
+ except KeyError:
+ raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
+ # END for each item
+
+ def __div__(self, file):
+ """Find the named object in this tree's contents
+ :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
+
+ :raise KeyError: if given file or tree does not exist in tree"""
+ msg = "Blob or Tree named %r not found"
+ if '/' in file:
+ tree = self
+ item = self
+ tokens = file.split('/')
+ for i,token in enumerate(tokens):
+ item = tree[token]
+ if item.type == 'tree':
+ tree = item
+ else:
+ # safety assertion - blobs are at the end of the path
+ if i != len(tokens)-1:
+ raise KeyError(msg % file)
+ return item
+ # END handle item type
+ # END for each token of split path
+ if item == self:
+ raise KeyError(msg % file)
+ return item
+ else:
+ for info in self._cache:
+ if info[2] == file: # [2] == name
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
+ # END for each obj
+ raise KeyError( msg % file )
+ # END handle long paths
+
+
+ @property
+ def trees(self):
+ """:return: list(Tree, ...) list of trees directly below this tree"""
+ return [ i for i in self if i.type == "tree" ]
+
+ @property
+ def blobs(self):
+ """:return: list(Blob, ...) list of blobs directly below this tree"""
+ return [ i for i in self if i.type == "blob" ]
+
+ @property
+ def cache(self):
+ """
+ :return: An object allowing to modify the internal cache. This can be used
+ to change the tree's contents. When done, make sure you call ``set_done``
+ on the tree modifier, or serialization behaviour will be incorrect.
+ See the ``TreeModifier`` for more information on how to alter the cache"""
+ return TreeModifier(self._cache)
+
+ def traverse( self, predicate = lambda i,d: True,
+ prune = lambda i,d: False, depth = -1, branch_first=True,
+ visit_once = False, ignore_self=1 ):
+ """For documentation, see util.Traversable.traverse
+ Trees are set to visit_once = False to gain more performance in the traversal"""
+ return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
+
+ # List protocol
+ def __getslice__(self, i, j):
+ return list(self._iter_convert_to_object(self._cache[i:j]))
+
+ def __iter__(self):
+ return self._iter_convert_to_object(self._cache)
+
+ def __len__(self):
+ return len(self._cache)
+
+ def __getitem__(self, item):
+ if isinstance(item, int):
+ info = self._cache[item]
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
+
+ if isinstance(item, basestring):
+ # compatability
+ return self.__div__(item)
+ # END index is basestring
+
+ raise TypeError( "Invalid index type: %r" % item )
+
+
+ def __contains__(self, item):
+ if isinstance(item, IndexObject):
+ for info in self._cache:
+ if item.binsha == info[0]:
+ return True
+ # END compare sha
+ # END for each entry
+ # END handle item is index object
+ # compatability
+
+ # treat item as repo-relative path
+ path = self.path
+ for info in self._cache:
+ if item == join_path(path, info[2]):
+ return True
+ # END for each item
+ return False
+
+ def __reversed__(self):
+ return reversed(self._iter_convert_to_object(self._cache))
+
+ def _serialize(self, stream):
+ """Serialize this tree into the stream. Please note that we will assume
+ our tree data to be in a sorted state. If this is not the case, serialization
+ will not generate a correct tree representation as these are assumed to be sorted
+ by algorithms"""
+ tree_to_stream(self._cache, stream.write)
+ return self
+
+ def _deserialize(self, stream):
+ self._cache = tree_entries_from_data(stream.read())
+ return self
+
+
+# END tree
+
# finalize map definition
Tree._map_id_to_type[Tree.tree_id] = Tree
diff --git a/git/objects/util.py b/git/objects/util.py
index 4c9323b8..8ac590f2 100644
--- a/git/objects/util.py
+++ b/git/objects/util.py
@@ -20,6 +20,7 @@ __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date',
'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
'verify_utctz', 'Actor')
+
#{ Functions
def mode_str_to_int(modestr):