summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/base.py196
-rw-r--r--lib/git/objects/blob.py34
-rw-r--r--lib/git/objects/commit.py187
-rw-r--r--lib/git/objects/fun.py7
-rw-r--r--lib/git/objects/submodule.py1
-rw-r--r--lib/git/objects/tag.py126
-rw-r--r--lib/git/objects/tree.py75
-rw-r--r--lib/git/objects/utils.py62
8 files changed, 284 insertions, 404 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 90aa8ca2..118bc3ca 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -3,179 +3,140 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import os
from git.utils import LazyMixin, join_path_native, stream_copy
-import utils
+from utils import get_object_type_by_name
+from gitdb.util import (
+ hex_to_bin,
+ bin_to_hex,
+ basename
+ )
+
+import gitdb.typ as dbtyp
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
+__all__ = ("Object", "IndexObject")
+
class Object(LazyMixin):
- """
- Implements an Object which may be Blobs, Trees, Commits and Tags
-
- This Object also serves as a constructor for instances of the correct type::
-
- inst = Object.new(repo,id)
- inst.sha # objects sha in hex
- inst.size # objects uncompressed data size
- inst.data # byte string containing the whole data of the object
- """
+ """Implements an Object which may be Blobs, Trees, Commits and Tags"""
NULL_HEX_SHA = '0'*40
NULL_BIN_SHA = '\0'*20
- TYPES = ("blob", "tree", "commit", "tag")
- __slots__ = ("repo", "sha", "size", "data" )
+
+ TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
+ __slots__ = ("repo", "binsha", "size" )
type = None # to be set by subclass
- def __init__(self, repo, id):
- """
- Initialize an object by identifying it by its id. All keyword arguments
- will be set on demand if None.
+ def __init__(self, repo, binsha):
+ """Initialize an object by identifying it by its binary sha.
+ All keyword arguments will be set on demand if None.
- ``repo``
- repository this object is located in
+ :param repo: repository this object is located in
- ``id``
- SHA1 or ref suitable for git-rev-parse
- """
+ :param binsha: 20 byte SHA1"""
super(Object,self).__init__()
self.repo = repo
- self.sha = id
+ self.binsha = binsha
@classmethod
def new(cls, repo, id):
"""
- Return
- New Object instance of a type appropriate to the object type behind
- id. The id of the newly created object will be a hexsha even though
+ :return: New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a binsha even though
the input id may have been a Reference or Rev-Spec
- Note
- This cannot be a __new__ method as it would always call __init__
- with the input id which is not necessarily a hexsha.
- """
+ :param id: reference, rev-spec, or hexsha
+
+ :note: This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a binsha."""
hexsha, typename, size = repo.git.get_object_header(id)
- obj_type = utils.get_object_type_by_name(typename)
- inst = obj_type(repo, hexsha)
+ inst = get_object_type_by_name(typename)(repo, hex_to_bin(hexsha))
inst.size = size
return inst
def _set_self_from_args_(self, args_dict):
- """
- Initialize attributes on self from the given dict that was retrieved
+ """Initialize attributes on self from the given dict that was retrieved
from locals() in the calling method.
Will only set an attribute on self if the corresponding value in args_dict
- is not None
- """
+ is not None"""
for attr, val in args_dict.items():
if attr != "self" and val is not None:
setattr( self, attr, val )
# END set all non-None attributes
def _set_cache_(self, attr):
- """
- Retrieve object information
- """
+ """Retrieve object information"""
if attr == "size":
- oinfo = self.repo.odb.info(self.sha)
+ oinfo = self.repo.odb.info(self.binsha)
self.size = oinfo.size
- assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
- elif attr == "data":
- ostream = self.repo.odb.stream(self.sha)
- self.size = ostream.size
- self.data = ostream.read()
- assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
+ # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
else:
super(Object,self)._set_cache_(attr)
def __eq__(self, other):
- """
- Returns
- True if the objects have the same SHA1
- """
- return self.sha == other.sha
+ """:return: True if the objects have the same SHA1"""
+ return self.binsha == other.binsha
def __ne__(self, other):
- """
- Returns
- True if the objects do not have the same SHA1
- """
- return self.sha != other.sha
+ """:return: True if the objects do not have the same SHA1 """
+ return self.binsha != other.binsha
def __hash__(self):
- """
- Returns
- Hash of our id allowing objects to be used in dicts and sets
- """
- return hash(self.sha)
+ """:return: Hash of our id allowing objects to be used in dicts and sets"""
+ return hash(self.binsha)
def __str__(self):
- """
- Returns
- string of our SHA1 as understood by all git commands
- """
- return self.sha
+ """:return: string of our SHA1 as understood by all git commands"""
+ return bin_to_hex(self.binsha)
def __repr__(self):
- """
- Returns
- string with pythonic representation of our object
- """
- return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+ """:return: string with pythonic representation of our object"""
+ return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
+
+ @property
+ def hexsha(self):
+ """:return: 40 byte hex version of our 20 byte binary sha"""
+ return bin_to_hex(self.binsha)
@property
def data_stream(self):
""" :return: File Object compatible stream to the uncompressed raw data of the object
:note: returned streams must be read in order"""
- return self.repo.odb.stream(self.sha)
+ return self.repo.odb.stream(self.binsha)
def stream_data(self, ostream):
"""Writes our data directly to the given output stream
:param ostream: File object compatible stream object.
:return: self"""
- istream = self.repo.odb.stream(self.sha)
+ istream = self.repo.odb.stream(self.binsha)
stream_copy(istream, ostream)
return self
class IndexObject(Object):
- """
- Base for all objects that can be part of the index file , namely Tree, Blob and
- SubModule objects
- """
+ """Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects"""
__slots__ = ("path", "mode")
- def __init__(self, repo, sha, mode=None, path=None):
- """
- Initialize a newly instanced IndexObject
- ``repo``
- is the Repo we are located in
-
- ``sha`` : string
- is the git object id as hex sha
-
- ``mode`` : int
- is the file mode as int, use the stat module to evaluate the infomration
-
- ``path`` : str
+ def __init__(self, repo, binsha, mode=None, path=None):
+ """Initialize a newly instanced IndexObject
+ :param repo: is the Repo we are located in
+ :param binsha: 20 byte sha1
+ :param mode: is the stat compatible file mode as int, use the stat module
+ to evaluate the infomration
+ :param path:
is the path to the file in the file system, relative to the git repository root, i.e.
file.ext or folder/other.ext
-
- NOTE
+ :note:
Path may not be set of the index object has been created directly as it cannot
- be retrieved without knowing the parent tree.
- """
- super(IndexObject, self).__init__(repo, sha)
+ be retrieved without knowing the parent tree."""
+ super(IndexObject, self).__init__(repo, binsha)
self._set_self_from_args_(locals())
- if isinstance(mode, basestring):
- self.mode = self._mode_str_to_int(mode)
def __hash__(self):
- """
- Returns
+ """:return:
Hash of our path as index items are uniquely identifyable by path, not
- by their data !
- """
+ by their data !"""
return hash(self.path)
def _set_cache_(self, attr):
@@ -184,41 +145,20 @@ class IndexObject(Object):
raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
else:
super(IndexObject, self)._set_cache_(attr)
+ # END hanlde slot attribute
- @classmethod
- def _mode_str_to_int(cls, modestr):
- """
- ``modestr``
- string like 755 or 644 or 100644 - only the last 6 chars will be used
-
- Returns
- String identifying a mode compatible to the mode methods ids of the
- stat module regarding the rwx permissions for user, group and other,
- special flags and file system flags, i.e. whether it is a symlink
- for example.
- """
- mode = 0
- for iteration, char in enumerate(reversed(modestr[-6:])):
- mode += int(char) << iteration*3
- # END for each char
- return mode
-
@property
def name(self):
- """
- Returns
- Name portion of the path, effectively being the basename
- """
- return os.path.basename(self.path)
+ """:return: Name portion of the path, effectively being the basename"""
+ return basename(self.path)
@property
def abspath(self):
"""
- Returns
+ :return:
Absolute path to this index object in the file system ( as opposed to the
.path field which is a path relative to the git repository ).
- The returned path will be native to the system and contains '\' on windows.
- """
+ The returned path will be native to the system and contains '\' on windows. """
return join_path_native(self.repo.working_tree_dir, self.path)
diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py
index 3f91d078..ed7a8d04 100644
--- a/lib/git/objects/blob.py
+++ b/lib/git/objects/blob.py
@@ -4,33 +4,33 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import mimetypes
+from mimetypes import guess_type
import base
+__all__ = ('Blob', )
+
class Blob(base.IndexObject):
"""A Blob encapsulates a git blob object"""
DEFAULT_MIME_TYPE = "text/plain"
type = "blob"
- __slots__ = tuple()
+ __slots__ = "data"
+
+ def _set_cache_(self, attr):
+ if attr == "data":
+ ostream = self.repo.odb.stream(self.binsha)
+ self.size = ostream.size
+ self.data = ostream.read()
+ # assert ostream.type == self.type, _assertion_msg_format % (self.binsha, ostream.type, self.type)
+ else:
+ super(Blob, self)._set_cache_(attr)
+ # END handle data
-
@property
def mime_type(self):
- """
- The mime type of this file (based on the filename)
-
- Returns
- str
-
- NOTE
- Defaults to 'text/plain' in case the actual file type is unknown.
- """
+ """ :return:String describing the mime type of this file (based on the filename)
+ :note: Defaults to 'text/plain' in case the actual file type is unknown. """
guesses = None
if self.path:
- guesses = mimetypes.guess_type(self.path)
+ guesses = guess_type(self.path)
return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
-
-
- def __repr__(self):
- return '<git.Blob "%s">' % self.sha
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index f30a6dea..3bf1fbc4 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -8,24 +8,36 @@ from git.utils import (
Iterable,
Stats,
)
-
-import git.diff as diff
+from git.diff import Diffable
from tree import Tree
from gitdb import IStream
from cStringIO import StringIO
+
import base
-import utils
-import time
-import os
+from gitdb.util import (
+ hex_to_bin
+ )
+from utils import (
+ Traversable,
+ Serializable,
+ get_user_id,
+ parse_date,
+ Actor,
+ altz_to_utctz_str
+ parse_actor_and_date
+ )
+from time import (
+ time,
+ altzone
+ )
+__all__ = ('Commit', )
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
- """
- Wraps a git Commit object.
+class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
+ """Wraps a git Commit object.
This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary.
- """
+ value on demand only if it involves calling the git binary."""
# ENVIRONMENT VARIABLES
# read when creating new commits
@@ -52,22 +64,19 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
"author", "authored_date", "author_tz_offset",
"committer", "committed_date", "committer_tz_offset",
"message", "parents", "encoding")
- _id_attribute_ = "sha"
+ _id_attribute_ = "binsha"
- def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
committer=None, committed_date=None, committer_tz_offset=None,
message=None, parents=None, encoding=None):
- """
- Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set if id names a valid sha.
+ """Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set on first query.
- The parameter documentation indicates the type of the argument after a colon ':'.
-
- :param sha: is the sha id of the commit or a ref
+ :param binsha: 20 byte sha1
:param parents: tuple( Commit, ... )
is a tuple of commit ids or actual Commits
:param tree: Tree
- is the corresponding tree id or an actual Tree
+ 20 byte tree sha
:param author: Actor
is the author string ( will be implicitly converted into an Actor object )
:param authored_date: int_seconds_since_epoch
@@ -86,12 +95,14 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
is the commit message
:param encoding: string
encoding of the message, defaults to UTF-8
+ :param parents:
+ List or tuple of Commit objects which are our parent(s) in the commit
+ dependency graph
:return: git.Commit
:note: Timezone information is in the same format and in the same sign
as what time.altzone returns. The sign is inverted compared to git's
- UTC timezone.
- """
+ UTC timezone."""
super(Commit,self).__init__(repo, sha)
self._set_self_from_args_(locals())
@@ -100,80 +111,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return commit.parents
def _set_cache_(self, attr):
- """ Called by LazyMixin superclass when the given uninitialized member needs
- to be set.
- We set all values at once. """
if attr in Commit.__slots__:
# read the data in a chunk, its faster - then provide a file wrapper
- # Could use self.data, but lets try to get it with less calls
- hexsha, typename, size, data = self.repo.git.get_object_data(self)
- self._deserialize(StringIO(data))
+ binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
+ self._deserialize(StringIO(stream.read()))
else:
super(Commit, self)._set_cache_(attr)
+ # END handle attrs
@property
def summary(self):
- """
- Returns
- First line of the commit message.
- """
+ """:return: First line of the commit message"""
return self.message.split('\n', 1)[0]
def count(self, paths='', **kwargs):
- """
- Count the number of commits reachable from this commit
+ """Count the number of commits reachable from this commit
- ``paths``
+ :param paths:
is an optinal path or a list of paths restricting the return value
to commits actually containing the paths
- ``kwargs``
+ :param kwargs:
Additional options to be passed to git-rev-list. They must not alter
the ouput style of the command, or parsing will yield incorrect results
- Returns
- int
- """
+ :return: int defining the number of reachable commits"""
# yes, it makes a difference whether empty paths are given or not in our case
# as the empty paths version will ignore merge commits for some reason.
if paths:
- return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+ return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines())
else:
- return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+ return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines())
@property
def name_rev(self):
"""
- Returns
+ :return:
String describing the commits hex sha based on the closest Reference.
- Mostly useful for UI purposes
- """
+ Mostly useful for UI purposes"""
return self.repo.git.name_rev(self)
@classmethod
def iter_items(cls, repo, rev, paths='', **kwargs):
- """
- Find all commits matching the given criteria.
-
- ``repo``
- is the Repo
-
- ``rev``
- revision specifier, see git-rev-parse for viable options
+ """Find all commits matching the given criteria.
- ``paths``
+ :param repo: is the Repo
+ :param rev: revision specifier, see git-rev-parse for viable options
+ :param paths:
is an optinal path or list of paths, if set only Commits that include the path
or paths will be considered
-
- ``kwargs``
+ :param kwargs:
optional keyword arguments to git rev-list where
``max_count`` is the maximum number of commits to fetch
``skip`` is the number of commits to skip
``since`` all commits since i.e. '1970-01-01'
-
- Returns
- iterator yielding Commit items
- """
+ :return: iterator yielding Commit items"""
if 'pretty' in kwargs:
raise ValueError("--pretty cannot be used as parsing expects single sha's only")
# END handle pretty
@@ -186,45 +178,36 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return cls._iter_from_process_or_stream(repo, proc)
def iter_parents(self, paths='', **kwargs):
- """
- Iterate _all_ parents of this commit.
-
- ``paths``
+ """Iterate _all_ parents of this commit.
+ :param paths:
Optional path or list of paths limiting the Commits to those that
contain at least one of the paths
-
- ``kwargs``
- All arguments allowed by git-rev-list
+ :param kwargs: All arguments allowed by git-rev-list
- Return:
- Iterator yielding Commit objects which are parents of self
- """
+ :return: Iterator yielding Commit objects which are parents of self """
# skip ourselves
skip = kwargs.get("skip", 1)
if skip == 0: # skip ourselves
skip = 1
kwargs['skip'] = skip
- return self.iter_items( self.repo, self, paths, **kwargs )
+ return self.iter_items(self.repo, self, paths, **kwargs)
@property
def stats(self):
- """
- Create a git stat from changes between this commit and its first parent
+ """Create a git stat from changes between this commit and its first parent
or from all changes done if this is the very first commit.
- Return
- git.Stats
- """
+ :return: git.Stats"""
if not self.parents:
- text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+ text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True)
text2 = ""
for line in text.splitlines()[1:]:
(insertions, deletions, filename) = line.split("\t")
text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
text = text2
else:
- text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+ text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
return Stats._list_from_string(self.repo, text)
@classmethod
@@ -260,7 +243,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
"""Commit the given tree, creating a commit object.
:param repo: Repo object the commit should be part of
- :param tree: Sha of a tree or a tree object to become the tree of the new commit
+ :param tree: Tree object or hex or bin sha
+ the tree of the new commit
:param message: Commit message. It may be an empty string if no message is provided.
It will be converted to a string in any case.
:param parent_commits:
@@ -279,8 +263,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
:note:
Additional information about the committer and Author are taken from the
environment or from the git configuration, see git-commit-tree for
- more information
- """
+ more information"""
parents = parent_commits
if parent_commits is None:
try:
@@ -299,8 +282,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
# COMMITER AND AUTHOR INFO
cr = repo.config_reader()
- env = os.environ
- default_email = utils.get_user_id()
+ env = environ
+ default_email = get_user_id()
default_name = default_email.split('@')[0]
conf_name = cr.get_value('user', cls.conf_name, default_name)
@@ -313,19 +296,19 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
committer_email = env.get(cls.env_committer_email, conf_email)
# PARSE THE DATES
- unix_time = int(time.time())
- offset = time.altzone
+ unix_time = int(time())
+ offset = altzone
author_date_str = env.get(cls.env_author_date, '')
if author_date_str:
- author_time, author_offset = utils.parse_date(author_date_str)
+ author_time, author_offset = parse_date(author_date_str)
else:
author_time, author_offset = unix_time, offset
# END set author time
committer_date_str = env.get(cls.env_committer_date, '')
if committer_date_str:
- committer_time, committer_offset = utils.parse_date(committer_date_str)
+ committer_time, committer_offset = parse_date(committer_date_str)
else:
committer_time, committer_offset = unix_time, offset
# END set committer time
@@ -334,12 +317,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
enc_section, enc_option = cls.conf_encoding.split('.')
conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
- author = utils.Actor(author_name, author_email)
- committer = utils.Actor(committer_name, committer_email)
+ author = Actor(author_name, author_email)
+ committer = Actor(committer_name, committer_email)
+
+ # if the tree is no object, make sure we create one - otherwise
+ # the created commit object is invalid
+ if isinstance(tree, str):
+ tree = repo.tree(tree)
+ # END tree conversion
# CREATE NEW COMMIT
- new_commit = cls(repo, cls.NULL_HEX_SHA, tree,
+ new_commit = cls(repo, cls.NULL_BIN_SHA, tree,
author, author_time, author_offset,
committer, committer_time, committer_offset,
message, parent_commits, conf_encoding)
@@ -350,7 +339,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
stream.seek(0)
istream = repo.odb.store(IStream(cls.type, streamlen, stream))
- new_commit.sha = istream.sha
+ new_commit.binsha = istream.binsha
if head:
try:
@@ -366,14 +355,6 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return new_commit
-
- def __str__(self):
- """ Convert commit to string which is SHA1 """
- return self.sha
-
- def __repr__(self):
- return '<git.Commit "%s">' % self.sha
-
#{ Serializable Implementation
def _serialize(self, stream):
@@ -387,11 +368,11 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
fmt = "%s %s <%s> %s %s\n"
write(fmt % ("author", a.name, a.email,
self.authored_date,
- utils.altz_to_utctz_str(self.author_tz_offset)))
+ altz_to_utctz_str(self.author_tz_offset)))
write(fmt % ("committer", c.name, c.email,
self.committed_date,
- utils.altz_to_utctz_str(self.committer_tz_offset)))
+ altz_to_utctz_str(self.committer_tz_offset)))
if self.encoding != self.default_encoding:
write("encoding %s\n" % self.encoding)
@@ -404,7 +385,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
readline = stream.readline
- self.tree = Tree(self.repo, readline().split()[1], Tree.tree_id<<12, '')
+ self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
self.parents = list()
next_line = None
@@ -414,12 +395,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
next_line = parent_line
break
# END abort reading parents
- self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+ self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
# END for each parent line
self.parents = tuple(self.parents)
- self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
- self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
+ self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
# now we can have the encoding line, or an empty line followed by the optional
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
index 5b39ab0c..2d0fd634 100644
--- a/lib/git/objects/fun.py
+++ b/lib/git/objects/fun.py
@@ -1,9 +1,10 @@
"""Module with functions which are supposed to be as fast as possible"""
+from stat import S_ISDIR
__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
'traverse_tree_recursive')
-from stat import S_ISDIR
+
def tree_to_stream(entries, write):
@@ -99,7 +100,7 @@ def _to_full_path(item, path_prefix):
def traverse_trees_recursive(odb, tree_shas, path_prefix):
"""
- :return: list with entries according to the given tree-shas.
+ :return: list with entries according to the given binary tree-shas.
The result is encoded in a list
of n tuple|None per blob/commit, (n == len(tree_shas)), where
* [0] == 20 byte sha
@@ -165,7 +166,7 @@ def traverse_trees_recursive(odb, tree_shas, path_prefix):
def traverse_tree_recursive(odb, tree_sha, path_prefix):
"""
- :return: list of entries of the tree pointed to by tree_sha. An entry
+ :return: list of entries of the tree pointed to by the binary tree_sha. An entry
has the following format:
* [0] 20 byte sha
* [1] mode as int
diff --git a/lib/git/objects/submodule.py b/lib/git/objects/submodule.py
index 4742d448..1f571a48 100644
--- a/lib/git/objects/submodule.py
+++ b/lib/git/objects/submodule.py
@@ -1,5 +1,6 @@
import base
+__all__ = ("Submodule", )
class Submodule(base.IndexObject):
"""Implements access to a git submodule. They are special in that their sha
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
index 96363db6..2e6ec878 100644
--- a/lib/git/objects/tag.py
+++ b/lib/git/objects/tag.py
@@ -3,77 +3,63 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-"""
-Module containing all object based types.
-"""
+""" Module containing all object based types. """
import base
-import utils
+from gitdb.util import hex_to_bin
+from utils import (
+ get_object_type_by_name,
+ parse_actor_and_date
+ )
-class TagObject(base.Object):
- """
- Non-Lightweight tag carrying additional information about an object we are pointing
- to.
- """
- type = "tag"
- __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
-
- def __init__(self, repo, sha, object=None, tag=None,
- tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
- """
- Initialize a tag object with additional data
-
- ``repo``
- repository this object is located in
-
- ``sha``
- SHA1 or ref suitable for git-rev-parse
-
- ``object``
- Object instance of object we are pointing to
-
- ``tag``
- name of this tag
-
- ``tagger``
- Actor identifying the tagger
-
- ``tagged_date`` : int_seconds_since_epoch
- is the DateTime of the tag creation - use time.gmtime to convert
- it into a different format
-
- ``tagged_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
+__all__ = ("TagObject", )
- """
- super(TagObject, self).__init__(repo, sha )
- self._set_self_from_args_(locals())
-
- def _set_cache_(self, attr):
- """
- Cache all our attributes at once
- """
- if attr in TagObject.__slots__:
- lines = self.data.splitlines()
-
- obj, hexsha = lines[0].split(" ") # object <hexsha>
- type_token, type_name = lines[1].split(" ") # type <type_name>
- self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha)
-
- self.tag = lines[2][4:] # tag <tag name>
-
- tagger_info = lines[3][7:]# tagger <actor> <date>
- self.tagger, self.tagged_date, self.tagger_tz_offset = utils.parse_actor_and_date(tagger_info)
-
- # line 4 empty - it could mark the beginning of the next header
- # in csse there really is no message, it would not exist. Otherwise
- # a newline separates header from message
- if len(lines) > 5:
- self.message = "\n".join(lines[5:])
- else:
- self.message = ''
- # END check our attributes
- else:
- super(TagObject, self)._set_cache_(attr)
-
-
+class TagObject(base.Object):
+ """Non-Lightweight tag carrying additional information about an object we are pointing to."""
+ type = "tag"
+ __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
+
+ def __init__(self, repo, binsha, object=None, tag=None,
+ tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
+ """Initialize a tag object with additional data
+
+ :param repo: repository this object is located in
+ :param binsha: 20 byte SHA1
+ :param object: Object instance of object we are pointing to
+ :param tag: name of this tag
+ :param tagger: Actor identifying the tagger
+ :param tagged_date: int_seconds_since_epoch
+ is the DateTime of the tag creation - use time.gmtime to convert
+ it into a different format
+ :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
+ authored_date is in, in a format similar to time.altzone"""
+ super(TagObject, self).__init__(repo, sha )
+ self._set_self_from_args_(locals())
+
+ def _set_cache_(self, attr):
+ """Cache all our attributes at once"""
+ if attr in TagObject.__slots__:
+ ostream = self.repo.odb.stream(self.binsha)
+ lines = ostream.read().splitlines()
+
+ obj, hexsha = lines[0].split(" ") # object <hexsha>
+ type_token, type_name = lines[1].split(" ") # type <type_name>
+ self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
+
+ self.tag = lines[2][4:] # tag <tag name>
+
+ tagger_info = lines[3][7:]# tagger <actor> <date>
+ self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
+
+ # line 4 empty - it could mark the beginning of the next header
+ # in case there really is no message, it would not exist. Otherwise
+ # a newline separates header from message
+ if len(lines) > 5:
+ self.message = "\n".join(lines[5:])
+ else:
+ self.message = ''
+ # END check our attributes
+ else:
+ super(TagObject, self)._set_cache_(attr)
+
+
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index 6b1d13c1..b6902fbb 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -3,15 +3,12 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
import utils
-import base
+from base import IndexObject
from blob import Blob
from submodule import Submodule
import git.diff as diff
-join = os.path.join
from fun import (
tree_entries_from_data,
@@ -19,7 +16,8 @@ from fun import (
)
from gitdb.util import to_bin_sha
-from binascii import b2a_hex
+
+__all__ = ("TreeModifier", "Tree")
class TreeModifier(object):
"""A utility class providing methods to alter the underlying cache in a list-like
@@ -63,7 +61,7 @@ class TreeModifier(object):
:return: self"""
if '/' in name:
raise ValueError("Name must not contain '/' characters")
- if (mode >> 12) not in Tree._map_id_to_type:
+ if (mode >> 12) not in self._map_id_to_type:
raise ValueError("Invalid object type according to mode %o" % mode)
sha = to_bin_sha(sha)
@@ -99,12 +97,8 @@ class TreeModifier(object):
#} END mutators
-class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializable):
- """
- Tress represent a ordered list of Blobs and other Trees. Hence it can be
- accessed like a list.
-
- Tree's will cache their contents after first retrieval to improve efficiency.
+class Tree(IndexObject, diff.Diffable, utils.Traversable, utils.Serializable):
+ """Tree objects represent an ordered list of Blobs and other Trees.
``Tree as a list``::
@@ -113,8 +107,6 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
You may as well access by index
blob = tree[0]
-
-
"""
type = "tree"
@@ -134,8 +126,8 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
}
- def __init__(self, repo, sha, mode=tree_id<<12, path=None):
- super(Tree, self).__init__(repo, sha, mode, path)
+ def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
+ super(Tree, self).__init__(repo, binsha, mode, path)
@classmethod
def _get_intermediate_items(cls, index_object):
@@ -146,39 +138,28 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def _set_cache_(self, attr):
if attr == "_cache":
# Set the data when we need it
- self._cache = tree_entries_from_data(self.data)
+ ostream = self.repo.odb.stream(self.binsha)
+ self._cache = tree_entries_from_data(ostream.read())
else:
super(Tree, self)._set_cache_(attr)
+ # END handle attribute
def _iter_convert_to_object(self, iterable):
- """Iterable yields tuples of (hexsha, mode, name), which will be converted
+ """Iterable yields tuples of (binsha, mode, name), which will be converted
to the respective object representation"""
for binsha, mode, name in iterable:
path = join(self.path, name)
- type_id = mode >> 12
try:
- yield self._map_id_to_type[type_id](self.repo, b2a_hex(binsha), mode, path)
+ yield self._map_id_to_type[type_id](self.repo, binsha, mode >> 12, path)
except KeyError:
- raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path))
+ raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
# END for each item
def __div__(self, file):
- """
- Find the named object in this tree's contents
-
- Examples::
-
- >>> Repo('/path/to/python-git').tree/'lib'
- <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e">
- >>> Repo('/path/to/python-git').tree/'README.txt'
- <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df">
-
- Returns
- ``git.Blob`` or ``git.Tree``
+ """Find the named object in this tree's contents
+ :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
- Raise
- KeyError if given file or tree does not exist in tree
- """
+ :raise KeyError: if given file or tree does not exist in tree"""
msg = "Blob or Tree named %r not found"
if '/' in file:
tree = self
@@ -201,29 +182,20 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
else:
for info in self._cache:
if info[2] == file: # [2] == name
- return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
# END for each obj
raise KeyError( msg % file )
# END handle long paths
- def __repr__(self):
- return '<git.Tree "%s">' % self.sha
-
@property
def trees(self):
- """
- Returns
- list(Tree, ...) list of trees directly below this tree
- """
+ """:return: list(Tree, ...) list of trees directly below this tree"""
return [ i for i in self if i.type == "tree" ]
@property
def blobs(self):
- """
- Returns
- list(Blob, ...) list of blobs directly below this tree
- """
+ """:return: list(Blob, ...) list of blobs directly below this tree"""
return [ i for i in self if i.type == "blob" ]
@property
@@ -238,7 +210,6 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
prune = lambda i,d: False, depth = -1, branch_first=True,
visit_once = False, ignore_self=1 ):
"""For documentation, see utils.Traversable.traverse
-
Trees are set to visit_once = False to gain more performance in the traversal"""
return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
@@ -255,7 +226,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def __getitem__(self, item):
if isinstance(item, int):
info = self._cache[item]
- return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
if isinstance(item, basestring):
# compatability
@@ -266,9 +237,9 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def __contains__(self, item):
- if isinstance(item, base.IndexObject):
+ if isinstance(item, IndexObject):
for info in self._cache:
- if item.sha == info[0]:
+ if item.binsha == info[0]:
return True
# END compare sha
# END for each entry
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 072662ee..c0ddd6e6 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -3,9 +3,7 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-"""
-Module for general utility functions
-"""
+"""Module for general utility functions"""
import re
from collections import deque as Deque
import platform
@@ -20,18 +18,28 @@ __all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_
#{ Functions
+def mode_str_to_int(modestr):
+ """
+ :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used
+ :return:
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other,
+ special flags and file system flags, i.e. whether it is a symlink
+ for example."""
+ mode = 0
+ for iteration, char in enumerate(reversed(modestr[-6:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
def get_object_type_by_name(object_type_name):
"""
- Returns
- type suitable to handle the given object type name.
+ :return: type suitable to handle the given object type name.
Use the type to create new instances.
- ``object_type_name``
- Member of TYPES
+ :param object_type_name: Member of TYPES
- Raises
- ValueError: In case object_type_name is unknown
- """
+ :raise ValueError: In case object_type_name is unknown"""
if object_type_name == "commit":
import commit
return commit.Commit
@@ -169,14 +177,11 @@ def parse_date(string_date):
_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
def parse_actor_and_date(line):
- """
- Parse out the actor (author or committer) info from a line like::
+ """Parse out the actor (author or committer) info from a line like::
- author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
- Returns
- [Actor, int_seconds_since_epoch, int_timezone_offset]
- """
+ :return: [Actor, int_seconds_since_epoch, int_timezone_offset]"""
m = _re_actor_epoch.search(line)
actor, epoch, offset = m.groups()
return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
@@ -238,13 +243,11 @@ class Actor(object):
class ProcessStreamAdapter(object):
- """
- Class wireing all calls to the contained Process instance.
+ """Class wireing all calls to the contained Process instance.
Use this type to hide the underlying process to provide access only to a specified
stream. The process is usually wrapped into an AutoInterrupt class to kill
- it if the instance goes out of scope.
- """
+ it if the instance goes out of scope."""
__slots__ = ("_proc", "_stream")
def __init__(self, process, stream_name):
self._proc = process
@@ -274,36 +277,33 @@ class Traversable(object):
def traverse( self, predicate = lambda i,d: True,
prune = lambda i,d: False, depth = -1, branch_first=True,
visit_once = True, ignore_self=1, as_edge = False ):
- """
- ``Returns``
- iterator yieling of items found when traversing self
+ """:return: iterator yieling of items found when traversing self
- ``predicate``
- f(i,d) returns False if item i at depth d should not be included in the result
+ :param predicate: f(i,d) returns False if item i at depth d should not be included in the result
- ``prune``
+ :param prune:
f(i,d) return True if the search should stop at item i at depth d.
Item i will not be returned.
- ``depth``
+ :param depth:
define at which level the iteration should not go deeper
if -1, there is no limit
if 0, you would effectively only get self, the root of the iteration
i.e. if 1, you would only get the first level of predessessors/successors
- ``branch_first``
+ :param branch_first:
if True, items will be returned branch first, otherwise depth first
- ``visit_once``
+ :param visit_once:
if True, items will only be returned once, although they might be encountered
several times. Loops are prevented that way.
- ``ignore_self``
+ :param ignore_self:
if True, self will be ignored and automatically pruned from
the result. Otherwise it will be the first item to be returned.
If as_edge is True, the source of the first edge is None
- ``as_edge``
+ :param as_edge:
if True, return a pair of items, first being the source, second the
destinatination, i.e. tuple(src, dest) with the edge spanning from
source to destination"""