summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-25 23:58:24 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-25 23:58:24 +0200
commit47e3138ee978ce708a41f38a0d874376d7ae5c78 (patch)
tree0880f5c8f2a375e718c69fcffd15e87b7f4aecae /lib/git/objects
parent58fb1187b7b8f1e62d3930bdba9be5aba47a52c6 (diff)
downloadgitpython-47e3138ee978ce708a41f38a0d874376d7ae5c78.tar.gz
Adjusted all files to (hopefully) deal with the fact that all objects now use 20 byte sha's internally as it is closer to the GitDB implementation
Switched all remaining files back to tabs Adjusted all remaining docstrings to suit the sphinx doc convention - its likely that there are many of docstring syntax errors though
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/base.py196
-rw-r--r--lib/git/objects/blob.py34
-rw-r--r--lib/git/objects/commit.py187
-rw-r--r--lib/git/objects/fun.py7
-rw-r--r--lib/git/objects/submodule.py1
-rw-r--r--lib/git/objects/tag.py126
-rw-r--r--lib/git/objects/tree.py75
-rw-r--r--lib/git/objects/utils.py62
8 files changed, 284 insertions, 404 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 90aa8ca2..118bc3ca 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -3,179 +3,140 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import os
from git.utils import LazyMixin, join_path_native, stream_copy
-import utils
+from utils import get_object_type_by_name
+from gitdb.util import (
+ hex_to_bin,
+ bin_to_hex,
+ basename
+ )
+
+import gitdb.typ as dbtyp
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
+__all__ = ("Object", "IndexObject")
+
class Object(LazyMixin):
- """
- Implements an Object which may be Blobs, Trees, Commits and Tags
-
- This Object also serves as a constructor for instances of the correct type::
-
- inst = Object.new(repo,id)
- inst.sha # objects sha in hex
- inst.size # objects uncompressed data size
- inst.data # byte string containing the whole data of the object
- """
+ """Implements an Object which may be Blobs, Trees, Commits and Tags"""
NULL_HEX_SHA = '0'*40
NULL_BIN_SHA = '\0'*20
- TYPES = ("blob", "tree", "commit", "tag")
- __slots__ = ("repo", "sha", "size", "data" )
+
+ TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
+ __slots__ = ("repo", "binsha", "size" )
type = None # to be set by subclass
- def __init__(self, repo, id):
- """
- Initialize an object by identifying it by its id. All keyword arguments
- will be set on demand if None.
+ def __init__(self, repo, binsha):
+ """Initialize an object by identifying it by its binary sha.
+ All keyword arguments will be set on demand if None.
- ``repo``
- repository this object is located in
+ :param repo: repository this object is located in
- ``id``
- SHA1 or ref suitable for git-rev-parse
- """
+ :param binsha: 20 byte SHA1"""
super(Object,self).__init__()
self.repo = repo
- self.sha = id
+ self.binsha = binsha
@classmethod
def new(cls, repo, id):
"""
- Return
- New Object instance of a type appropriate to the object type behind
- id. The id of the newly created object will be a hexsha even though
+ :return: New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a binsha even though
the input id may have been a Reference or Rev-Spec
- Note
- This cannot be a __new__ method as it would always call __init__
- with the input id which is not necessarily a hexsha.
- """
+ :param id: reference, rev-spec, or hexsha
+
+ :note: This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a binsha."""
hexsha, typename, size = repo.git.get_object_header(id)
- obj_type = utils.get_object_type_by_name(typename)
- inst = obj_type(repo, hexsha)
+ inst = get_object_type_by_name(typename)(repo, hex_to_bin(hexsha))
inst.size = size
return inst
def _set_self_from_args_(self, args_dict):
- """
- Initialize attributes on self from the given dict that was retrieved
+ """Initialize attributes on self from the given dict that was retrieved
from locals() in the calling method.
Will only set an attribute on self if the corresponding value in args_dict
- is not None
- """
+ is not None"""
for attr, val in args_dict.items():
if attr != "self" and val is not None:
setattr( self, attr, val )
# END set all non-None attributes
def _set_cache_(self, attr):
- """
- Retrieve object information
- """
+ """Retrieve object information"""
if attr == "size":
- oinfo = self.repo.odb.info(self.sha)
+ oinfo = self.repo.odb.info(self.binsha)
self.size = oinfo.size
- assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
- elif attr == "data":
- ostream = self.repo.odb.stream(self.sha)
- self.size = ostream.size
- self.data = ostream.read()
- assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
+ # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
else:
super(Object,self)._set_cache_(attr)
def __eq__(self, other):
- """
- Returns
- True if the objects have the same SHA1
- """
- return self.sha == other.sha
+ """:return: True if the objects have the same SHA1"""
+ return self.binsha == other.binsha
def __ne__(self, other):
- """
- Returns
- True if the objects do not have the same SHA1
- """
- return self.sha != other.sha
+ """:return: True if the objects do not have the same SHA1 """
+ return self.binsha != other.binsha
def __hash__(self):
- """
- Returns
- Hash of our id allowing objects to be used in dicts and sets
- """
- return hash(self.sha)
+ """:return: Hash of our id allowing objects to be used in dicts and sets"""
+ return hash(self.binsha)
def __str__(self):
- """
- Returns
- string of our SHA1 as understood by all git commands
- """
- return self.sha
+ """:return: string of our SHA1 as understood by all git commands"""
+ return bin_to_hex(self.binsha)
def __repr__(self):
- """
- Returns
- string with pythonic representation of our object
- """
- return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+ """:return: string with pythonic representation of our object"""
+ return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
+
+ @property
+ def hexsha(self):
+ """:return: 40 byte hex version of our 20 byte binary sha"""
+ return bin_to_hex(self.binsha)
@property
def data_stream(self):
""" :return: File Object compatible stream to the uncompressed raw data of the object
:note: returned streams must be read in order"""
- return self.repo.odb.stream(self.sha)
+ return self.repo.odb.stream(self.binsha)
def stream_data(self, ostream):
"""Writes our data directly to the given output stream
:param ostream: File object compatible stream object.
:return: self"""
- istream = self.repo.odb.stream(self.sha)
+ istream = self.repo.odb.stream(self.binsha)
stream_copy(istream, ostream)
return self
class IndexObject(Object):
- """
- Base for all objects that can be part of the index file , namely Tree, Blob and
- SubModule objects
- """
+ """Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects"""
__slots__ = ("path", "mode")
- def __init__(self, repo, sha, mode=None, path=None):
- """
- Initialize a newly instanced IndexObject
- ``repo``
- is the Repo we are located in
-
- ``sha`` : string
- is the git object id as hex sha
-
- ``mode`` : int
- is the file mode as int, use the stat module to evaluate the infomration
-
- ``path`` : str
+ def __init__(self, repo, binsha, mode=None, path=None):
+ """Initialize a newly instanced IndexObject
+ :param repo: is the Repo we are located in
+ :param binsha: 20 byte sha1
+ :param mode: is the stat compatible file mode as int, use the stat module
+ to evaluate the infomration
+ :param path:
is the path to the file in the file system, relative to the git repository root, i.e.
file.ext or folder/other.ext
-
- NOTE
+ :note:
Path may not be set of the index object has been created directly as it cannot
- be retrieved without knowing the parent tree.
- """
- super(IndexObject, self).__init__(repo, sha)
+ be retrieved without knowing the parent tree."""
+ super(IndexObject, self).__init__(repo, binsha)
self._set_self_from_args_(locals())
- if isinstance(mode, basestring):
- self.mode = self._mode_str_to_int(mode)
def __hash__(self):
- """
- Returns
+ """:return:
Hash of our path as index items are uniquely identifyable by path, not
- by their data !
- """
+ by their data !"""
return hash(self.path)
def _set_cache_(self, attr):
@@ -184,41 +145,20 @@ class IndexObject(Object):
raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
else:
super(IndexObject, self)._set_cache_(attr)
+ # END hanlde slot attribute
- @classmethod
- def _mode_str_to_int(cls, modestr):
- """
- ``modestr``
- string like 755 or 644 or 100644 - only the last 6 chars will be used
-
- Returns
- String identifying a mode compatible to the mode methods ids of the
- stat module regarding the rwx permissions for user, group and other,
- special flags and file system flags, i.e. whether it is a symlink
- for example.
- """
- mode = 0
- for iteration, char in enumerate(reversed(modestr[-6:])):
- mode += int(char) << iteration*3
- # END for each char
- return mode
-
@property
def name(self):
- """
- Returns
- Name portion of the path, effectively being the basename
- """
- return os.path.basename(self.path)
+ """:return: Name portion of the path, effectively being the basename"""
+ return basename(self.path)
@property
def abspath(self):
"""
- Returns
+ :return:
Absolute path to this index object in the file system ( as opposed to the
.path field which is a path relative to the git repository ).
- The returned path will be native to the system and contains '\' on windows.
- """
+ The returned path will be native to the system and contains '\' on windows. """
return join_path_native(self.repo.working_tree_dir, self.path)
diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py
index 3f91d078..ed7a8d04 100644
--- a/lib/git/objects/blob.py
+++ b/lib/git/objects/blob.py
@@ -4,33 +4,33 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import mimetypes
+from mimetypes import guess_type
import base
+__all__ = ('Blob', )
+
class Blob(base.IndexObject):
"""A Blob encapsulates a git blob object"""
DEFAULT_MIME_TYPE = "text/plain"
type = "blob"
- __slots__ = tuple()
+ __slots__ = "data"
+
+ def _set_cache_(self, attr):
+ if attr == "data":
+ ostream = self.repo.odb.stream(self.binsha)
+ self.size = ostream.size
+ self.data = ostream.read()
+ # assert ostream.type == self.type, _assertion_msg_format % (self.binsha, ostream.type, self.type)
+ else:
+ super(Blob, self)._set_cache_(attr)
+ # END handle data
-
@property
def mime_type(self):
- """
- The mime type of this file (based on the filename)
-
- Returns
- str
-
- NOTE
- Defaults to 'text/plain' in case the actual file type is unknown.
- """
+ """ :return:String describing the mime type of this file (based on the filename)
+ :note: Defaults to 'text/plain' in case the actual file type is unknown. """
guesses = None
if self.path:
- guesses = mimetypes.guess_type(self.path)
+ guesses = guess_type(self.path)
return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
-
-
- def __repr__(self):
- return '<git.Blob "%s">' % self.sha
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index f30a6dea..3bf1fbc4 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -8,24 +8,36 @@ from git.utils import (
Iterable,
Stats,
)
-
-import git.diff as diff
+from git.diff import Diffable
from tree import Tree
from gitdb import IStream
from cStringIO import StringIO
+
import base
-import utils
-import time
-import os
+from gitdb.util import (
+ hex_to_bin
+ )
+from utils import (
+ Traversable,
+ Serializable,
+ get_user_id,
+ parse_date,
+ Actor,
+ altz_to_utctz_str
+ parse_actor_and_date
+ )
+from time import (
+ time,
+ altzone
+ )
+__all__ = ('Commit', )
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
- """
- Wraps a git Commit object.
+class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
+ """Wraps a git Commit object.
This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary.
- """
+ value on demand only if it involves calling the git binary."""
# ENVIRONMENT VARIABLES
# read when creating new commits
@@ -52,22 +64,19 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
"author", "authored_date", "author_tz_offset",
"committer", "committed_date", "committer_tz_offset",
"message", "parents", "encoding")
- _id_attribute_ = "sha"
+ _id_attribute_ = "binsha"
- def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
committer=None, committed_date=None, committer_tz_offset=None,
message=None, parents=None, encoding=None):
- """
- Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set if id names a valid sha.
+ """Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set on first query.
- The parameter documentation indicates the type of the argument after a colon ':'.
-
- :param sha: is the sha id of the commit or a ref
+ :param binsha: 20 byte sha1
:param parents: tuple( Commit, ... )
is a tuple of commit ids or actual Commits
:param tree: Tree
- is the corresponding tree id or an actual Tree
+ 20 byte tree sha
:param author: Actor
is the author string ( will be implicitly converted into an Actor object )
:param authored_date: int_seconds_since_epoch
@@ -86,12 +95,14 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
is the commit message
:param encoding: string
encoding of the message, defaults to UTF-8
+ :param parents:
+ List or tuple of Commit objects which are our parent(s) in the commit
+ dependency graph
:return: git.Commit
:note: Timezone information is in the same format and in the same sign
as what time.altzone returns. The sign is inverted compared to git's
- UTC timezone.
- """
+ UTC timezone."""
super(Commit,self).__init__(repo, sha)
self._set_self_from_args_(locals())
@@ -100,80 +111,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return commit.parents
def _set_cache_(self, attr):
- """ Called by LazyMixin superclass when the given uninitialized member needs
- to be set.
- We set all values at once. """
if attr in Commit.__slots__:
# read the data in a chunk, its faster - then provide a file wrapper
- # Could use self.data, but lets try to get it with less calls
- hexsha, typename, size, data = self.repo.git.get_object_data(self)
- self._deserialize(StringIO(data))
+ binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
+ self._deserialize(StringIO(stream.read()))
else:
super(Commit, self)._set_cache_(attr)
+ # END handle attrs
@property
def summary(self):
- """
- Returns
- First line of the commit message.
- """
+ """:return: First line of the commit message"""
return self.message.split('\n', 1)[0]
def count(self, paths='', **kwargs):
- """
- Count the number of commits reachable from this commit
+ """Count the number of commits reachable from this commit
- ``paths``
+ :param paths:
is an optinal path or a list of paths restricting the return value
to commits actually containing the paths
- ``kwargs``
+ :param kwargs:
Additional options to be passed to git-rev-list. They must not alter
the ouput style of the command, or parsing will yield incorrect results
- Returns
- int
- """
+ :return: int defining the number of reachable commits"""
# yes, it makes a difference whether empty paths are given or not in our case
# as the empty paths version will ignore merge commits for some reason.
if paths:
- return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+ return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines())
else:
- return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+ return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines())
@property
def name_rev(self):
"""
- Returns
+ :return:
String describing the commits hex sha based on the closest Reference.
- Mostly useful for UI purposes
- """
+ Mostly useful for UI purposes"""
return self.repo.git.name_rev(self)
@classmethod
def iter_items(cls, repo, rev, paths='', **kwargs):
- """
- Find all commits matching the given criteria.
-
- ``repo``
- is the Repo
-
- ``rev``
- revision specifier, see git-rev-parse for viable options
+ """Find all commits matching the given criteria.
- ``paths``
+ :param repo: is the Repo
+ :param rev: revision specifier, see git-rev-parse for viable options
+ :param paths:
is an optinal path or list of paths, if set only Commits that include the path
or paths will be considered
-
- ``kwargs``
+ :param kwargs:
optional keyword arguments to git rev-list where
``max_count`` is the maximum number of commits to fetch
``skip`` is the number of commits to skip
``since`` all commits since i.e. '1970-01-01'
-
- Returns
- iterator yielding Commit items
- """
+ :return: iterator yielding Commit items"""
if 'pretty' in kwargs:
raise ValueError("--pretty cannot be used as parsing expects single sha's only")
# END handle pretty
@@ -186,45 +178,36 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return cls._iter_from_process_or_stream(repo, proc)
def iter_parents(self, paths='', **kwargs):
- """
- Iterate _all_ parents of this commit.
-
- ``paths``
+ """Iterate _all_ parents of this commit.
+ :param paths:
Optional path or list of paths limiting the Commits to those that
contain at least one of the paths
-
- ``kwargs``
- All arguments allowed by git-rev-list
+ :param kwargs: All arguments allowed by git-rev-list
- Return:
- Iterator yielding Commit objects which are parents of self
- """
+ :return: Iterator yielding Commit objects which are parents of self """
# skip ourselves
skip = kwargs.get("skip", 1)
if skip == 0: # skip ourselves
skip = 1
kwargs['skip'] = skip
- return self.iter_items( self.repo, self, paths, **kwargs )
+ return self.iter_items(self.repo, self, paths, **kwargs)
@property
def stats(self):
- """
- Create a git stat from changes between this commit and its first parent
+ """Create a git stat from changes between this commit and its first parent
or from all changes done if this is the very first commit.
- Return
- git.Stats
- """
+ :return: git.Stats"""
if not self.parents:
- text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+ text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True)
text2 = ""
for line in text.splitlines()[1:]:
(insertions, deletions, filename) = line.split("\t")
text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
text = text2
else:
- text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+ text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
return Stats._list_from_string(self.repo, text)
@classmethod
@@ -260,7 +243,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
"""Commit the given tree, creating a commit object.
:param repo: Repo object the commit should be part of
- :param tree: Sha of a tree or a tree object to become the tree of the new commit
+ :param tree: Tree object or hex or bin sha
+ the tree of the new commit
:param message: Commit message. It may be an empty string if no message is provided.
It will be converted to a string in any case.
:param parent_commits:
@@ -279,8 +263,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
:note:
Additional information about the committer and Author are taken from the
environment or from the git configuration, see git-commit-tree for
- more information
- """
+ more information"""
parents = parent_commits
if parent_commits is None:
try:
@@ -299,8 +282,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
# COMMITER AND AUTHOR INFO
cr = repo.config_reader()
- env = os.environ
- default_email = utils.get_user_id()
+ env = environ
+ default_email = get_user_id()
default_name = default_email.split('@')[0]
conf_name = cr.get_value('user', cls.conf_name, default_name)
@@ -313,19 +296,19 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
committer_email = env.get(cls.env_committer_email, conf_email)
# PARSE THE DATES
- unix_time = int(time.time())
- offset = time.altzone
+ unix_time = int(time())
+ offset = altzone
author_date_str = env.get(cls.env_author_date, '')
if author_date_str:
- author_time, author_offset = utils.parse_date(author_date_str)
+ author_time, author_offset = parse_date(author_date_str)
else:
author_time, author_offset = unix_time, offset
# END set author time
committer_date_str = env.get(cls.env_committer_date, '')
if committer_date_str:
- committer_time, committer_offset = utils.parse_date(committer_date_str)
+ committer_time, committer_offset = parse_date(committer_date_str)
else:
committer_time, committer_offset = unix_time, offset
# END set committer time
@@ -334,12 +317,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
enc_section, enc_option = cls.conf_encoding.split('.')
conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
- author = utils.Actor(author_name, author_email)
- committer = utils.Actor(committer_name, committer_email)
+ author = Actor(author_name, author_email)
+ committer = Actor(committer_name, committer_email)
+
+ # if the tree is no object, make sure we create one - otherwise
+ # the created commit object is invalid
+ if isinstance(tree, str):
+ tree = repo.tree(tree)
+ # END tree conversion
# CREATE NEW COMMIT
- new_commit = cls(repo, cls.NULL_HEX_SHA, tree,
+ new_commit = cls(repo, cls.NULL_BIN_SHA, tree,
author, author_time, author_offset,
committer, committer_time, committer_offset,
message, parent_commits, conf_encoding)
@@ -350,7 +339,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
stream.seek(0)
istream = repo.odb.store(IStream(cls.type, streamlen, stream))
- new_commit.sha = istream.sha
+ new_commit.binsha = istream.binsha
if head:
try:
@@ -366,14 +355,6 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return new_commit
-
- def __str__(self):
- """ Convert commit to string which is SHA1 """
- return self.sha
-
- def __repr__(self):
- return '<git.Commit "%s">' % self.sha
-
#{ Serializable Implementation
def _serialize(self, stream):
@@ -387,11 +368,11 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
fmt = "%s %s <%s> %s %s\n"
write(fmt % ("author", a.name, a.email,
self.authored_date,
- utils.altz_to_utctz_str(self.author_tz_offset)))
+ altz_to_utctz_str(self.author_tz_offset)))
write(fmt % ("committer", c.name, c.email,
self.committed_date,
- utils.altz_to_utctz_str(self.committer_tz_offset)))
+ altz_to_utctz_str(self.committer_tz_offset)))
if self.encoding != self.default_encoding:
write("encoding %s\n" % self.encoding)
@@ -404,7 +385,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
readline = stream.readline
- self.tree = Tree(self.repo, readline().split()[1], Tree.tree_id<<12, '')
+ self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
self.parents = list()
next_line = None
@@ -414,12 +395,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
next_line = parent_line
break
# END abort reading parents
- self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+ self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
# END for each parent line
self.parents = tuple(self.parents)
- self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
- self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
+ self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
# now we can have the encoding line, or an empty line followed by the optional
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
index 5b39ab0c..2d0fd634 100644
--- a/lib/git/objects/fun.py
+++ b/lib/git/objects/fun.py
@@ -1,9 +1,10 @@
"""Module with functions which are supposed to be as fast as possible"""
+from stat import S_ISDIR
__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
'traverse_tree_recursive')
-from stat import S_ISDIR
+
def tree_to_stream(entries, write):
@@ -99,7 +100,7 @@ def _to_full_path(item, path_prefix):
def traverse_trees_recursive(odb, tree_shas, path_prefix):
"""
- :return: list with entries according to the given tree-shas.
+ :return: list with entries according to the given binary tree-shas.
The result is encoded in a list
of n tuple|None per blob/commit, (n == len(tree_shas)), where
* [0] == 20 byte sha
@@ -165,7 +166,7 @@ def traverse_trees_recursive(odb, tree_shas, path_prefix):
def traverse_tree_recursive(odb, tree_sha, path_prefix):
"""
- :return: list of entries of the tree pointed to by tree_sha. An entry
+ :return: list of entries of the tree pointed to by the binary tree_sha. An entry
has the following format:
* [0] 20 byte sha
* [1] mode as int
diff --git a/lib/git/objects/submodule.py b/lib/git/objects/submodule.py
index 4742d448..1f571a48 100644
--- a/lib/git/objects/submodule.py
+++ b/lib/git/objects/submodule.py
@@ -1,5 +1,6 @@
import base
+__all__ = ("Submodule", )
class Submodule(base.IndexObject):
"""Implements access to a git submodule. They are special in that their sha
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
index 96363db6..2e6ec878 100644
--- a/lib/git/objects/tag.py
+++ b/lib/git/objects/tag.py
@@ -3,77 +3,63 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-"""
-Module containing all object based types.
-"""
+""" Module containing all object based types. """
import base
-import utils
+from gitdb.util import hex_to_bin
+from utils import (
+ get_object_type_by_name,
+ parse_actor_and_date
+ )
-class TagObject(base.Object):
- """
- Non-Lightweight tag carrying additional information about an object we are pointing
- to.
- """
- type = "tag"
- __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
-
- def __init__(self, repo, sha, object=None, tag=None,
- tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
- """
- Initialize a tag object with additional data
-
- ``repo``
- repository this object is located in
-
- ``sha``
- SHA1 or ref suitable for git-rev-parse
-
- ``object``
- Object instance of object we are pointing to
-
- ``tag``
- name of this tag
-
- ``tagger``
- Actor identifying the tagger
-
- ``tagged_date`` : int_seconds_since_epoch
- is the DateTime of the tag creation - use time.gmtime to convert
- it into a different format
-
- ``tagged_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
+__all__ = ("TagObject", )
- """
- super(TagObject, self).__init__(repo, sha )
- self._set_self_from_args_(locals())
-
- def _set_cache_(self, attr):
- """
- Cache all our attributes at once
- """
- if attr in TagObject.__slots__:
- lines = self.data.splitlines()
-
- obj, hexsha = lines[0].split(" ") # object <hexsha>
- type_token, type_name = lines[1].split(" ") # type <type_name>
- self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha)
-
- self.tag = lines[2][4:] # tag <tag name>
-
- tagger_info = lines[3][7:]# tagger <actor> <date>
- self.tagger, self.tagged_date, self.tagger_tz_offset = utils.parse_actor_and_date(tagger_info)
-
- # line 4 empty - it could mark the beginning of the next header
- # in csse there really is no message, it would not exist. Otherwise
- # a newline separates header from message
- if len(lines) > 5:
- self.message = "\n".join(lines[5:])
- else:
- self.message = ''
- # END check our attributes
- else:
- super(TagObject, self)._set_cache_(attr)
-
-
+class TagObject(base.Object):
+ """Non-Lightweight tag carrying additional information about an object we are pointing to."""
+ type = "tag"
+ __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
+
+ def __init__(self, repo, binsha, object=None, tag=None,
+ tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
+ """Initialize a tag object with additional data
+
+ :param repo: repository this object is located in
+ :param binsha: 20 byte SHA1
+ :param object: Object instance of object we are pointing to
+ :param tag: name of this tag
+ :param tagger: Actor identifying the tagger
+ :param tagged_date: int_seconds_since_epoch
+ is the DateTime of the tag creation - use time.gmtime to convert
+ it into a different format
+ :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
+ authored_date is in, in a format similar to time.altzone"""
+ super(TagObject, self).__init__(repo, sha )
+ self._set_self_from_args_(locals())
+
+ def _set_cache_(self, attr):
+ """Cache all our attributes at once"""
+ if attr in TagObject.__slots__:
+ ostream = self.repo.odb.stream(self.binsha)
+ lines = ostream.read().splitlines()
+
+ obj, hexsha = lines[0].split(" ") # object <hexsha>
+ type_token, type_name = lines[1].split(" ") # type <type_name>
+ self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
+
+ self.tag = lines[2][4:] # tag <tag name>
+
+ tagger_info = lines[3][7:]# tagger <actor> <date>
+ self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
+
+ # line 4 empty - it could mark the beginning of the next header
+ # in case there really is no message, it would not exist. Otherwise
+ # a newline separates header from message
+ if len(lines) > 5:
+ self.message = "\n".join(lines[5:])
+ else:
+ self.message = ''
+ # END check our attributes
+ else:
+ super(TagObject, self)._set_cache_(attr)
+
+
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index 6b1d13c1..b6902fbb 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -3,15 +3,12 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
import utils
-import base
+from base import IndexObject
from blob import Blob
from submodule import Submodule
import git.diff as diff
-join = os.path.join
from fun import (
tree_entries_from_data,
@@ -19,7 +16,8 @@ from fun import (
)
from gitdb.util import to_bin_sha
-from binascii import b2a_hex
+
+__all__ = ("TreeModifier", "Tree")
class TreeModifier(object):
"""A utility class providing methods to alter the underlying cache in a list-like
@@ -63,7 +61,7 @@ class TreeModifier(object):
:return: self"""
if '/' in name:
raise ValueError("Name must not contain '/' characters")
- if (mode >> 12) not in Tree._map_id_to_type:
+ if (mode >> 12) not in self._map_id_to_type:
raise ValueError("Invalid object type according to mode %o" % mode)
sha = to_bin_sha(sha)
@@ -99,12 +97,8 @@ class TreeModifier(object):
#} END mutators
-class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializable):
- """
- Tress represent a ordered list of Blobs and other Trees. Hence it can be
- accessed like a list.
-
- Tree's will cache their contents after first retrieval to improve efficiency.
+class Tree(IndexObject, diff.Diffable, utils.Traversable, utils.Serializable):
+ """Tree objects represent an ordered list of Blobs and other Trees.
``Tree as a list``::
@@ -113,8 +107,6 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
You may as well access by index
blob = tree[0]
-
-
"""
type = "tree"
@@ -134,8 +126,8 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
}
- def __init__(self, repo, sha, mode=tree_id<<12, path=None):
- super(Tree, self).__init__(repo, sha, mode, path)
+ def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
+ super(Tree, self).__init__(repo, binsha, mode, path)
@classmethod
def _get_intermediate_items(cls, index_object):
@@ -146,39 +138,28 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def _set_cache_(self, attr):
if attr == "_cache":
# Set the data when we need it
- self._cache = tree_entries_from_data(self.data)
+ ostream = self.repo.odb.stream(self.binsha)
+ self._cache = tree_entries_from_data(ostream.read())
else:
super(Tree, self)._set_cache_(attr)
+ # END handle attribute
def _iter_convert_to_object(self, iterable):
- """Iterable yields tuples of (hexsha, mode, name), which will be converted
+ """Iterable yields tuples of (binsha, mode, name), which will be converted
to the respective object representation"""
for binsha, mode, name in iterable:
path = join(self.path, name)
- type_id = mode >> 12
try:
- yield self._map_id_to_type[type_id](self.repo, b2a_hex(binsha), mode, path)
+ yield self._map_id_to_type[type_id](self.repo, binsha, mode >> 12, path)
except KeyError:
- raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path))
+ raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
# END for each item
def __div__(self, file):
- """
- Find the named object in this tree's contents
-
- Examples::
-
- >>> Repo('/path/to/python-git').tree/'lib'
- <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e">
- >>> Repo('/path/to/python-git').tree/'README.txt'
- <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df">
-
- Returns
- ``git.Blob`` or ``git.Tree``
+ """Find the named object in this tree's contents
+ :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
- Raise
- KeyError if given file or tree does not exist in tree
- """
+ :raise KeyError: if given file or tree does not exist in tree"""
msg = "Blob or Tree named %r not found"
if '/' in file:
tree = self
@@ -201,29 +182,20 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
else:
for info in self._cache:
if info[2] == file: # [2] == name
- return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
# END for each obj
raise KeyError( msg % file )
# END handle long paths
- def __repr__(self):
- return '<git.Tree "%s">' % self.sha
-
@property
def trees(self):
- """
- Returns
- list(Tree, ...) list of trees directly below this tree
- """
+ """:return: list(Tree, ...) list of trees directly below this tree"""
return [ i for i in self if i.type == "tree" ]
@property
def blobs(self):
- """
- Returns
- list(Blob, ...) list of blobs directly below this tree
- """
+ """:return: list(Blob, ...) list of blobs directly below this tree"""
return [ i for i in self if i.type == "blob" ]
@property
@@ -238,7 +210,6 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
prune = lambda i,d: False, depth = -1, branch_first=True,
visit_once = False, ignore_self=1 ):
"""For documentation, see utils.Traversable.traverse
-
Trees are set to visit_once = False to gain more performance in the traversal"""
return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
@@ -255,7 +226,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def __getitem__(self, item):
if isinstance(item, int):
info = self._cache[item]
- return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
+ return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
if isinstance(item, basestring):
# compatability
@@ -266,9 +237,9 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def __contains__(self, item):
- if isinstance(item, base.IndexObject):
+ if isinstance(item, IndexObject):
for info in self._cache:
- if item.sha == info[0]:
+ if item.binsha == info[0]:
return True
# END compare sha
# END for each entry
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 072662ee..c0ddd6e6 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -3,9 +3,7 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-"""
-Module for general utility functions
-"""
+"""Module for general utility functions"""
import re
from collections import deque as Deque
import platform
@@ -20,18 +18,28 @@ __all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_
#{ Functions
+def mode_str_to_int(modestr):
+ """
+ :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used
+ :return:
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other,
+ special flags and file system flags, i.e. whether it is a symlink
+ for example."""
+ mode = 0
+ for iteration, char in enumerate(reversed(modestr[-6:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
def get_object_type_by_name(object_type_name):
"""
- Returns
- type suitable to handle the given object type name.
+ :return: type suitable to handle the given object type name.
Use the type to create new instances.
- ``object_type_name``
- Member of TYPES
+ :param object_type_name: Member of TYPES
- Raises
- ValueError: In case object_type_name is unknown
- """
+ :raise ValueError: In case object_type_name is unknown"""
if object_type_name == "commit":
import commit
return commit.Commit
@@ -169,14 +177,11 @@ def parse_date(string_date):
_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
def parse_actor_and_date(line):
- """
- Parse out the actor (author or committer) info from a line like::
+ """Parse out the actor (author or committer) info from a line like::
- author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
- Returns
- [Actor, int_seconds_since_epoch, int_timezone_offset]
- """
+ :return: [Actor, int_seconds_since_epoch, int_timezone_offset]"""
m = _re_actor_epoch.search(line)
actor, epoch, offset = m.groups()
return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
@@ -238,13 +243,11 @@ class Actor(object):
class ProcessStreamAdapter(object):
- """
- Class wireing all calls to the contained Process instance.
+ """Class wireing all calls to the contained Process instance.
Use this type to hide the underlying process to provide access only to a specified
stream. The process is usually wrapped into an AutoInterrupt class to kill
- it if the instance goes out of scope.
- """
+ it if the instance goes out of scope."""
__slots__ = ("_proc", "_stream")
def __init__(self, process, stream_name):
self._proc = process
@@ -274,36 +277,33 @@ class Traversable(object):
def traverse( self, predicate = lambda i,d: True,
prune = lambda i,d: False, depth = -1, branch_first=True,
visit_once = True, ignore_self=1, as_edge = False ):
- """
- ``Returns``
- iterator yieling of items found when traversing self
+ """:return: iterator yieling of items found when traversing self
- ``predicate``
- f(i,d) returns False if item i at depth d should not be included in the result
+ :param predicate: f(i,d) returns False if item i at depth d should not be included in the result
- ``prune``
+ :param prune:
f(i,d) return True if the search should stop at item i at depth d.
Item i will not be returned.
- ``depth``
+ :param depth:
define at which level the iteration should not go deeper
if -1, there is no limit
if 0, you would effectively only get self, the root of the iteration
i.e. if 1, you would only get the first level of predessessors/successors
- ``branch_first``
+ :param branch_first:
if True, items will be returned branch first, otherwise depth first
- ``visit_once``
+ :param visit_once:
if True, items will only be returned once, although they might be encountered
several times. Loops are prevented that way.
- ``ignore_self``
+ :param ignore_self:
if True, self will be ignored and automatically pruned from
the result. Otherwise it will be the first item to be returned.
If as_edge is True, the source of the first edge is None
- ``as_edge``
+ :param as_edge:
if True, return a pair of items, first being the source, second the
destinatination, i.e. tuple(src, dest) with the edge spanning from
source to destination"""