summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/__init__.py1
-rw-r--r--lib/git/objects/base.py103
-rw-r--r--lib/git/objects/blob.py2
-rw-r--r--lib/git/objects/commit.py140
-rw-r--r--lib/git/objects/tag.py15
-rw-r--r--lib/git/objects/tree.py60
-rw-r--r--lib/git/objects/utils.py18
7 files changed, 190 insertions, 149 deletions
diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py
index 39e650b7..192750e3 100644
--- a/lib/git/objects/__init__.py
+++ b/lib/git/objects/__init__.py
@@ -2,6 +2,7 @@
Import all submodules main classes into the package space
"""
import inspect
+from base import *
from tag import *
from blob import *
from tree import *
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 3b48e066..b0989a43 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -15,22 +15,16 @@ class Object(LazyMixin):
This Object also serves as a constructor for instances of the correct type::
- inst = Object(repo,id)
+ inst = Object.new(repo,id)
+ inst.sha # objects sha in hex
+ inst.size # objects uncompressed data size
+ inst.data # byte string containing the whole data of the object
"""
+ NULL_HEX_SHA = '0'*40
TYPES = ("blob", "tree", "commit", "tag")
- __slots__ = ("repo", "id", "size", "data" )
+ __slots__ = ("repo", "sha", "size", "data" )
type = None # to be set by subclass
- def __new__(cls, repo, id, *args, **kwargs):
- if cls is Object:
- hexsha, typename, size = repo.git.get_object_header(id)
- obj_type = utils.get_object_type_by_name(typename)
- inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs)
- inst.size = size
- return inst
- else:
- return super(Object,cls).__new__(cls, repo, id, *args, **kwargs)
-
def __init__(self, repo, id):
"""
Initialize an object by identifying it by its id. All keyword arguments
@@ -44,8 +38,26 @@ class Object(LazyMixin):
"""
super(Object,self).__init__()
self.repo = repo
- self.id = id
-
+ self.sha = id
+
+ @classmethod
+ def new(cls, repo, id):
+ """
+ Return
+ New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a hexsha even though
+ the input id may have been a Reference or Rev-Spec
+
+ Note
+ This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a hexsha.
+ """
+ hexsha, typename, size = repo.git.get_object_header(id)
+ obj_type = utils.get_object_type_by_name(typename)
+ inst = obj_type(repo, hexsha)
+ inst.size = size
+ return inst
+
def _set_self_from_args_(self, args_dict):
"""
Initialize attributes on self from the given dict that was retrieved
@@ -64,11 +76,11 @@ class Object(LazyMixin):
Retrieve object information
"""
if attr == "size":
- hexsha, typename, self.size = self.repo.git.get_object_header(self.id)
- assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
+ hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
+ assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
elif attr == "data":
- hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id)
- assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
+ hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
+ assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
else:
super(Object,self)._set_cache_(attr)
@@ -77,36 +89,57 @@ class Object(LazyMixin):
Returns
True if the objects have the same SHA1
"""
- return self.id == other.id
+ return self.sha == other.sha
def __ne__(self, other):
"""
Returns
True if the objects do not have the same SHA1
"""
- return self.id != other.id
+ return self.sha != other.sha
def __hash__(self):
"""
Returns
Hash of our id allowing objects to be used in dicts and sets
"""
- return hash(self.id)
+ return hash(self.sha)
def __str__(self):
"""
Returns
string of our SHA1 as understood by all git commands
"""
- return self.id
+ return self.sha
def __repr__(self):
"""
Returns
string with pythonic representation of our object
"""
- return '<git.%s "%s">' % (self.__class__.__name__, self.id)
+ return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+
+ @property
+ def data_stream(self):
+ """
+ Returns
+ File Object compatible stream to the uncompressed raw data of the object
+ """
+ proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
+ return utils.ProcessStreamAdapter(proc, "stdout")
+ def stream_data(self, ostream):
+ """
+ Writes our data directly to the given output stream
+
+ ``ostream``
+ File object compatible stream object.
+
+ Returns
+ self
+ """
+ self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
+ return self
class IndexObject(Object):
"""
@@ -115,13 +148,13 @@ class IndexObject(Object):
"""
__slots__ = ("path", "mode")
- def __init__(self, repo, id, mode=None, path=None):
+ def __init__(self, repo, sha, mode=None, path=None):
"""
Initialize a newly instanced IndexObject
``repo``
is the Repo we are located in
- ``id`` : string
+ ``sha`` : string
is the git object id as hex sha
``mode`` : int
@@ -135,7 +168,7 @@ class IndexObject(Object):
Path may not be set of the index object has been created directly as it cannot
be retrieved without knowing the parent tree.
"""
- super(IndexObject, self).__init__(repo, id)
+ super(IndexObject, self).__init__(repo, sha)
self._set_self_from_args_(locals())
if isinstance(mode, basestring):
self.mode = self._mode_str_to_int(mode)
@@ -162,5 +195,21 @@ class IndexObject(Object):
mode += int(char) << iteration*3
# END for each char
return mode
-
+
+ @property
+ def name(self):
+ """
+ Returns
+ Name portion of the path, effectively being the basename
+ """
+ return os.path.basename(self.path)
+
+ @property
+ def abspath(self):
+ """
+ Returns
+ Absolute path to this index object in the file system ( as opposed to the
+ .path field which is a path relative to the git repository )
+ """
+ return os.path.join(self.repo.git.git_dir, self.path)
diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py
index 88ca73d6..11dee323 100644
--- a/lib/git/objects/blob.py
+++ b/lib/git/objects/blob.py
@@ -33,4 +33,4 @@ class Blob(base.IndexObject):
def __repr__(self):
- return '<git.Blob "%s">' % self.id
+ return '<git.Blob "%s">' % self.sha
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 847f4dec..80b3ad23 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -11,7 +11,7 @@ from tree import Tree
import base
import utils
-class Commit(base.Object, Iterable):
+class Commit(base.Object, Iterable, diff.Diffable):
"""
Wraps a git Commit object.
@@ -23,8 +23,9 @@ class Commit(base.Object, Iterable):
type = "commit"
__slots__ = ("tree", "author", "authored_date", "committer", "committed_date",
"message", "parents")
+ _id_attribute_ = "sha"
- def __init__(self, repo, id, tree=None, author=None, authored_date=None,
+ def __init__(self, repo, sha, tree=None, author=None, authored_date=None,
committer=None, committed_date=None, message=None, parents=None):
"""
Instantiate a new Commit. All keyword arguments taking None as default will
@@ -32,7 +33,7 @@ class Commit(base.Object, Iterable):
The parameter documentation indicates the type of the argument after a colon ':'.
- ``id``
+ ``sha``
is the sha id of the commit or a ref
``parents`` : tuple( Commit, ... )
@@ -61,15 +62,15 @@ class Commit(base.Object, Iterable):
Returns
git.Commit
"""
- super(Commit,self).__init__(repo, id)
+ super(Commit,self).__init__(repo, sha)
self._set_self_from_args_(locals())
if parents is not None:
self.parents = tuple( self.__class__(repo, p) for p in parents )
# END for each parent to convert
- if self.id and tree is not None:
- self.tree = Tree(repo, id=tree, path='')
+ if self.sha and tree is not None:
+ self.tree = Tree(repo, tree, path='')
# END id to tree conversion
def _set_cache_(self, attr):
@@ -81,8 +82,8 @@ class Commit(base.Object, Iterable):
if attr in Commit.__slots__:
# prepare our data lines to match rev-list
data_lines = self.data.splitlines()
- data_lines.insert(0, "commit %s" % self.id)
- temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next()
+ data_lines.insert(0, "commit %s" % self.sha)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
self.parents = temp.parents
self.tree = temp.tree
self.author = temp.author
@@ -101,27 +102,30 @@ class Commit(base.Object, Iterable):
"""
return self.message.split('\n', 1)[0]
- @classmethod
- def count(cls, repo, rev, paths='', **kwargs):
+ def count(self, paths='', **kwargs):
"""
- Count the number of commits reachable from this revision
-
- ``repo``
- is the Repo
-
- ``rev``
- revision specifier, see git-rev-parse for viable options
+ Count the number of commits reachable from this commit
``paths``
is an optinal path or a list of paths restricting the return value
to commits actually containing the paths
``kwargs``
- Additional options to be passed to git-rev-list
+ Additional options to be passed to git-rev-list. They must not alter
+ the ouput style of the command, or parsing will yield incorrect results
Returns
int
"""
- return len(repo.git.rev_list(rev, '--', paths, **kwargs).strip().splitlines())
+ return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).strip().splitlines())
+
+ @property
+ def name_rev(self):
+ """
+ Returns
+ String describing the commits hex sha based on the closest Reference.
+ Mostly useful for UI purposes
+ """
+ return self.repo.git.name_rev(self)
@classmethod
def iter_items(cls, repo, rev, paths='', **kwargs):
@@ -150,9 +154,8 @@ class Commit(base.Object, Iterable):
options = {'pretty': 'raw', 'as_process' : True }
options.update(kwargs)
- # the test system might confront us with string values -
proc = repo.git.rev_list(rev, '--', paths, **options)
- return cls._iter_from_process_or_stream(repo, proc)
+ return cls._iter_from_process_or_stream(repo, proc, True)
def iter_parents(self, paths='', **kwargs):
"""
@@ -176,60 +179,6 @@ class Commit(base.Object, Iterable):
return self.iter_items( self.repo, self, paths, **kwargs )
- @classmethod
- def diff(cls, repo, a, b=None, paths=None):
- """
- Creates diffs between a tree and the index or between two trees:
-
- ``repo``
- is the Repo
-
- ``a``
- is a named commit
-
- ``b``
- is an optional named commit. Passing a list assumes you
- wish to omit the second named commit and limit the diff to the
- given paths.
-
- ``paths``
- is a list of paths to limit the diff to.
-
- Returns
- git.Diff[]::
-
- between tree and the index if only a is given
- between two trees if a and b are given and are commits
- """
- paths = paths or []
-
- if isinstance(b, list):
- paths = b
- b = None
-
- if paths:
- paths.insert(0, "--")
-
- if b:
- paths.insert(0, b)
- paths.insert(0, a)
- text = repo.git.diff('-M', full_index=True, *paths)
- return diff.Diff._list_from_string(repo, text)
-
- @property
- def diffs(self):
- """
- Returns
- git.Diff[]
- Diffs between this commit and its first parent or all changes if this
- commit is the first commit and has no parent.
- """
- if not self.parents:
- d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw')
- return diff.Diff._list_from_string(self.repo, d)
- else:
- return self.diff(self.repo, self.parents[0].id, self.id)
-
@property
def stats(self):
"""
@@ -240,18 +189,18 @@ class Commit(base.Object, Iterable):
git.Stats
"""
if not self.parents:
- text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True)
+ text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
text2 = ""
for line in text.splitlines()[1:]:
(insertions, deletions, filename) = line.split("\t")
text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
text = text2
else:
- text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True)
+ text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
return stats.Stats._list_from_string(self.repo, text)
@classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
"""
Parse out commit information into a list of Commit objects
@@ -261,6 +210,9 @@ class Commit(base.Object, Iterable):
``proc``
git-rev-list process instance (raw format)
+ ``from_rev_list``
+ If True, the stream was created by rev-list in which case we parse
+ the message differently
Returns
iterator returning Commit objects
"""
@@ -269,8 +221,9 @@ class Commit(base.Object, Iterable):
stream = proc_or_stream.stdout
for line in stream:
- id = line.split()[1]
- assert line.split()[0] == "commit"
+ commit_tokens = line.split()
+ id = commit_tokens[1]
+ assert commit_tokens[0] == "commit"
tree = stream.next().split()[1]
parents = []
@@ -290,24 +243,31 @@ class Commit(base.Object, Iterable):
stream.next()
message_lines = []
- next_line = None
- for msg_line in stream:
- if not msg_line.startswith(' '):
- break
- # END abort message reading
- message_lines.append(msg_line.strip())
- # END while there are message lines
+ if from_rev_list:
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ # and forget about this empty marker
+ break
+ # END abort message reading
+ # strip leading 4 spaces
+ message_lines.append(msg_line[4:])
+ # END while there are message lines
+ else:
+ # a stream from our data simply gives us the plain message
+ for msg_line in stream:
+ message_lines.append(msg_line)
+ # END message parsing
message = '\n'.join(message_lines)
- yield Commit(repo, id=id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date,
+ yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date,
committer=committer, committed_date=committed_date, message=message)
# END for each line in stream
def __str__(self):
""" Convert commit to string which is SHA1 """
- return self.id
+ return self.sha
def __repr__(self):
- return '<git.Commit "%s">' % self.id
+ return '<git.Commit "%s">' % self.sha
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
index f54d4b64..c329edf7 100644
--- a/lib/git/objects/tag.py
+++ b/lib/git/objects/tag.py
@@ -17,7 +17,7 @@ class TagObject(base.Object):
type = "tag"
__slots__ = ( "object", "tag", "tagger", "tagged_date", "message" )
- def __init__(self, repo, id, object=None, tag=None,
+ def __init__(self, repo, sha, object=None, tag=None,
tagger=None, tagged_date=None, message=None):
"""
Initialize a tag object with additional data
@@ -25,7 +25,7 @@ class TagObject(base.Object):
``repo``
repository this object is located in
- ``id``
+ ``sha``
SHA1 or ref suitable for git-rev-parse
``object``
@@ -41,7 +41,7 @@ class TagObject(base.Object):
is the DateTime of the tag creation - use time.gmtime to convert
it into a different format
"""
- super(TagObject, self).__init__(repo, id )
+ super(TagObject, self).__init__(repo, sha )
self._set_self_from_args_(locals())
def _set_cache_(self, attr):
@@ -60,8 +60,13 @@ class TagObject(base.Object):
tagger_info = lines[3][7:]# tagger <actor> <date>
self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info)
- # line 4 empty - check git source to figure out purpose
- self.message = "\n".join(lines[5:])
+ # line 4 empty - it could mark the beginning of the next header
+ # in csse there really is no message, it would not exist. Otherwise
+ # a newline separates header from message
+ if len(lines) > 5:
+ self.message = "\n".join(lines[5:])
+ else:
+ self.message = ''
# END check our attributes
else:
super(TagObject, self)._set_cache_(attr)
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index abfa9622..27bd84d0 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -8,6 +8,7 @@ import os
import blob
import base
import binascii
+import git.diff as diff
def sha_to_hex(sha):
"""Takes a string and returns the hex of the sha within"""
@@ -15,7 +16,7 @@ def sha_to_hex(sha):
assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
return hexsha
-class Tree(base.IndexObject):
+class Tree(base.IndexObject, diff.Diffable):
"""
Tress represent a ordered list of Blobs and other Trees. Hence it can be
accessed like a list.
@@ -37,13 +38,14 @@ class Tree(base.IndexObject):
__slots__ = "_cache"
# using ascii codes for comparison
- ascii_commit_id = (0x31 << 4) + 0x36
- ascii_blob_id = (0x31 << 4) + 0x30
- ascii_tree_id = (0x34 << 4) + 0x30
+ commit_id = 016
+ blob_id = 010
+ symlink_id = 012
+ tree_id = 040
- def __init__(self, repo, id, mode=0, path=None):
- super(Tree, self).__init__(repo, id, mode, path)
+ def __init__(self, repo, sha, mode=0, path=None):
+ super(Tree, self).__init__(repo, sha, mode, path)
def _set_cache_(self, attr):
if attr == "_cache":
@@ -87,8 +89,8 @@ class Tree(base.IndexObject):
mode = 0
mode_boundary = i + 6
- # keep it ascii - we compare against the respective values
- type_id = (ord(data[i])<<4) + ord(data[i+1])
+ # read type
+ type_id = ((ord(data[i])-ord_zero)<<3) + (ord(data[i+1])-ord_zero)
i += 2
while data[i] != ' ':
@@ -108,18 +110,20 @@ class Tree(base.IndexObject):
i += 1
# END while not reached NULL
name = data[ns:i]
+ path = os.path.join(self.path, name)
# byte is NULL, get next 20
i += 1
sha = data[i:i+20]
i = i + 20
+ mode |= type_id<<12
hexsha = sha_to_hex(sha)
- if type_id == self.ascii_blob_id:
- yield blob.Blob(self.repo, hexsha, mode, name)
- elif type_id == self.ascii_tree_id:
- yield Tree(self.repo, hexsha, mode, name)
- elif type_id == self.ascii_commit_id:
+ if type_id == self.blob_id or type_id == self.symlink_id:
+ yield blob.Blob(self.repo, hexsha, mode, path)
+ elif type_id == self.tree_id:
+ yield Tree(self.repo, hexsha, mode, path)
+ elif type_id == self.commit_id:
# todo
yield None
else:
@@ -148,29 +152,28 @@ class Tree(base.IndexObject):
def __repr__(self):
- return '<git.Tree "%s">' % self.id
+ return '<git.Tree "%s">' % self.sha
@classmethod
- def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ):
+ def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate, prune ):
for obj in tree:
- # adjust path to be complete
- obj.path = os.path.join(tree.path, obj.path)
- if not predicate(obj):
- continue
- yield obj
- if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ):
- for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ):
+ if predicate(obj):
+ yield obj
+ if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ) and not prune(obj):
+ for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate, prune ):
yield recursive_obj
# END for each recursive object
# END if we may enter recursion
# END for each object
- def traverse(self, max_depth=-1, predicate = lambda i: True):
+ def traverse(self, max_depth=-1, predicate = lambda i: True, prune = lambda t: False):
"""
Returns
+
Iterator to traverse the tree recursively up to the given level.
- The iterator returns Blob and Tree objects
+ The iterator returns Blob and Tree objects with paths relative to their
+ repository.
``max_depth``
@@ -181,8 +184,13 @@ class Tree(base.IndexObject):
``predicate``
If predicate(item) returns True, item will be returned by iterator
+
+ ``prune``
+
+ If prune(tree) returns True, the traversal will not continue into the
+ given tree object.
"""
- return self._iter_recursive( self.repo, self, 0, max_depth, predicate )
+ return self._iter_recursive( self.repo, self, 0, max_depth, predicate, prune )
@property
def trees(self):
@@ -218,7 +226,7 @@ class Tree(base.IndexObject):
if isinstance(item, basestring):
# compatability
for obj in self._cache:
- if obj.path == item:
+ if obj.name == item:
return obj
# END for each obj
raise KeyError( "Blob or Tree named %s not found" % item )
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 367ed2b7..7bb4e8e2 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -52,3 +52,21 @@ def parse_actor_and_date(line):
m = _re_actor_epoch.search(line)
actor, epoch = m.groups()
return (Actor._from_string(actor), int(epoch))
+
+
+
+class ProcessStreamAdapter(object):
+ """
+ Class wireing all calls to the contained Process instance.
+
+ Use this type to hide the underlying process to provide access only to a specified
+ stream. The process is usually wrapped into an AutoInterrupt class to kill
+ it if the instance goes out of scope.
+ """
+ __slots__ = ("_proc", "_stream")
+ def __init__(self, process, stream_name):
+ self._proc = process
+ self._stream = getattr(process, stream_name)
+
+ def __getattr__(self, attr):
+ return getattr(self._stream, attr)