diff options
Diffstat (limited to 'lib/git/objects')
-rw-r--r-- | lib/git/objects/__init__.py | 1 | ||||
-rw-r--r-- | lib/git/objects/base.py | 103 | ||||
-rw-r--r-- | lib/git/objects/blob.py | 2 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 140 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 15 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 60 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 18 |
7 files changed, 190 insertions, 149 deletions
diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py index 39e650b7..192750e3 100644 --- a/lib/git/objects/__init__.py +++ b/lib/git/objects/__init__.py @@ -2,6 +2,7 @@ Import all submodules main classes into the package space """ import inspect +from base import * from tag import * from blob import * from tree import * diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 3b48e066..b0989a43 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -15,22 +15,16 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: - inst = Object(repo,id) + inst = Object.new(repo,id) + inst.sha # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object """ + NULL_HEX_SHA = '0'*40 TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size", "data" ) + __slots__ = ("repo", "sha", "size", "data" ) type = None # to be set by subclass - def __new__(cls, repo, id, *args, **kwargs): - if cls is Object: - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs) - inst.size = size - return inst - else: - return super(Object,cls).__new__(cls, repo, id, *args, **kwargs) - def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments @@ -44,8 +38,26 @@ class Object(LazyMixin): """ super(Object,self).__init__() self.repo = repo - self.id = id - + self.sha = id + + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + def _set_self_from_args_(self, args_dict): """ Initialize attributes on self from the given dict that was retrieved @@ -64,11 +76,11 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.id) - assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) - assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) else: super(Object,self)._set_cache_(attr) @@ -77,36 +89,57 @@ class Object(LazyMixin): Returns True if the objects have the same SHA1 """ - return self.id == other.id + return self.sha == other.sha def __ne__(self, other): """ Returns True if the objects do not have the same SHA1 """ - return self.id != other.id + return self.sha != other.sha def __hash__(self): """ Returns Hash of our id allowing objects to be used in dicts and sets """ - return hash(self.id) + return hash(self.sha) def __str__(self): """ Returns string of our SHA1 as understood by all git commands """ - return self.id + return self.sha def __repr__(self): """ Returns string with pythonic representation of our object """ - return '<git.%s "%s">' % (self.__class__.__name__, self.id) + return '<git.%s "%s">' % (self.__class__.__name__, self.sha) + + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) + return self class IndexObject(Object): """ @@ -115,13 +148,13 @@ class IndexObject(Object): """ __slots__ = ("path", "mode") - def __init__(self, repo, id, mode=None, path=None): + def __init__(self, repo, sha, mode=None, path=None): """ Initialize a newly instanced IndexObject ``repo`` is the Repo we are located in - ``id`` : string + ``sha`` : string is the git object id as hex sha ``mode`` : int @@ -135,7 +168,7 @@ class IndexObject(Object): Path may not be set of the index object has been created directly as it cannot be retrieved without knowing the parent tree. """ - super(IndexObject, self).__init__(repo, id) + super(IndexObject, self).__init__(repo, sha) self._set_self_from_args_(locals()) if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) @@ -162,5 +195,21 @@ class IndexObject(Object): mode += int(char) << iteration*3 # END for each char return mode - + + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ) + """ + return os.path.join(self.repo.git.git_dir, self.path) diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py index 88ca73d6..11dee323 100644 --- a/lib/git/objects/blob.py +++ b/lib/git/objects/blob.py @@ -33,4 +33,4 @@ class Blob(base.IndexObject): def __repr__(self): - return '<git.Blob "%s">' % self.id + return '<git.Blob "%s">' % self.sha diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 847f4dec..80b3ad23 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -11,7 +11,7 @@ from tree import Tree import base import utils -class Commit(base.Object, Iterable): +class Commit(base.Object, Iterable, diff.Diffable): """ Wraps a git Commit object. @@ -23,8 +23,9 @@ class Commit(base.Object, Iterable): type = "commit" __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", "message", "parents") + _id_attribute_ = "sha" - def __init__(self, repo, id, tree=None, author=None, authored_date=None, + def __init__(self, repo, sha, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): """ Instantiate a new Commit. All keyword arguments taking None as default will @@ -32,7 +33,7 @@ class Commit(base.Object, Iterable): The parameter documentation indicates the type of the argument after a colon ':'. - ``id`` + ``sha`` is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) @@ -61,15 +62,15 @@ class Commit(base.Object, Iterable): Returns git.Commit """ - super(Commit,self).__init__(repo, id) + super(Commit,self).__init__(repo, sha) self._set_self_from_args_(locals()) if parents is not None: self.parents = tuple( self.__class__(repo, p) for p in parents ) # END for each parent to convert - if self.id and tree is not None: - self.tree = Tree(repo, id=tree, path='') + if self.sha and tree is not None: + self.tree = Tree(repo, tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -81,8 +82,8 @@ class Commit(base.Object, Iterable): if attr in Commit.__slots__: # prepare our data lines to match rev-list data_lines = self.data.splitlines() - data_lines.insert(0, "commit %s" % self.id) - temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() + data_lines.insert(0, "commit %s" % self.sha) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -101,27 +102,30 @@ class Commit(base.Object, Iterable): """ return self.message.split('\n', 1)[0] - @classmethod - def count(cls, repo, rev, paths='', **kwargs): + def count(self, paths='', **kwargs): """ - Count the number of commits reachable from this revision - - ``repo`` - is the Repo - - ``rev`` - revision specifier, see git-rev-parse for viable options + Count the number of commits reachable from this commit ``paths`` is an optinal path or a list of paths restricting the return value to commits actually containing the paths ``kwargs`` - Additional options to be passed to git-rev-list + Additional options to be passed to git-rev-list. They must not alter + the ouput style of the command, or parsing will yield incorrect results Returns int """ - return len(repo.git.rev_list(rev, '--', paths, **kwargs).strip().splitlines()) + return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).strip().splitlines()) + + @property + def name_rev(self): + """ + Returns + String describing the commits hex sha based on the closest Reference. + Mostly useful for UI purposes + """ + return self.repo.git.name_rev(self) @classmethod def iter_items(cls, repo, rev, paths='', **kwargs): @@ -150,9 +154,8 @@ class Commit(base.Object, Iterable): options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - # the test system might confront us with string values - proc = repo.git.rev_list(rev, '--', paths, **options) - return cls._iter_from_process_or_stream(repo, proc) + return cls._iter_from_process_or_stream(repo, proc, True) def iter_parents(self, paths='', **kwargs): """ @@ -176,60 +179,6 @@ class Commit(base.Object, Iterable): return self.iter_items( self.repo, self, paths, **kwargs ) - @classmethod - def diff(cls, repo, a, b=None, paths=None): - """ - Creates diffs between a tree and the index or between two trees: - - ``repo`` - is the Repo - - ``a`` - is a named commit - - ``b`` - is an optional named commit. Passing a list assumes you - wish to omit the second named commit and limit the diff to the - given paths. - - ``paths`` - is a list of paths to limit the diff to. - - Returns - git.Diff[]:: - - between tree and the index if only a is given - between two trees if a and b are given and are commits - """ - paths = paths or [] - - if isinstance(b, list): - paths = b - b = None - - if paths: - paths.insert(0, "--") - - if b: - paths.insert(0, b) - paths.insert(0, a) - text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff._list_from_string(repo, text) - - @property - def diffs(self): - """ - Returns - git.Diff[] - Diffs between this commit and its first parent or all changes if this - commit is the first commit and has no parent. - """ - if not self.parents: - d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff._list_from_string(self.repo, d) - else: - return self.diff(self.repo, self.parents[0].id, self.id) - @property def stats(self): """ @@ -240,18 +189,18 @@ class Commit(base.Object, Iterable): git.Stats """ if not self.parents: - text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) + text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) text2 = "" for line in text.splitlines()[1:]: (insertions, deletions, filename) = line.split("\t") text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) text = text2 else: - text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) + text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) return stats.Stats._list_from_string(self.repo, text) @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): + def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): """ Parse out commit information into a list of Commit objects @@ -261,6 +210,9 @@ class Commit(base.Object, Iterable): ``proc`` git-rev-list process instance (raw format) + ``from_rev_list`` + If True, the stream was created by rev-list in which case we parse + the message differently Returns iterator returning Commit objects """ @@ -269,8 +221,9 @@ class Commit(base.Object, Iterable): stream = proc_or_stream.stdout for line in stream: - id = line.split()[1] - assert line.split()[0] == "commit" + commit_tokens = line.split() + id = commit_tokens[1] + assert commit_tokens[0] == "commit" tree = stream.next().split()[1] parents = [] @@ -290,24 +243,31 @@ class Commit(base.Object, Iterable): stream.next() message_lines = [] - next_line = None - for msg_line in stream: - if not msg_line.startswith(' '): - break - # END abort message reading - message_lines.append(msg_line.strip()) - # END while there are message lines + if from_rev_list: + for msg_line in stream: + if not msg_line.startswith(' '): + # and forget about this empty marker + break + # END abort message reading + # strip leading 4 spaces + message_lines.append(msg_line[4:]) + # END while there are message lines + else: + # a stream from our data simply gives us the plain message + for msg_line in stream: + message_lines.append(msg_line) + # END message parsing message = '\n'.join(message_lines) - yield Commit(repo, id=id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, + yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, committer=committer, committed_date=committed_date, message=message) # END for each line in stream def __str__(self): """ Convert commit to string which is SHA1 """ - return self.id + return self.sha def __repr__(self): - return '<git.Commit "%s">' % self.id + return '<git.Commit "%s">' % self.sha diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index f54d4b64..c329edf7 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -17,7 +17,7 @@ class TagObject(base.Object): type = "tag" __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - def __init__(self, repo, id, object=None, tag=None, + def __init__(self, repo, sha, object=None, tag=None, tagger=None, tagged_date=None, message=None): """ Initialize a tag object with additional data @@ -25,7 +25,7 @@ class TagObject(base.Object): ``repo`` repository this object is located in - ``id`` + ``sha`` SHA1 or ref suitable for git-rev-parse ``object`` @@ -41,7 +41,7 @@ class TagObject(base.Object): is the DateTime of the tag creation - use time.gmtime to convert it into a different format """ - super(TagObject, self).__init__(repo, id ) + super(TagObject, self).__init__(repo, sha ) self._set_self_from_args_(locals()) def _set_cache_(self, attr): @@ -60,8 +60,13 @@ class TagObject(base.Object): tagger_info = lines[3][7:]# tagger <actor> <date> self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) - # line 4 empty - check git source to figure out purpose - self.message = "\n".join(lines[5:]) + # line 4 empty - it could mark the beginning of the next header + # in csse there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' # END check our attributes else: super(TagObject, self)._set_cache_(attr) diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index abfa9622..27bd84d0 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -8,6 +8,7 @@ import os import blob import base import binascii +import git.diff as diff def sha_to_hex(sha): """Takes a string and returns the hex of the sha within""" @@ -15,7 +16,7 @@ def sha_to_hex(sha): assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha return hexsha -class Tree(base.IndexObject): +class Tree(base.IndexObject, diff.Diffable): """ Tress represent a ordered list of Blobs and other Trees. Hence it can be accessed like a list. @@ -37,13 +38,14 @@ class Tree(base.IndexObject): __slots__ = "_cache" # using ascii codes for comparison - ascii_commit_id = (0x31 << 4) + 0x36 - ascii_blob_id = (0x31 << 4) + 0x30 - ascii_tree_id = (0x34 << 4) + 0x30 + commit_id = 016 + blob_id = 010 + symlink_id = 012 + tree_id = 040 - def __init__(self, repo, id, mode=0, path=None): - super(Tree, self).__init__(repo, id, mode, path) + def __init__(self, repo, sha, mode=0, path=None): + super(Tree, self).__init__(repo, sha, mode, path) def _set_cache_(self, attr): if attr == "_cache": @@ -87,8 +89,8 @@ class Tree(base.IndexObject): mode = 0 mode_boundary = i + 6 - # keep it ascii - we compare against the respective values - type_id = (ord(data[i])<<4) + ord(data[i+1]) + # read type + type_id = ((ord(data[i])-ord_zero)<<3) + (ord(data[i+1])-ord_zero) i += 2 while data[i] != ' ': @@ -108,18 +110,20 @@ class Tree(base.IndexObject): i += 1 # END while not reached NULL name = data[ns:i] + path = os.path.join(self.path, name) # byte is NULL, get next 20 i += 1 sha = data[i:i+20] i = i + 20 + mode |= type_id<<12 hexsha = sha_to_hex(sha) - if type_id == self.ascii_blob_id: - yield blob.Blob(self.repo, hexsha, mode, name) - elif type_id == self.ascii_tree_id: - yield Tree(self.repo, hexsha, mode, name) - elif type_id == self.ascii_commit_id: + if type_id == self.blob_id or type_id == self.symlink_id: + yield blob.Blob(self.repo, hexsha, mode, path) + elif type_id == self.tree_id: + yield Tree(self.repo, hexsha, mode, path) + elif type_id == self.commit_id: # todo yield None else: @@ -148,29 +152,28 @@ class Tree(base.IndexObject): def __repr__(self): - return '<git.Tree "%s">' % self.id + return '<git.Tree "%s">' % self.sha @classmethod - def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate, prune ): for obj in tree: - # adjust path to be complete - obj.path = os.path.join(tree.path, obj.path) - if not predicate(obj): - continue - yield obj - if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): - for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + if predicate(obj): + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ) and not prune(obj): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate, prune ): yield recursive_obj # END for each recursive object # END if we may enter recursion # END for each object - def traverse(self, max_depth=-1, predicate = lambda i: True): + def traverse(self, max_depth=-1, predicate = lambda i: True, prune = lambda t: False): """ Returns + Iterator to traverse the tree recursively up to the given level. - The iterator returns Blob and Tree objects + The iterator returns Blob and Tree objects with paths relative to their + repository. ``max_depth`` @@ -181,8 +184,13 @@ class Tree(base.IndexObject): ``predicate`` If predicate(item) returns True, item will be returned by iterator + + ``prune`` + + If prune(tree) returns True, the traversal will not continue into the + given tree object. """ - return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + return self._iter_recursive( self.repo, self, 0, max_depth, predicate, prune ) @property def trees(self): @@ -218,7 +226,7 @@ class Tree(base.IndexObject): if isinstance(item, basestring): # compatability for obj in self._cache: - if obj.path == item: + if obj.name == item: return obj # END for each obj raise KeyError( "Blob or Tree named %s not found" % item ) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index 367ed2b7..7bb4e8e2 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -52,3 +52,21 @@ def parse_actor_and_date(line): m = _re_actor_epoch.search(line) actor, epoch = m.groups() return (Actor._from_string(actor), int(epoch)) + + + +class ProcessStreamAdapter(object): + """ + Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope. + """ + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) |