diff options
Diffstat (limited to 'lib/git')
-rw-r--r-- | lib/git/__init__.py | 2 | ||||
-rw-r--r-- | lib/git/diff.py | 203 | ||||
-rw-r--r-- | lib/git/objects/base.py | 33 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 57 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 4 | ||||
-rw-r--r-- | lib/git/refs.py | 2 | ||||
-rw-r--r-- | lib/git/repo.py | 40 |
7 files changed, 227 insertions, 114 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py index 6f482128..e2adac62 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -12,7 +12,7 @@ __version__ = 'git' from git.objects import * from git.refs import * from git.actor import Actor -from git.diff import Diff +from git.diff import * from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError from git.cmd import Git from git.repo import Repo diff --git a/lib/git/diff.py b/lib/git/diff.py index 0db83b4f..1774597a 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -7,9 +7,128 @@ import re import objects.blob as blob + +class Diffable(object): + """ + Common interface for all object that can be diffed against another object of compatible type. + + NOTE: + Subclasses require a repo member as it is the case for Object instances, for practical + reasons we do not derive from Object. + """ + __slots__ = tuple() + + # subclasses provide additional arguments to the git-diff comamnd by supplynig + # them in this tuple + _diff_args = tuple() + + # Temporary standin for Index type until we have a real index type + class Index(object): + pass + + def diff(self, other=None, paths=None, create_patch=False, **kwargs): + """ + Creates diffs between two items being trees, trees and index or an + index and the working tree. + + ``other`` + Is the item to compare us with. + If None, we will be compared to the working tree. + If Index ( type ), it will be compared against the index + + ``paths`` + is a list of paths or a single path to limit the diff to. + It will only include at least one of the givne path or paths. + + ``create_patch`` + If True, the returned Diff contains a detailed patch that if applied + makes the self to other. Patches are somwhat costly as blobs have to be read + and diffed. + + ``kwargs`` + Additional arguments passed to git-diff, such as + R=True to swap both sides of the diff. + + Returns + git.DiffIndex + + Note + Rename detection will only work if create_patch is True + """ + args = list(self._diff_args[:]) + args.append( "--abbrev=40" ) # we need full shas + args.append( "--full-index" ) # get full index paths, not only filenames + + if create_patch: + args.append("-p") + args.append("-M") # check for renames + else: + args.append("--raw") + + if paths is not None and not isinstance(paths, (tuple,list)): + paths = [ paths ] + + if other is not None and other is not self.Index: + args.insert(0, other) + if other is self.Index: + args.insert(0, "--cached") + + args.insert(0,self) + + # paths is list here or None + if paths: + args.append("--") + args.extend(paths) + # END paths handling + + kwargs['as_process'] = True + proc = self.repo.git.diff(*args, **kwargs) + + diff_method = Diff._index_from_raw_format + if create_patch: + diff_method = Diff._index_from_patch_format + return diff_method(self.repo, proc.stdout) + + +class DiffIndex(list): + """ + Implements an Index for diffs, allowing a list of Diffs to be queried by + the diff properties. + + The class improves the diff handling convenience + """ + # change type invariant identifying possible ways a blob can have changed + # A = Added + # D = Deleted + # R = Renamed + # NOTE: 'Modified' mode is impllied as it wouldn't be listed as a diff otherwise + change_type = ("A", "D", "R") + + + def iter_change_type(self, change_type): + """ + Return + iterator yieling Diff instances that match the given change_type + + ``change_type`` + Member of DiffIndex.change_type + """ + if change_type not in self.change_type: + raise ValueError( "Invalid change type: %s" % change_type ) + + for diff in self: + if change_type == "A" and diff.new_file: + yield diff + elif change_type == "D" and diff.deleted_file: + yield diff + elif change_type == "R" and diff.renamed: + yield diff + # END for each diff + + class Diff(object): """ - A Diff contains diff information between two commits. + A Diff contains diff information between two Trees. It contains two sides a and b of the diff, members are prefixed with "a" and "b" respectively to inidcate that. @@ -27,7 +146,7 @@ class Diff(object): ``Deleted File``:: b_mode is None - b_blob is NOne + b_blob is None """ # precompiled regex @@ -46,7 +165,7 @@ class Diff(object): """, re.VERBOSE | re.MULTILINE) re_is_null_hexsha = re.compile( r'^0{40}$' ) __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", - "rename_from", "rename_to", "renamed", "diff") + "rename_from", "rename_to", "diff") def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, rename_from, @@ -62,31 +181,45 @@ class Diff(object): self.a_mode = a_mode self.b_mode = b_mode + if self.a_mode: self.a_mode = blob.Blob._mode_str_to_int( self.a_mode ) if self.b_mode: self.b_mode = blob.Blob._mode_str_to_int( self.b_mode ) + self.new_file = new_file self.deleted_file = deleted_file - self.rename_from = rename_from - self.rename_to = rename_to - self.renamed = rename_from != rename_to + + # be clear and use None instead of empty strings + self.rename_from = rename_from or None + self.rename_to = rename_to or None + self.diff = diff + @property + def renamed(self): + """ + Returns: + True if the blob of our diff has been renamed + """ + return self.rename_from != self.rename_to + @classmethod - def _list_from_string(cls, repo, text): + def _index_from_patch_format(cls, repo, stream): """ - Create a new diff object from the given text + Create a new DiffIndex from the given text which must be in patch format ``repo`` is the repository we are operating on - it is required - ``text`` - result of 'git diff' between two commits or one commit and the index + ``stream`` + result of 'git diff' as a stream (supporting file protocol) Returns - git.Diff[] + git.DiffIndex """ - diffs = [] + # for now, we have to bake the stream + text = stream.read() + index = DiffIndex() diff_header = cls.re_header.match for diff in ('\n' + text).split('\ndiff --git')[1:]: @@ -97,9 +230,51 @@ class Diff(object): a_blob_id, b_blob_id, b_mode = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, + index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - return diffs + return index + + @classmethod + def _index_from_raw_format(cls, repo, stream): + """ + Create a new DiffIndex from the given stream which must be in raw format. + + NOTE: + This format is inherently incapable of detecting renames, hence we only + modify, delete and add files + + Returns + git.DiffIndex + """ + # handles + # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore + index = DiffIndex() + for line in stream: + if not line.startswith(":"): + continue + # END its not a valid diff line + old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split() + a_path = path + b_path = path + deleted_file = False + new_file = False + + # NOTE: We cannot conclude from the existance of a blob to change type + # as diffs with the working do not have blobs yet + if change_type == 'D': + b_path = None + deleted_file = True + elif change_type == 'A': + a_path = None + new_file = True + # END add/remove handling + + diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, + new_file, deleted_file, None, None, '') + index.append(diff) + # END for each line + + return index diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 3b48e066..ab1da7b0 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -15,22 +15,12 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: - inst = Object(repo,id) + inst = Object.new(repo,id) """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass - def __new__(cls, repo, id, *args, **kwargs): - if cls is Object: - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs) - inst.size = size - return inst - else: - return super(Object,cls).__new__(cls, repo, id, *args, **kwargs) - def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments @@ -45,7 +35,25 @@ class Object(LazyMixin): super(Object,self).__init__() self.repo = repo self.id = id - + + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + def _set_self_from_args_(self, args_dict): """ Initialize attributes on self from the given dict that was retrieved @@ -162,5 +170,4 @@ class IndexObject(Object): mode += int(char) << iteration*3 # END for each char return mode - diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 847f4dec..181cbb52 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -11,7 +11,7 @@ from tree import Tree import base import utils -class Commit(base.Object, Iterable): +class Commit(base.Object, Iterable, diff.Diffable): """ Wraps a git Commit object. @@ -176,60 +176,6 @@ class Commit(base.Object, Iterable): return self.iter_items( self.repo, self, paths, **kwargs ) - @classmethod - def diff(cls, repo, a, b=None, paths=None): - """ - Creates diffs between a tree and the index or between two trees: - - ``repo`` - is the Repo - - ``a`` - is a named commit - - ``b`` - is an optional named commit. Passing a list assumes you - wish to omit the second named commit and limit the diff to the - given paths. - - ``paths`` - is a list of paths to limit the diff to. - - Returns - git.Diff[]:: - - between tree and the index if only a is given - between two trees if a and b are given and are commits - """ - paths = paths or [] - - if isinstance(b, list): - paths = b - b = None - - if paths: - paths.insert(0, "--") - - if b: - paths.insert(0, b) - paths.insert(0, a) - text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff._list_from_string(repo, text) - - @property - def diffs(self): - """ - Returns - git.Diff[] - Diffs between this commit and its first parent or all changes if this - commit is the first commit and has no parent. - """ - if not self.parents: - d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff._list_from_string(self.repo, d) - else: - return self.diff(self.repo, self.parents[0].id, self.id) - @property def stats(self): """ @@ -268,6 +214,7 @@ class Commit(base.Object, Iterable): if not hasattr(stream,'next'): stream = proc_or_stream.stdout + for line in stream: id = line.split()[1] assert line.split()[0] == "commit" diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index abfa9622..c35c075e 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -8,6 +8,7 @@ import os import blob import base import binascii +import git.diff as diff def sha_to_hex(sha): """Takes a string and returns the hex of the sha within""" @@ -15,7 +16,7 @@ def sha_to_hex(sha): assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha return hexsha -class Tree(base.IndexObject): +class Tree(base.IndexObject, diff.Diffable): """ Tress represent a ordered list of Blobs and other Trees. Hence it can be accessed like a list. @@ -169,6 +170,7 @@ class Tree(base.IndexObject): def traverse(self, max_depth=-1, predicate = lambda i: True): """ Returns + Iterator to traverse the tree recursively up to the given level. The iterator returns Blob and Tree objects diff --git a/lib/git/refs.py b/lib/git/refs.py index a4d7bbb1..4445f252 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -72,7 +72,7 @@ class Reference(LazyMixin, Iterable): """ # have to be dynamic here as we may be a tag which can point to anything # Our path will be resolved to the hexsha which will be used accordingly - return Object(self.repo, self.path) + return Object.new(self.repo, self.path) @classmethod def iter_items(cls, repo, common_path = "refs", **kwargs): diff --git a/lib/git/repo.py b/lib/git/repo.py index 6edb7f62..cc4a6c6b 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -107,6 +107,15 @@ class Repo(object): branches = heads @property + def head(self): + """ + Return + Head Object, reference pointing to the current head of the repository + """ + return Head(self,'HEAD') + + + @property def tags(self): """ A list of ``Tag`` objects that are available in this repo @@ -129,7 +138,7 @@ class Repo(object): if rev is None: rev = self.active_branch - c = Object(self, rev) + c = Object.new(self, rev) assert c.type == "commit", "Revision %s did not point to a commit, but to %s" % (rev, c) return c @@ -299,7 +308,7 @@ class Repo(object): ignored files will not appear here, i.e. files mentioned in .gitignore """ # make sure we get all files, no only untracked directores - proc = self.git.commit(untracked_files=True, as_process=True) + proc = self.git.status(untracked_files=True, as_process=True) stream = iter(proc.stdout) untracked_files = list() for line in stream: @@ -327,34 +336,7 @@ class Repo(object): """ return Head( self, self.git.symbolic_ref('HEAD').strip() ) - - def diff(self, a, b, *paths): - """ - The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) - - ``a`` - is the base commit - ``b`` - is the other commit - - ``paths`` - is an optional list of file paths on which to restrict the diff - Returns - ``str`` - """ - return self.git.diff(a, b, '--', *paths) - - def commit_diff(self, commit): - """ - The commit diff for the given commit - ``commit`` is the commit name/id - - Returns - ``git.Diff[]`` - """ - return Commit.diff(self, commit) - def blame(self, rev, file): """ The blame information for the given file at the given revision. |