diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/git/__init__.py | 10 | ||||
-rw-r--r-- | lib/git/actor.py | 70 | ||||
-rw-r--r-- | lib/git/blob.py | 161 | ||||
-rw-r--r-- | lib/git/cmd.py | 507 | ||||
-rw-r--r-- | lib/git/commit.py | 296 | ||||
-rw-r--r-- | lib/git/diff.py | 169 | ||||
-rw-r--r-- | lib/git/errors.py | 34 | ||||
-rw-r--r-- | lib/git/head.py | 118 | ||||
-rw-r--r-- | lib/git/lazy.py | 32 | ||||
-rw-r--r-- | lib/git/objects/__init__.py | 11 | ||||
-rw-r--r-- | lib/git/objects/base.py | 151 | ||||
-rw-r--r-- | lib/git/objects/blob.py | 36 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 299 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 70 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 242 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 36 | ||||
-rw-r--r-- | lib/git/refs.py | 238 | ||||
-rw-r--r-- | lib/git/repo.py | 1010 | ||||
-rw-r--r-- | lib/git/stats.py | 103 | ||||
-rw-r--r-- | lib/git/tag.py | 92 | ||||
-rw-r--r-- | lib/git/tree.py | 108 | ||||
-rw-r--r-- | lib/git/utils.py | 89 |
22 files changed, 2215 insertions, 1667 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py index 28d14d0c..6f482128 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -9,19 +9,17 @@ import inspect __version__ = 'git' +from git.objects import * +from git.refs import * from git.actor import Actor -from git.blob import Blob -from git.commit import Commit from git.diff import Diff from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError from git.cmd import Git -from git.head import Head from git.repo import Repo from git.stats import Stats -from git.tag import Tag -from git.tree import Tree from git.utils import dashify from git.utils import touch + __all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ] + if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/lib/git/actor.py b/lib/git/actor.py index bc1a4479..fe4a47e5 100644 --- a/lib/git/actor.py +++ b/lib/git/actor.py @@ -7,36 +7,40 @@ import re class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" - def __init__(self, name, email): - self.name = name - self.email = email - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.Actor "%s <%s>">' % (self.name, self.email) - - @classmethod - def from_string(cls, string): - """ - Create an Actor from a string. - - ``str`` - is the string, which is expected to be in regular git format - - Format - John Doe <jdoe@example.com> - - Returns - Actor - """ - if re.search(r'<.+>', string): - m = re.search(r'(.*) <(.+?)>', string) - name, email = m.groups() - return Actor(name, email) - else: - return Actor(string, None) + """Actors hold information about a person acting on the repository. They + can be committers and authors or anything with a name and an email as + mentioned in the git log entries.""" + # precompiled regex + name_only_regex = re.compile( r'<.+>' ) + name_email_regex = re.compile( r'(.*) <(.+?)>' ) + + def __init__(self, name, email): + self.name = name + self.email = email + + def __str__(self): + return self.name + + def __repr__(self): + return '<git.Actor "%s <%s>">' % (self.name, self.email) + + @classmethod + def _from_string(cls, string): + """ + Create an Actor from a string. + + ``str`` + is the string, which is expected to be in regular git format + + Format + John Doe <jdoe@example.com> + + Returns + Actor + """ + if cls.name_only_regex.search(string): + m = cls.name_email_regex.search(string) + name, email = m.groups() + return Actor(name, email) + else: + return Actor(string, None) diff --git a/lib/git/blob.py b/lib/git/blob.py deleted file mode 100644 index 82a41f73..00000000 --- a/lib/git/blob.py +++ /dev/null @@ -1,161 +0,0 @@ -# blob.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import mimetypes -import os -import re -import time -from actor import Actor -from commit import Commit - -class Blob(object): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - - def __init__(self, repo, id, mode=None, name=None): - """ - Create an unbaked Blob containing just the specified attributes - - ``repo`` - is the Repo - - ``id`` - is the git object id - - ``mode`` - is the file mode - - ``name`` - is the file name - - Returns - git.Blob - """ - self.repo = repo - self.id = id - self.mode = mode - self.name = name - - self._size = None - self.data_stored = None - - @property - def size(self): - """ - The size of this blob in bytes - - Returns - int - - NOTE - The size will be cached after the first access - """ - if self._size is None: - self._size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) - return self._size - - @property - def data(self): - """ - The binary contents of this blob. - - Returns - str - - NOTE - The data will be cached after the first access. - """ - self.data_stored = self.data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - return self.data_stored - - @property - def mime_type(self): - """ - The mime type of this file (based on the filename) - - Returns - str - - NOTE - Defaults to 'text/plain' in case the actual file type is unknown. - """ - guesses = None - if self.name: - guesses = mimetypes.guess_type(self.name) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE - - @property - def basename(self): - """ - Returns - The basename of the Blobs file name - """ - return os.path.basename(self.name) - - @classmethod - def blame(cls, repo, commit, file): - """ - The blame information for the given file at the given commit - - Returns - list: [git.Commit, list: [<line>]] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance. - """ - data = repo.git.blame(commit, '--', file, p=True) - commits = {} - blames = [] - info = None - - for line in data.splitlines(): - parts = re.split(r'\s+', line, 1) - if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]): - if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line): - m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line) - id, origin_line, final_line, group_lines = m.groups() - info = {'id': id} - blames.append([None, []]) - elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line): - m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line) - id, origin_line, final_line = m.groups() - info = {'id': id} - elif re.search(r'^(author|committer)', parts[0]): - if re.search(r'^(.+)-mail$', parts[0]): - m = re.search(r'^(.+)-mail$', parts[0]) - info["%s_email" % m.groups()[0]] = parts[-1] - elif re.search(r'^(.+)-time$', parts[0]): - m = re.search(r'^(.+)-time$', parts[0]) - info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1])) - elif re.search(r'^(author|committer)$', parts[0]): - m = re.search(r'^(author|committer)$', parts[0]) - info[m.groups()[0]] = parts[-1] - elif re.search(r'^filename', parts[0]): - info['filename'] = parts[-1] - elif re.search(r'^summary', parts[0]): - info['summary'] = parts[-1] - elif parts[0] == '': - if info: - c = commits.has_key(info['id']) and commits[info['id']] - if not c: - c = Commit(repo, id=info['id'], - author=Actor.from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[info['id']] = c - - m = re.search(r'^\t(.*)$', line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - - return blames - - def __repr__(self): - return '<git.Blob "%s">' % self.id diff --git a/lib/git/cmd.py b/lib/git/cmd.py index aef53350..2965eb8b 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -6,7 +6,6 @@ import os, sys import subprocess -import re from utils import * from errors import GitCommandError @@ -14,208 +13,326 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output') + 'with_exceptions', 'with_raw_output', 'as_process') extra = {} if sys.platform == 'win32': - extra = {'shell': True} + extra = {'shell': True} class Git(object): - """ - The Git class manages communication with the Git binary. - + """ + The Git class manages communication with the Git binary. + It provides a convenient interface to calling the Git binary, such as in:: g = Git( git_dir ) - g.init() # calls 'git init' program + g.init() # calls 'git init' program rval = g.ls_files() # calls 'git ls-files' program ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. - """ - def __init__(self, git_dir=None): - """ - Initialize this instance with: - - ``git_dir`` - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd() - """ - super(Git, self).__init__() - self.git_dir = git_dir - - def __getattr__(self, name): - """ - A convenience method as it allows to call the command as if it was - an object. - Returns - Callable object that will execute call _call_process with your arguments. - """ - if name[:1] == '_': - raise AttributeError(name) - return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) - - @property - def get_dir(self): - """ - Returns - Git directory we are working on - """ - return self.git_dir - - def execute(self, command, - istream=None, - with_keep_cwd=False, - with_extended_output=False, - with_exceptions=True, - with_raw_output=False, - ): - """ - Handles executing the command on the shell and consumes and returns - the returned information (stdout) - - ``command`` - The command argument list to execute. - It should be a string, or a sequence of program arguments. The - program to execute is the first item in the args sequence or string. - - ``istream`` - Standard input filehandle passed to subprocess.Popen. - - ``with_keep_cwd`` - Whether to use the current working directory from os.getcwd(). - GitPython uses get_work_tree() as its working directory by - default and get_git_dir() for bare repositories. - - ``with_extended_output`` - Whether to return a (status, stdout, stderr) tuple. - - ``with_exceptions`` - Whether to raise an exception when git returns a non-zero status. - - ``with_raw_output`` - Whether to avoid stripping off trailing whitespace. - - Returns:: - - str(output) # extended_output = False (Default) - tuple(int(status), str(stdout), str(stderr)) # extended_output = True - - Raise - GitCommandError - - NOTE - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module. - """ - if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': - print ' '.join(command) - - # Allow the user to have the command executed in their working dir. - if with_keep_cwd or self.git_dir is None: - cwd = os.getcwd() - else: - cwd=self.git_dir - - # Start the process - proc = subprocess.Popen(command, - cwd=cwd, - stdin=istream, - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - **extra - ) - - # Wait for the process to return - try: - stdout_value = proc.stdout.read() - stderr_value = proc.stderr.read() - status = proc.wait() - finally: - proc.stdout.close() - proc.stderr.close() - - # Strip off trailing whitespace by default - if not with_raw_output: - stdout_value = stdout_value.rstrip() - stderr_value = stderr_value.rstrip() - - if with_exceptions and status != 0: - raise GitCommandError(command, status, stderr_value) - - if GIT_PYTHON_TRACE == 'full': - if stderr_value: - print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value) - elif stdout_value: - print "%s -> %d: '%s'" % (command, status, stdout_value) - else: - print "%s -> %d" % (command, status) - - # Allow access to the command's status code - if with_extended_output: - return (status, stdout_value, stderr_value) - else: - return stdout_value - - def transform_kwargs(self, **kwargs): - """ - Transforms Python style kwargs into git command line options. - """ - args = [] - for k, v in kwargs.items(): - if len(k) == 1: - if v is True: - args.append("-%s" % k) - elif type(v) is not bool: - args.append("-%s%s" % (k, v)) - else: - if v is True: - args.append("--%s" % dashify(k)) - elif type(v) is not bool: - args.append("--%s=%s" % (dashify(k), v)) - return args - - def _call_process(self, method, *args, **kwargs): - """ - Run the given git command with the specified arguments and return - the result as a String - - ``method`` - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. - - ``args`` - is the list of arguments - - ``kwargs`` - is a dict of keyword arguments. - This function accepts the same optional keyword arguments - as execute(). - - Examples:: - git.rev_list('master', max_count=10, header=True) - - Returns - Same as execute() - """ - - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. - _kwargs = {} - for kwarg in execute_kwargs: - try: - _kwargs[kwarg] = kwargs.pop(kwarg) - except KeyError: - pass - - # Prepare the argument list - opt_args = self.transform_kwargs(**kwargs) - ext_args = map(str, args) - args = opt_args + ext_args - - call = ["git", dashify(method)] - call.extend(args) - - return self.execute(call, **_kwargs) + Set the GIT_PYTHON_TRACE environment variable print each invocation + of the command to stdout. + Set its value to 'full' to see details about the returned values. + """ + class AutoInterrupt(object): + """ + Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object + """ + __slots__= "proc" + + def __init__(self, proc ): + self.proc = proc + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid)) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + + def __init__(self, git_dir=None): + """ + Initialize this instance with: + + ``git_dir`` + Git directory we should work in. If None, we always work in the current + directory as returned by os.getcwd() + """ + super(Git, self).__init__() + self.git_dir = git_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None + + def __getattr__(self, name): + """ + A convenience method as it allows to call the command as if it was + an object. + Returns + Callable object that will execute call _call_process with your arguments. + """ + if name[:1] == '_': + raise AttributeError(name) + return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + + @property + def get_dir(self): + """ + Returns + Git directory we are working on + """ + return self.git_dir + + def execute(self, command, + istream=None, + with_keep_cwd=False, + with_extended_output=False, + with_exceptions=True, + with_raw_output=False, + as_process=False + ): + """ + Handles executing the command on the shell and consumes and returns + the returned information (stdout) + + ``command`` + The command argument list to execute. + It should be a string, or a sequence of program arguments. The + program to execute is the first item in the args sequence or string. + + ``istream`` + Standard input filehandle passed to subprocess.Popen. + + ``with_keep_cwd`` + Whether to use the current working directory from os.getcwd(). + GitPython uses get_work_tree() as its working directory by + default and get_git_dir() for bare repositories. + + ``with_extended_output`` + Whether to return a (status, stdout, stderr) tuple. + + ``with_exceptions`` + Whether to raise an exception when git returns a non-zero status. + + ``with_raw_output`` + Whether to avoid stripping off trailing whitespace. + + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output, + with_exceptions and with_raw_output ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. + + Returns:: + + str(output) # extended_output = False (Default) + tuple(int(status), str(stdout), str(stderr)) # extended_output = True + + Raise + GitCommandError + + NOTE + If you add additional keyword arguments to the signature of this method, + you must update the execute_kwargs tuple housed in this module. + """ + if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': + print ' '.join(command) + + # Allow the user to have the command executed in their working dir. + if with_keep_cwd or self.git_dir is None: + cwd = os.getcwd() + else: + cwd=self.git_dir + + # Start the process + proc = subprocess.Popen(command, + cwd=cwd, + stdin=istream, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + **extra + ) + + if as_process: + return self.AutoInterrupt(proc) + + # Wait for the process to return + status = 0 + try: + stdout_value = proc.stdout.read() + stderr_value = proc.stderr.read() + status = proc.wait() + finally: + proc.stdout.close() + proc.stderr.close() + + # Strip off trailing whitespace by default + if not with_raw_output: + stdout_value = stdout_value.rstrip() + stderr_value = stderr_value.rstrip() + + if with_exceptions and status != 0: + raise GitCommandError(command, status, stderr_value) + + if GIT_PYTHON_TRACE == 'full': + if stderr_value: + print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value) + elif stdout_value: + print "%s -> %d: '%s'" % (command, status, stdout_value) + else: + print "%s -> %d" % (command, status) + + # Allow access to the command's status code + if with_extended_output: + return (status, stdout_value, stderr_value) + else: + return stdout_value + + def transform_kwargs(self, **kwargs): + """ + Transforms Python style kwargs into git command line options. + """ + args = [] + for k, v in kwargs.items(): + if len(k) == 1: + if v is True: + args.append("-%s" % k) + elif type(v) is not bool: + args.append("-%s%s" % (k, v)) + else: + if v is True: + args.append("--%s" % dashify(k)) + elif type(v) is not bool: + args.append("--%s=%s" % (dashify(k), v)) + return args + + def _call_process(self, method, *args, **kwargs): + """ + Run the given git command with the specified arguments and return + the result as a String + + ``method`` + is the command. Contained "_" characters will be converted to dashes, + such as in 'ls_files' to call 'ls-files'. + + ``args`` + is the list of arguments + + ``kwargs`` + is a dict of keyword arguments. + This function accepts the same optional keyword arguments + as execute(). + + Examples:: + git.rev_list('master', max_count=10, header=True) + + Returns + Same as execute() + """ + + # Handle optional arguments prior to calling transform_kwargs + # otherwise these'll end up in args, which is bad. + _kwargs = {} + for kwarg in execute_kwargs: + try: + _kwargs[kwarg] = kwargs.pop(kwarg) + except KeyError: + pass + + # Prepare the argument list + opt_args = self.transform_kwargs(**kwargs) + ext_args = map(str, args) + args = opt_args + ext_args + + call = ["git", dashify(method)] + call.extend(args) + + return self.execute(call, **_kwargs) + + def _parse_object_header(self, header_line): + """ + ``header_line`` + <hex_sha> type_string size_as_int + + Returns + (hex_sha, type_string, size_as_int) + + Raises + ValueError if the header contains indication for an error due to incorrect + input sha + """ + tokens = header_line.split() + if len(tokens) != 3: + raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) + + return (tokens[0], tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : subprocess.PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ + Use this method to quickly examine the type and size of the object behind + the given ref. + + NOTE + The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + Return: + (hexsha, type_string, size_as_int) + """ + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ + As get_object_header, but returns object data as well + + Return: + (hexsha, type_string, size_as_int,data_string) + """ + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + hexsha, typename, size = self.__get_object_header(cmd, ref) + data = cmd.stdout.read(size) + cmd.stdout.read(1) # finishing newlines + + return (hexsha, typename, size, data) diff --git a/lib/git/commit.py b/lib/git/commit.py deleted file mode 100644 index edfe47ca..00000000 --- a/lib/git/commit.py +++ /dev/null @@ -1,296 +0,0 @@ -# commit.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import re -import time - -from actor import Actor -from lazy import LazyMixin -from tree import Tree -import diff -import stats - -class Commit(LazyMixin): - """ - Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary. - """ - def __init__(self, repo, id, tree=None, author=None, authored_date=None, - committer=None, committed_date=None, message=None, parents=None): - """ - Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set if id names a valid sha. - - The parameter documentation indicates the type of the argument after a colon ':'. - - ``id`` - is the sha id of the commit - - ``parents`` : list( Commit, ... ) - is a list of commit ids - - ``tree`` : Tree - is the corresponding tree id - - ``author`` : Actor - is the author string ( will be implicitly converted into an Actor object ) - - ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) - is the authored DateTime - - ``committer`` : Actor - is the committer string - - ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the committed DateTime - - ``message`` : string - is the commit message - - Returns - git.Commit - """ - LazyMixin.__init__(self) - - self.repo = repo - self.id = id - self.parents = None - self.tree = None - self.author = author - self.authored_date = authored_date - self.committer = committer - self.committed_date = committed_date - self.message = message - - if self.id: - if parents is not None: - self.parents = [Commit(repo, p) for p in parents] - if tree is not None: - self.tree = Tree(repo, id=tree) - - def __eq__(self, other): - return self.id == other.id - - def __ne__(self, other): - return self.id != other.id - - def __bake__(self): - """ - Called by LazyMixin superclass when the first uninitialized member needs - to be set as it is queried. - """ - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message - - @property - def id_abbrev(self): - """ - Returns - First 7 bytes of the commit's sha id as an abbreviation of the full string. - """ - return self.id[0:7] - - @property - def summary(self): - """ - Returns - First line of the commit message. - """ - return self.message.split('\n', 1)[0] - - @classmethod - def count(cls, repo, ref, path=''): - """ - Count the number of commits reachable from this ref - - ``repo`` - is the Repo - - ``ref`` - is the ref from which to begin (SHA1 or name) - - ``path`` - is an optinal path - - Returns - int - """ - return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) - - @classmethod - def find_all(cls, repo, ref, path='', **kwargs): - """ - Find all commits matching the given criteria. - - ``repo`` - is the Repo - - ``ref`` - is the ref from which to begin (SHA1 or name) - - ``path`` - is an optinal path, if set only Commits that include the path - will be considered - - ``kwargs`` - optional keyword arguments to git where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - - Returns - git.Commit[] - """ - options = {'pretty': 'raw'} - options.update(kwargs) - - output = repo.git.rev_list(ref, '--', path, **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``text`` - is the text output from the git-rev-list command (raw format) - - Returns - git.Commit[] - """ - lines = [l for l in text.splitlines() if l.strip('\r\n')] - - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] - - parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - author, authored_date = cls.actor(lines.pop(0)) - committer, committed_date = cls.actor(lines.pop(0)) - - messages = [] - while lines and lines[0].startswith(' '): - messages.append(lines.pop(0).strip()) - - message = '\n'.join(messages) - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - - return commits - - @classmethod - def diff(cls, repo, a, b=None, paths=None): - """ - Creates diffs between a tree and the index or between two trees: - - ``repo`` - is the Repo - - ``a`` - is a named commit - - ``b`` - is an optional named commit. Passing a list assumes you - wish to omit the second named commit and limit the diff to the - given paths. - - ``paths`` - is a list of paths to limit the diff to. - - Returns - git.Diff[]:: - - between tree and the index if only a is given - between two trees if a and b are given and are commits - """ - paths = paths or [] - - if isinstance(b, list): - paths = b - b = None - - if paths: - paths.insert(0, "--") - - if b: - paths.insert(0, b) - paths.insert(0, a) - text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff.list_from_string(repo, text) - - @property - def diffs(self): - """ - Returns - git.Diff[] - Diffs between this commit and its first parent or all changes if this - commit is the first commit and has no parent. - """ - if not self.parents: - d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - if re.search(r'diff --git a', d): - if not re.search(r'^diff --git a', d): - p = re.compile(r'.+?(diff --git a)', re.MULTILINE | re.DOTALL) - d = p.sub(r'diff --git a', d, 1) - else: - d = '' - return diff.Diff.list_from_string(self.repo, d) - else: - return self.diff(self.repo, self.parents[0].id, self.id) - - @property - def stats(self): - """ - Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - Return - git.Stats - """ - if not self.parents: - text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) - return stats.Stats.list_from_string(self.repo, text) - - def __str__(self): - """ Convert commit to string which is SHA1 """ - return self.id - - def __repr__(self): - return '<git.Commit "%s">' % self.id - - @classmethod - def actor(cls, line): - """ - Parse out the actor (author or committer) info - - Returns - [Actor, gmtime(acted at time)] - """ - m = re.search(r'^.+? (.*) (\d+) .*$', line) - actor, epoch = m.groups() - return [Actor.from_string(actor), time.gmtime(int(epoch))] diff --git a/lib/git/diff.py b/lib/git/diff.py index 44f55602..0db83b4f 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -5,94 +5,101 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import re -import blob +import objects.blob as blob class Diff(object): - """ - A Diff contains diff information between two commits. - - It contains two sides a and b of the diff, members are prefixed with - "a" and "b" respectively to inidcate that. - - Diffs keep information about the changed blob objects, the file mode, renames, - deletions and new files. - - There are a few cases where None has to be expected as member variable value: - - ``New File``:: - - a_mode is None - a_blob is None - - ``Deleted File``:: - - b_mode is None - b_blob is NOne - """ + """ + A Diff contains diff information between two commits. + + It contains two sides a and b of the diff, members are prefixed with + "a" and "b" respectively to inidcate that. + + Diffs keep information about the changed blob objects, the file mode, renames, + deletions and new files. + + There are a few cases where None has to be expected as member variable value: + + ``New File``:: + + a_mode is None + a_blob is None + + ``Deleted File``:: + + b_mode is None + b_blob is NOne + """ + + # precompiled regex + re_header = re.compile(r""" + #^diff[ ]--git + [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n + (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n + ^rename[ ]from[ ](?P<rename_from>\S+)\n + ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? + (?:^old[ ]mode[ ](?P<old_mode>\d+)\n + ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? + (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? + (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? + (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) + \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) + re_is_null_hexsha = re.compile( r'^0{40}$' ) + __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", + "rename_from", "rename_to", "renamed", "diff") - def __init__(self, repo, a_path, b_path, a_blob, b_blob, a_mode, - b_mode, new_file, deleted_file, rename_from, - rename_to, diff): - self.repo = repo + def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, + b_mode, new_file, deleted_file, rename_from, + rename_to, diff): + if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id): + self.a_blob = None + else: + self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path) + if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id): + self.b_blob = None + else: + self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path) - if not a_blob or re.search(r'^0{40}$', a_blob): - self.a_blob = None - else: - self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, name=a_path) - if not b_blob or re.search(r'^0{40}$', b_blob): - self.b_blob = None - else: - self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, name=b_path) + self.a_mode = a_mode + self.b_mode = b_mode + if self.a_mode: + self.a_mode = blob.Blob._mode_str_to_int( self.a_mode ) + if self.b_mode: + self.b_mode = blob.Blob._mode_str_to_int( self.b_mode ) + self.new_file = new_file + self.deleted_file = deleted_file + self.rename_from = rename_from + self.rename_to = rename_to + self.renamed = rename_from != rename_to + self.diff = diff - self.a_mode = a_mode - self.b_mode = b_mode - self.new_file = new_file - self.deleted_file = deleted_file - self.rename_from = rename_from - self.rename_to = rename_to - self.renamed = rename_from != rename_to - self.diff = diff + @classmethod + def _list_from_string(cls, repo, text): + """ + Create a new diff object from the given text + ``repo`` + is the repository we are operating on - it is required + + ``text`` + result of 'git diff' between two commits or one commit and the index + + Returns + git.Diff[] + """ + diffs = [] - @classmethod - def list_from_string(cls, repo, text): - """ - Create a new diff object from the given text - ``repo`` - is the repository we are operating on - it is required - - ``text`` - result of 'git diff' between two commits or one commit and the index - - Returns - git.Diff[] - """ - diffs = [] + diff_header = cls.re_header.match + for diff in ('\n' + text).split('\ndiff --git')[1:]: + header = diff_header(diff) - diff_header = re.compile(r""" - #^diff[ ]--git - [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n - (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n - ^rename[ ]from[ ](?P<rename_from>\S+)\n - ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? - (?:^old[ ]mode[ ](?P<old_mode>\d+)\n - ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? - (?:^index[ ](?P<a_blob>[0-9A-Fa-f]+) - \.\.(?P<b_blob>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE).match + a_path, b_path, similarity_index, rename_from, rename_to, \ + old_mode, new_mode, new_file_mode, deleted_file_mode, \ + a_blob_id, b_blob_id, b_mode = header.groups() + new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - for diff in ('\n' + text).split('\ndiff --git')[1:]: - header = diff_header(diff) + diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, + old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, + new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - a_path, b_path, similarity_index, rename_from, rename_to, \ - old_mode, new_mode, new_file_mode, deleted_file_mode, \ - a_blob, b_blob, b_mode = header.groups() - new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - - diffs.append(Diff(repo, a_path, b_path, a_blob, b_blob, - old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, - new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - - return diffs + return diffs diff --git a/lib/git/errors.py b/lib/git/errors.py index 2632d5f3..e9a637c0 100644 --- a/lib/git/errors.py +++ b/lib/git/errors.py @@ -8,25 +8,25 @@ Module containing all exceptions thrown througout the git package, """ class InvalidGitRepositoryError(Exception): - """ - Thrown if the given repository appears to have an invalid format. - """ + """ + Thrown if the given repository appears to have an invalid format. + """ -class NoSuchPathError(Exception): - """ - Thrown if a path could not be access by the system. - """ +class NoSuchPathError(OSError): + """ + Thrown if a path could not be access by the system. + """ class GitCommandError(Exception): - """ - Thrown if execution of the git command fails with non-zero status code. - """ - def __init__(self, command, status, stderr=None): - self.stderr = stderr - self.status = status - self.command = command + """ + Thrown if execution of the git command fails with non-zero status code. + """ + def __init__(self, command, status, stderr=None): + self.stderr = stderr + self.status = status + self.command = command - def __str__(self): - return repr("%s returned exit status %d" % - (str(self.command), self.status)) + def __str__(self): + return repr("%s returned exit status %d" % + (str(self.command), self.status)) diff --git a/lib/git/head.py b/lib/git/head.py deleted file mode 100644 index 639cee40..00000000 --- a/lib/git/head.py +++ /dev/null @@ -1,118 +0,0 @@ -# head.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import commit - -class Head(object): - """ - A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. - - Examples:: - - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] - - >>> head.name - 'master' - - >>> head.commit - <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> - - >>> head.commit.id - '1c09f116cbc2cb4100fb6935bb162daa4723f455' - """ - - def __init__(self, name, commit): - """ - Initialize a newly instanced Head - - `name` - is the name of the head - - `commit` - is the Commit object that the head points to - """ - self.name = name - self.commit = commit - - @classmethod - def find_all(cls, repo, **kwargs): - """ - Find all Heads in the repository - - `repo` - is the Repo - - `kwargs` - Additional options given as keyword arguments, will be passed - to git-for-each-ref - - Returns - git.Head[] - - List is sorted by committerdate - """ - - options = {'sort': "committerdate", - 'format': "%(refname)%00%(objectname)"} - options.update(kwargs) - - output = repo.git.for_each_ref("refs/heads", **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out head information into a list of head objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - - Returns - git.Head[] - """ - heads = [] - - for line in text.splitlines(): - heads.append(cls.from_string(repo, line)) - - return heads - - @classmethod - def from_string(cls, repo, line): - """ - Create a new Head instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted head information - - Format:: - - name: [a-zA-Z_/]+ - <null byte> - id: [0-9A-Fa-f]{40} - - Returns - git.Head - """ - full_name, ids = line.split("\x00") - - if full_name.startswith('refs/heads/'): - name = full_name[len('refs/heads/'):] - else: - name = full_name - - c = commit.Commit(repo, id=ids) - return Head(name, c) - - def __repr__(self): - return '<git.Head "%s">' % self.name diff --git a/lib/git/lazy.py b/lib/git/lazy.py deleted file mode 100644 index 5e470181..00000000 --- a/lib/git/lazy.py +++ /dev/null @@ -1,32 +0,0 @@ -# lazy.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -class LazyMixin(object): - lazy_properties = [] - - def __init__(self): - self.__baked__ = False - - def __getattribute__(self, attr): - val = object.__getattribute__(self, attr) - if val is not None: - return val - else: - self.__prebake__() - return object.__getattribute__(self, attr) - - def __bake__(self): - """ This method should be overridden in the derived class. """ - raise NotImplementedError(" '__bake__' method has not been implemented.") - - def __prebake__(self): - if self.__baked__: - return - self.__bake__() - self.__baked__ = True - - def __bake_it__(self): - self.__baked__ = True diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py new file mode 100644 index 00000000..39e650b7 --- /dev/null +++ b/lib/git/objects/__init__.py @@ -0,0 +1,11 @@ +""" +Import all submodules main classes into the package space +""" +import inspect +from tag import * +from blob import * +from tree import * +from commit import * + +__all__ = [ name for name, obj in locals().items() + if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py new file mode 100644 index 00000000..07538ada --- /dev/null +++ b/lib/git/objects/base.py @@ -0,0 +1,151 @@ +# base.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +import os +from git.utils import LazyMixin + +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" + +class Object(LazyMixin): + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + """ + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "id", "size", "data" ) + type = None # to be set by subclass + + def __init__(self, repo, id): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + """ + super(Object,self).__init__() + self.repo = repo + self.id = id + + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): + """ + Retrieve object information + """ + if attr == "size": + hexsha, typename, self.size = self.repo.git.get_object_header(self.id) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + elif attr == "data": + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.id == other.id + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.id != other.id + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.id) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.id + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '<git.%s "%s">' % (self.__class__.__name__, self.id) + + +class IndexObject(Object): + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, id, mode=None, path=None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in + + ``id`` : string + is the git object id as hex sha + + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration + + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. + """ + super(IndexObject, self).__init__(repo, id) + self._set_self_from_args_(locals()) + if isinstance(mode, basestring): + self.mode = self._mode_str_to_int(mode) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + + @classmethod + def _mode_str_to_int(cls, modestr): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 3 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-3:])): + mode += int(char) << iteration*3 + # END for each char + return mode + + diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py new file mode 100644 index 00000000..88ca73d6 --- /dev/null +++ b/lib/git/objects/blob.py @@ -0,0 +1,36 @@ +# blob.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import mimetypes +import base + +class Blob(base.IndexObject): + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" + + __slots__ = tuple() + + + @property + def mime_type(self): + """ + The mime type of this file (based on the filename) + + Returns + str + + NOTE + Defaults to 'text/plain' in case the actual file type is unknown. + """ + guesses = None + if self.path: + guesses = mimetypes.guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + + + def __repr__(self): + return '<git.Blob "%s">' % self.id diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py new file mode 100644 index 00000000..101014ab --- /dev/null +++ b/lib/git/objects/commit.py @@ -0,0 +1,299 @@ +# commit.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import re +import time +from git.utils import Iterable +from git.actor import Actor +import git.diff as diff +import git.stats as stats +from tree import Tree +import base + +class Commit(base.Object, Iterable): + """ + Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary. + """ + # precompiled regex + re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + + # object configuration + type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") + + def __init__(self, repo, id, tree=None, author=None, authored_date=None, + committer=None, committed_date=None, message=None, parents=None): + """ + Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set if id names a valid sha. + + The parameter documentation indicates the type of the argument after a colon ':'. + + ``id`` + is the sha id of the commit or a ref + + ``parents`` : tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + + ``tree`` : Tree + is the corresponding tree id or an actual Tree + + ``author`` : Actor + is the author string ( will be implicitly converted into an Actor object ) + + ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) + is the authored DateTime + + ``committer`` : Actor + is the committer string + + ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the committed DateTime + + ``message`` : string + is the commit message + + Returns + git.Commit + """ + super(Commit,self).__init__(repo, id) + self._set_self_from_args_(locals()) + + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.id and tree is not None: + self.tree = Tree(repo, id=tree, path='') + # END id to tree conversion + + def _set_cache_(self, attr): + """ + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. + """ + if attr in Commit.__slots__: + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.id) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) + + @property + def summary(self): + """ + Returns + First line of the commit message. + """ + return self.message.split('\n', 1)[0] + + @classmethod + def count(cls, repo, ref, path=''): + """ + Count the number of commits reachable from this ref + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path + + Returns + int + """ + return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) + + @classmethod + def iter_items(cls, repo, ref, path='', **kwargs): + """ + Find all commits matching the given criteria. + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1, Head or name) + + ``path`` + is an optinal path, if set only Commits that include the path + will be considered + + ``kwargs`` + optional keyword arguments to git where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + + Returns + iterator yielding Commit items + """ + options = {'pretty': 'raw', 'as_process' : True } + options.update(kwargs) + + # the test system might confront us with string values - + proc = repo.git.rev_list(ref, '--', path, **options) + return cls._iter_from_process_or_stream(repo, proc) + + @classmethod + def _iter_from_process_or_stream(cls, repo, proc_or_stream): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``proc`` + git-rev-list process instance (raw format) + + Returns + iterator returning Commit objects + """ + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] + + parents = [] + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = cls._actor(next_line) + committer, committed_date = cls._actor(stream.next()) + + # empty line + stream.next() + + message_lines = [] + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) + # END while there are message lines + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream + + @classmethod + def diff(cls, repo, a, b=None, paths=None): + """ + Creates diffs between a tree and the index or between two trees: + + ``repo`` + is the Repo + + ``a`` + is a named commit + + ``b`` + is an optional named commit. Passing a list assumes you + wish to omit the second named commit and limit the diff to the + given paths. + + ``paths`` + is a list of paths to limit the diff to. + + Returns + git.Diff[]:: + + between tree and the index if only a is given + between two trees if a and b are given and are commits + """ + paths = paths or [] + + if isinstance(b, list): + paths = b + b = None + + if paths: + paths.insert(0, "--") + + if b: + paths.insert(0, b) + paths.insert(0, a) + text = repo.git.diff('-M', full_index=True, *paths) + return diff.Diff._list_from_string(repo, text) + + @property + def diffs(self): + """ + Returns + git.Diff[] + Diffs between this commit and its first parent or all changes if this + commit is the first commit and has no parent. + """ + if not self.parents: + d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') + return diff.Diff._list_from_string(self.repo, d) + else: + return self.diff(self.repo, self.parents[0].id, self.id) + + @property + def stats(self): + """ + Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + Return + git.Stats + """ + if not self.parents: + text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) + return stats.Stats._list_from_string(self.repo, text) + + def __str__(self): + """ Convert commit to string which is SHA1 """ + return self.id + + def __repr__(self): + return '<git.Commit "%s">' % self.id + + @classmethod + def _actor(cls, line): + """ + Parse out the actor (author or committer) info + + Returns + [Actor, gmtime(acted at time)] + """ + m = cls.re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor._from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py new file mode 100644 index 00000000..ecf6349d --- /dev/null +++ b/lib/git/objects/tag.py @@ -0,0 +1,70 @@ +# objects.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module containing all object based types. +""" +import base +import commit +from utils import get_object_type_by_name + +class TagObject(base.Object): + """ + Non-Lightweight tag carrying additional information about an object we are pointing + to. + """ + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) + + def __init__(self, repo, id, object=None, tag=None, + tagger=None, tagged_date=None, message=None): + """ + Initialize a tag object with additional data + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + + ``object`` + Object instance of object we are pointing to + + ``tag`` + name of this tag + + ``tagger`` + Actor identifying the tagger + + ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the DateTime of the tag creation + """ + super(TagObject, self).__init__(repo, id ) + self._set_self_from_args_(locals()) + + def _set_cache_(self, attr): + """ + Cache all our attributes at once + """ + if attr in TagObject.__slots__: + lines = self.data.splitlines() + + obj, hexsha = lines[0].split(" ") # object <hexsha> + type_token, type_name = lines[1].split(" ") # type <type_name> + self.object = get_object_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag <tag name> + + tagger_info = lines[3][7:]# tagger <actor> <date> + self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + + # line 4 empty - check git source to figure out purpose + self.message = "\n".join(lines[5:]) + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + + diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py new file mode 100644 index 00000000..abfa9622 --- /dev/null +++ b/lib/git/objects/tree.py @@ -0,0 +1,242 @@ +# tree.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import os +import blob +import base +import binascii + +def sha_to_hex(sha): + """Takes a string and returns the hex of the sha within""" + hexsha = binascii.hexlify(sha) + assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha + return hexsha + +class Tree(base.IndexObject): + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ + + type = "tree" + __slots__ = "_cache" + + # using ascii codes for comparison + ascii_commit_id = (0x31 << 4) + 0x36 + ascii_blob_id = (0x31 << 4) + 0x30 + ascii_tree_id = (0x34 << 4) + 0x30 + + + def __init__(self, repo, id, mode=0, path=None): + super(Tree, self).__init__(repo, id, mode, path) + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache() + else: + super(Tree, self)._set_cache_(attr) + + def _get_tree_cache(self): + """ + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for obj in self._iter_from_data(): + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + def _iter_from_data(self): + """ + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + + Returns + list(IndexObject, ...) + """ + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + mode_boundary = i + 6 + + # keep it ascii - we compare against the respective values + type_id = (ord(data[i])<<4) + ord(data[i+1]) + i += 2 + + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.ascii_blob_id: + yield blob.Blob(self.repo, hexsha, mode, name) + elif type_id == self.ascii_tree_id: + yield Tree(self.repo, hexsha, mode, name) + elif type_id == self.ascii_commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data: %i" % type_id ) + # END for each byte in data stream + + + def __div__(self, file): + """ + Find the named object in this tree's contents + + Examples:: + + >>> Repo('/path/to/python-git').tree/'lib' + <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> + >>> Repo('/path/to/python-git').tree/'README.txt' + <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> + + Returns + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree + """ + return self[file] + + + def __repr__(self): + return '<git.Tree "%s">' % self.id + + @classmethod + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + + for obj in tree: + # adjust path to be complete + obj.path = os.path.join(tree.path, obj.path) + if not predicate(obj): + continue + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + yield recursive_obj + # END for each recursive object + # END if we may enter recursion + # END for each object + + def traverse(self, max_depth=-1, predicate = lambda i: True): + """ + Returns + Iterator to traverse the tree recursively up to the given level. + The iterator returns Blob and Tree objects + + ``max_depth`` + + if -1, the whole tree will be traversed + if 0, only the first level will be traversed which is the same as + the default non-recursive iterator + + ``predicate`` + + If predicate(item) returns True, item will be returned by iterator + """ + return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] + + + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + + def __iter__(self): + return iter(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + for obj in self._cache: + if obj.path == item: + return obj + # END for each obj + raise KeyError( "Blob or Tree named %s not found" % item ) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py new file mode 100644 index 00000000..15c1d114 --- /dev/null +++ b/lib/git/objects/utils.py @@ -0,0 +1,36 @@ +# util.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module for general utility functions +""" +import commit, tag, blob, tree + +def get_object_type_by_name(object_type_name): + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) diff --git a/lib/git/refs.py b/lib/git/refs.py new file mode 100644 index 00000000..3c9eb817 --- /dev/null +++ b/lib/git/refs.py @@ -0,0 +1,238 @@ +# refs.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module containing all ref based objects +""" +from objects.base import Object +from objects.utils import get_object_type_by_name +from utils import LazyMixin, Iterable + +class Ref(LazyMixin, Iterable): + """ + Represents a named reference to any object + """ + __slots__ = ("repo", "path") + + def __init__(self, repo, path, object = None): + """ + Initialize this instance + ``repo`` + Our parent repository + + ``path`` + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master + + ``object`` + Object instance, will be retrieved on demand if None + """ + self.repo = repo + self.path = path + if object is not None: + self.object = object + + def __str__(self): + return self.name + + def __repr__(self): + return '<git.%s "%s">' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + return self.path == other.path and self.object == other.object + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + Returns + (shortest) Name of this reference - it may contain path components + """ + # first two path tokens are can be removed as they are + # refs/heads or refs/tags or refs/remotes + tokens = self.path.split('/') + if len(tokens) < 3: + return self.path # could be refs/HEAD + + return '/'.join(tokens[2:]) + + @property + def object(self): + """ + Returns + The object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query + """ + # have to be dynamic here as we may be a tag which can point to anything + hexsha, typename, size = self.repo.git.get_object_header(self.path) + return get_object_type_by_name(typename)(self.repo, hexsha) + + @classmethod + def iter_items(cls, repo, common_path = "refs", **kwargs): + """ + Find all refs in the repository + + ``repo`` + is the Repo + + ``common_path`` + Optional keyword argument to the path which is to be shared by all + returned Ref objects + + ``kwargs`` + Additional options given as keyword arguments, will be passed + to git-for-each-ref + + Returns + git.Ref[] + + List is sorted by committerdate + The returned objects are compatible to the Ref base, but represent the + actual type, such as Head or Tag + """ + + options = {'sort': "committerdate", + 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"} + + options.update(kwargs) + + output = repo.git.for_each_ref(common_path, **options) + return cls._iter_from_stream(repo, iter(output.splitlines())) + + @classmethod + def _iter_from_stream(cls, repo, stream): + """ Parse out ref information into a list of Ref compatible objects + Returns git.Ref[] list of Ref objects """ + heads = [] + + for line in stream: + heads.append(cls._from_string(repo, line)) + + return heads + + @classmethod + def _from_string(cls, repo, line): + """ Create a new Ref instance from the given string. + Format + name: [a-zA-Z_/]+ + <null byte> + id: [0-9A-Fa-f]{40} + Returns git.Head """ + full_path, hexsha, type_name, object_size = line.split("\x00") + + # No, we keep the object dynamic by allowing it to be retrieved by + # our path on demand - due to perstent commands it is fast. + # This reduces the risk that the object does not match + # the changed ref anymore in case it changes in the meanwhile + return cls(repo, full_path) + + # obj = get_object_type_by_name(type_name)(repo, hexsha) + # obj.size = object_size + # return cls(repo, full_path, obj) + + +class Head(Ref): + """ + A Head is a named reference to a Commit. Every Head instance contains a name + and a Commit object. + + Examples:: + + >>> repo = Repo("/path/to/repo") + >>> head = repo.heads[0] + + >>> head.name + 'master' + + >>> head.commit + <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> + + >>> head.commit.id + '1c09f116cbc2cb4100fb6935bb162daa4723f455' + """ + + @property + def commit(self): + """ + Returns + Commit object the head points to + """ + return self.object + + @classmethod + def iter_items(cls, repo, common_path = "refs/heads", **kwargs): + """ + Returns + Iterator yielding Head items + + For more documentation, please refer to git.base.Ref.list_items + """ + return super(Head,cls).iter_items(repo, common_path, **kwargs) + + def __repr__(self): + return '<git.Head "%s">' % self.name + + + +class TagRef(Ref): + """ + Class representing a lightweight tag reference which either points to a commit + or to a tag object. In the latter case additional information, like the signature + or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagRef.list_items(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message + """ + + __slots__ = tuple() + + @property + def commit(self): + """ + Returns + Commit object the tag ref points to + """ + if self.object.type == "commit": + return self.object + elif self.object.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return self.object.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + + @property + def tag(self): + """ + Returns + Tag object this tag ref points to or None in case + we are a light weight tag + """ + if self.object.type == "tag": + return self.object + return None + + @classmethod + def iter_items(cls, repo, common_path = "refs/tags", **kwargs): + """ + Returns + Iterator yielding commit items + + For more documentation, please refer to git.base.Ref.list_items + """ + return super(TagRef,cls).iter_items(repo, common_path, **kwargs) + + +# provide an alias +Tag = TagRef diff --git a/lib/git/repo.py b/lib/git/repo.py index 1c4b4095..c74c7e8d 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -8,506 +8,588 @@ import os import re import gzip import StringIO +import time + from errors import InvalidGitRepositoryError, NoSuchPathError from utils import touch, is_git_dir from cmd import Git -from head import Head -from blob import Blob -from tag import Tag -from commit import Commit -from tree import Tree +from actor import Actor +from refs import * +from objects import * + class Repo(object): - """ - Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. - """ - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - - def __init__(self, path=None): - """ - Create a new Repo instance - - ``path`` - is the path to either the root git directory or the bare git repo - - Examples:: - - repo = Repo("/Users/mtrier/Development/git-python") - repo = Repo("/Users/mtrier/Development/git-python.git") - - Raises - InvalidGitRepositoryError or NoSuchPathError - - Returns - ``git.Repo`` - """ - - epath = os.path.abspath(os.path.expanduser(path or os.getcwd())) - - if not os.path.exists(epath): - raise NoSuchPathError(epath) - - self.path = None - curpath = epath - while curpath: - if is_git_dir(curpath): - self.bare = True - self.path = curpath - self.wd = curpath - break - gitpath = os.path.join(curpath, '.git') - if is_git_dir(gitpath): - self.bare = False - self.path = gitpath - self.wd = curpath - break - curpath, dummy = os.path.split(curpath) - if not dummy: - break - - if self.path is None: - raise InvalidGitRepositoryError(epath) - - self.git = Git(self.wd) - - # Description property - def _get_description(self): - filename = os.path.join(self.path, 'description') - return file(filename).read().rstrip() - - def _set_description(self, descr): - filename = os.path.join(self.path, 'description') - file(filename, 'w').write(descr+'\n') - - description = property(_get_description, _set_description, - doc="the project's description") - del _get_description - del _set_description - - @property - def heads(self): - """ - A list of ``Head`` objects representing the branch heads in - this repo - - Returns - ``git.Head[]`` - """ - return Head.find_all(self) - - # alias heads - branches = heads - - @property - def tags(self): - """ - A list of ``Tag`` objects that are available in this repo - - Returns - ``git.Tag[]`` - """ - return Tag.find_all(self) - - def commits(self, start='master', path='', max_count=10, skip=0): - """ - A list of Commit objects representing the history of a given ref/commit - - ``start`` - is the branch/commit name (default 'master') - - ``path`` - is an optional path to limit the returned commits to - Commits that do not contain that path will not be returned. - - ``max_count`` - is the maximum number of commits to return (default 10) + """ + Represents a git repository and allows you to query references, + gather commit information, generate diffs, create and clone repositories query + the log. + """ + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + def __init__(self, path=None): + """ + Create a new Repo instance + + ``path`` + is the path to either the root git directory or the bare git repo + + Examples:: + + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") + + Raises + InvalidGitRepositoryError or NoSuchPathError + + Returns + ``git.Repo`` + """ + + epath = os.path.abspath(os.path.expanduser(path or os.getcwd())) + + if not os.path.exists(epath): + raise NoSuchPathError(epath) + + self.path = None + curpath = epath + while curpath: + if is_git_dir(curpath): + self.bare = True + self.path = curpath + self.wd = curpath + break + gitpath = os.path.join(curpath, '.git') + if is_git_dir(gitpath): + self.bare = False + self.path = gitpath + self.wd = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + + if self.path is None: + raise InvalidGitRepositoryError(epath) + + self.git = Git(self.wd) + + # Description property + def _get_description(self): + filename = os.path.join(self.path, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = os.path.join(self.path, 'description') + file(filename, 'w').write(descr+'\n') + + description = property(_get_description, _set_description, + doc="the project's description") + del _get_description + del _set_description + + @property + def heads(self): + """ + A list of ``Head`` objects representing the branch heads in + this repo + + Returns + ``git.Head[]`` + """ + return Head.list_items(self) + + # alias heads + branches = heads + + @property + def tags(self): + """ + A list of ``Tag`` objects that are available in this repo + + Returns + ``git.Tag[]`` + """ + return Tag.list_items(self) + + def blame(self, commit, file): + """ + The blame information for the given file at the given commit + + Returns + list: [git.Commit, list: [<line>]] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance. + """ + data = self.git.blame(commit, '--', file, p=True) + commits = {} + blames = [] + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail <tom@mojombo.com> + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail <tom@mojombo.com> + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = time.gmtime(int(parts[-1])) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # <and rest> + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, id=sha, + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + def commits(self, start='master', path='', max_count=None, skip=0): + """ + A list of Commit objects representing the history of a given ref/commit + + ``start`` + is the branch/commit name (default 'master') + + ``path`` + is an optional path to limit the returned commits to + Commits that do not contain that path will not be returned. + + ``max_count`` + is the maximum number of commits to return (default None) + + ``skip`` + is the number of commits to skip (default 0) which will effectively + move your commit-window by the given number. + + Returns + ``git.Commit[]`` + """ + options = {'max_count': max_count, + 'skip': skip} + + if max_count is None: + options.pop('max_count') + + return Commit.list_items(self, start, path, **options) + + def commits_between(self, frm, to): + """ + The Commits objects that are reachable via ``to`` but not via ``frm`` + Commits are returned in chronological order. + + ``from`` + is the branch/commit name of the younger item + + ``to`` + is the branch/commit name of the older item + + Returns + ``git.Commit[]`` + """ + return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) + + def commits_since(self, start='master', path='', since='1970-01-01'): + """ + The Commits objects that are newer than the specified date. + Commits are returned in chronological order. + + ``start`` + is the branch/commit name (default 'master') + + ``path`` + is an optinal path to limit the returned commits to. + + + ``since`` + is a string represeting a date/time - ``skip`` - is the number of commits to skip (default 0) which will effectively - move your commit-window by the given number. + Returns + ``git.Commit[]`` + """ + options = {'since': since} - Returns - ``git.Commit[]`` - """ - options = {'max_count': max_count, - 'skip': skip} + return Commit.list_items(self, start, path, **options) - return Commit.find_all(self, start, path, **options) + def commit_count(self, start='master', path=''): + """ + The number of commits reachable by the given branch/commit - def commits_between(self, frm, to): - """ - The Commits objects that are reachable via ``to`` but not via ``frm`` - Commits are returned in chronological order. + ``start`` + is the branch/commit name (default 'master') - ``from`` - is the branch/commit name of the younger item + ``path`` + is an optional path + Commits that do not contain the path will not contribute to the count. - ``to`` - is the branch/commit name of the older item - - Returns - ``git.Commit[]`` - """ - return reversed(Commit.find_all(self, "%s..%s" % (frm, to))) - - def commits_since(self, start='master', path='', since='1970-01-01'): - """ - The Commits objects that are newer than the specified date. - Commits are returned in chronological order. + Returns + ``int`` + """ + return Commit.count(self, start, path) - ``start`` - is the branch/commit name (default 'master') + def commit(self, id=None, path = ''): + """ + The Commit object for the specified id - ``path`` - is an optinal path to limit the returned commits to. - + ``id`` + is the SHA1 identifier of the commit or a ref or a ref name + if None, it defaults to the active branch + - ``since`` - is a string represeting a date/time + ``path`` + is an optional path, if set the returned commit must contain the path. - Returns - ``git.Commit[]`` - """ - options = {'since': since} + Returns + ``git.Commit`` + """ + if id is None: + id = self.active_branch + options = {'max_count': 1} - return Commit.find_all(self, start, path, **options) - - def commit_count(self, start='master', path=''): - """ - The number of commits reachable by the given branch/commit - - ``start`` - is the branch/commit name (default 'master') + commits = Commit.list_items(self, id, path, **options) - ``path`` - is an optional path - Commits that do not contain the path will not contribute to the count. - - Returns - ``int`` - """ - return Commit.count(self, start, path) - - def commit(self, id, path = ''): - """ - The Commit object for the specified id - - ``id`` - is the SHA1 identifier of the commit - - ``path`` - is an optional path, if set the returned commit must contain the path. - - Returns - ``git.Commit`` - """ - options = {'max_count': 1} - - commits = Commit.find_all(self, id, path, **options) - - if not commits: - raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) - return commits[0] - - def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): - """ - Returns a list of commits that is in ``other_repo`` but not in self - - Returns - git.Commit[] - """ - repo_refs = self.git.rev_list(ref, '--').strip().splitlines() - other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() + if not commits: + raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) + return commits[0] - diff_refs = list(set(other_repo_refs) - set(repo_refs)) - return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) + def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): + """ + Returns a list of commits that is in ``other_repo`` but not in self - def tree(self, treeish='master'): - """ - The Tree object for the given treeish reference + Returns + git.Commit[] + """ + repo_refs = self.git.rev_list(ref, '--').strip().splitlines() + other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() - ``treeish`` - is the reference (default 'master') + diff_refs = list(set(other_repo_refs) - set(repo_refs)) + return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs) - Examples:: + def tree(self, treeish=None): + """ + The Tree object for the given treeish reference - repo.tree('master') + ``treeish`` + is a Ref instance defaulting to the active_branch if None. + Examples:: + + repo.tree(repo.heads[0]) + + Returns + ``git.Tree`` + + NOTE + A ref is requried here to assure you point to a commit or tag. Otherwise + it is not garantueed that you point to the root-level tree. + + If you need a non-root level tree, find it by iterating the root tree. + """ + if treeish is None: + treeish = self.active_branch + if not isinstance(treeish, Ref): + raise ValueError( "Treeish reference required, got %r" % treeish ) + + + # As we are directly reading object information, we must make sure + # we truly point to a tree object. We resolve the ref to a sha in all cases + # to assure the returned tree can be compared properly. Except for + # heads, ids should always be hexshas + hexsha, typename, size = self.git.get_object_header( treeish ) + if typename != "tree": + hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' ) + # END tree handling + treeish = hexsha + + # the root has an empty relative path and the default mode + return Tree(self, treeish, 0, '') + + + def diff(self, a, b, *paths): + """ + The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) + + ``a`` + is the base commit + ``b`` + is the other commit + + ``paths`` + is an optional list of file paths on which to restrict the diff + + Returns + ``str`` + """ + return self.git.diff(a, b, '--', *paths) - Returns - ``git.Tree`` - """ - return Tree(self, id=treeish) + def commit_diff(self, commit): + """ + The commit diff for the given commit + ``commit`` is the commit name/id - def blob(self, id): - """ - The Blob object for the given id - - ``id`` - is the SHA1 id of the blob - - Returns - ``git.Blob`` - """ - return Blob(self, id=id) - - def log(self, commit='master', path=None, **kwargs): - """ - The Commit for a treeish, and all commits leading to it. - - ``kwargs`` - keyword arguments specifying flags to be used in git-log command, - i.e.: max_count=1 to limit the amount of commits returned + Returns + ``git.Diff[]`` + """ + return Commit.diff(self, commit) - Returns - ``git.Commit[]`` - """ - options = {'pretty': 'raw'} - options.update(kwargs) - arg = [commit, '--'] - if path: - arg.append(path) - commits = self.git.log(*arg, **options) - return Commit.list_from_string(self, commits) + @classmethod + def init_bare(self, path, mkdir=True, **kwargs): + """ + Initialize a bare git repository at the given path - def diff(self, a, b, *paths): - """ - The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) - - ``a`` - is the base commit - ``b`` - is the other commit + ``path`` + is the full path to the repo (traditionally ends with /<name>.git) - ``paths`` - is an optional list of file paths on which to restrict the diff - - Returns - ``str`` - """ - return self.git.diff(a, b, '--', *paths) + ``mkdir`` + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. - def commit_diff(self, commit): - """ - The commit diff for the given commit - ``commit`` is the commit name/id + ``kwargs`` + keyword arguments serving as additional options to the git init command - Returns - ``git.Diff[]`` - """ - return Commit.diff(self, commit) + Examples:: - @classmethod - def init_bare(self, path, mkdir=True, **kwargs): - """ - Initialize a bare git repository at the given path + git.Repo.init_bare('/var/git/myrepo.git') - ``path`` - is the full path to the repo (traditionally ends with /<name>.git) - - ``mkdir`` - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - - ``kwargs`` - keyword arguments serving as additional options to the git init command - - Examples:: - - git.Repo.init_bare('/var/git/myrepo.git') - - Returns - ``git.Repo`` (the newly created repo) - """ - - if mkdir and not os.path.exists(path): - os.makedirs(path, 0755) - - git = Git(path) - output = git.init('--bare', **kwargs) - return Repo(path) - create = init_bare - - def fork_bare(self, path, **kwargs): - """ - Fork a bare git repository from this repo - - ``path`` - is the full path of the new repo (traditionally ends with /<name>.git) - - ``kwargs`` - keyword arguments to be given to the git clone command - - Returns - ``git.Repo`` (the newly forked repo) - """ - options = {'bare': True} - options.update(kwargs) - self.git.clone(self.path, path, **options) - return Repo(path) - - def archive_tar(self, treeish='master', prefix=None): - """ - Archive the given treeish - - ``treeish`` - is the treeish name/id (default 'master') - - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - Examples:: - - >>> repo.archive_tar - <String containing tar archive> - - >>> repo.archive_tar('a87ff14') - <String containing tar archive for commit a87ff14> - - >>> repo.archive_tar('master', 'myproject/') - <String containing tar bytes archive, whose files are prefixed with 'myproject/'> - - Returns - str (containing bytes of tar archive) - """ - options = {} - if prefix: - options['prefix'] = prefix - return self.git.archive(treeish, **options) - - def archive_tar_gz(self, treeish='master', prefix=None): - """ - Archive and gzip the given treeish - - ``treeish`` - is the treeish name/id (default 'master') - - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - Examples:: - - >>> repo.archive_tar_gz - <String containing tar.gz archive> - - >>> repo.archive_tar_gz('a87ff14') - <String containing tar.gz archive for commit a87ff14> - - >>> repo.archive_tar_gz('master', 'myproject/') - <String containing tar.gz archive and prefixed with 'myproject/'> - - Returns - str (containing the bytes of tar.gz archive) - """ - kwargs = {} - if prefix: - kwargs['prefix'] = prefix - resultstr = self.git.archive(treeish, **kwargs) - sio = StringIO.StringIO() - gf = gzip.GzipFile(fileobj=sio, mode ='wb') - gf.write(resultstr) - gf.close() - return sio.getvalue() - - def _get_daemon_export(self): - filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def _get_alternates(self): - """ - The list of alternates for this repo from which objects can be retrieved - - Returns - list of strings being pathnames of alternates - """ - alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates') - - if os.path.exists(alternates_path): - try: - f = open(alternates_path) - alts = f.read() - finally: - f.close() - return alts.strip().splitlines() - else: - return [] - - def _set_alternates(self, alts): - """ - Sets the alternates - - ``alts`` - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects + Returns + ``git.Repo`` (the newly created repo) + """ + + if mkdir and not os.path.exists(path): + os.makedirs(path, 0755) + + git = Git(path) + output = git.init('--bare', **kwargs) + return Repo(path) + create = init_bare + + def fork_bare(self, path, **kwargs): + """ + Fork a bare git repository from this repo + + ``path`` + is the full path of the new repo (traditionally ends with /<name>.git) + + ``kwargs`` + keyword arguments to be given to the git clone command + + Returns + ``git.Repo`` (the newly forked repo) + """ + options = {'bare': True} + options.update(kwargs) + self.git.clone(self.path, path, **options) + return Repo(path) + + def archive_tar(self, treeish='master', prefix=None): + """ + Archive the given treeish + + ``treeish`` + is the treeish name/id (default 'master') + + ``prefix`` + is the optional prefix to prepend to each filename in the archive + + Examples:: + + >>> repo.archive_tar + <String containing tar archive> + + >>> repo.archive_tar('a87ff14') + <String containing tar archive for commit a87ff14> + + >>> repo.archive_tar('master', 'myproject/') + <String containing tar bytes archive, whose files are prefixed with 'myproject/'> + + Returns + str (containing bytes of tar archive) + """ + options = {} + if prefix: + options['prefix'] = prefix + return self.git.archive(treeish, **options) + + def archive_tar_gz(self, treeish='master', prefix=None): + """ + Archive and gzip the given treeish + + ``treeish`` + is the treeish name/id (default 'master') + + ``prefix`` + is the optional prefix to prepend to each filename in the archive + + Examples:: + + >>> repo.archive_tar_gz + <String containing tar.gz archive> + + >>> repo.archive_tar_gz('a87ff14') + <String containing tar.gz archive for commit a87ff14> + + >>> repo.archive_tar_gz('master', 'myproject/') + <String containing tar.gz archive and prefixed with 'myproject/'> + + Returns + str (containing the bytes of tar.gz archive) + """ + kwargs = {} + if prefix: + kwargs['prefix'] = prefix + resultstr = self.git.archive(treeish, **kwargs) + sio = StringIO.StringIO() + gf = gzip.GzipFile(fileobj=sio, mode ='wb') + gf.write(resultstr) + gf.close() + return sio.getvalue() + + def _get_daemon_export(self): + filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + daemon_export = property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + del _get_daemon_export + del _set_daemon_export + + def _get_alternates(self): + """ + The list of alternates for this repo from which objects can be retrieved + + Returns + list of strings being pathnames of alternates + """ + alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates') + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return [] + + def _set_alternates(self, alts): + """ + Sets the alternates + + ``alts`` + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects Raises NoSuchPathError - Returns - None - """ - for alt in alts: - if not os.path.exists(alt): - raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt) - - if not alts: - os.remove(os.path.join(self.path, 'objects', 'info', 'alternates')) - else: - try: - f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w') - f.write("\n".join(alts)) - finally: - f.close() - - alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") - - @property - def is_dirty(self): - """ - Return the status of the index. - - Returns - ``True``, if the index has any uncommitted changes, - otherwise ``False`` + Returns + None + """ + for alt in alts: + if not os.path.exists(alt): + raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt) + + if not alts: + os.remove(os.path.join(self.path, 'objects', 'info', 'alternates')) + else: + try: + f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w') + f.write("\n".join(alts)) + finally: + f.close() + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + @property + def is_dirty(self): + """ + Return the status of the index. + + Returns + ``True``, if the index has any uncommitted changes, + otherwise ``False`` NOTE Working tree changes that have not been staged will not be detected ! - """ - if self.bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - return len(self.git.diff('HEAD', '--').strip()) > 0 - - @property - def active_branch(self): - """ - The name of the currently active branch. - - Returns - str (the branch name) - """ - branch = self.git.symbolic_ref('HEAD').strip() - if branch.startswith('refs/heads/'): - branch = branch[len('refs/heads/'):] - - return branch - - def __repr__(self): - return '<git.Repo "%s">' % self.path + """ + if self.bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + return len(self.git.diff('HEAD', '--').strip()) > 0 + + @property + def active_branch(self): + """ + The name of the currently active branch. + + Returns + Head to the active branch + """ + return Head( self, self.git.symbolic_ref('HEAD').strip() ) + + def __repr__(self): + return '<git.Repo "%s">' % self.path diff --git a/lib/git/stats.py b/lib/git/stats.py index 307e2f2f..bda4e539 100644 --- a/lib/git/stats.py +++ b/lib/git/stats.py @@ -5,55 +5,56 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php class Stats(object): - """ - Represents stat information as presented by git at the end of a merge. It is - created from the output of a diff operation. - - ``Example``:: - - c = Commit( sha1 ) - s = c.stats - s.total # full-stat-dict - s.files # dict( filepath : stat-dict ) - - ``stat-dict`` - - A dictionary with the following keys and values:: - - deletions = number of deleted lines as int - insertions = number of inserted lines as int - lines = total number of lines changed as int, or deletions + insertions - - ``full-stat-dict`` - - In addition to the items in the stat-dict, it features additional information:: - - files = number of changed files as int - - """ - def __init__(self, repo, total, files): - self.repo = repo - self.total = total - self.files = files + """ + Represents stat information as presented by git at the end of a merge. It is + created from the output of a diff operation. + + ``Example``:: + + c = Commit( sha1 ) + s = c.stats + s.total # full-stat-dict + s.files # dict( filepath : stat-dict ) + + ``stat-dict`` + + A dictionary with the following keys and values:: + + deletions = number of deleted lines as int + insertions = number of inserted lines as int + lines = total number of lines changed as int, or deletions + insertions + + ``full-stat-dict`` + + In addition to the items in the stat-dict, it features additional information:: + + files = number of changed files as int + + """ + __slots__ = ("total", "files") + + def __init__(self, total, files): + self.total = total + self.files = files - @classmethod - def list_from_string(cls, repo, text): - """ - Create a Stat object from output retrieved by git-diff. - - Returns - git.Stat - """ - hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}} - for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") - insertions = raw_insertions != '-' and int(raw_insertions) or 0 - deletions = raw_deletions != '-' and int(raw_deletions) or 0 - hsh['total']['insertions'] += insertions - hsh['total']['deletions'] += deletions - hsh['total']['lines'] += insertions + deletions - hsh['total']['files'] += 1 - hsh['files'][filename.strip()] = {'insertions': insertions, - 'deletions': deletions, - 'lines': insertions + deletions} - return Stats(repo, hsh['total'], hsh['files']) + @classmethod + def _list_from_string(cls, repo, text): + """ + Create a Stat object from output retrieved by git-diff. + + Returns + git.Stat + """ + hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}} + for line in text.splitlines(): + (raw_insertions, raw_deletions, filename) = line.split("\t") + insertions = raw_insertions != '-' and int(raw_insertions) or 0 + deletions = raw_deletions != '-' and int(raw_deletions) or 0 + hsh['total']['insertions'] += insertions + hsh['total']['deletions'] += deletions + hsh['total']['lines'] += insertions + deletions + hsh['total']['files'] += 1 + hsh['files'][filename.strip()] = {'insertions': insertions, + 'deletions': deletions, + 'lines': insertions + deletions} + return Stats(hsh['total'], hsh['files']) diff --git a/lib/git/tag.py b/lib/git/tag.py deleted file mode 100644 index 8413ce73..00000000 --- a/lib/git/tag.py +++ /dev/null @@ -1,92 +0,0 @@ -# tag.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from commit import Commit - -class Tag(object): - def __init__(self, name, commit): - """ - Initialize a newly instantiated Tag - - ``name`` - is the name of the head - - ``commit`` - is the Commit that the head points to - """ - self.name = name - self.commit = commit - - @classmethod - def find_all(cls, repo, **kwargs): - """ - Find all Tags in the repository - - ``repo`` - is the Repo - - ``kwargs`` - Additional options given as keyword arguments, will be passed - to git-for-each-ref - - Returns - ``git.Tag[]`` - - List is sorted by committerdate - """ - options = {'sort': "committerdate", - 'format': "%(refname)%00%(objectname)"} - options.update(**kwargs) - - output = repo.git.for_each_ref("refs/tags", **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out tag information into an array of Tag objects - - ``repo`` - is the Repo - - ``text`` - is the text output from the git-for-each command - - Returns - git.Tag[] - """ - tags = [] - for line in text.splitlines(): - tags.append(cls.from_string(repo, line)) - return tags - - @classmethod - def from_string(cls, repo, line): - """ - Create a new Tag instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted tag information - - Format:: - - name: [a-zA-Z_/]+ - <null byte> - id: [0-9A-Fa-f]{40} - - Returns - git.Tag - """ - full_name, ids = line.split("\x00") - name = full_name.split("/")[-1] - commit = Commit(repo, id=ids) - return Tag(name, commit) - - def __repr__(self): - return '<git.Tag "%s">' % self.name diff --git a/lib/git/tree.py b/lib/git/tree.py deleted file mode 100644 index cfb0881c..00000000 --- a/lib/git/tree.py +++ /dev/null @@ -1,108 +0,0 @@ -# tree.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os -from lazy import LazyMixin -import blob - -class Tree(LazyMixin): - def __init__(self, repo, id, mode=None, name=None): - LazyMixin.__init__(self) - self.repo = repo - self.id = id - self.mode = mode - self.name = name - self._contents = None - - def __bake__(self): - # Ensure the treeish references directly a tree - treeish = self.id - if not treeish.endswith(':'): - treeish = treeish + ':' - - # Read the tree contents. - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content_from_string(self.repo, line) - if obj is not None: - self._contents[obj.name] = obj - - @staticmethod - def content_from_string(repo, text): - """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - - Returns - ``git.Blob`` or ``git.Tree`` - """ - try: - mode, typ, id, name = text.expandtabs(1).split(" ", 3) - except: - return None - - if typ == "tree": - return Tree(repo, id=id, mode=mode, name=name) - elif typ == "blob": - return blob.Blob(repo, id=id, mode=mode, name=name) - elif typ == "commit": - return None - else: - raise(TypeError, "Invalid type: %s" % typ) - - def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> - >>> Repo('/path/to/python-git').tree/'README.txt' - <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> - - Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found - """ - return self.get(file) - - @property - def basename(self): - os.path.basename(self.name) - - def __repr__(self): - return '<git.Tree "%s">' % self.id - - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] - - def __iter__(self): - return iter(self._contents) - - def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() diff --git a/lib/git/utils.py b/lib/git/utils.py index 5d0ba8ca..f84c247d 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -7,20 +7,83 @@ import os def dashify(string): - return string.replace('_', '-') + return string.replace('_', '-') def touch(filename): - os.utime(filename) + os.utime(filename) def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" - - if os.path.isdir(d) and \ - os.path.isdir(os.path.join(d, 'objects')) and \ - os.path.isdir(os.path.join(d, 'refs')): - headref = os.path.join(d, 'HEAD') - return os.path.isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False + """ This is taken from the git setup.c:is_git_directory + function.""" + + if os.path.isdir(d) and \ + os.path.isdir(os.path.join(d, 'objects')) and \ + os.path.isdir(os.path.join(d, 'refs')): + headref = os.path.join(d, 'HEAD') + return os.path.isfile(headref) or \ + (os.path.islink(headref) and + os.readlink(headref).startswith('refs')) + return False + + +class LazyMixin(object): + """ + Base class providing an interface to lazily retrieve attribute values upon + first access. If slots are used, memory will only be reserved once the attribute + is actually accessed and retrieved the first time. All future accesses will + return the cached value as stored in the Instance's dict or slot. + """ + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) + + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + + +class Iterable(object): + """ + Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository + """ + __slots__ = tuple() + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + Note: Favor the iter_items method as it will + + Returns: + list(Item,...) list of item instances + """ + return list(cls.iter_items(repo, *args, **kwargs)) + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """ + For more information about the arguments, see list_items + Return: + iterator yielding Items + """ + raise NotImplementedError("To be implemented by Subclass") + |