summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/git/__init__.py10
-rw-r--r--lib/git/actor.py70
-rw-r--r--lib/git/blob.py161
-rw-r--r--lib/git/cmd.py507
-rw-r--r--lib/git/commit.py296
-rw-r--r--lib/git/diff.py169
-rw-r--r--lib/git/errors.py34
-rw-r--r--lib/git/head.py118
-rw-r--r--lib/git/lazy.py32
-rw-r--r--lib/git/objects/__init__.py11
-rw-r--r--lib/git/objects/base.py151
-rw-r--r--lib/git/objects/blob.py36
-rw-r--r--lib/git/objects/commit.py299
-rw-r--r--lib/git/objects/tag.py70
-rw-r--r--lib/git/objects/tree.py242
-rw-r--r--lib/git/objects/utils.py36
-rw-r--r--lib/git/refs.py238
-rw-r--r--lib/git/repo.py1010
-rw-r--r--lib/git/stats.py103
-rw-r--r--lib/git/tag.py92
-rw-r--r--lib/git/tree.py108
-rw-r--r--lib/git/utils.py89
22 files changed, 2215 insertions, 1667 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py
index 28d14d0c..6f482128 100644
--- a/lib/git/__init__.py
+++ b/lib/git/__init__.py
@@ -9,19 +9,17 @@ import inspect
__version__ = 'git'
+from git.objects import *
+from git.refs import *
from git.actor import Actor
-from git.blob import Blob
-from git.commit import Commit
from git.diff import Diff
from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError
from git.cmd import Git
-from git.head import Head
from git.repo import Repo
from git.stats import Stats
-from git.tag import Tag
-from git.tree import Tree
from git.utils import dashify
from git.utils import touch
+
__all__ = [ name for name, obj in locals().items()
- if not (name.startswith('_') or inspect.ismodule(obj)) ]
+ if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/lib/git/actor.py b/lib/git/actor.py
index bc1a4479..fe4a47e5 100644
--- a/lib/git/actor.py
+++ b/lib/git/actor.py
@@ -7,36 +7,40 @@
import re
class Actor(object):
- """Actors hold information about a person acting on the repository. They
- can be committers and authors or anything with a name and an email as
- mentioned in the git log entries."""
- def __init__(self, name, email):
- self.name = name
- self.email = email
-
- def __str__(self):
- return self.name
-
- def __repr__(self):
- return '<git.Actor "%s <%s>">' % (self.name, self.email)
-
- @classmethod
- def from_string(cls, string):
- """
- Create an Actor from a string.
-
- ``str``
- is the string, which is expected to be in regular git format
-
- Format
- John Doe <jdoe@example.com>
-
- Returns
- Actor
- """
- if re.search(r'<.+>', string):
- m = re.search(r'(.*) <(.+?)>', string)
- name, email = m.groups()
- return Actor(name, email)
- else:
- return Actor(string, None)
+ """Actors hold information about a person acting on the repository. They
+ can be committers and authors or anything with a name and an email as
+ mentioned in the git log entries."""
+ # precompiled regex
+ name_only_regex = re.compile( r'<.+>' )
+ name_email_regex = re.compile( r'(.*) <(.+?)>' )
+
+ def __init__(self, name, email):
+ self.name = name
+ self.email = email
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return '<git.Actor "%s <%s>">' % (self.name, self.email)
+
+ @classmethod
+ def _from_string(cls, string):
+ """
+ Create an Actor from a string.
+
+ ``str``
+ is the string, which is expected to be in regular git format
+
+ Format
+ John Doe <jdoe@example.com>
+
+ Returns
+ Actor
+ """
+ if cls.name_only_regex.search(string):
+ m = cls.name_email_regex.search(string)
+ name, email = m.groups()
+ return Actor(name, email)
+ else:
+ return Actor(string, None)
diff --git a/lib/git/blob.py b/lib/git/blob.py
deleted file mode 100644
index 82a41f73..00000000
--- a/lib/git/blob.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# blob.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import mimetypes
-import os
-import re
-import time
-from actor import Actor
-from commit import Commit
-
-class Blob(object):
- """A Blob encapsulates a git blob object"""
- DEFAULT_MIME_TYPE = "text/plain"
-
- def __init__(self, repo, id, mode=None, name=None):
- """
- Create an unbaked Blob containing just the specified attributes
-
- ``repo``
- is the Repo
-
- ``id``
- is the git object id
-
- ``mode``
- is the file mode
-
- ``name``
- is the file name
-
- Returns
- git.Blob
- """
- self.repo = repo
- self.id = id
- self.mode = mode
- self.name = name
-
- self._size = None
- self.data_stored = None
-
- @property
- def size(self):
- """
- The size of this blob in bytes
-
- Returns
- int
-
- NOTE
- The size will be cached after the first access
- """
- if self._size is None:
- self._size = int(self.repo.git.cat_file(self.id, s=True).rstrip())
- return self._size
-
- @property
- def data(self):
- """
- The binary contents of this blob.
-
- Returns
- str
-
- NOTE
- The data will be cached after the first access.
- """
- self.data_stored = self.data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True)
- return self.data_stored
-
- @property
- def mime_type(self):
- """
- The mime type of this file (based on the filename)
-
- Returns
- str
-
- NOTE
- Defaults to 'text/plain' in case the actual file type is unknown.
- """
- guesses = None
- if self.name:
- guesses = mimetypes.guess_type(self.name)
- return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
-
- @property
- def basename(self):
- """
- Returns
- The basename of the Blobs file name
- """
- return os.path.basename(self.name)
-
- @classmethod
- def blame(cls, repo, commit, file):
- """
- The blame information for the given file at the given commit
-
- Returns
- list: [git.Commit, list: [<line>]]
- A list of tuples associating a Commit object with a list of lines that
- changed within the given commit. The Commit objects will be given in order
- of appearance.
- """
- data = repo.git.blame(commit, '--', file, p=True)
- commits = {}
- blames = []
- info = None
-
- for line in data.splitlines():
- parts = re.split(r'\s+', line, 1)
- if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]):
- if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line):
- m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line)
- id, origin_line, final_line, group_lines = m.groups()
- info = {'id': id}
- blames.append([None, []])
- elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line):
- m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line)
- id, origin_line, final_line = m.groups()
- info = {'id': id}
- elif re.search(r'^(author|committer)', parts[0]):
- if re.search(r'^(.+)-mail$', parts[0]):
- m = re.search(r'^(.+)-mail$', parts[0])
- info["%s_email" % m.groups()[0]] = parts[-1]
- elif re.search(r'^(.+)-time$', parts[0]):
- m = re.search(r'^(.+)-time$', parts[0])
- info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1]))
- elif re.search(r'^(author|committer)$', parts[0]):
- m = re.search(r'^(author|committer)$', parts[0])
- info[m.groups()[0]] = parts[-1]
- elif re.search(r'^filename', parts[0]):
- info['filename'] = parts[-1]
- elif re.search(r'^summary', parts[0]):
- info['summary'] = parts[-1]
- elif parts[0] == '':
- if info:
- c = commits.has_key(info['id']) and commits[info['id']]
- if not c:
- c = Commit(repo, id=info['id'],
- author=Actor.from_string(info['author'] + ' ' + info['author_email']),
- authored_date=info['author_date'],
- committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']),
- committed_date=info['committer_date'],
- message=info['summary'])
- commits[info['id']] = c
-
- m = re.search(r'^\t(.*)$', line)
- text, = m.groups()
- blames[-1][0] = c
- blames[-1][1].append( text )
- info = None
-
- return blames
-
- def __repr__(self):
- return '<git.Blob "%s">' % self.id
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index aef53350..2965eb8b 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -6,7 +6,6 @@
import os, sys
import subprocess
-import re
from utils import *
from errors import GitCommandError
@@ -14,208 +13,326 @@ from errors import GitCommandError
GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
- 'with_exceptions', 'with_raw_output')
+ 'with_exceptions', 'with_raw_output', 'as_process')
extra = {}
if sys.platform == 'win32':
- extra = {'shell': True}
+ extra = {'shell': True}
class Git(object):
- """
- The Git class manages communication with the Git binary.
-
+ """
+ The Git class manages communication with the Git binary.
+
It provides a convenient interface to calling the Git binary, such as in::
g = Git( git_dir )
- g.init() # calls 'git init' program
+ g.init() # calls 'git init' program
rval = g.ls_files() # calls 'git ls-files' program
``Debugging``
- Set the GIT_PYTHON_TRACE environment variable print each invocation
- of the command to stdout.
- Set its value to 'full' to see details about the returned values.
- """
- def __init__(self, git_dir=None):
- """
- Initialize this instance with:
-
- ``git_dir``
- Git directory we should work in. If None, we always work in the current
- directory as returned by os.getcwd()
- """
- super(Git, self).__init__()
- self.git_dir = git_dir
-
- def __getattr__(self, name):
- """
- A convenience method as it allows to call the command as if it was
- an object.
- Returns
- Callable object that will execute call _call_process with your arguments.
- """
- if name[:1] == '_':
- raise AttributeError(name)
- return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
-
- @property
- def get_dir(self):
- """
- Returns
- Git directory we are working on
- """
- return self.git_dir
-
- def execute(self, command,
- istream=None,
- with_keep_cwd=False,
- with_extended_output=False,
- with_exceptions=True,
- with_raw_output=False,
- ):
- """
- Handles executing the command on the shell and consumes and returns
- the returned information (stdout)
-
- ``command``
- The command argument list to execute.
- It should be a string, or a sequence of program arguments. The
- program to execute is the first item in the args sequence or string.
-
- ``istream``
- Standard input filehandle passed to subprocess.Popen.
-
- ``with_keep_cwd``
- Whether to use the current working directory from os.getcwd().
- GitPython uses get_work_tree() as its working directory by
- default and get_git_dir() for bare repositories.
-
- ``with_extended_output``
- Whether to return a (status, stdout, stderr) tuple.
-
- ``with_exceptions``
- Whether to raise an exception when git returns a non-zero status.
-
- ``with_raw_output``
- Whether to avoid stripping off trailing whitespace.
-
- Returns::
-
- str(output) # extended_output = False (Default)
- tuple(int(status), str(stdout), str(stderr)) # extended_output = True
-
- Raise
- GitCommandError
-
- NOTE
- If you add additional keyword arguments to the signature of this method,
- you must update the execute_kwargs tuple housed in this module.
- """
- if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
- print ' '.join(command)
-
- # Allow the user to have the command executed in their working dir.
- if with_keep_cwd or self.git_dir is None:
- cwd = os.getcwd()
- else:
- cwd=self.git_dir
-
- # Start the process
- proc = subprocess.Popen(command,
- cwd=cwd,
- stdin=istream,
- stderr=subprocess.PIPE,
- stdout=subprocess.PIPE,
- **extra
- )
-
- # Wait for the process to return
- try:
- stdout_value = proc.stdout.read()
- stderr_value = proc.stderr.read()
- status = proc.wait()
- finally:
- proc.stdout.close()
- proc.stderr.close()
-
- # Strip off trailing whitespace by default
- if not with_raw_output:
- stdout_value = stdout_value.rstrip()
- stderr_value = stderr_value.rstrip()
-
- if with_exceptions and status != 0:
- raise GitCommandError(command, status, stderr_value)
-
- if GIT_PYTHON_TRACE == 'full':
- if stderr_value:
- print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
- elif stdout_value:
- print "%s -> %d: '%s'" % (command, status, stdout_value)
- else:
- print "%s -> %d" % (command, status)
-
- # Allow access to the command's status code
- if with_extended_output:
- return (status, stdout_value, stderr_value)
- else:
- return stdout_value
-
- def transform_kwargs(self, **kwargs):
- """
- Transforms Python style kwargs into git command line options.
- """
- args = []
- for k, v in kwargs.items():
- if len(k) == 1:
- if v is True:
- args.append("-%s" % k)
- elif type(v) is not bool:
- args.append("-%s%s" % (k, v))
- else:
- if v is True:
- args.append("--%s" % dashify(k))
- elif type(v) is not bool:
- args.append("--%s=%s" % (dashify(k), v))
- return args
-
- def _call_process(self, method, *args, **kwargs):
- """
- Run the given git command with the specified arguments and return
- the result as a String
-
- ``method``
- is the command. Contained "_" characters will be converted to dashes,
- such as in 'ls_files' to call 'ls-files'.
-
- ``args``
- is the list of arguments
-
- ``kwargs``
- is a dict of keyword arguments.
- This function accepts the same optional keyword arguments
- as execute().
-
- Examples::
- git.rev_list('master', max_count=10, header=True)
-
- Returns
- Same as execute()
- """
-
- # Handle optional arguments prior to calling transform_kwargs
- # otherwise these'll end up in args, which is bad.
- _kwargs = {}
- for kwarg in execute_kwargs:
- try:
- _kwargs[kwarg] = kwargs.pop(kwarg)
- except KeyError:
- pass
-
- # Prepare the argument list
- opt_args = self.transform_kwargs(**kwargs)
- ext_args = map(str, args)
- args = opt_args + ext_args
-
- call = ["git", dashify(method)]
- call.extend(args)
-
- return self.execute(call, **_kwargs)
+ Set the GIT_PYTHON_TRACE environment variable print each invocation
+ of the command to stdout.
+ Set its value to 'full' to see details about the returned values.
+ """
+ class AutoInterrupt(object):
+ """
+ Kill/Interrupt the stored process instance once this instance goes out of scope. It is
+ used to prevent processes piling up in case iterators stop reading.
+ Besides all attributes are wired through to the contained process object
+ """
+ __slots__= "proc"
+
+ def __init__(self, proc ):
+ self.proc = proc
+
+ def __del__(self):
+ # did the process finish already so we have a return code ?
+ if self.proc.poll() is not None:
+ return
+
+ # try to kill it
+ try:
+ os.kill(self.proc.pid, 2) # interrupt signal
+ except AttributeError:
+ # try windows
+ subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid))
+ # END exception handling
+
+ def __getattr__(self, attr):
+ return getattr(self.proc, attr)
+
+
+ def __init__(self, git_dir=None):
+ """
+ Initialize this instance with:
+
+ ``git_dir``
+ Git directory we should work in. If None, we always work in the current
+ directory as returned by os.getcwd()
+ """
+ super(Git, self).__init__()
+ self.git_dir = git_dir
+
+ # cached command slots
+ self.cat_file_header = None
+ self.cat_file_all = None
+
+ def __getattr__(self, name):
+ """
+ A convenience method as it allows to call the command as if it was
+ an object.
+ Returns
+ Callable object that will execute call _call_process with your arguments.
+ """
+ if name[:1] == '_':
+ raise AttributeError(name)
+ return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+
+ @property
+ def get_dir(self):
+ """
+ Returns
+ Git directory we are working on
+ """
+ return self.git_dir
+
+ def execute(self, command,
+ istream=None,
+ with_keep_cwd=False,
+ with_extended_output=False,
+ with_exceptions=True,
+ with_raw_output=False,
+ as_process=False
+ ):
+ """
+ Handles executing the command on the shell and consumes and returns
+ the returned information (stdout)
+
+ ``command``
+ The command argument list to execute.
+ It should be a string, or a sequence of program arguments. The
+ program to execute is the first item in the args sequence or string.
+
+ ``istream``
+ Standard input filehandle passed to subprocess.Popen.
+
+ ``with_keep_cwd``
+ Whether to use the current working directory from os.getcwd().
+ GitPython uses get_work_tree() as its working directory by
+ default and get_git_dir() for bare repositories.
+
+ ``with_extended_output``
+ Whether to return a (status, stdout, stderr) tuple.
+
+ ``with_exceptions``
+ Whether to raise an exception when git returns a non-zero status.
+
+ ``with_raw_output``
+ Whether to avoid stripping off trailing whitespace.
+
+ ``as_process``
+ Whether to return the created process instance directly from which
+ streams can be read on demand. This will render with_extended_output,
+ with_exceptions and with_raw_output ineffective - the caller will have
+ to deal with the details himself.
+ It is important to note that the process will be placed into an AutoInterrupt
+ wrapper that will interrupt the process once it goes out of scope. If you
+ use the command in iterators, you should pass the whole process instance
+ instead of a single stream.
+
+ Returns::
+
+ str(output) # extended_output = False (Default)
+ tuple(int(status), str(stdout), str(stderr)) # extended_output = True
+
+ Raise
+ GitCommandError
+
+ NOTE
+ If you add additional keyword arguments to the signature of this method,
+ you must update the execute_kwargs tuple housed in this module.
+ """
+ if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+ print ' '.join(command)
+
+ # Allow the user to have the command executed in their working dir.
+ if with_keep_cwd or self.git_dir is None:
+ cwd = os.getcwd()
+ else:
+ cwd=self.git_dir
+
+ # Start the process
+ proc = subprocess.Popen(command,
+ cwd=cwd,
+ stdin=istream,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ **extra
+ )
+
+ if as_process:
+ return self.AutoInterrupt(proc)
+
+ # Wait for the process to return
+ status = 0
+ try:
+ stdout_value = proc.stdout.read()
+ stderr_value = proc.stderr.read()
+ status = proc.wait()
+ finally:
+ proc.stdout.close()
+ proc.stderr.close()
+
+ # Strip off trailing whitespace by default
+ if not with_raw_output:
+ stdout_value = stdout_value.rstrip()
+ stderr_value = stderr_value.rstrip()
+
+ if with_exceptions and status != 0:
+ raise GitCommandError(command, status, stderr_value)
+
+ if GIT_PYTHON_TRACE == 'full':
+ if stderr_value:
+ print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
+ elif stdout_value:
+ print "%s -> %d: '%s'" % (command, status, stdout_value)
+ else:
+ print "%s -> %d" % (command, status)
+
+ # Allow access to the command's status code
+ if with_extended_output:
+ return (status, stdout_value, stderr_value)
+ else:
+ return stdout_value
+
+ def transform_kwargs(self, **kwargs):
+ """
+ Transforms Python style kwargs into git command line options.
+ """
+ args = []
+ for k, v in kwargs.items():
+ if len(k) == 1:
+ if v is True:
+ args.append("-%s" % k)
+ elif type(v) is not bool:
+ args.append("-%s%s" % (k, v))
+ else:
+ if v is True:
+ args.append("--%s" % dashify(k))
+ elif type(v) is not bool:
+ args.append("--%s=%s" % (dashify(k), v))
+ return args
+
+ def _call_process(self, method, *args, **kwargs):
+ """
+ Run the given git command with the specified arguments and return
+ the result as a String
+
+ ``method``
+ is the command. Contained "_" characters will be converted to dashes,
+ such as in 'ls_files' to call 'ls-files'.
+
+ ``args``
+ is the list of arguments
+
+ ``kwargs``
+ is a dict of keyword arguments.
+ This function accepts the same optional keyword arguments
+ as execute().
+
+ Examples::
+ git.rev_list('master', max_count=10, header=True)
+
+ Returns
+ Same as execute()
+ """
+
+ # Handle optional arguments prior to calling transform_kwargs
+ # otherwise these'll end up in args, which is bad.
+ _kwargs = {}
+ for kwarg in execute_kwargs:
+ try:
+ _kwargs[kwarg] = kwargs.pop(kwarg)
+ except KeyError:
+ pass
+
+ # Prepare the argument list
+ opt_args = self.transform_kwargs(**kwargs)
+ ext_args = map(str, args)
+ args = opt_args + ext_args
+
+ call = ["git", dashify(method)]
+ call.extend(args)
+
+ return self.execute(call, **_kwargs)
+
+ def _parse_object_header(self, header_line):
+ """
+ ``header_line``
+ <hex_sha> type_string size_as_int
+
+ Returns
+ (hex_sha, type_string, size_as_int)
+
+ Raises
+ ValueError if the header contains indication for an error due to incorrect
+ input sha
+ """
+ tokens = header_line.split()
+ if len(tokens) != 3:
+ raise ValueError( "SHA named %s could not be resolved" % tokens[0] )
+
+ return (tokens[0], tokens[1], int(tokens[2]))
+
+ def __prepare_ref(self, ref):
+ # required for command to separate refs on stdin
+ refstr = str(ref) # could be ref-object
+ if refstr.endswith("\n"):
+ return refstr
+ return refstr + "\n"
+
+ def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+ cur_val = getattr(self, attr_name)
+ if cur_val is not None:
+ return cur_val
+
+ options = { "istream" : subprocess.PIPE, "as_process" : True }
+ options.update( kwargs )
+
+ cmd = self._call_process( cmd_name, *args, **options )
+ setattr(self, attr_name, cmd )
+ return cmd
+
+ def __get_object_header(self, cmd, ref):
+ cmd.stdin.write(self.__prepare_ref(ref))
+ cmd.stdin.flush()
+ return self._parse_object_header(cmd.stdout.readline())
+
+ def get_object_header(self, ref):
+ """
+ Use this method to quickly examine the type and size of the object behind
+ the given ref.
+
+ NOTE
+ The method will only suffer from the costs of command invocation
+ once and reuses the command in subsequent calls.
+
+ Return:
+ (hexsha, type_string, size_as_int)
+ """
+ cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+ return self.__get_object_header(cmd, ref)
+
+ def get_object_data(self, ref):
+ """
+ As get_object_header, but returns object data as well
+
+ Return:
+ (hexsha, type_string, size_as_int,data_string)
+ """
+ cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+ hexsha, typename, size = self.__get_object_header(cmd, ref)
+ data = cmd.stdout.read(size)
+ cmd.stdout.read(1) # finishing newlines
+
+ return (hexsha, typename, size, data)
diff --git a/lib/git/commit.py b/lib/git/commit.py
deleted file mode 100644
index edfe47ca..00000000
--- a/lib/git/commit.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# commit.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import re
-import time
-
-from actor import Actor
-from lazy import LazyMixin
-from tree import Tree
-import diff
-import stats
-
-class Commit(LazyMixin):
- """
- Wraps a git Commit object.
-
- This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary.
- """
- def __init__(self, repo, id, tree=None, author=None, authored_date=None,
- committer=None, committed_date=None, message=None, parents=None):
- """
- Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set if id names a valid sha.
-
- The parameter documentation indicates the type of the argument after a colon ':'.
-
- ``id``
- is the sha id of the commit
-
- ``parents`` : list( Commit, ... )
- is a list of commit ids
-
- ``tree`` : Tree
- is the corresponding tree id
-
- ``author`` : Actor
- is the author string ( will be implicitly converted into an Actor object )
-
- ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst )
- is the authored DateTime
-
- ``committer`` : Actor
- is the committer string
-
- ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst)
- is the committed DateTime
-
- ``message`` : string
- is the commit message
-
- Returns
- git.Commit
- """
- LazyMixin.__init__(self)
-
- self.repo = repo
- self.id = id
- self.parents = None
- self.tree = None
- self.author = author
- self.authored_date = authored_date
- self.committer = committer
- self.committed_date = committed_date
- self.message = message
-
- if self.id:
- if parents is not None:
- self.parents = [Commit(repo, p) for p in parents]
- if tree is not None:
- self.tree = Tree(repo, id=tree)
-
- def __eq__(self, other):
- return self.id == other.id
-
- def __ne__(self, other):
- return self.id != other.id
-
- def __bake__(self):
- """
- Called by LazyMixin superclass when the first uninitialized member needs
- to be set as it is queried.
- """
- temp = Commit.find_all(self.repo, self.id, max_count=1)[0]
- self.parents = temp.parents
- self.tree = temp.tree
- self.author = temp.author
- self.authored_date = temp.authored_date
- self.committer = temp.committer
- self.committed_date = temp.committed_date
- self.message = temp.message
-
- @property
- def id_abbrev(self):
- """
- Returns
- First 7 bytes of the commit's sha id as an abbreviation of the full string.
- """
- return self.id[0:7]
-
- @property
- def summary(self):
- """
- Returns
- First line of the commit message.
- """
- return self.message.split('\n', 1)[0]
-
- @classmethod
- def count(cls, repo, ref, path=''):
- """
- Count the number of commits reachable from this ref
-
- ``repo``
- is the Repo
-
- ``ref``
- is the ref from which to begin (SHA1 or name)
-
- ``path``
- is an optinal path
-
- Returns
- int
- """
- return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
-
- @classmethod
- def find_all(cls, repo, ref, path='', **kwargs):
- """
- Find all commits matching the given criteria.
-
- ``repo``
- is the Repo
-
- ``ref``
- is the ref from which to begin (SHA1 or name)
-
- ``path``
- is an optinal path, if set only Commits that include the path
- will be considered
-
- ``kwargs``
- optional keyword arguments to git where
- ``max_count`` is the maximum number of commits to fetch
- ``skip`` is the number of commits to skip
-
- Returns
- git.Commit[]
- """
- options = {'pretty': 'raw'}
- options.update(kwargs)
-
- output = repo.git.rev_list(ref, '--', path, **options)
- return cls.list_from_string(repo, output)
-
- @classmethod
- def list_from_string(cls, repo, text):
- """
- Parse out commit information into a list of Commit objects
-
- ``repo``
- is the Repo
-
- ``text``
- is the text output from the git-rev-list command (raw format)
-
- Returns
- git.Commit[]
- """
- lines = [l for l in text.splitlines() if l.strip('\r\n')]
-
- commits = []
-
- while lines:
- id = lines.pop(0).split()[1]
- tree = lines.pop(0).split()[1]
-
- parents = []
- while lines and lines[0].startswith('parent'):
- parents.append(lines.pop(0).split()[-1])
- author, authored_date = cls.actor(lines.pop(0))
- committer, committed_date = cls.actor(lines.pop(0))
-
- messages = []
- while lines and lines[0].startswith(' '):
- messages.append(lines.pop(0).strip())
-
- message = '\n'.join(messages)
-
- commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
- committer=committer, committed_date=committed_date, message=message))
-
- return commits
-
- @classmethod
- def diff(cls, repo, a, b=None, paths=None):
- """
- Creates diffs between a tree and the index or between two trees:
-
- ``repo``
- is the Repo
-
- ``a``
- is a named commit
-
- ``b``
- is an optional named commit. Passing a list assumes you
- wish to omit the second named commit and limit the diff to the
- given paths.
-
- ``paths``
- is a list of paths to limit the diff to.
-
- Returns
- git.Diff[]::
-
- between tree and the index if only a is given
- between two trees if a and b are given and are commits
- """
- paths = paths or []
-
- if isinstance(b, list):
- paths = b
- b = None
-
- if paths:
- paths.insert(0, "--")
-
- if b:
- paths.insert(0, b)
- paths.insert(0, a)
- text = repo.git.diff('-M', full_index=True, *paths)
- return diff.Diff.list_from_string(repo, text)
-
- @property
- def diffs(self):
- """
- Returns
- git.Diff[]
- Diffs between this commit and its first parent or all changes if this
- commit is the first commit and has no parent.
- """
- if not self.parents:
- d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw')
- if re.search(r'diff --git a', d):
- if not re.search(r'^diff --git a', d):
- p = re.compile(r'.+?(diff --git a)', re.MULTILINE | re.DOTALL)
- d = p.sub(r'diff --git a', d, 1)
- else:
- d = ''
- return diff.Diff.list_from_string(self.repo, d)
- else:
- return self.diff(self.repo, self.parents[0].id, self.id)
-
- @property
- def stats(self):
- """
- Create a git stat from changes between this commit and its first parent
- or from all changes done if this is the very first commit.
-
- Return
- git.Stats
- """
- if not self.parents:
- text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True)
- text2 = ""
- for line in text.splitlines()[1:]:
- (insertions, deletions, filename) = line.split("\t")
- text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
- text = text2
- else:
- text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True)
- return stats.Stats.list_from_string(self.repo, text)
-
- def __str__(self):
- """ Convert commit to string which is SHA1 """
- return self.id
-
- def __repr__(self):
- return '<git.Commit "%s">' % self.id
-
- @classmethod
- def actor(cls, line):
- """
- Parse out the actor (author or committer) info
-
- Returns
- [Actor, gmtime(acted at time)]
- """
- m = re.search(r'^.+? (.*) (\d+) .*$', line)
- actor, epoch = m.groups()
- return [Actor.from_string(actor), time.gmtime(int(epoch))]
diff --git a/lib/git/diff.py b/lib/git/diff.py
index 44f55602..0db83b4f 100644
--- a/lib/git/diff.py
+++ b/lib/git/diff.py
@@ -5,94 +5,101 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import re
-import blob
+import objects.blob as blob
class Diff(object):
- """
- A Diff contains diff information between two commits.
-
- It contains two sides a and b of the diff, members are prefixed with
- "a" and "b" respectively to inidcate that.
-
- Diffs keep information about the changed blob objects, the file mode, renames,
- deletions and new files.
-
- There are a few cases where None has to be expected as member variable value:
-
- ``New File``::
-
- a_mode is None
- a_blob is None
-
- ``Deleted File``::
-
- b_mode is None
- b_blob is NOne
- """
+ """
+ A Diff contains diff information between two commits.
+
+ It contains two sides a and b of the diff, members are prefixed with
+ "a" and "b" respectively to inidcate that.
+
+ Diffs keep information about the changed blob objects, the file mode, renames,
+ deletions and new files.
+
+ There are a few cases where None has to be expected as member variable value:
+
+ ``New File``::
+
+ a_mode is None
+ a_blob is None
+
+ ``Deleted File``::
+
+ b_mode is None
+ b_blob is NOne
+ """
+
+ # precompiled regex
+ re_header = re.compile(r"""
+ #^diff[ ]--git
+ [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
+ (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
+ ^rename[ ]from[ ](?P<rename_from>\S+)\n
+ ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
+ (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
+ ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
+ (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
+ (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
+ (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
+ \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
+ """, re.VERBOSE | re.MULTILINE)
+ re_is_null_hexsha = re.compile( r'^0{40}$' )
+ __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file",
+ "rename_from", "rename_to", "renamed", "diff")
- def __init__(self, repo, a_path, b_path, a_blob, b_blob, a_mode,
- b_mode, new_file, deleted_file, rename_from,
- rename_to, diff):
- self.repo = repo
+ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
+ b_mode, new_file, deleted_file, rename_from,
+ rename_to, diff):
+ if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id):
+ self.a_blob = None
+ else:
+ self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path)
+ if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id):
+ self.b_blob = None
+ else:
+ self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path)
- if not a_blob or re.search(r'^0{40}$', a_blob):
- self.a_blob = None
- else:
- self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, name=a_path)
- if not b_blob or re.search(r'^0{40}$', b_blob):
- self.b_blob = None
- else:
- self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, name=b_path)
+ self.a_mode = a_mode
+ self.b_mode = b_mode
+ if self.a_mode:
+ self.a_mode = blob.Blob._mode_str_to_int( self.a_mode )
+ if self.b_mode:
+ self.b_mode = blob.Blob._mode_str_to_int( self.b_mode )
+ self.new_file = new_file
+ self.deleted_file = deleted_file
+ self.rename_from = rename_from
+ self.rename_to = rename_to
+ self.renamed = rename_from != rename_to
+ self.diff = diff
- self.a_mode = a_mode
- self.b_mode = b_mode
- self.new_file = new_file
- self.deleted_file = deleted_file
- self.rename_from = rename_from
- self.rename_to = rename_to
- self.renamed = rename_from != rename_to
- self.diff = diff
+ @classmethod
+ def _list_from_string(cls, repo, text):
+ """
+ Create a new diff object from the given text
+ ``repo``
+ is the repository we are operating on - it is required
+
+ ``text``
+ result of 'git diff' between two commits or one commit and the index
+
+ Returns
+ git.Diff[]
+ """
+ diffs = []
- @classmethod
- def list_from_string(cls, repo, text):
- """
- Create a new diff object from the given text
- ``repo``
- is the repository we are operating on - it is required
-
- ``text``
- result of 'git diff' between two commits or one commit and the index
-
- Returns
- git.Diff[]
- """
- diffs = []
+ diff_header = cls.re_header.match
+ for diff in ('\n' + text).split('\ndiff --git')[1:]:
+ header = diff_header(diff)
- diff_header = re.compile(r"""
- #^diff[ ]--git
- [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
- (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
- ^rename[ ]from[ ](?P<rename_from>\S+)\n
- ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
- (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
- ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
- (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
- (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
- (?:^index[ ](?P<a_blob>[0-9A-Fa-f]+)
- \.\.(?P<b_blob>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
- """, re.VERBOSE | re.MULTILINE).match
+ a_path, b_path, similarity_index, rename_from, rename_to, \
+ old_mode, new_mode, new_file_mode, deleted_file_mode, \
+ a_blob_id, b_blob_id, b_mode = header.groups()
+ new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
- for diff in ('\n' + text).split('\ndiff --git')[1:]:
- header = diff_header(diff)
+ diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id,
+ old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
+ new_file, deleted_file, rename_from, rename_to, diff[header.end():]))
- a_path, b_path, similarity_index, rename_from, rename_to, \
- old_mode, new_mode, new_file_mode, deleted_file_mode, \
- a_blob, b_blob, b_mode = header.groups()
- new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
-
- diffs.append(Diff(repo, a_path, b_path, a_blob, b_blob,
- old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
- new_file, deleted_file, rename_from, rename_to, diff[header.end():]))
-
- return diffs
+ return diffs
diff --git a/lib/git/errors.py b/lib/git/errors.py
index 2632d5f3..e9a637c0 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -8,25 +8,25 @@ Module containing all exceptions thrown througout the git package,
"""
class InvalidGitRepositoryError(Exception):
- """
- Thrown if the given repository appears to have an invalid format.
- """
+ """
+ Thrown if the given repository appears to have an invalid format.
+ """
-class NoSuchPathError(Exception):
- """
- Thrown if a path could not be access by the system.
- """
+class NoSuchPathError(OSError):
+ """
+ Thrown if a path could not be access by the system.
+ """
class GitCommandError(Exception):
- """
- Thrown if execution of the git command fails with non-zero status code.
- """
- def __init__(self, command, status, stderr=None):
- self.stderr = stderr
- self.status = status
- self.command = command
+ """
+ Thrown if execution of the git command fails with non-zero status code.
+ """
+ def __init__(self, command, status, stderr=None):
+ self.stderr = stderr
+ self.status = status
+ self.command = command
- def __str__(self):
- return repr("%s returned exit status %d" %
- (str(self.command), self.status))
+ def __str__(self):
+ return repr("%s returned exit status %d" %
+ (str(self.command), self.status))
diff --git a/lib/git/head.py b/lib/git/head.py
deleted file mode 100644
index 639cee40..00000000
--- a/lib/git/head.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# head.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import commit
-
-class Head(object):
- """
- A Head is a named reference to a Commit. Every Head instance contains a name
- and a Commit object.
-
- Examples::
-
- >>> repo = Repo("/path/to/repo")
- >>> head = repo.heads[0]
-
- >>> head.name
- 'master'
-
- >>> head.commit
- <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455">
-
- >>> head.commit.id
- '1c09f116cbc2cb4100fb6935bb162daa4723f455'
- """
-
- def __init__(self, name, commit):
- """
- Initialize a newly instanced Head
-
- `name`
- is the name of the head
-
- `commit`
- is the Commit object that the head points to
- """
- self.name = name
- self.commit = commit
-
- @classmethod
- def find_all(cls, repo, **kwargs):
- """
- Find all Heads in the repository
-
- `repo`
- is the Repo
-
- `kwargs`
- Additional options given as keyword arguments, will be passed
- to git-for-each-ref
-
- Returns
- git.Head[]
-
- List is sorted by committerdate
- """
-
- options = {'sort': "committerdate",
- 'format': "%(refname)%00%(objectname)"}
- options.update(kwargs)
-
- output = repo.git.for_each_ref("refs/heads", **options)
- return cls.list_from_string(repo, output)
-
- @classmethod
- def list_from_string(cls, repo, text):
- """
- Parse out head information into a list of head objects
-
- ``repo``
- is the Repo
- ``text``
- is the text output from the git-for-each-ref command
-
- Returns
- git.Head[]
- """
- heads = []
-
- for line in text.splitlines():
- heads.append(cls.from_string(repo, line))
-
- return heads
-
- @classmethod
- def from_string(cls, repo, line):
- """
- Create a new Head instance from the given string.
-
- ``repo``
- is the Repo
-
- ``line``
- is the formatted head information
-
- Format::
-
- name: [a-zA-Z_/]+
- <null byte>
- id: [0-9A-Fa-f]{40}
-
- Returns
- git.Head
- """
- full_name, ids = line.split("\x00")
-
- if full_name.startswith('refs/heads/'):
- name = full_name[len('refs/heads/'):]
- else:
- name = full_name
-
- c = commit.Commit(repo, id=ids)
- return Head(name, c)
-
- def __repr__(self):
- return '<git.Head "%s">' % self.name
diff --git a/lib/git/lazy.py b/lib/git/lazy.py
deleted file mode 100644
index 5e470181..00000000
--- a/lib/git/lazy.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# lazy.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-class LazyMixin(object):
- lazy_properties = []
-
- def __init__(self):
- self.__baked__ = False
-
- def __getattribute__(self, attr):
- val = object.__getattribute__(self, attr)
- if val is not None:
- return val
- else:
- self.__prebake__()
- return object.__getattribute__(self, attr)
-
- def __bake__(self):
- """ This method should be overridden in the derived class. """
- raise NotImplementedError(" '__bake__' method has not been implemented.")
-
- def __prebake__(self):
- if self.__baked__:
- return
- self.__bake__()
- self.__baked__ = True
-
- def __bake_it__(self):
- self.__baked__ = True
diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py
new file mode 100644
index 00000000..39e650b7
--- /dev/null
+++ b/lib/git/objects/__init__.py
@@ -0,0 +1,11 @@
+"""
+Import all submodules main classes into the package space
+"""
+import inspect
+from tag import *
+from blob import *
+from tree import *
+from commit import *
+
+__all__ = [ name for name, obj in locals().items()
+ if not (name.startswith('_') or inspect.ismodule(obj)) ] \ No newline at end of file
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
new file mode 100644
index 00000000..07538ada
--- /dev/null
+++ b/lib/git/objects/base.py
@@ -0,0 +1,151 @@
+# base.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+import os
+from git.utils import LazyMixin
+
+_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
+
+class Object(LazyMixin):
+ """
+ Implements an Object which may be Blobs, Trees, Commits and Tags
+ """
+ TYPES = ("blob", "tree", "commit", "tag")
+ __slots__ = ("repo", "id", "size", "data" )
+ type = None # to be set by subclass
+
+ def __init__(self, repo, id):
+ """
+ Initialize an object by identifying it by its id. All keyword arguments
+ will be set on demand if None.
+
+ ``repo``
+ repository this object is located in
+
+ ``id``
+ SHA1 or ref suitable for git-rev-parse
+ """
+ super(Object,self).__init__()
+ self.repo = repo
+ self.id = id
+
+ def _set_self_from_args_(self, args_dict):
+ """
+ Initialize attributes on self from the given dict that was retrieved
+ from locals() in the calling method.
+
+ Will only set an attribute on self if the corresponding value in args_dict
+ is not None
+ """
+ for attr, val in args_dict.items():
+ if attr != "self" and val is not None:
+ setattr( self, attr, val )
+ # END set all non-None attributes
+
+ def _set_cache_(self, attr):
+ """
+ Retrieve object information
+ """
+ if attr == "size":
+ hexsha, typename, self.size = self.repo.git.get_object_header(self.id)
+ assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
+ elif attr == "data":
+ hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id)
+ assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
+
+ def __eq__(self, other):
+ """
+ Returns
+ True if the objects have the same SHA1
+ """
+ return self.id == other.id
+
+ def __ne__(self, other):
+ """
+ Returns
+ True if the objects do not have the same SHA1
+ """
+ return self.id != other.id
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our id allowing objects to be used in dicts and sets
+ """
+ return hash(self.id)
+
+ def __str__(self):
+ """
+ Returns
+ string of our SHA1 as understood by all git commands
+ """
+ return self.id
+
+ def __repr__(self):
+ """
+ Returns
+ string with pythonic representation of our object
+ """
+ return '<git.%s "%s">' % (self.__class__.__name__, self.id)
+
+
+class IndexObject(Object):
+ """
+ Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects
+ """
+ __slots__ = ("path", "mode")
+
+ def __init__(self, repo, id, mode=None, path=None):
+ """
+ Initialize a newly instanced IndexObject
+ ``repo``
+ is the Repo we are located in
+
+ ``id`` : string
+ is the git object id as hex sha
+
+ ``mode`` : int
+ is the file mode as int, use the stat module to evaluate the infomration
+
+ ``path`` : str
+ is the path to the file in the file system, relative to the git repository root, i.e.
+ file.ext or folder/other.ext
+
+ NOTE
+ Path may not be set of the index object has been created directly as it cannot
+ be retrieved without knowing the parent tree.
+ """
+ super(IndexObject, self).__init__(repo, id)
+ self._set_self_from_args_(locals())
+ if isinstance(mode, basestring):
+ self.mode = self._mode_str_to_int(mode)
+
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+
+ @classmethod
+ def _mode_str_to_int(cls, modestr):
+ """
+ ``modestr``
+ string like 755 or 644 or 100644 - only the last 3 chars will be used
+
+ Returns
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other
+ """
+ mode = 0
+ for iteration,char in enumerate(reversed(modestr[-3:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
+
diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py
new file mode 100644
index 00000000..88ca73d6
--- /dev/null
+++ b/lib/git/objects/blob.py
@@ -0,0 +1,36 @@
+# blob.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import mimetypes
+import base
+
+class Blob(base.IndexObject):
+ """A Blob encapsulates a git blob object"""
+ DEFAULT_MIME_TYPE = "text/plain"
+ type = "blob"
+
+ __slots__ = tuple()
+
+
+ @property
+ def mime_type(self):
+ """
+ The mime type of this file (based on the filename)
+
+ Returns
+ str
+
+ NOTE
+ Defaults to 'text/plain' in case the actual file type is unknown.
+ """
+ guesses = None
+ if self.path:
+ guesses = mimetypes.guess_type(self.path)
+ return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
+
+
+ def __repr__(self):
+ return '<git.Blob "%s">' % self.id
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
new file mode 100644
index 00000000..101014ab
--- /dev/null
+++ b/lib/git/objects/commit.py
@@ -0,0 +1,299 @@
+# commit.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import re
+import time
+from git.utils import Iterable
+from git.actor import Actor
+import git.diff as diff
+import git.stats as stats
+from tree import Tree
+import base
+
+class Commit(base.Object, Iterable):
+ """
+ Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary.
+ """
+ # precompiled regex
+ re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$')
+
+ # object configuration
+ type = "commit"
+ __slots__ = ("tree", "author", "authored_date", "committer", "committed_date",
+ "message", "parents")
+
+ def __init__(self, repo, id, tree=None, author=None, authored_date=None,
+ committer=None, committed_date=None, message=None, parents=None):
+ """
+ Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set if id names a valid sha.
+
+ The parameter documentation indicates the type of the argument after a colon ':'.
+
+ ``id``
+ is the sha id of the commit or a ref
+
+ ``parents`` : tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+
+ ``tree`` : Tree
+ is the corresponding tree id or an actual Tree
+
+ ``author`` : Actor
+ is the author string ( will be implicitly converted into an Actor object )
+
+ ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst )
+ is the authored DateTime
+
+ ``committer`` : Actor
+ is the committer string
+
+ ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst)
+ is the committed DateTime
+
+ ``message`` : string
+ is the commit message
+
+ Returns
+ git.Commit
+ """
+ super(Commit,self).__init__(repo, id)
+ self._set_self_from_args_(locals())
+
+ if parents is not None:
+ self.parents = tuple( self.__class__(repo, p) for p in parents )
+ # END for each parent to convert
+
+ if self.id and tree is not None:
+ self.tree = Tree(repo, id=tree, path='')
+ # END id to tree conversion
+
+ def _set_cache_(self, attr):
+ """
+ Called by LazyMixin superclass when the given uninitialized member needs
+ to be set.
+ We set all values at once.
+ """
+ if attr in Commit.__slots__:
+ # prepare our data lines to match rev-list
+ data_lines = self.data.splitlines()
+ data_lines.insert(0, "commit %s" % self.id)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next()
+ self.parents = temp.parents
+ self.tree = temp.tree
+ self.author = temp.author
+ self.authored_date = temp.authored_date
+ self.committer = temp.committer
+ self.committed_date = temp.committed_date
+ self.message = temp.message
+ else:
+ super(Commit, self)._set_cache_(attr)
+
+ @property
+ def summary(self):
+ """
+ Returns
+ First line of the commit message.
+ """
+ return self.message.split('\n', 1)[0]
+
+ @classmethod
+ def count(cls, repo, ref, path=''):
+ """
+ Count the number of commits reachable from this ref
+
+ ``repo``
+ is the Repo
+
+ ``ref``
+ is the ref from which to begin (SHA1 or name)
+
+ ``path``
+ is an optinal path
+
+ Returns
+ int
+ """
+ return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
+
+ @classmethod
+ def iter_items(cls, repo, ref, path='', **kwargs):
+ """
+ Find all commits matching the given criteria.
+
+ ``repo``
+ is the Repo
+
+ ``ref``
+ is the ref from which to begin (SHA1, Head or name)
+
+ ``path``
+ is an optinal path, if set only Commits that include the path
+ will be considered
+
+ ``kwargs``
+ optional keyword arguments to git where
+ ``max_count`` is the maximum number of commits to fetch
+ ``skip`` is the number of commits to skip
+
+ Returns
+ iterator yielding Commit items
+ """
+ options = {'pretty': 'raw', 'as_process' : True }
+ options.update(kwargs)
+
+ # the test system might confront us with string values -
+ proc = repo.git.rev_list(ref, '--', path, **options)
+ return cls._iter_from_process_or_stream(repo, proc)
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """
+ Parse out commit information into a list of Commit objects
+
+ ``repo``
+ is the Repo
+
+ ``proc``
+ git-rev-list process instance (raw format)
+
+ Returns
+ iterator returning Commit objects
+ """
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ id = line.split()[1]
+ assert line.split()[0] == "commit"
+ tree = stream.next().split()[1]
+
+ parents = []
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date = cls._actor(next_line)
+ committer, committed_date = cls._actor(stream.next())
+
+ # empty line
+ stream.next()
+
+ message_lines = []
+ next_line = None
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ break
+ # END abort message reading
+ message_lines.append(msg_line.strip())
+ # END while there are message lines
+ message = '\n'.join(message_lines)
+
+ yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
+ committer=committer, committed_date=committed_date, message=message)
+ # END for each line in stream
+
+ @classmethod
+ def diff(cls, repo, a, b=None, paths=None):
+ """
+ Creates diffs between a tree and the index or between two trees:
+
+ ``repo``
+ is the Repo
+
+ ``a``
+ is a named commit
+
+ ``b``
+ is an optional named commit. Passing a list assumes you
+ wish to omit the second named commit and limit the diff to the
+ given paths.
+
+ ``paths``
+ is a list of paths to limit the diff to.
+
+ Returns
+ git.Diff[]::
+
+ between tree and the index if only a is given
+ between two trees if a and b are given and are commits
+ """
+ paths = paths or []
+
+ if isinstance(b, list):
+ paths = b
+ b = None
+
+ if paths:
+ paths.insert(0, "--")
+
+ if b:
+ paths.insert(0, b)
+ paths.insert(0, a)
+ text = repo.git.diff('-M', full_index=True, *paths)
+ return diff.Diff._list_from_string(repo, text)
+
+ @property
+ def diffs(self):
+ """
+ Returns
+ git.Diff[]
+ Diffs between this commit and its first parent or all changes if this
+ commit is the first commit and has no parent.
+ """
+ if not self.parents:
+ d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw')
+ return diff.Diff._list_from_string(self.repo, d)
+ else:
+ return self.diff(self.repo, self.parents[0].id, self.id)
+
+ @property
+ def stats(self):
+ """
+ Create a git stat from changes between this commit and its first parent
+ or from all changes done if this is the very first commit.
+
+ Return
+ git.Stats
+ """
+ if not self.parents:
+ text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True)
+ text2 = ""
+ for line in text.splitlines()[1:]:
+ (insertions, deletions, filename) = line.split("\t")
+ text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+ text = text2
+ else:
+ text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True)
+ return stats.Stats._list_from_string(self.repo, text)
+
+ def __str__(self):
+ """ Convert commit to string which is SHA1 """
+ return self.id
+
+ def __repr__(self):
+ return '<git.Commit "%s">' % self.id
+
+ @classmethod
+ def _actor(cls, line):
+ """
+ Parse out the actor (author or committer) info
+
+ Returns
+ [Actor, gmtime(acted at time)]
+ """
+ m = cls.re_actor_epoch.search(line)
+ actor, epoch = m.groups()
+ return (Actor._from_string(actor), time.gmtime(int(epoch)))
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
new file mode 100644
index 00000000..ecf6349d
--- /dev/null
+++ b/lib/git/objects/tag.py
@@ -0,0 +1,70 @@
+# objects.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""
+Module containing all object based types.
+"""
+import base
+import commit
+from utils import get_object_type_by_name
+
+class TagObject(base.Object):
+ """
+ Non-Lightweight tag carrying additional information about an object we are pointing
+ to.
+ """
+ type = "tag"
+ __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" )
+
+ def __init__(self, repo, id, object=None, tag=None,
+ tagger=None, tagged_date=None, message=None):
+ """
+ Initialize a tag object with additional data
+
+ ``repo``
+ repository this object is located in
+
+ ``id``
+ SHA1 or ref suitable for git-rev-parse
+
+ ``object``
+ Object instance of object we are pointing to
+
+ ``tag``
+ name of this tag
+
+ ``tagger``
+ Actor identifying the tagger
+
+ ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst)
+ is the DateTime of the tag creation
+ """
+ super(TagObject, self).__init__(repo, id )
+ self._set_self_from_args_(locals())
+
+ def _set_cache_(self, attr):
+ """
+ Cache all our attributes at once
+ """
+ if attr in TagObject.__slots__:
+ lines = self.data.splitlines()
+
+ obj, hexsha = lines[0].split(" ") # object <hexsha>
+ type_token, type_name = lines[1].split(" ") # type <type_name>
+ self.object = get_object_type_by_name(type_name)(self.repo, hexsha)
+
+ self.tag = lines[2][4:] # tag <tag name>
+
+ tagger_info = lines[3][7:]# tagger <actor> <date>
+ self.tagger, self.tagged_date = commit.Commit._actor(tagger_info)
+
+ # line 4 empty - check git source to figure out purpose
+ self.message = "\n".join(lines[5:])
+ # END check our attributes
+ else:
+ super(TagObject, self)._set_cache_(attr)
+
+
+
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
new file mode 100644
index 00000000..abfa9622
--- /dev/null
+++ b/lib/git/objects/tree.py
@@ -0,0 +1,242 @@
+# tree.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import os
+import blob
+import base
+import binascii
+
+def sha_to_hex(sha):
+ """Takes a string and returns the hex of the sha within"""
+ hexsha = binascii.hexlify(sha)
+ assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
+ return hexsha
+
+class Tree(base.IndexObject):
+ """
+ Tress represent a ordered list of Blobs and other Trees. Hence it can be
+ accessed like a list.
+
+ Tree's will cache their contents after first retrieval to improve efficiency.
+
+ ``Tree as a list``::
+
+ Access a specific blob using the
+ tree['filename'] notation.
+
+ You may as well access by index
+ blob = tree[0]
+
+
+ """
+
+ type = "tree"
+ __slots__ = "_cache"
+
+ # using ascii codes for comparison
+ ascii_commit_id = (0x31 << 4) + 0x36
+ ascii_blob_id = (0x31 << 4) + 0x30
+ ascii_tree_id = (0x34 << 4) + 0x30
+
+
+ def __init__(self, repo, id, mode=0, path=None):
+ super(Tree, self).__init__(repo, id, mode, path)
+
+ def _set_cache_(self, attr):
+ if attr == "_cache":
+ # Set the data when we need it
+ self._cache = self._get_tree_cache()
+ else:
+ super(Tree, self)._set_cache_(attr)
+
+ def _get_tree_cache(self):
+ """
+ Return
+ list(object_instance, ...)
+
+ ``treeish``
+ sha or ref identifying a tree
+ """
+ out = list()
+ for obj in self._iter_from_data():
+ if obj is not None:
+ out.append(obj)
+ # END if object was handled
+ # END for each line from ls-tree
+ return out
+
+
+ def _iter_from_data(self):
+ """
+ Reads the binary non-pretty printed representation of a tree and converts
+ it into Blob, Tree or Commit objects.
+
+ Note: This method was inspired by the parse_tree method in dulwich.
+
+ Returns
+ list(IndexObject, ...)
+ """
+ ord_zero = ord('0')
+ data = self.data
+ len_data = len(data)
+ i = 0
+ while i < len_data:
+ mode = 0
+ mode_boundary = i + 6
+
+ # keep it ascii - we compare against the respective values
+ type_id = (ord(data[i])<<4) + ord(data[i+1])
+ i += 2
+
+ while data[i] != ' ':
+ # move existing mode integer up one level being 3 bits
+ # and add the actual ordinal value of the character
+ mode = (mode << 3) + (ord(data[i]) - ord_zero)
+ i += 1
+ # END while reading mode
+
+ # byte is space now, skip it
+ i += 1
+
+ # parse name, it is NULL separated
+
+ ns = i
+ while data[i] != '\0':
+ i += 1
+ # END while not reached NULL
+ name = data[ns:i]
+
+ # byte is NULL, get next 20
+ i += 1
+ sha = data[i:i+20]
+ i = i + 20
+
+ hexsha = sha_to_hex(sha)
+ if type_id == self.ascii_blob_id:
+ yield blob.Blob(self.repo, hexsha, mode, name)
+ elif type_id == self.ascii_tree_id:
+ yield Tree(self.repo, hexsha, mode, name)
+ elif type_id == self.ascii_commit_id:
+ # todo
+ yield None
+ else:
+ raise TypeError( "Unknown type found in tree data: %i" % type_id )
+ # END for each byte in data stream
+
+
+ def __div__(self, file):
+ """
+ Find the named object in this tree's contents
+
+ Examples::
+
+ >>> Repo('/path/to/python-git').tree/'lib'
+ <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e">
+ >>> Repo('/path/to/python-git').tree/'README.txt'
+ <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df">
+
+ Returns
+ ``git.Blob`` or ``git.Tree``
+
+ Raise
+ KeyError if given file or tree does not exist in tree
+ """
+ return self[file]
+
+
+ def __repr__(self):
+ return '<git.Tree "%s">' % self.id
+
+ @classmethod
+ def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ):
+
+ for obj in tree:
+ # adjust path to be complete
+ obj.path = os.path.join(tree.path, obj.path)
+ if not predicate(obj):
+ continue
+ yield obj
+ if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ):
+ for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ):
+ yield recursive_obj
+ # END for each recursive object
+ # END if we may enter recursion
+ # END for each object
+
+ def traverse(self, max_depth=-1, predicate = lambda i: True):
+ """
+ Returns
+ Iterator to traverse the tree recursively up to the given level.
+ The iterator returns Blob and Tree objects
+
+ ``max_depth``
+
+ if -1, the whole tree will be traversed
+ if 0, only the first level will be traversed which is the same as
+ the default non-recursive iterator
+
+ ``predicate``
+
+ If predicate(item) returns True, item will be returned by iterator
+ """
+ return self._iter_recursive( self.repo, self, 0, max_depth, predicate )
+
+ @property
+ def trees(self):
+ """
+ Returns
+ list(Tree, ...) list of trees directly below this tree
+ """
+ return [ i for i in self if i.type == "tree" ]
+
+ @property
+ def blobs(self):
+ """
+ Returns
+ list(Blob, ...) list of blobs directly below this tree
+ """
+ return [ i for i in self if i.type == "blob" ]
+
+
+ # List protocol
+ def __getslice__(self,i,j):
+ return self._cache[i:j]
+
+ def __iter__(self):
+ return iter(self._cache)
+
+ def __len__(self):
+ return len(self._cache)
+
+ def __getitem__(self,item):
+ if isinstance(item, int):
+ return self._cache[item]
+
+ if isinstance(item, basestring):
+ # compatability
+ for obj in self._cache:
+ if obj.path == item:
+ return obj
+ # END for each obj
+ raise KeyError( "Blob or Tree named %s not found" % item )
+ # END index is basestring
+
+ raise TypeError( "Invalid index type: %r" % item )
+
+
+ def __contains__(self,item):
+ if isinstance(item, base.IndexObject):
+ return item in self._cache
+
+ # compatability
+ for obj in self._cache:
+ if item == obj.path:
+ return True
+ # END for each item
+ return False
+
+ def __reversed__(self):
+ return reversed(self._cache)
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
new file mode 100644
index 00000000..15c1d114
--- /dev/null
+++ b/lib/git/objects/utils.py
@@ -0,0 +1,36 @@
+# util.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""
+Module for general utility functions
+"""
+import commit, tag, blob, tree
+
+def get_object_type_by_name(object_type_name):
+ """
+ Returns
+ type suitable to handle the given object type name.
+ Use the type to create new instances.
+
+ ``object_type_name``
+ Member of TYPES
+
+ Raises
+ ValueError: In case object_type_name is unknown
+ """
+ if object_type_name == "commit":
+ import commit
+ return commit.Commit
+ elif object_type_name == "tag":
+ import tag
+ return tag.TagObject
+ elif object_type_name == "blob":
+ import blob
+ return blob.Blob
+ elif object_type_name == "tree":
+ import tree
+ return tree.Tree
+ else:
+ raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
diff --git a/lib/git/refs.py b/lib/git/refs.py
new file mode 100644
index 00000000..3c9eb817
--- /dev/null
+++ b/lib/git/refs.py
@@ -0,0 +1,238 @@
+# refs.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""
+Module containing all ref based objects
+"""
+from objects.base import Object
+from objects.utils import get_object_type_by_name
+from utils import LazyMixin, Iterable
+
+class Ref(LazyMixin, Iterable):
+ """
+ Represents a named reference to any object
+ """
+ __slots__ = ("repo", "path")
+
+ def __init__(self, repo, path, object = None):
+ """
+ Initialize this instance
+ ``repo``
+ Our parent repository
+
+ ``path``
+ Path relative to the .git/ directory pointing to the ref in question, i.e.
+ refs/heads/master
+
+ ``object``
+ Object instance, will be retrieved on demand if None
+ """
+ self.repo = repo
+ self.path = path
+ if object is not None:
+ self.object = object
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return '<git.%s "%s">' % (self.__class__.__name__, self.path)
+
+ def __eq__(self, other):
+ return self.path == other.path and self.object == other.object
+
+ def __ne__(self, other):
+ return not ( self == other )
+
+ def __hash__(self):
+ return hash(self.path)
+
+ @property
+ def name(self):
+ """
+ Returns
+ (shortest) Name of this reference - it may contain path components
+ """
+ # first two path tokens are can be removed as they are
+ # refs/heads or refs/tags or refs/remotes
+ tokens = self.path.split('/')
+ if len(tokens) < 3:
+ return self.path # could be refs/HEAD
+
+ return '/'.join(tokens[2:])
+
+ @property
+ def object(self):
+ """
+ Returns
+ The object our ref currently refers to. Refs can be cached, they will
+ always point to the actual object as it gets re-created on each query
+ """
+ # have to be dynamic here as we may be a tag which can point to anything
+ hexsha, typename, size = self.repo.git.get_object_header(self.path)
+ return get_object_type_by_name(typename)(self.repo, hexsha)
+
+ @classmethod
+ def iter_items(cls, repo, common_path = "refs", **kwargs):
+ """
+ Find all refs in the repository
+
+ ``repo``
+ is the Repo
+
+ ``common_path``
+ Optional keyword argument to the path which is to be shared by all
+ returned Ref objects
+
+ ``kwargs``
+ Additional options given as keyword arguments, will be passed
+ to git-for-each-ref
+
+ Returns
+ git.Ref[]
+
+ List is sorted by committerdate
+ The returned objects are compatible to the Ref base, but represent the
+ actual type, such as Head or Tag
+ """
+
+ options = {'sort': "committerdate",
+ 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"}
+
+ options.update(kwargs)
+
+ output = repo.git.for_each_ref(common_path, **options)
+ return cls._iter_from_stream(repo, iter(output.splitlines()))
+
+ @classmethod
+ def _iter_from_stream(cls, repo, stream):
+ """ Parse out ref information into a list of Ref compatible objects
+ Returns git.Ref[] list of Ref objects """
+ heads = []
+
+ for line in stream:
+ heads.append(cls._from_string(repo, line))
+
+ return heads
+
+ @classmethod
+ def _from_string(cls, repo, line):
+ """ Create a new Ref instance from the given string.
+ Format
+ name: [a-zA-Z_/]+
+ <null byte>
+ id: [0-9A-Fa-f]{40}
+ Returns git.Head """
+ full_path, hexsha, type_name, object_size = line.split("\x00")
+
+ # No, we keep the object dynamic by allowing it to be retrieved by
+ # our path on demand - due to perstent commands it is fast.
+ # This reduces the risk that the object does not match
+ # the changed ref anymore in case it changes in the meanwhile
+ return cls(repo, full_path)
+
+ # obj = get_object_type_by_name(type_name)(repo, hexsha)
+ # obj.size = object_size
+ # return cls(repo, full_path, obj)
+
+
+class Head(Ref):
+ """
+ A Head is a named reference to a Commit. Every Head instance contains a name
+ and a Commit object.
+
+ Examples::
+
+ >>> repo = Repo("/path/to/repo")
+ >>> head = repo.heads[0]
+
+ >>> head.name
+ 'master'
+
+ >>> head.commit
+ <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455">
+
+ >>> head.commit.id
+ '1c09f116cbc2cb4100fb6935bb162daa4723f455'
+ """
+
+ @property
+ def commit(self):
+ """
+ Returns
+ Commit object the head points to
+ """
+ return self.object
+
+ @classmethod
+ def iter_items(cls, repo, common_path = "refs/heads", **kwargs):
+ """
+ Returns
+ Iterator yielding Head items
+
+ For more documentation, please refer to git.base.Ref.list_items
+ """
+ return super(Head,cls).iter_items(repo, common_path, **kwargs)
+
+ def __repr__(self):
+ return '<git.Head "%s">' % self.name
+
+
+
+class TagRef(Ref):
+ """
+ Class representing a lightweight tag reference which either points to a commit
+ or to a tag object. In the latter case additional information, like the signature
+ or the tag-creator, is available.
+
+ This tag object will always point to a commit object, but may carray additional
+ information in a tag object::
+
+ tagref = TagRef.list_items(repo)[0]
+ print tagref.commit.message
+ if tagref.tag is not None:
+ print tagref.tag.message
+ """
+
+ __slots__ = tuple()
+
+ @property
+ def commit(self):
+ """
+ Returns
+ Commit object the tag ref points to
+ """
+ if self.object.type == "commit":
+ return self.object
+ elif self.object.type == "tag":
+ # it is a tag object which carries the commit as an object - we can point to anything
+ return self.object.object
+ else:
+ raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self )
+
+ @property
+ def tag(self):
+ """
+ Returns
+ Tag object this tag ref points to or None in case
+ we are a light weight tag
+ """
+ if self.object.type == "tag":
+ return self.object
+ return None
+
+ @classmethod
+ def iter_items(cls, repo, common_path = "refs/tags", **kwargs):
+ """
+ Returns
+ Iterator yielding commit items
+
+ For more documentation, please refer to git.base.Ref.list_items
+ """
+ return super(TagRef,cls).iter_items(repo, common_path, **kwargs)
+
+
+# provide an alias
+Tag = TagRef
diff --git a/lib/git/repo.py b/lib/git/repo.py
index 1c4b4095..c74c7e8d 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -8,506 +8,588 @@ import os
import re
import gzip
import StringIO
+import time
+
from errors import InvalidGitRepositoryError, NoSuchPathError
from utils import touch, is_git_dir
from cmd import Git
-from head import Head
-from blob import Blob
-from tag import Tag
-from commit import Commit
-from tree import Tree
+from actor import Actor
+from refs import *
+from objects import *
+
class Repo(object):
- """
- Represents a git repository and allows you to query references,
- gather commit information, generate diffs, create and clone repositories query
- the log.
- """
- DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
-
- def __init__(self, path=None):
- """
- Create a new Repo instance
-
- ``path``
- is the path to either the root git directory or the bare git repo
-
- Examples::
-
- repo = Repo("/Users/mtrier/Development/git-python")
- repo = Repo("/Users/mtrier/Development/git-python.git")
-
- Raises
- InvalidGitRepositoryError or NoSuchPathError
-
- Returns
- ``git.Repo``
- """
-
- epath = os.path.abspath(os.path.expanduser(path or os.getcwd()))
-
- if not os.path.exists(epath):
- raise NoSuchPathError(epath)
-
- self.path = None
- curpath = epath
- while curpath:
- if is_git_dir(curpath):
- self.bare = True
- self.path = curpath
- self.wd = curpath
- break
- gitpath = os.path.join(curpath, '.git')
- if is_git_dir(gitpath):
- self.bare = False
- self.path = gitpath
- self.wd = curpath
- break
- curpath, dummy = os.path.split(curpath)
- if not dummy:
- break
-
- if self.path is None:
- raise InvalidGitRepositoryError(epath)
-
- self.git = Git(self.wd)
-
- # Description property
- def _get_description(self):
- filename = os.path.join(self.path, 'description')
- return file(filename).read().rstrip()
-
- def _set_description(self, descr):
- filename = os.path.join(self.path, 'description')
- file(filename, 'w').write(descr+'\n')
-
- description = property(_get_description, _set_description,
- doc="the project's description")
- del _get_description
- del _set_description
-
- @property
- def heads(self):
- """
- A list of ``Head`` objects representing the branch heads in
- this repo
-
- Returns
- ``git.Head[]``
- """
- return Head.find_all(self)
-
- # alias heads
- branches = heads
-
- @property
- def tags(self):
- """
- A list of ``Tag`` objects that are available in this repo
-
- Returns
- ``git.Tag[]``
- """
- return Tag.find_all(self)
-
- def commits(self, start='master', path='', max_count=10, skip=0):
- """
- A list of Commit objects representing the history of a given ref/commit
-
- ``start``
- is the branch/commit name (default 'master')
-
- ``path``
- is an optional path to limit the returned commits to
- Commits that do not contain that path will not be returned.
-
- ``max_count``
- is the maximum number of commits to return (default 10)
+ """
+ Represents a git repository and allows you to query references,
+ gather commit information, generate diffs, create and clone repositories query
+ the log.
+ """
+ DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
+
+ # precompiled regex
+ re_whitespace = re.compile(r'\s+')
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_author_committer_start = re.compile(r'^(author|committer)')
+ re_tab_full_line = re.compile(r'^\t(.*)$')
+
+ def __init__(self, path=None):
+ """
+ Create a new Repo instance
+
+ ``path``
+ is the path to either the root git directory or the bare git repo
+
+ Examples::
+
+ repo = Repo("/Users/mtrier/Development/git-python")
+ repo = Repo("/Users/mtrier/Development/git-python.git")
+
+ Raises
+ InvalidGitRepositoryError or NoSuchPathError
+
+ Returns
+ ``git.Repo``
+ """
+
+ epath = os.path.abspath(os.path.expanduser(path or os.getcwd()))
+
+ if not os.path.exists(epath):
+ raise NoSuchPathError(epath)
+
+ self.path = None
+ curpath = epath
+ while curpath:
+ if is_git_dir(curpath):
+ self.bare = True
+ self.path = curpath
+ self.wd = curpath
+ break
+ gitpath = os.path.join(curpath, '.git')
+ if is_git_dir(gitpath):
+ self.bare = False
+ self.path = gitpath
+ self.wd = curpath
+ break
+ curpath, dummy = os.path.split(curpath)
+ if not dummy:
+ break
+
+ if self.path is None:
+ raise InvalidGitRepositoryError(epath)
+
+ self.git = Git(self.wd)
+
+ # Description property
+ def _get_description(self):
+ filename = os.path.join(self.path, 'description')
+ return file(filename).read().rstrip()
+
+ def _set_description(self, descr):
+ filename = os.path.join(self.path, 'description')
+ file(filename, 'w').write(descr+'\n')
+
+ description = property(_get_description, _set_description,
+ doc="the project's description")
+ del _get_description
+ del _set_description
+
+ @property
+ def heads(self):
+ """
+ A list of ``Head`` objects representing the branch heads in
+ this repo
+
+ Returns
+ ``git.Head[]``
+ """
+ return Head.list_items(self)
+
+ # alias heads
+ branches = heads
+
+ @property
+ def tags(self):
+ """
+ A list of ``Tag`` objects that are available in this repo
+
+ Returns
+ ``git.Tag[]``
+ """
+ return Tag.list_items(self)
+
+ def blame(self, commit, file):
+ """
+ The blame information for the given file at the given commit
+
+ Returns
+ list: [git.Commit, list: [<line>]]
+ A list of tuples associating a Commit object with a list of lines that
+ changed within the given commit. The Commit objects will be given in order
+ of appearance.
+ """
+ data = self.git.blame(commit, '--', file, p=True)
+ commits = {}
+ blames = []
+ info = None
+
+ for line in data.splitlines(False):
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ if self.re_hexsha_only.search(firstpart):
+ # handles
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
+ digits = parts[-1].split(" ")
+ if len(digits) == 3:
+ info = {'id': firstpart}
+ blames.append([None, []])
+ # END blame data initialization
+ else:
+ m = self.re_author_committer_start.search(firstpart)
+ if m:
+ # handles:
+ # author Tom Preston-Werner
+ # author-mail <tom@mojombo.com>
+ # author-time 1192271832
+ # author-tz -0700
+ # committer Tom Preston-Werner
+ # committer-mail <tom@mojombo.com>
+ # committer-time 1192271832
+ # committer-tz -0700 - IGNORED BY US
+ role = m.group(0)
+ if firstpart.endswith('-mail'):
+ info["%s_email" % role] = parts[-1]
+ elif firstpart.endswith('-time'):
+ info["%s_date" % role] = time.gmtime(int(parts[-1]))
+ elif role == firstpart:
+ info[role] = parts[-1]
+ # END distinguish mail,time,name
+ else:
+ # handle
+ # filename lib/grit.rb
+ # summary add Blob
+ # <and rest>
+ if firstpart.startswith('filename'):
+ info['filename'] = parts[-1]
+ elif firstpart.startswith('summary'):
+ info['summary'] = parts[-1]
+ elif firstpart == '':
+ if info:
+ sha = info['id']
+ c = commits.get(sha)
+ if c is None:
+ c = Commit( self, id=sha,
+ author=Actor._from_string(info['author'] + ' ' + info['author_email']),
+ authored_date=info['author_date'],
+ committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
+ committed_date=info['committer_date'],
+ message=info['summary'])
+ commits[sha] = c
+ # END if commit objects needs initial creation
+ m = self.re_tab_full_line.search(line)
+ text, = m.groups()
+ blames[-1][0] = c
+ blames[-1][1].append( text )
+ info = None
+ # END if we collected commit info
+ # END distinguish filename,summary,rest
+ # END distinguish author|committer vs filename,summary,rest
+ # END distinguish hexsha vs other information
+ return blames
+
+ def commits(self, start='master', path='', max_count=None, skip=0):
+ """
+ A list of Commit objects representing the history of a given ref/commit
+
+ ``start``
+ is the branch/commit name (default 'master')
+
+ ``path``
+ is an optional path to limit the returned commits to
+ Commits that do not contain that path will not be returned.
+
+ ``max_count``
+ is the maximum number of commits to return (default None)
+
+ ``skip``
+ is the number of commits to skip (default 0) which will effectively
+ move your commit-window by the given number.
+
+ Returns
+ ``git.Commit[]``
+ """
+ options = {'max_count': max_count,
+ 'skip': skip}
+
+ if max_count is None:
+ options.pop('max_count')
+
+ return Commit.list_items(self, start, path, **options)
+
+ def commits_between(self, frm, to):
+ """
+ The Commits objects that are reachable via ``to`` but not via ``frm``
+ Commits are returned in chronological order.
+
+ ``from``
+ is the branch/commit name of the younger item
+
+ ``to``
+ is the branch/commit name of the older item
+
+ Returns
+ ``git.Commit[]``
+ """
+ return reversed(Commit.list_items(self, "%s..%s" % (frm, to)))
+
+ def commits_since(self, start='master', path='', since='1970-01-01'):
+ """
+ The Commits objects that are newer than the specified date.
+ Commits are returned in chronological order.
+
+ ``start``
+ is the branch/commit name (default 'master')
+
+ ``path``
+ is an optinal path to limit the returned commits to.
+
+
+ ``since``
+ is a string represeting a date/time
- ``skip``
- is the number of commits to skip (default 0) which will effectively
- move your commit-window by the given number.
+ Returns
+ ``git.Commit[]``
+ """
+ options = {'since': since}
- Returns
- ``git.Commit[]``
- """
- options = {'max_count': max_count,
- 'skip': skip}
+ return Commit.list_items(self, start, path, **options)
- return Commit.find_all(self, start, path, **options)
+ def commit_count(self, start='master', path=''):
+ """
+ The number of commits reachable by the given branch/commit
- def commits_between(self, frm, to):
- """
- The Commits objects that are reachable via ``to`` but not via ``frm``
- Commits are returned in chronological order.
+ ``start``
+ is the branch/commit name (default 'master')
- ``from``
- is the branch/commit name of the younger item
+ ``path``
+ is an optional path
+ Commits that do not contain the path will not contribute to the count.
- ``to``
- is the branch/commit name of the older item
-
- Returns
- ``git.Commit[]``
- """
- return reversed(Commit.find_all(self, "%s..%s" % (frm, to)))
-
- def commits_since(self, start='master', path='', since='1970-01-01'):
- """
- The Commits objects that are newer than the specified date.
- Commits are returned in chronological order.
+ Returns
+ ``int``
+ """
+ return Commit.count(self, start, path)
- ``start``
- is the branch/commit name (default 'master')
+ def commit(self, id=None, path = ''):
+ """
+ The Commit object for the specified id
- ``path``
- is an optinal path to limit the returned commits to.
-
+ ``id``
+ is the SHA1 identifier of the commit or a ref or a ref name
+ if None, it defaults to the active branch
+
- ``since``
- is a string represeting a date/time
+ ``path``
+ is an optional path, if set the returned commit must contain the path.
- Returns
- ``git.Commit[]``
- """
- options = {'since': since}
+ Returns
+ ``git.Commit``
+ """
+ if id is None:
+ id = self.active_branch
+ options = {'max_count': 1}
- return Commit.find_all(self, start, path, **options)
-
- def commit_count(self, start='master', path=''):
- """
- The number of commits reachable by the given branch/commit
-
- ``start``
- is the branch/commit name (default 'master')
+ commits = Commit.list_items(self, id, path, **options)
- ``path``
- is an optional path
- Commits that do not contain the path will not contribute to the count.
-
- Returns
- ``int``
- """
- return Commit.count(self, start, path)
-
- def commit(self, id, path = ''):
- """
- The Commit object for the specified id
-
- ``id``
- is the SHA1 identifier of the commit
-
- ``path``
- is an optional path, if set the returned commit must contain the path.
-
- Returns
- ``git.Commit``
- """
- options = {'max_count': 1}
-
- commits = Commit.find_all(self, id, path, **options)
-
- if not commits:
- raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path )
- return commits[0]
-
- def commit_deltas_from(self, other_repo, ref='master', other_ref='master'):
- """
- Returns a list of commits that is in ``other_repo`` but not in self
-
- Returns
- git.Commit[]
- """
- repo_refs = self.git.rev_list(ref, '--').strip().splitlines()
- other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines()
+ if not commits:
+ raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path )
+ return commits[0]
- diff_refs = list(set(other_repo_refs) - set(repo_refs))
- return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs)
+ def commit_deltas_from(self, other_repo, ref='master', other_ref='master'):
+ """
+ Returns a list of commits that is in ``other_repo`` but not in self
- def tree(self, treeish='master'):
- """
- The Tree object for the given treeish reference
+ Returns
+ git.Commit[]
+ """
+ repo_refs = self.git.rev_list(ref, '--').strip().splitlines()
+ other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines()
- ``treeish``
- is the reference (default 'master')
+ diff_refs = list(set(other_repo_refs) - set(repo_refs))
+ return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs)
- Examples::
+ def tree(self, treeish=None):
+ """
+ The Tree object for the given treeish reference
- repo.tree('master')
+ ``treeish``
+ is a Ref instance defaulting to the active_branch if None.
+ Examples::
+
+ repo.tree(repo.heads[0])
+
+ Returns
+ ``git.Tree``
+
+ NOTE
+ A ref is requried here to assure you point to a commit or tag. Otherwise
+ it is not garantueed that you point to the root-level tree.
+
+ If you need a non-root level tree, find it by iterating the root tree.
+ """
+ if treeish is None:
+ treeish = self.active_branch
+ if not isinstance(treeish, Ref):
+ raise ValueError( "Treeish reference required, got %r" % treeish )
+
+
+ # As we are directly reading object information, we must make sure
+ # we truly point to a tree object. We resolve the ref to a sha in all cases
+ # to assure the returned tree can be compared properly. Except for
+ # heads, ids should always be hexshas
+ hexsha, typename, size = self.git.get_object_header( treeish )
+ if typename != "tree":
+ hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' )
+ # END tree handling
+ treeish = hexsha
+
+ # the root has an empty relative path and the default mode
+ return Tree(self, treeish, 0, '')
+
+
+ def diff(self, a, b, *paths):
+ """
+ The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s)
+
+ ``a``
+ is the base commit
+ ``b``
+ is the other commit
+
+ ``paths``
+ is an optional list of file paths on which to restrict the diff
+
+ Returns
+ ``str``
+ """
+ return self.git.diff(a, b, '--', *paths)
- Returns
- ``git.Tree``
- """
- return Tree(self, id=treeish)
+ def commit_diff(self, commit):
+ """
+ The commit diff for the given commit
+ ``commit`` is the commit name/id
- def blob(self, id):
- """
- The Blob object for the given id
-
- ``id``
- is the SHA1 id of the blob
-
- Returns
- ``git.Blob``
- """
- return Blob(self, id=id)
-
- def log(self, commit='master', path=None, **kwargs):
- """
- The Commit for a treeish, and all commits leading to it.
-
- ``kwargs``
- keyword arguments specifying flags to be used in git-log command,
- i.e.: max_count=1 to limit the amount of commits returned
+ Returns
+ ``git.Diff[]``
+ """
+ return Commit.diff(self, commit)
- Returns
- ``git.Commit[]``
- """
- options = {'pretty': 'raw'}
- options.update(kwargs)
- arg = [commit, '--']
- if path:
- arg.append(path)
- commits = self.git.log(*arg, **options)
- return Commit.list_from_string(self, commits)
+ @classmethod
+ def init_bare(self, path, mkdir=True, **kwargs):
+ """
+ Initialize a bare git repository at the given path
- def diff(self, a, b, *paths):
- """
- The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s)
-
- ``a``
- is the base commit
- ``b``
- is the other commit
+ ``path``
+ is the full path to the repo (traditionally ends with /<name>.git)
- ``paths``
- is an optional list of file paths on which to restrict the diff
-
- Returns
- ``str``
- """
- return self.git.diff(a, b, '--', *paths)
+ ``mkdir``
+ if specified will create the repository directory if it doesn't
+ already exists. Creates the directory with a mode=0755.
- def commit_diff(self, commit):
- """
- The commit diff for the given commit
- ``commit`` is the commit name/id
+ ``kwargs``
+ keyword arguments serving as additional options to the git init command
- Returns
- ``git.Diff[]``
- """
- return Commit.diff(self, commit)
+ Examples::
- @classmethod
- def init_bare(self, path, mkdir=True, **kwargs):
- """
- Initialize a bare git repository at the given path
+ git.Repo.init_bare('/var/git/myrepo.git')
- ``path``
- is the full path to the repo (traditionally ends with /<name>.git)
-
- ``mkdir``
- if specified will create the repository directory if it doesn't
- already exists. Creates the directory with a mode=0755.
-
- ``kwargs``
- keyword arguments serving as additional options to the git init command
-
- Examples::
-
- git.Repo.init_bare('/var/git/myrepo.git')
-
- Returns
- ``git.Repo`` (the newly created repo)
- """
-
- if mkdir and not os.path.exists(path):
- os.makedirs(path, 0755)
-
- git = Git(path)
- output = git.init('--bare', **kwargs)
- return Repo(path)
- create = init_bare
-
- def fork_bare(self, path, **kwargs):
- """
- Fork a bare git repository from this repo
-
- ``path``
- is the full path of the new repo (traditionally ends with /<name>.git)
-
- ``kwargs``
- keyword arguments to be given to the git clone command
-
- Returns
- ``git.Repo`` (the newly forked repo)
- """
- options = {'bare': True}
- options.update(kwargs)
- self.git.clone(self.path, path, **options)
- return Repo(path)
-
- def archive_tar(self, treeish='master', prefix=None):
- """
- Archive the given treeish
-
- ``treeish``
- is the treeish name/id (default 'master')
-
- ``prefix``
- is the optional prefix to prepend to each filename in the archive
-
- Examples::
-
- >>> repo.archive_tar
- <String containing tar archive>
-
- >>> repo.archive_tar('a87ff14')
- <String containing tar archive for commit a87ff14>
-
- >>> repo.archive_tar('master', 'myproject/')
- <String containing tar bytes archive, whose files are prefixed with 'myproject/'>
-
- Returns
- str (containing bytes of tar archive)
- """
- options = {}
- if prefix:
- options['prefix'] = prefix
- return self.git.archive(treeish, **options)
-
- def archive_tar_gz(self, treeish='master', prefix=None):
- """
- Archive and gzip the given treeish
-
- ``treeish``
- is the treeish name/id (default 'master')
-
- ``prefix``
- is the optional prefix to prepend to each filename in the archive
-
- Examples::
-
- >>> repo.archive_tar_gz
- <String containing tar.gz archive>
-
- >>> repo.archive_tar_gz('a87ff14')
- <String containing tar.gz archive for commit a87ff14>
-
- >>> repo.archive_tar_gz('master', 'myproject/')
- <String containing tar.gz archive and prefixed with 'myproject/'>
-
- Returns
- str (containing the bytes of tar.gz archive)
- """
- kwargs = {}
- if prefix:
- kwargs['prefix'] = prefix
- resultstr = self.git.archive(treeish, **kwargs)
- sio = StringIO.StringIO()
- gf = gzip.GzipFile(fileobj=sio, mode ='wb')
- gf.write(resultstr)
- gf.close()
- return sio.getvalue()
-
- def _get_daemon_export(self):
- filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE)
- return os.path.exists(filename)
-
- def _set_daemon_export(self, value):
- filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE)
- fileexists = os.path.exists(filename)
- if value and not fileexists:
- touch(filename)
- elif not value and fileexists:
- os.unlink(filename)
-
- daemon_export = property(_get_daemon_export, _set_daemon_export,
- doc="If True, git-daemon may export this repository")
- del _get_daemon_export
- del _set_daemon_export
-
- def _get_alternates(self):
- """
- The list of alternates for this repo from which objects can be retrieved
-
- Returns
- list of strings being pathnames of alternates
- """
- alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates')
-
- if os.path.exists(alternates_path):
- try:
- f = open(alternates_path)
- alts = f.read()
- finally:
- f.close()
- return alts.strip().splitlines()
- else:
- return []
-
- def _set_alternates(self, alts):
- """
- Sets the alternates
-
- ``alts``
- is the array of string paths representing the alternates at which
- git should look for objects, i.e. /home/user/repo/.git/objects
+ Returns
+ ``git.Repo`` (the newly created repo)
+ """
+
+ if mkdir and not os.path.exists(path):
+ os.makedirs(path, 0755)
+
+ git = Git(path)
+ output = git.init('--bare', **kwargs)
+ return Repo(path)
+ create = init_bare
+
+ def fork_bare(self, path, **kwargs):
+ """
+ Fork a bare git repository from this repo
+
+ ``path``
+ is the full path of the new repo (traditionally ends with /<name>.git)
+
+ ``kwargs``
+ keyword arguments to be given to the git clone command
+
+ Returns
+ ``git.Repo`` (the newly forked repo)
+ """
+ options = {'bare': True}
+ options.update(kwargs)
+ self.git.clone(self.path, path, **options)
+ return Repo(path)
+
+ def archive_tar(self, treeish='master', prefix=None):
+ """
+ Archive the given treeish
+
+ ``treeish``
+ is the treeish name/id (default 'master')
+
+ ``prefix``
+ is the optional prefix to prepend to each filename in the archive
+
+ Examples::
+
+ >>> repo.archive_tar
+ <String containing tar archive>
+
+ >>> repo.archive_tar('a87ff14')
+ <String containing tar archive for commit a87ff14>
+
+ >>> repo.archive_tar('master', 'myproject/')
+ <String containing tar bytes archive, whose files are prefixed with 'myproject/'>
+
+ Returns
+ str (containing bytes of tar archive)
+ """
+ options = {}
+ if prefix:
+ options['prefix'] = prefix
+ return self.git.archive(treeish, **options)
+
+ def archive_tar_gz(self, treeish='master', prefix=None):
+ """
+ Archive and gzip the given treeish
+
+ ``treeish``
+ is the treeish name/id (default 'master')
+
+ ``prefix``
+ is the optional prefix to prepend to each filename in the archive
+
+ Examples::
+
+ >>> repo.archive_tar_gz
+ <String containing tar.gz archive>
+
+ >>> repo.archive_tar_gz('a87ff14')
+ <String containing tar.gz archive for commit a87ff14>
+
+ >>> repo.archive_tar_gz('master', 'myproject/')
+ <String containing tar.gz archive and prefixed with 'myproject/'>
+
+ Returns
+ str (containing the bytes of tar.gz archive)
+ """
+ kwargs = {}
+ if prefix:
+ kwargs['prefix'] = prefix
+ resultstr = self.git.archive(treeish, **kwargs)
+ sio = StringIO.StringIO()
+ gf = gzip.GzipFile(fileobj=sio, mode ='wb')
+ gf.write(resultstr)
+ gf.close()
+ return sio.getvalue()
+
+ def _get_daemon_export(self):
+ filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE)
+ return os.path.exists(filename)
+
+ def _set_daemon_export(self, value):
+ filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE)
+ fileexists = os.path.exists(filename)
+ if value and not fileexists:
+ touch(filename)
+ elif not value and fileexists:
+ os.unlink(filename)
+
+ daemon_export = property(_get_daemon_export, _set_daemon_export,
+ doc="If True, git-daemon may export this repository")
+ del _get_daemon_export
+ del _set_daemon_export
+
+ def _get_alternates(self):
+ """
+ The list of alternates for this repo from which objects can be retrieved
+
+ Returns
+ list of strings being pathnames of alternates
+ """
+ alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates')
+
+ if os.path.exists(alternates_path):
+ try:
+ f = open(alternates_path)
+ alts = f.read()
+ finally:
+ f.close()
+ return alts.strip().splitlines()
+ else:
+ return []
+
+ def _set_alternates(self, alts):
+ """
+ Sets the alternates
+
+ ``alts``
+ is the array of string paths representing the alternates at which
+ git should look for objects, i.e. /home/user/repo/.git/objects
Raises
NoSuchPathError
- Returns
- None
- """
- for alt in alts:
- if not os.path.exists(alt):
- raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt)
-
- if not alts:
- os.remove(os.path.join(self.path, 'objects', 'info', 'alternates'))
- else:
- try:
- f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w')
- f.write("\n".join(alts))
- finally:
- f.close()
-
- alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
-
- @property
- def is_dirty(self):
- """
- Return the status of the index.
-
- Returns
- ``True``, if the index has any uncommitted changes,
- otherwise ``False``
+ Returns
+ None
+ """
+ for alt in alts:
+ if not os.path.exists(alt):
+ raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt)
+
+ if not alts:
+ os.remove(os.path.join(self.path, 'objects', 'info', 'alternates'))
+ else:
+ try:
+ f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w')
+ f.write("\n".join(alts))
+ finally:
+ f.close()
+
+ alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
+
+ @property
+ def is_dirty(self):
+ """
+ Return the status of the index.
+
+ Returns
+ ``True``, if the index has any uncommitted changes,
+ otherwise ``False``
NOTE
Working tree changes that have not been staged will not be detected !
- """
- if self.bare:
- # Bare repositories with no associated working directory are
- # always consired to be clean.
- return False
-
- return len(self.git.diff('HEAD', '--').strip()) > 0
-
- @property
- def active_branch(self):
- """
- The name of the currently active branch.
-
- Returns
- str (the branch name)
- """
- branch = self.git.symbolic_ref('HEAD').strip()
- if branch.startswith('refs/heads/'):
- branch = branch[len('refs/heads/'):]
-
- return branch
-
- def __repr__(self):
- return '<git.Repo "%s">' % self.path
+ """
+ if self.bare:
+ # Bare repositories with no associated working directory are
+ # always consired to be clean.
+ return False
+
+ return len(self.git.diff('HEAD', '--').strip()) > 0
+
+ @property
+ def active_branch(self):
+ """
+ The name of the currently active branch.
+
+ Returns
+ Head to the active branch
+ """
+ return Head( self, self.git.symbolic_ref('HEAD').strip() )
+
+ def __repr__(self):
+ return '<git.Repo "%s">' % self.path
diff --git a/lib/git/stats.py b/lib/git/stats.py
index 307e2f2f..bda4e539 100644
--- a/lib/git/stats.py
+++ b/lib/git/stats.py
@@ -5,55 +5,56 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
class Stats(object):
- """
- Represents stat information as presented by git at the end of a merge. It is
- created from the output of a diff operation.
-
- ``Example``::
-
- c = Commit( sha1 )
- s = c.stats
- s.total # full-stat-dict
- s.files # dict( filepath : stat-dict )
-
- ``stat-dict``
-
- A dictionary with the following keys and values::
-
- deletions = number of deleted lines as int
- insertions = number of inserted lines as int
- lines = total number of lines changed as int, or deletions + insertions
-
- ``full-stat-dict``
-
- In addition to the items in the stat-dict, it features additional information::
-
- files = number of changed files as int
-
- """
- def __init__(self, repo, total, files):
- self.repo = repo
- self.total = total
- self.files = files
+ """
+ Represents stat information as presented by git at the end of a merge. It is
+ created from the output of a diff operation.
+
+ ``Example``::
+
+ c = Commit( sha1 )
+ s = c.stats
+ s.total # full-stat-dict
+ s.files # dict( filepath : stat-dict )
+
+ ``stat-dict``
+
+ A dictionary with the following keys and values::
+
+ deletions = number of deleted lines as int
+ insertions = number of inserted lines as int
+ lines = total number of lines changed as int, or deletions + insertions
+
+ ``full-stat-dict``
+
+ In addition to the items in the stat-dict, it features additional information::
+
+ files = number of changed files as int
+
+ """
+ __slots__ = ("total", "files")
+
+ def __init__(self, total, files):
+ self.total = total
+ self.files = files
- @classmethod
- def list_from_string(cls, repo, text):
- """
- Create a Stat object from output retrieved by git-diff.
-
- Returns
- git.Stat
- """
- hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}}
- for line in text.splitlines():
- (raw_insertions, raw_deletions, filename) = line.split("\t")
- insertions = raw_insertions != '-' and int(raw_insertions) or 0
- deletions = raw_deletions != '-' and int(raw_deletions) or 0
- hsh['total']['insertions'] += insertions
- hsh['total']['deletions'] += deletions
- hsh['total']['lines'] += insertions + deletions
- hsh['total']['files'] += 1
- hsh['files'][filename.strip()] = {'insertions': insertions,
- 'deletions': deletions,
- 'lines': insertions + deletions}
- return Stats(repo, hsh['total'], hsh['files'])
+ @classmethod
+ def _list_from_string(cls, repo, text):
+ """
+ Create a Stat object from output retrieved by git-diff.
+
+ Returns
+ git.Stat
+ """
+ hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}}
+ for line in text.splitlines():
+ (raw_insertions, raw_deletions, filename) = line.split("\t")
+ insertions = raw_insertions != '-' and int(raw_insertions) or 0
+ deletions = raw_deletions != '-' and int(raw_deletions) or 0
+ hsh['total']['insertions'] += insertions
+ hsh['total']['deletions'] += deletions
+ hsh['total']['lines'] += insertions + deletions
+ hsh['total']['files'] += 1
+ hsh['files'][filename.strip()] = {'insertions': insertions,
+ 'deletions': deletions,
+ 'lines': insertions + deletions}
+ return Stats(hsh['total'], hsh['files'])
diff --git a/lib/git/tag.py b/lib/git/tag.py
deleted file mode 100644
index 8413ce73..00000000
--- a/lib/git/tag.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# tag.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from commit import Commit
-
-class Tag(object):
- def __init__(self, name, commit):
- """
- Initialize a newly instantiated Tag
-
- ``name``
- is the name of the head
-
- ``commit``
- is the Commit that the head points to
- """
- self.name = name
- self.commit = commit
-
- @classmethod
- def find_all(cls, repo, **kwargs):
- """
- Find all Tags in the repository
-
- ``repo``
- is the Repo
-
- ``kwargs``
- Additional options given as keyword arguments, will be passed
- to git-for-each-ref
-
- Returns
- ``git.Tag[]``
-
- List is sorted by committerdate
- """
- options = {'sort': "committerdate",
- 'format': "%(refname)%00%(objectname)"}
- options.update(**kwargs)
-
- output = repo.git.for_each_ref("refs/tags", **options)
- return cls.list_from_string(repo, output)
-
- @classmethod
- def list_from_string(cls, repo, text):
- """
- Parse out tag information into an array of Tag objects
-
- ``repo``
- is the Repo
-
- ``text``
- is the text output from the git-for-each command
-
- Returns
- git.Tag[]
- """
- tags = []
- for line in text.splitlines():
- tags.append(cls.from_string(repo, line))
- return tags
-
- @classmethod
- def from_string(cls, repo, line):
- """
- Create a new Tag instance from the given string.
-
- ``repo``
- is the Repo
-
- ``line``
- is the formatted tag information
-
- Format::
-
- name: [a-zA-Z_/]+
- <null byte>
- id: [0-9A-Fa-f]{40}
-
- Returns
- git.Tag
- """
- full_name, ids = line.split("\x00")
- name = full_name.split("/")[-1]
- commit = Commit(repo, id=ids)
- return Tag(name, commit)
-
- def __repr__(self):
- return '<git.Tag "%s">' % self.name
diff --git a/lib/git/tree.py b/lib/git/tree.py
deleted file mode 100644
index cfb0881c..00000000
--- a/lib/git/tree.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# tree.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
-from lazy import LazyMixin
-import blob
-
-class Tree(LazyMixin):
- def __init__(self, repo, id, mode=None, name=None):
- LazyMixin.__init__(self)
- self.repo = repo
- self.id = id
- self.mode = mode
- self.name = name
- self._contents = None
-
- def __bake__(self):
- # Ensure the treeish references directly a tree
- treeish = self.id
- if not treeish.endswith(':'):
- treeish = treeish + ':'
-
- # Read the tree contents.
- self._contents = {}
- for line in self.repo.git.ls_tree(self.id).splitlines():
- obj = self.content_from_string(self.repo, line)
- if obj is not None:
- self._contents[obj.name] = obj
-
- @staticmethod
- def content_from_string(repo, text):
- """
- Parse a content item and create the appropriate object
-
- ``repo``
- is the Repo
-
- ``text``
- is the single line containing the items data in `git ls-tree` format
-
- Returns
- ``git.Blob`` or ``git.Tree``
- """
- try:
- mode, typ, id, name = text.expandtabs(1).split(" ", 3)
- except:
- return None
-
- if typ == "tree":
- return Tree(repo, id=id, mode=mode, name=name)
- elif typ == "blob":
- return blob.Blob(repo, id=id, mode=mode, name=name)
- elif typ == "commit":
- return None
- else:
- raise(TypeError, "Invalid type: %s" % typ)
-
- def __div__(self, file):
- """
- Find the named object in this tree's contents
-
- Examples::
-
- >>> Repo('/path/to/python-git').tree/'lib'
- <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e">
- >>> Repo('/path/to/python-git').tree/'README.txt'
- <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df">
-
- Returns
- ``git.Blob`` or ``git.Tree`` or ``None`` if not found
- """
- return self.get(file)
-
- @property
- def basename(self):
- os.path.basename(self.name)
-
- def __repr__(self):
- return '<git.Tree "%s">' % self.id
-
- # Implement the basics of the dict protocol:
- # directories/trees can be seen as object dicts.
- def __getitem__(self, key):
- return self._contents[key]
-
- def __iter__(self):
- return iter(self._contents)
-
- def __len__(self):
- return len(self._contents)
-
- def __contains__(self, key):
- return key in self._contents
-
- def get(self, key):
- return self._contents.get(key)
-
- def items(self):
- return self._contents.items()
-
- def keys(self):
- return self._contents.keys()
-
- def values(self):
- return self._contents.values()
diff --git a/lib/git/utils.py b/lib/git/utils.py
index 5d0ba8ca..f84c247d 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -7,20 +7,83 @@
import os
def dashify(string):
- return string.replace('_', '-')
+ return string.replace('_', '-')
def touch(filename):
- os.utime(filename)
+ os.utime(filename)
def is_git_dir(d):
- """ This is taken from the git setup.c:is_git_directory
- function."""
-
- if os.path.isdir(d) and \
- os.path.isdir(os.path.join(d, 'objects')) and \
- os.path.isdir(os.path.join(d, 'refs')):
- headref = os.path.join(d, 'HEAD')
- return os.path.isfile(headref) or \
- (os.path.islink(headref) and
- os.readlink(headref).startswith('refs'))
- return False
+ """ This is taken from the git setup.c:is_git_directory
+ function."""
+
+ if os.path.isdir(d) and \
+ os.path.isdir(os.path.join(d, 'objects')) and \
+ os.path.isdir(os.path.join(d, 'refs')):
+ headref = os.path.join(d, 'HEAD')
+ return os.path.isfile(headref) or \
+ (os.path.islink(headref) and
+ os.readlink(headref).startswith('refs'))
+ return False
+
+
+class LazyMixin(object):
+ """
+ Base class providing an interface to lazily retrieve attribute values upon
+ first access. If slots are used, memory will only be reserved once the attribute
+ is actually accessed and retrieved the first time. All future accesses will
+ return the cached value as stored in the Instance's dict or slot.
+ """
+ __slots__ = tuple()
+
+ def __getattr__(self, attr):
+ """
+ Whenever an attribute is requested that we do not know, we allow it
+ to be created and set. Next time the same attribute is reqeusted, it is simply
+ returned from our dict/slots.
+ """
+ self._set_cache_(attr)
+ # will raise in case the cache was not created
+ return object.__getattribute__(self, attr)
+
+ def _set_cache_(self, attr):
+ """ This method should be overridden in the derived class.
+ It should check whether the attribute named by attr can be created
+ and cached. Do nothing if you do not know the attribute or call your subclass
+
+ The derived class may create as many additional attributes as it deems
+ necessary in case a git command returns more information than represented
+ in the single attribute."""
+ pass
+
+
+class Iterable(object):
+ """
+ Defines an interface for iterable items which is to assure a uniform
+ way to retrieve and iterate items within the git repository
+ """
+ __slots__ = tuple()
+
+ @classmethod
+ def list_items(cls, repo, *args, **kwargs):
+ """
+ Find all items of this type - subclasses can specify args and kwargs differently.
+ If no args are given, subclasses are obliged to return all items if no additional
+ arguments arg given.
+
+ Note: Favor the iter_items method as it will
+
+ Returns:
+ list(Item,...) list of item instances
+ """
+ return list(cls.iter_items(repo, *args, **kwargs))
+
+
+ @classmethod
+ def iter_items(cls, repo, *args, **kwargs):
+ """
+ For more information about the arguments, see list_items
+ Return:
+ iterator yielding Items
+ """
+ raise NotImplementedError("To be implemented by Subclass")
+