summaryrefslogtreecommitdiff
path: root/lib/git
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-02 12:30:33 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-02 12:51:05 +0200
commit8c1a87d11df666d308d14e4ae7ee0e9d614296b6 (patch)
tree87481ab28367db496886a3801bda37227c10f7ed /lib/git
parentdf0892351a394d768489b5647d47b73c24d3ef5f (diff)
downloadgitpython-8c1a87d11df666d308d14e4ae7ee0e9d614296b6.tar.gz
commit: refactored existing code to decode commits from streams - performance is slightly better
git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used test.helpers: removed unnecessary code
Diffstat (limited to 'lib/git')
-rw-r--r--lib/git/cmd.py901
-rw-r--r--lib/git/objects/base.py418
-rw-r--r--lib/git/objects/commit.py139
-rw-r--r--lib/git/objects/tree.py2
-rw-r--r--lib/git/objects/utils.py17
5 files changed, 797 insertions, 680 deletions
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index ef2fdf4e..cef4ea60 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,414 +13,505 @@ from errors import GitCommandError
GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
- 'with_exceptions', 'as_process',
- 'output_stream' )
+ 'with_exceptions', 'as_process',
+ 'output_stream' )
def dashify(string):
- return string.replace('_', '-')
+ return string.replace('_', '-')
class Git(object):
- """
- The Git class manages communication with the Git binary.
-
- It provides a convenient interface to calling the Git binary, such as in::
-
- g = Git( git_dir )
- g.init() # calls 'git init' program
- rval = g.ls_files() # calls 'git ls-files' program
-
- ``Debugging``
- Set the GIT_PYTHON_TRACE environment variable print each invocation
- of the command to stdout.
- Set its value to 'full' to see details about the returned values.
- """
- __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
-
- class AutoInterrupt(object):
- """
- Kill/Interrupt the stored process instance once this instance goes out of scope. It is
- used to prevent processes piling up in case iterators stop reading.
- Besides all attributes are wired through to the contained process object.
-
- The wait method was overridden to perform automatic status code checking
- and possibly raise.
- """
- __slots__= ("proc", "args")
-
- def __init__(self, proc, args ):
- self.proc = proc
- self.args = args
-
- def __del__(self):
- # did the process finish already so we have a return code ?
- if self.proc.poll() is not None:
- return
-
- # can be that nothing really exists anymore ...
- if os is None:
- return
-
- # try to kill it
- try:
- os.kill(self.proc.pid, 2) # interrupt signal
- except AttributeError:
- # try windows
- # for some reason, providing None for stdout/stderr still prints something. This is why
- # we simply use the shell and redirect to nul. Its slower than CreateProcess, question
- # is whether we really want to see all these messages. Its annoying no matter what.
- subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
- # END exception handling
-
- def __getattr__(self, attr):
- return getattr(self.proc, attr)
-
- def wait(self):
- """
- Wait for the process and return its status code.
-
- Raise
- GitCommandError if the return status is not 0
- """
- status = self.proc.wait()
- if status != 0:
- raise GitCommandError(self.args, status, self.proc.stderr.read())
- # END status handling
- return status
-
-
-
- def __init__(self, working_dir=None):
- """
- Initialize this instance with:
-
- ``working_dir``
- Git directory we should work in. If None, we always work in the current
- directory as returned by os.getcwd().
- It is meant to be the working tree directory if available, or the
- .git directory in case of bare repositories.
- """
- super(Git, self).__init__()
- self._working_dir = working_dir
-
- # cached command slots
- self.cat_file_header = None
- self.cat_file_all = None
-
- def __getattr__(self, name):
- """
- A convenience method as it allows to call the command as if it was
- an object.
- Returns
- Callable object that will execute call _call_process with your arguments.
- """
- if name[:1] == '_':
- raise AttributeError(name)
- return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
-
- @property
- def working_dir(self):
- """
- Returns
- Git directory we are working on
- """
- return self._working_dir
-
- def execute(self, command,
- istream=None,
- with_keep_cwd=False,
- with_extended_output=False,
- with_exceptions=True,
- as_process=False,
- output_stream=None,
- **subprocess_kwargs
- ):
- """
- Handles executing the command on the shell and consumes and returns
- the returned information (stdout)
-
- ``command``
- The command argument list to execute.
- It should be a string, or a sequence of program arguments. The
- program to execute is the first item in the args sequence or string.
-
- ``istream``
- Standard input filehandle passed to subprocess.Popen.
-
- ``with_keep_cwd``
- Whether to use the current working directory from os.getcwd().
- The cmd otherwise uses its own working_dir that it has been initialized
- with if possible.
-
- ``with_extended_output``
- Whether to return a (status, stdout, stderr) tuple.
-
- ``with_exceptions``
- Whether to raise an exception when git returns a non-zero status.
-
- ``as_process``
- Whether to return the created process instance directly from which
- streams can be read on demand. This will render with_extended_output and
- with_exceptions ineffective - the caller will have
- to deal with the details himself.
- It is important to note that the process will be placed into an AutoInterrupt
- wrapper that will interrupt the process once it goes out of scope. If you
- use the command in iterators, you should pass the whole process instance
- instead of a single stream.
-
- ``output_stream``
- If set to a file-like object, data produced by the git command will be
- output to the given stream directly.
- This feature only has any effect if as_process is False. Processes will
- always be created with a pipe due to issues with subprocess.
- This merely is a workaround as data will be copied from the
- output pipe to the given output stream directly.
-
- ``**subprocess_kwargs``
- Keyword arguments to be passed to subprocess.Popen. Please note that
- some of the valid kwargs are already set by this method, the ones you
- specify may not be the same ones.
-
- Returns::
-
- str(output) # extended_output = False (Default)
- tuple(int(status), str(stdout), str(stderr)) # extended_output = True
-
- if ouput_stream is True, the stdout value will be your output stream:
- output_stream # extended_output = False
- tuple(int(status), output_stream, str(stderr))# extended_output = True
-
- Raise
- GitCommandError
-
- NOTE
- If you add additional keyword arguments to the signature of this method,
- you must update the execute_kwargs tuple housed in this module.
- """
- if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
- print ' '.join(command)
-
- # Allow the user to have the command executed in their working dir.
- if with_keep_cwd or self._working_dir is None:
- cwd = os.getcwd()
- else:
- cwd=self._working_dir
-
- # Start the process
- proc = subprocess.Popen(command,
- cwd=cwd,
- stdin=istream,
- stderr=subprocess.PIPE,
- stdout=subprocess.PIPE,
- close_fds=(os.name=='posix'),# unsupported on linux
- **subprocess_kwargs
- )
- if as_process:
- return self.AutoInterrupt(proc, command)
-
- # Wait for the process to return
- status = 0
- stdout_value = ''
- stderr_value = ''
- try:
- if output_stream is None:
- stdout_value = proc.stdout.read().rstrip() # strip trailing "\n"
- else:
- max_chunk_size = 1024*64
- while True:
- chunk = proc.stdout.read(max_chunk_size)
- output_stream.write(chunk)
- if len(chunk) < max_chunk_size:
- break
- # END reading output stream
- stdout_value = output_stream
- # END stdout handling
- stderr_value = proc.stderr.read().rstrip() # strip trailing "\n"
-
- # waiting here should do nothing as we have finished stream reading
- status = proc.wait()
- finally:
- proc.stdout.close()
- proc.stderr.close()
-
- if with_exceptions and status != 0:
- raise GitCommandError(command, status, stderr_value)
-
- if GIT_PYTHON_TRACE == 'full':
- if stderr_value:
- print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
- elif stdout_value:
- print "%s -> %d: '%s'" % (command, status, stdout_value)
- else:
- print "%s -> %d" % (command, status)
-
- # Allow access to the command's status code
- if with_extended_output:
- return (status, stdout_value, stderr_value)
- else:
- return stdout_value
-
- def transform_kwargs(self, **kwargs):
- """
- Transforms Python style kwargs into git command line options.
- """
- args = []
- for k, v in kwargs.items():
- if len(k) == 1:
- if v is True:
- args.append("-%s" % k)
- elif type(v) is not bool:
- args.append("-%s%s" % (k, v))
- else:
- if v is True:
- args.append("--%s" % dashify(k))
- elif type(v) is not bool:
- args.append("--%s=%s" % (dashify(k), v))
- return args
-
- @classmethod
- def __unpack_args(cls, arg_list):
- if not isinstance(arg_list, (list,tuple)):
- return [ str(arg_list) ]
-
- outlist = list()
- for arg in arg_list:
- if isinstance(arg_list, (list, tuple)):
- outlist.extend(cls.__unpack_args( arg ))
- # END recursion
- else:
- outlist.append(str(arg))
- # END for each arg
- return outlist
-
- def _call_process(self, method, *args, **kwargs):
- """
- Run the given git command with the specified arguments and return
- the result as a String
-
- ``method``
- is the command. Contained "_" characters will be converted to dashes,
- such as in 'ls_files' to call 'ls-files'.
-
- ``args``
- is the list of arguments. If None is included, it will be pruned.
- This allows your commands to call git more conveniently as None
- is realized as non-existent
-
- ``kwargs``
- is a dict of keyword arguments.
- This function accepts the same optional keyword arguments
- as execute().
-
- Examples::
- git.rev_list('master', max_count=10, header=True)
-
- Returns
- Same as execute()
- """
-
- # Handle optional arguments prior to calling transform_kwargs
- # otherwise these'll end up in args, which is bad.
- _kwargs = {}
- for kwarg in execute_kwargs:
- try:
- _kwargs[kwarg] = kwargs.pop(kwarg)
- except KeyError:
- pass
-
- # Prepare the argument list
- opt_args = self.transform_kwargs(**kwargs)
-
- ext_args = self.__unpack_args([a for a in args if a is not None])
- args = opt_args + ext_args
-
- call = ["git", dashify(method)]
- call.extend(args)
-
- return self.execute(call, **_kwargs)
-
- def _parse_object_header(self, header_line):
- """
- ``header_line``
- <hex_sha> type_string size_as_int
-
- Returns
- (hex_sha, type_string, size_as_int)
-
- Raises
- ValueError if the header contains indication for an error due to incorrect
- input sha
- """
- tokens = header_line.split()
- if len(tokens) != 3:
- raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
- if len(tokens[0]) != 40:
- raise ValueError("Failed to parse header: %r" % header_line)
- return (tokens[0], tokens[1], int(tokens[2]))
-
- def __prepare_ref(self, ref):
- # required for command to separate refs on stdin
- refstr = str(ref) # could be ref-object
- if refstr.endswith("\n"):
- return refstr
- return refstr + "\n"
-
- def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
- cur_val = getattr(self, attr_name)
- if cur_val is not None:
- return cur_val
-
- options = { "istream" : subprocess.PIPE, "as_process" : True }
- options.update( kwargs )
-
- cmd = self._call_process( cmd_name, *args, **options )
- setattr(self, attr_name, cmd )
- return cmd
-
- def __get_object_header(self, cmd, ref):
- cmd.stdin.write(self.__prepare_ref(ref))
- cmd.stdin.flush()
- return self._parse_object_header(cmd.stdout.readline())
-
- def get_object_header(self, ref):
- """
- Use this method to quickly examine the type and size of the object behind
- the given ref.
-
- NOTE
- The method will only suffer from the costs of command invocation
- once and reuses the command in subsequent calls.
-
- Return:
- (hexsha, type_string, size_as_int)
- """
- cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
- return self.__get_object_header(cmd, ref)
-
- def get_object_data(self, ref):
- """
- As get_object_header, but returns object data as well
-
- Return:
- (hexsha, type_string, size_as_int,data_string)
- """
- cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
- hexsha, typename, size = self.__get_object_header(cmd, ref)
- data = cmd.stdout.read(size)
- cmd.stdout.read(1) # finishing newlines
-
- return (hexsha, typename, size, data)
-
- def clear_cache(self):
- """
- Clear all kinds of internal caches to release resources.
-
- Currently persistent commands will be interrupted.
-
- Returns
- self
- """
- self.cat_file_all = None
- self.cat_file_header = None
- return self
+ """
+ The Git class manages communication with the Git binary.
+
+ It provides a convenient interface to calling the Git binary, such as in::
+
+ g = Git( git_dir )
+ g.init() # calls 'git init' program
+ rval = g.ls_files() # calls 'git ls-files' program
+
+ ``Debugging``
+ Set the GIT_PYTHON_TRACE environment variable print each invocation
+ of the command to stdout.
+ Set its value to 'full' to see details about the returned values.
+ """
+ __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+
+ class AutoInterrupt(object):
+ """
+ Kill/Interrupt the stored process instance once this instance goes out of scope. It is
+ used to prevent processes piling up in case iterators stop reading.
+ Besides all attributes are wired through to the contained process object.
+
+ The wait method was overridden to perform automatic status code checking
+ and possibly raise.
+ """
+ __slots__= ("proc", "args")
+
+ def __init__(self, proc, args ):
+ self.proc = proc
+ self.args = args
+
+ def __del__(self):
+ # did the process finish already so we have a return code ?
+ if self.proc.poll() is not None:
+ return
+
+ # can be that nothing really exists anymore ...
+ if os is None:
+ return
+
+ # try to kill it
+ try:
+ os.kill(self.proc.pid, 2) # interrupt signal
+ except AttributeError:
+ # try windows
+ # for some reason, providing None for stdout/stderr still prints something. This is why
+ # we simply use the shell and redirect to nul. Its slower than CreateProcess, question
+ # is whether we really want to see all these messages. Its annoying no matter what.
+ subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
+ # END exception handling
+
+ def __getattr__(self, attr):
+ return getattr(self.proc, attr)
+
+ def wait(self):
+ """
+ Wait for the process and return its status code.
+
+ Raise
+ GitCommandError if the return status is not 0
+ """
+ status = self.proc.wait()
+ if status != 0:
+ raise GitCommandError(self.args, status, self.proc.stderr.read())
+ # END status handling
+ return status
+ # END auto interrupt
+
+ class CatFileContentStream(object):
+ """Object representing a sized read-only stream returning the contents of
+ an object.
+ It behaves like a stream, but counts the data read and simulates an empty
+ stream once our sized content region is empty.
+ If not all data is read to the end of the objects's lifetime, we read the
+ rest to assure the underlying stream continues to work"""
+
+ __slots__ = ('_stream', '_nbr', '_size')
+
+ def __init__(self, size, stream):
+ self._stream = stream
+ self._size = size
+ self._nbr = 0 # num bytes read
+
+ def read(self, size=-1):
+ bytes_left = self._size - self._nbr
+ if bytes_left == 0:
+ return ''
+ if size > -1:
+ # assure we don't try to read past our limit
+ size = min(bytes_left, size)
+ else:
+ # they try to read all, make sure its not more than what remains
+ size = bytes_left
+ # END check early depletion
+ data = self._stream.read(size)
+ self._nbr += len(data)
+
+ # check for depletion, read our final byte to make the stream usable by others
+ if self._size - self._nbr == 0:
+ self._stream.read(1) # final newline
+ # END finish reading
+
+ return data
+
+ def readline(self, size=-1):
+ if self._nbr == self._size:
+ return ''
+
+ if size > -1:
+ size = min(self._size - self._nbr, size)
+
+ data = self._stream.readline(size)
+ self._nbr += len(data)
+
+ # handle final byte
+ # we inline everything, it must be fast !
+ if self._size - self._nbr == 0:
+ self._stream.read(1)
+ # END finish reading
+
+ return data
+
+ def readlines(self, size=-1):
+ if self._nbr == self._size:
+ return list()
+
+ # leave all additional logic to our readline method, we just check the size
+ out = list()
+ nbr = 0
+ while True:
+ line = self.readline()
+ if not line:
+ break
+ out.append(line)
+ if size > -1:
+ nbr += len(line)
+ if nbr > size:
+ break
+ # END handle size constraint
+ # END readline loop
+ return out
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ line = self.readline()
+ if not line:
+ raise StopIteration
+ return line
+
+ def __del__(self):
+ bytes_left = self._size - self._nbr
+ if bytes_left:
+ # seek and discard
+ self._stream.seek(bytes_left + 1, os.SEEK_CUR) # includes terminating newline
+ # END handle incomplete read
+
+
+ def __init__(self, working_dir=None):
+ """
+ Initialize this instance with:
+
+ ``working_dir``
+ Git directory we should work in. If None, we always work in the current
+ directory as returned by os.getcwd().
+ It is meant to be the working tree directory if available, or the
+ .git directory in case of bare repositories.
+ """
+ super(Git, self).__init__()
+ self._working_dir = working_dir
+
+ # cached command slots
+ self.cat_file_header = None
+ self.cat_file_all = None
+
+ def __getattr__(self, name):
+ """
+ A convenience method as it allows to call the command as if it was
+ an object.
+ Returns
+ Callable object that will execute call _call_process with your arguments.
+ """
+ if name[:1] == '_':
+ raise AttributeError(name)
+ return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+
+ @property
+ def working_dir(self):
+ """
+ Returns
+ Git directory we are working on
+ """
+ return self._working_dir
+
+ def execute(self, command,
+ istream=None,
+ with_keep_cwd=False,
+ with_extended_output=False,
+ with_exceptions=True,
+ as_process=False,
+ output_stream=None,
+ **subprocess_kwargs
+ ):
+ """
+ Handles executing the command on the shell and consumes and returns
+ the returned information (stdout)
+
+ ``command``
+ The command argument list to execute.
+ It should be a string, or a sequence of program arguments. The
+ program to execute is the first item in the args sequence or string.
+
+ ``istream``
+ Standard input filehandle passed to subprocess.Popen.
+
+ ``with_keep_cwd``
+ Whether to use the current working directory from os.getcwd().
+ The cmd otherwise uses its own working_dir that it has been initialized
+ with if possible.
+
+ ``with_extended_output``
+ Whether to return a (status, stdout, stderr) tuple.
+
+ ``with_exceptions``
+ Whether to raise an exception when git returns a non-zero status.
+
+ ``as_process``
+ Whether to return the created process instance directly from which
+ streams can be read on demand. This will render with_extended_output and
+ with_exceptions ineffective - the caller will have
+ to deal with the details himself.
+ It is important to note that the process will be placed into an AutoInterrupt
+ wrapper that will interrupt the process once it goes out of scope. If you
+ use the command in iterators, you should pass the whole process instance
+ instead of a single stream.
+
+ ``output_stream``
+ If set to a file-like object, data produced by the git command will be
+ output to the given stream directly.
+ This feature only has any effect if as_process is False. Processes will
+ always be created with a pipe due to issues with subprocess.
+ This merely is a workaround as data will be copied from the
+ output pipe to the given output stream directly.
+
+ ``**subprocess_kwargs``
+ Keyword arguments to be passed to subprocess.Popen. Please note that
+ some of the valid kwargs are already set by this method, the ones you
+ specify may not be the same ones.
+
+ Returns::
+
+ str(output) # extended_output = False (Default)
+ tuple(int(status), str(stdout), str(stderr)) # extended_output = True
+
+ if ouput_stream is True, the stdout value will be your output stream:
+ output_stream # extended_output = False
+ tuple(int(status), output_stream, str(stderr))# extended_output = True
+
+ Raise
+ GitCommandError
+
+ NOTE
+ If you add additional keyword arguments to the signature of this method,
+ you must update the execute_kwargs tuple housed in this module.
+ """
+ if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+ print ' '.join(command)
+
+ # Allow the user to have the command executed in their working dir.
+ if with_keep_cwd or self._working_dir is None:
+ cwd = os.getcwd()
+ else:
+ cwd=self._working_dir
+
+ # Start the process
+ proc = subprocess.Popen(command,
+ cwd=cwd,
+ stdin=istream,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ close_fds=(os.name=='posix'),# unsupported on linux
+ **subprocess_kwargs
+ )
+ if as_process:
+ return self.AutoInterrupt(proc, command)
+
+ # Wait for the process to return
+ status = 0
+ stdout_value = ''
+ stderr_value = ''
+ try:
+ if output_stream is None:
+ stdout_value = proc.stdout.read().rstrip() # strip trailing "\n"
+ else:
+ max_chunk_size = 1024*64
+ while True:
+ chunk = proc.stdout.read(max_chunk_size)
+ output_stream.write(chunk)
+ if len(chunk) < max_chunk_size:
+ break
+ # END reading output stream
+ stdout_value = output_stream
+ # END stdout handling
+ stderr_value = proc.stderr.read().rstrip() # strip trailing "\n"
+
+ # waiting here should do nothing as we have finished stream reading
+ status = proc.wait()
+ finally:
+ proc.stdout.close()
+ proc.stderr.close()
+
+ if with_exceptions and status != 0:
+ raise GitCommandError(command, status, stderr_value)
+
+ if GIT_PYTHON_TRACE == 'full':
+ if stderr_value:
+ print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
+ elif stdout_value:
+ print "%s -> %d: '%s'" % (command, status, stdout_value)
+ else:
+ print "%s -> %d" % (command, status)
+
+ # Allow access to the command's status code
+ if with_extended_output:
+ return (status, stdout_value, stderr_value)
+ else:
+ return stdout_value
+
+ def transform_kwargs(self, **kwargs):
+ """
+ Transforms Python style kwargs into git command line options.
+ """
+ args = []
+ for k, v in kwargs.items():
+ if len(k) == 1:
+ if v is True:
+ args.append("-%s" % k)
+ elif type(v) is not bool:
+ args.append("-%s%s" % (k, v))
+ else:
+ if v is True:
+ args.append("--%s" % dashify(k))
+ elif type(v) is not bool:
+ args.append("--%s=%s" % (dashify(k), v))
+ return args
+
+ @classmethod
+ def __unpack_args(cls, arg_list):
+ if not isinstance(arg_list, (list,tuple)):
+ return [ str(arg_list) ]
+
+ outlist = list()
+ for arg in arg_list:
+ if isinstance(arg_list, (list, tuple)):
+ outlist.extend(cls.__unpack_args( arg ))
+ # END recursion
+ else:
+ outlist.append(str(arg))
+ # END for each arg
+ return outlist
+
+ def _call_process(self, method, *args, **kwargs):
+ """
+ Run the given git command with the specified arguments and return
+ the result as a String
+
+ ``method``
+ is the command. Contained "_" characters will be converted to dashes,
+ such as in 'ls_files' to call 'ls-files'.
+
+ ``args``
+ is the list of arguments. If None is included, it will be pruned.
+ This allows your commands to call git more conveniently as None
+ is realized as non-existent
+
+ ``kwargs``
+ is a dict of keyword arguments.
+ This function accepts the same optional keyword arguments
+ as execute().
+
+ Examples::
+ git.rev_list('master', max_count=10, header=True)
+
+ Returns
+ Same as execute()
+ """
+
+ # Handle optional arguments prior to calling transform_kwargs
+ # otherwise these'll end up in args, which is bad.
+ _kwargs = {}
+ for kwarg in execute_kwargs:
+ try:
+ _kwargs[kwarg] = kwargs.pop(kwarg)
+ except KeyError:
+ pass
+
+ # Prepare the argument list
+ opt_args = self.transform_kwargs(**kwargs)
+
+ ext_args = self.__unpack_args([a for a in args if a is not None])
+ args = opt_args + ext_args
+
+ call = ["git", dashify(method)]
+ call.extend(args)
+
+ return self.execute(call, **_kwargs)
+
+ def _parse_object_header(self, header_line):
+ """
+ ``header_line``
+ <hex_sha> type_string size_as_int
+
+ Returns
+ (hex_sha, type_string, size_as_int)
+
+ Raises
+ ValueError if the header contains indication for an error due to incorrect
+ input sha
+ """
+ tokens = header_line.split()
+ if len(tokens) != 3:
+ raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
+ if len(tokens[0]) != 40:
+ raise ValueError("Failed to parse header: %r" % header_line)
+ return (tokens[0], tokens[1], int(tokens[2]))
+
+ def __prepare_ref(self, ref):
+ # required for command to separate refs on stdin
+ refstr = str(ref) # could be ref-object
+ if refstr.endswith("\n"):
+ return refstr
+ return refstr + "\n"
+
+ def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+ cur_val = getattr(self, attr_name)
+ if cur_val is not None:
+ return cur_val
+
+ options = { "istream" : subprocess.PIPE, "as_process" : True }
+ options.update( kwargs )
+
+ cmd = self._call_process( cmd_name, *args, **options )
+ setattr(self, attr_name, cmd )
+ return cmd
+
+ def __get_object_header(self, cmd, ref):
+ cmd.stdin.write(self.__prepare_ref(ref))
+ cmd.stdin.flush()
+ return self._parse_object_header(cmd.stdout.readline())
+
+ def get_object_header(self, ref):
+ """ Use this method to quickly examine the type and size of the object behind
+ the given ref.
+
+ :note: The method will only suffer from the costs of command invocation
+ once and reuses the command in subsequent calls.
+
+ :return: (hexsha, type_string, size_as_int) """
+ cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+ return self.__get_object_header(cmd, ref)
+
+ def get_object_data(self, ref):
+ """ As get_object_header, but returns object data as well
+ :return: (hexsha, type_string, size_as_int,data_string)
+ :note: not threadsafe
+ """
+ hexsha, typename, size, stream = self.stream_object_data(ref)
+ data = stream.read(size)
+ del(stream)
+ return (hexsha, typename, size, data)
+
+ def stream_object_data(self, ref):
+ """As get_object_header, but returns the data as a stream
+ :return: (hexsha, type_string, size_as_int, stream)
+ :note: This method is not threadsafe, you need one independent Command instance
+ per thread to be safe !"""
+ cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+ hexsha, typename, size = self.__get_object_header(cmd, ref)
+ return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout))
+
+ def clear_cache(self):
+ """
+ Clear all kinds of internal caches to release resources.
+
+ Currently persistent commands will be interrupted.
+
+ Returns
+ self
+ """
+ self.cat_file_all = None
+ self.cat_file_header = None
+ return self
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index bb15192d..f7043199 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -6,223 +6,223 @@
import os
from git.utils import LazyMixin, join_path_native
import utils
-
+
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
class Object(LazyMixin):
- """
- Implements an Object which may be Blobs, Trees, Commits and Tags
-
- This Object also serves as a constructor for instances of the correct type::
-
- inst = Object.new(repo,id)
- inst.sha # objects sha in hex
- inst.size # objects uncompressed data size
- inst.data # byte string containing the whole data of the object
- """
- NULL_HEX_SHA = '0'*40
- TYPES = ("blob", "tree", "commit", "tag")
- __slots__ = ("repo", "sha", "size", "data" )
- type = None # to be set by subclass
-
- def __init__(self, repo, id):
- """
- Initialize an object by identifying it by its id. All keyword arguments
- will be set on demand if None.
-
- ``repo``
- repository this object is located in
-
- ``id``
- SHA1 or ref suitable for git-rev-parse
- """
- super(Object,self).__init__()
- self.repo = repo
- self.sha = id
+ """
+ Implements an Object which may be Blobs, Trees, Commits and Tags
+
+ This Object also serves as a constructor for instances of the correct type::
+
+ inst = Object.new(repo,id)
+ inst.sha # objects sha in hex
+ inst.size # objects uncompressed data size
+ inst.data # byte string containing the whole data of the object
+ """
+ NULL_HEX_SHA = '0'*40
+ TYPES = ("blob", "tree", "commit", "tag")
+ __slots__ = ("repo", "sha", "size", "data" )
+ type = None # to be set by subclass
+
+ def __init__(self, repo, id):
+ """
+ Initialize an object by identifying it by its id. All keyword arguments
+ will be set on demand if None.
+
+ ``repo``
+ repository this object is located in
+
+ ``id``
+ SHA1 or ref suitable for git-rev-parse
+ """
+ super(Object,self).__init__()
+ self.repo = repo
+ self.sha = id
- @classmethod
- def new(cls, repo, id):
- """
- Return
- New Object instance of a type appropriate to the object type behind
- id. The id of the newly created object will be a hexsha even though
- the input id may have been a Reference or Rev-Spec
-
- Note
- This cannot be a __new__ method as it would always call __init__
- with the input id which is not necessarily a hexsha.
- """
- hexsha, typename, size = repo.git.get_object_header(id)
- obj_type = utils.get_object_type_by_name(typename)
- inst = obj_type(repo, hexsha)
- inst.size = size
- return inst
-
- def _set_self_from_args_(self, args_dict):
- """
- Initialize attributes on self from the given dict that was retrieved
- from locals() in the calling method.
-
- Will only set an attribute on self if the corresponding value in args_dict
- is not None
- """
- for attr, val in args_dict.items():
- if attr != "self" and val is not None:
- setattr( self, attr, val )
- # END set all non-None attributes
-
- def _set_cache_(self, attr):
- """
- Retrieve object information
- """
- if attr == "size":
- hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
- assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
- elif attr == "data":
- hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
- assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
- else:
- super(Object,self)._set_cache_(attr)
-
- def __eq__(self, other):
- """
- Returns
- True if the objects have the same SHA1
- """
- return self.sha == other.sha
-
- def __ne__(self, other):
- """
- Returns
- True if the objects do not have the same SHA1
- """
- return self.sha != other.sha
-
- def __hash__(self):
- """
- Returns
- Hash of our id allowing objects to be used in dicts and sets
- """
- return hash(self.sha)
-
- def __str__(self):
- """
- Returns
- string of our SHA1 as understood by all git commands
- """
- return self.sha
-
- def __repr__(self):
- """
- Returns
- string with pythonic representation of our object
- """
- return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+ @classmethod
+ def new(cls, repo, id):
+ """
+ Return
+ New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a hexsha even though
+ the input id may have been a Reference or Rev-Spec
+
+ Note
+ This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a hexsha.
+ """
+ hexsha, typename, size = repo.git.get_object_header(id)
+ obj_type = utils.get_object_type_by_name(typename)
+ inst = obj_type(repo, hexsha)
+ inst.size = size
+ return inst
+
+ def _set_self_from_args_(self, args_dict):
+ """
+ Initialize attributes on self from the given dict that was retrieved
+ from locals() in the calling method.
+
+ Will only set an attribute on self if the corresponding value in args_dict
+ is not None
+ """
+ for attr, val in args_dict.items():
+ if attr != "self" and val is not None:
+ setattr( self, attr, val )
+ # END set all non-None attributes
+
+ def _set_cache_(self, attr):
+ """
+ Retrieve object information
+ """
+ if attr == "size":
+ hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
+ assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
+ elif attr == "data":
+ hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
+ assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
+
+ def __eq__(self, other):
+ """
+ Returns
+ True if the objects have the same SHA1
+ """
+ return self.sha == other.sha
+
+ def __ne__(self, other):
+ """
+ Returns
+ True if the objects do not have the same SHA1
+ """
+ return self.sha != other.sha
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our id allowing objects to be used in dicts and sets
+ """
+ return hash(self.sha)
+
+ def __str__(self):
+ """
+ Returns
+ string of our SHA1 as understood by all git commands
+ """
+ return self.sha
+
+ def __repr__(self):
+ """
+ Returns
+ string with pythonic representation of our object
+ """
+ return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
- @property
- def data_stream(self):
- """
- Returns
- File Object compatible stream to the uncompressed raw data of the object
- """
- proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
- return utils.ProcessStreamAdapter(proc, "stdout")
-
- def stream_data(self, ostream):
- """
- Writes our data directly to the given output stream
-
- ``ostream``
- File object compatible stream object.
-
- Returns
- self
- """
- self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
- return self
+ @property
+ def data_stream(self):
+ """
+ Returns
+ File Object compatible stream to the uncompressed raw data of the object
+ """
+ proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
+ return utils.ProcessStreamAdapter(proc, "stdout")
+ def stream_data(self, ostream):
+ """
+ Writes our data directly to the given output stream
+
+ ``ostream``
+ File object compatible stream object.
+
+ Returns
+ self
+ """
+ self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
+ return self
+
class IndexObject(Object):
- """
- Base for all objects that can be part of the index file , namely Tree, Blob and
- SubModule objects
- """
- __slots__ = ("path", "mode")
-
- def __init__(self, repo, sha, mode=None, path=None):
- """
- Initialize a newly instanced IndexObject
- ``repo``
- is the Repo we are located in
+ """
+ Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects
+ """
+ __slots__ = ("path", "mode")
+
+ def __init__(self, repo, sha, mode=None, path=None):
+ """
+ Initialize a newly instanced IndexObject
+ ``repo``
+ is the Repo we are located in
- ``sha`` : string
- is the git object id as hex sha
+ ``sha`` : string
+ is the git object id as hex sha
- ``mode`` : int
- is the file mode as int, use the stat module to evaluate the infomration
+ ``mode`` : int
+ is the file mode as int, use the stat module to evaluate the infomration
- ``path`` : str
- is the path to the file in the file system, relative to the git repository root, i.e.
- file.ext or folder/other.ext
-
- NOTE
- Path may not be set of the index object has been created directly as it cannot
- be retrieved without knowing the parent tree.
- """
- super(IndexObject, self).__init__(repo, sha)
- self._set_self_from_args_(locals())
- if isinstance(mode, basestring):
- self.mode = self._mode_str_to_int(mode)
-
- def __hash__(self):
- """
- Returns
- Hash of our path as index items are uniquely identifyable by path, not
- by their data !
- """
- return hash(self.path)
-
- def _set_cache_(self, attr):
- if attr in IndexObject.__slots__:
- # they cannot be retrieved lateron ( not without searching for them )
- raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
- else:
- super(IndexObject, self)._set_cache_(attr)
-
- @classmethod
- def _mode_str_to_int(cls, modestr):
- """
- ``modestr``
- string like 755 or 644 or 100644 - only the last 6 chars will be used
-
- Returns
- String identifying a mode compatible to the mode methods ids of the
- stat module regarding the rwx permissions for user, group and other,
- special flags and file system flags, i.e. whether it is a symlink
- for example.
- """
- mode = 0
- for iteration,char in enumerate(reversed(modestr[-6:])):
- mode += int(char) << iteration*3
- # END for each char
- return mode
-
- @property
- def name(self):
- """
- Returns
- Name portion of the path, effectively being the basename
- """
- return os.path.basename(self.path)
-
- @property
- def abspath(self):
- """
- Returns
- Absolute path to this index object in the file system ( as opposed to the
- .path field which is a path relative to the git repository ).
-
- The returned path will be native to the system and contains '\' on windows.
- """
- return join_path_native(self.repo.working_tree_dir, self.path)
-
+ ``path`` : str
+ is the path to the file in the file system, relative to the git repository root, i.e.
+ file.ext or folder/other.ext
+
+ NOTE
+ Path may not be set of the index object has been created directly as it cannot
+ be retrieved without knowing the parent tree.
+ """
+ super(IndexObject, self).__init__(repo, sha)
+ self._set_self_from_args_(locals())
+ if isinstance(mode, basestring):
+ self.mode = self._mode_str_to_int(mode)
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our path as index items are uniquely identifyable by path, not
+ by their data !
+ """
+ return hash(self.path)
+
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+
+ @classmethod
+ def _mode_str_to_int(cls, modestr):
+ """
+ ``modestr``
+ string like 755 or 644 or 100644 - only the last 6 chars will be used
+
+ Returns
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other,
+ special flags and file system flags, i.e. whether it is a symlink
+ for example.
+ """
+ mode = 0
+ for iteration,char in enumerate(reversed(modestr[-6:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
+ @property
+ def name(self):
+ """
+ Returns
+ Name portion of the path, effectively being the basename
+ """
+ return os.path.basename(self.path)
+
+ @property
+ def abspath(self):
+ """
+ Returns
+ Absolute path to this index object in the file system ( as opposed to the
+ .path field which is a path relative to the git repository ).
+
+ The returned path will be native to the system and contains '\' on windows.
+ """
+ return join_path_native(self.repo.working_tree_dir, self.path)
+
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 87eed49b..948e9a54 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -9,12 +9,14 @@ import git.diff as diff
import git.stats as stats
from git.actor import Actor
from tree import Tree
+from cStringIO import StringIO
import base
import utils
import time
import os
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
+
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
"""
Wraps a git Commit object.
@@ -91,7 +93,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
self._set_self_from_args_(locals())
if parents is not None:
- self.parents = tuple( self.__class__(repo, p) for p in parents )
+ cls = type(self)
+ self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls))
# END for each parent to convert
if self.sha and tree is not None:
@@ -109,20 +112,9 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
We set all values at once.
"""
if attr in Commit.__slots__:
- # prepare our data lines to match rev-list
- data_lines = self.data.splitlines()
- data_lines.insert(0, "commit %s" % self.sha)
- temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
- self.parents = temp.parents
- self.tree = temp.tree
- self.author = temp.author
- self.authored_date = temp.authored_date
- self.author_tz_offset = temp.author_tz_offset
- self.committer = temp.committer
- self.committed_date = temp.committed_date
- self.committer_tz_offset = temp.committer_tz_offset
- self.message = temp.message
- self.encoding = temp.encoding
+ # read the data in a chunk, its faster - then provide a file wrapper
+ hexsha, typename, size, data = self.repo.git.get_object_data(self)
+ self._deserialize(StringIO(data))
else:
super(Commit, self)._set_cache_(attr)
@@ -260,59 +252,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
iterator returning Commit objects
"""
stream = proc_or_stream
- if not hasattr(stream,'next'):
+ if not hasattr(stream,'readline'):
stream = proc_or_stream.stdout
- for line in stream:
- commit_tokens = line.split()
+ while True:
+ line = stream.readline()
+ if not line:
+ break
+ commit_tokens = line.split()
id = commit_tokens[1]
assert commit_tokens[0] == "commit"
- tree = stream.next().split()[1]
-
- parents = []
- next_line = None
- for parent_line in stream:
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- parents.append(parent_line.split()[-1])
- # END for each parent line
-
- author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
- committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-
- # empty line
- encoding = stream.next()
- encoding.strip()
- if encoding:
- encoding = encoding[encoding.find(' ')+1:]
- # END parse encoding
-
- message_lines = list()
- if from_rev_list:
- for msg_line in stream:
- if not msg_line.startswith(' '):
- # and forget about this empty marker
- break
- # END abort message reading
- # strip leading 4 spaces
- message_lines.append(msg_line[4:])
- # END while there are message lines
- else:
- # a stream from our data simply gives us the plain message
- for msg_line in stream:
- message_lines.append(msg_line)
- # END message parsing
- message = '\n'.join(message_lines)
-
-
- yield Commit(repo, id, tree,
- author, authored_date, author_tz_offset,
- committer, committed_date, committer_tz_offset,
- message, tuple(parents),
- encoding or cls.default_encoding)
+ yield Commit(repo, id)._deserialize(stream, from_rev_list)
# END for each line in stream
@@ -393,7 +344,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
# assume utf8 encoding
enc_section, enc_option = cls.conf_encoding.split('.')
- conf_encoding = cr.get_value(enc_section, enc_option, default_encoding)
+ conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
author = Actor(author_name, author_email)
committer = Actor(committer_name, committer_email)
@@ -429,3 +380,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
def __repr__(self):
return '<git.Commit "%s">' % self.sha
+ #{ Serializable Implementation
+
+ def _serialize(self, stream):
+ # for now, this is very inefficient and in fact shouldn't be used like this
+ return super(Commit, self)._serialize(stream)
+
+ def _deserialize(self, stream, from_rev_list=False):
+ """:param from_rev_list: if true, the stream format is coming from the rev-list command
+ Otherwise it is assumed to be a plain data stream from our object"""
+ self.tree = Tree(self.repo, stream.readline().split()[1], 0, '')
+
+ self.parents = list()
+ next_line = None
+ while True:
+ parent_line = stream.readline()
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+ # END for each parent line
+ self.parents = tuple(self.parents)
+
+ self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline())
+
+
+ # empty line
+ self.encoding = self.default_encoding
+ enc = stream.readline()
+ enc.strip()
+ if enc:
+ self.encoding = enc[enc.find(' ')+1:]
+ # END parse encoding
+
+ message_lines = list()
+ if from_rev_list:
+ while True:
+ msg_line = stream.readline()
+ if not msg_line.startswith(' '):
+ # and forget about this empty marker
+ # cut the last newline to get rid of the artificial newline added
+ # by rev-list command. Lets hope its just linux style \n
+ message_lines[-1] = message_lines[-1][:-1]
+ break
+ # END abort message reading
+ # strip leading 4 spaces
+ message_lines.append(msg_line[4:])
+ # END while there are message lines
+ self.message = ''.join(message_lines)
+ else:
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()[:-1]
+ # END message parsing
+ return self
+
+ #} END serializable implementation
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index a9e60981..285d3b5b 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable):
visit_once = False, ignore_self=1 ):
"""For documentation, see utils.Traversable.traverse
- Trees are set to visist_once = False to gain more performance in the traversal"""
+ Trees are set to visit_once = False to gain more performance in the traversal"""
return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
# List protocol
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 7060e293..6d378a72 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -280,3 +280,20 @@ class Traversable(object):
addToStack( stack, item, branch_first, nd )
# END for each item on work stack
+
+
+class Serializable(object):
+ """Defines methods to serialize and deserialize objects from and into a data stream"""
+
+ def _serialize(self, stream):
+ """Serialize the data of this object into the given data stream
+ :note: a serialized object would ``_deserialize`` into the same objet
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def _deserialize(self, stream):
+ """Deserialize all information regarding this object from the stream
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")