summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO12
-rw-r--r--lib/git/cmd.py1
-rw-r--r--lib/git/objects/base.py12
-rw-r--r--lib/git/objects/utils.py18
-rw-r--r--test/git/test_base.py5
5 files changed, 48 insertions, 0 deletions
diff --git a/TODO b/TODO
index 03179467..f1b49a0c 100644
--- a/TODO
+++ b/TODO
@@ -15,6 +15,18 @@ General
* References should be parsed 'manually' to get around command invocation, but
be sure to be able to read packed refs.
+Object
+------
+* DataStream method should read the data itself. This would be easy once you have
+ the actul loose object, but will be hard if it is in a pack. In a distant future,
+ we might be able to do that or at least implement direct object reading for loose
+ objects ( to safe a command call ). Currently object information comes from
+ persistent commands anyway, so the penalty is not that high. The data_stream
+ though is not based on persistent commands.
+ It would be good to improve things there as cat-file keeps all the data in a buffer
+ before it writes it. Hence it does not write to a stream directly, which can be
+ bad if files are large, say 1GB :).
+
Config
------
* Expand .get* methods of GitConfigParser to support default value. If it is not None,
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index 485a1553..836b599d 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -144,6 +144,7 @@ class Git(object):
wrapper that will interrupt the process once it goes out of scope. If you
use the command in iterators, you should pass the whole process instance
instead of a single stream.
+
``output_stream``
If set to a file-like object, data produced by the git command will be
output to the given stream directly.
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index ab1da7b0..dd67a3c7 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -16,6 +16,9 @@ class Object(LazyMixin):
This Object also serves as a constructor for instances of the correct type::
inst = Object.new(repo,id)
+ inst.id # objects sha in hex
+ inst.size # objects uncompressed data size
+ inst.data # byte string containing the whole data of the object
"""
TYPES = ("blob", "tree", "commit", "tag")
__slots__ = ("repo", "id", "size", "data" )
@@ -115,6 +118,15 @@ class Object(LazyMixin):
"""
return '<git.%s "%s">' % (self.__class__.__name__, self.id)
+ @property
+ def data_stream(self):
+ """
+ Returns
+ File Object compatible stream to the uncompressed raw data of the object
+ """
+ proc = self.repo.git.cat_file(self.type, self.id, as_process=True)
+ return utils.ProcessStreamAdapter(proc, "stdout")
+
class IndexObject(Object):
"""
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 367ed2b7..7bb4e8e2 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -52,3 +52,21 @@ def parse_actor_and_date(line):
m = _re_actor_epoch.search(line)
actor, epoch = m.groups()
return (Actor._from_string(actor), int(epoch))
+
+
+
+class ProcessStreamAdapter(object):
+ """
+ Class wireing all calls to the contained Process instance.
+
+ Use this type to hide the underlying process to provide access only to a specified
+ stream. The process is usually wrapped into an AutoInterrupt class to kill
+ it if the instance goes out of scope.
+ """
+ __slots__ = ("_proc", "_stream")
+ def __init__(self, process, stream_name):
+ self._proc = process
+ self._stream = getattr(process, stream_name)
+
+ def __getattr__(self, attr):
+ return getattr(self._stream, attr)
diff --git a/test/git/test_base.py b/test/git/test_base.py
index 71576048..4ad98d7f 100644
--- a/test/git/test_base.py
+++ b/test/git/test_base.py
@@ -48,6 +48,11 @@ class TestBase(object):
assert not item.path.startswith("/") # must be relative
assert isinstance(item.mode, int)
# END index object check
+
+ # read from stream
+ data_stream = item.data_stream
+ data = data_stream.read()
+ assert data
# END for each object type to create
# each has a unique sha