diff options
-rw-r--r-- | TODO | 12 | ||||
-rw-r--r-- | lib/git/cmd.py | 1 | ||||
-rw-r--r-- | lib/git/objects/base.py | 12 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 18 | ||||
-rw-r--r-- | test/git/test_base.py | 5 |
5 files changed, 48 insertions, 0 deletions
@@ -15,6 +15,18 @@ General * References should be parsed 'manually' to get around command invocation, but be sure to be able to read packed refs. +Object +------ +* DataStream method should read the data itself. This would be easy once you have + the actul loose object, but will be hard if it is in a pack. In a distant future, + we might be able to do that or at least implement direct object reading for loose + objects ( to safe a command call ). Currently object information comes from + persistent commands anyway, so the penalty is not that high. The data_stream + though is not based on persistent commands. + It would be good to improve things there as cat-file keeps all the data in a buffer + before it writes it. Hence it does not write to a stream directly, which can be + bad if files are large, say 1GB :). + Config ------ * Expand .get* methods of GitConfigParser to support default value. If it is not None, diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 485a1553..836b599d 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -144,6 +144,7 @@ class Git(object): wrapper that will interrupt the process once it goes out of scope. If you use the command in iterators, you should pass the whole process instance instead of a single stream. + ``output_stream`` If set to a file-like object, data produced by the git command will be output to the given stream directly. diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index ab1da7b0..dd67a3c7 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -16,6 +16,9 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: inst = Object.new(repo,id) + inst.id # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) @@ -115,6 +118,15 @@ class Object(LazyMixin): """ return '<git.%s "%s">' % (self.__class__.__name__, self.id) + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.id, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") + class IndexObject(Object): """ diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index 367ed2b7..7bb4e8e2 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -52,3 +52,21 @@ def parse_actor_and_date(line): m = _re_actor_epoch.search(line) actor, epoch = m.groups() return (Actor._from_string(actor), int(epoch)) + + + +class ProcessStreamAdapter(object): + """ + Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope. + """ + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) diff --git a/test/git/test_base.py b/test/git/test_base.py index 71576048..4ad98d7f 100644 --- a/test/git/test_base.py +++ b/test/git/test_base.py @@ -48,6 +48,11 @@ class TestBase(object): assert not item.path.startswith("/") # must be relative assert isinstance(item.mode, int) # END index object check + + # read from stream + data_stream = item.data_stream + data = data_stream.read() + assert data # END for each object type to create # each has a unique sha |