diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2011-06-06 20:29:03 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2011-06-06 20:32:38 +0200 |
commit | a5497c432fe8ab1415d633d5d4b68f00a2807c26 (patch) | |
tree | bcc4bb31df4581d979fd881fbd7c5966e2cec726 /git/db | |
parent | ce79835556c195ed6e638a33280f729537dcee54 (diff) | |
download | gitpython-a5497c432fe8ab1415d633d5d4b68f00a2807c26.tar.gz |
Streams returned by git cmd db are now containing all the data right away. This could cause several copies to exist, and makes the cmd implementation a bad choice if big files are involved
Diffstat (limited to 'git/db')
-rw-r--r-- | git/db/cmd/base.py | 13 | ||||
-rw-r--r-- | git/db/complex.py | 5 |
2 files changed, 14 insertions, 4 deletions
diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index b3354b0a..735e71df 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -31,6 +31,7 @@ from git.refs import ( TagReference ) from git.objects.commit import Commit +from cStringIO import StringIO import re import os import sys @@ -305,9 +306,15 @@ class CmdObjectDBRMixin(object): return OInfo(hex_to_bin(hexsha), typename, size) def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) + """For now, all lookup is done by git itself + :note: As we don't know when the stream is actually read (and if it is + stored for later use) we read the data rigth away and cache it. + This has HUGE performance implication, both for memory as for + reading/deserializing objects, but we have no other choice in order + to make the database behaviour consistent with other implementations !""" + + hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, StringIO(data)) def partial_to_complete_sha_hex(self, partial_hexsha): """:return: Full binary 20 byte sha from the given partial hexsha diff --git a/git/db/complex.py b/git/db/complex.py index 71a39c45..31b047a0 100644 --- a/git/db/complex.py +++ b/git/db/complex.py @@ -8,7 +8,10 @@ __all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityG class CmdGitDB(CmdPartialGitDB, PurePartialGitDB): """A database which uses primarily the git command implementation, but falls back - to pure python where it is more feasible""" + to pure python where it is more feasible + :note: To assure consistent behaviour across implementations, when calling the + ``stream()`` method a cache is created. This makes this implementation a bad + choice when reading big files as these are streamed from memory in all cases.""" class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB): """A database which fills in its missing implementation using the pure python |