summaryrefslogtreecommitdiff
path: root/git/db
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2011-06-06 20:29:03 +0200
committerSebastian Thiel <byronimo@gmail.com>2011-06-06 20:32:38 +0200
commita5497c432fe8ab1415d633d5d4b68f00a2807c26 (patch)
treebcc4bb31df4581d979fd881fbd7c5966e2cec726 /git/db
parentce79835556c195ed6e638a33280f729537dcee54 (diff)
downloadgitpython-a5497c432fe8ab1415d633d5d4b68f00a2807c26.tar.gz
Streams returned by git cmd db are now containing all the data right away. This could cause several copies to exist, and makes the cmd implementation a bad choice if big files are involved
Diffstat (limited to 'git/db')
-rw-r--r--git/db/cmd/base.py13
-rw-r--r--git/db/complex.py5
2 files changed, 14 insertions, 4 deletions
diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py
index b3354b0a..735e71df 100644
--- a/git/db/cmd/base.py
+++ b/git/db/cmd/base.py
@@ -31,6 +31,7 @@ from git.refs import (
TagReference
)
from git.objects.commit import Commit
+from cStringIO import StringIO
import re
import os
import sys
@@ -305,9 +306,15 @@ class CmdObjectDBRMixin(object):
return OInfo(hex_to_bin(hexsha), typename, size)
def stream(self, sha):
- """For now, all lookup is done by git itself"""
- hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
- return OStream(hex_to_bin(hexsha), typename, size, stream)
+ """For now, all lookup is done by git itself
+ :note: As we don't know when the stream is actually read (and if it is
+ stored for later use) we read the data rigth away and cache it.
+ This has HUGE performance implication, both for memory as for
+ reading/deserializing objects, but we have no other choice in order
+ to make the database behaviour consistent with other implementations !"""
+
+ hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
+ return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
def partial_to_complete_sha_hex(self, partial_hexsha):
""":return: Full binary 20 byte sha from the given partial hexsha
diff --git a/git/db/complex.py b/git/db/complex.py
index 71a39c45..31b047a0 100644
--- a/git/db/complex.py
+++ b/git/db/complex.py
@@ -8,7 +8,10 @@ __all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityG
class CmdGitDB(CmdPartialGitDB, PurePartialGitDB):
"""A database which uses primarily the git command implementation, but falls back
- to pure python where it is more feasible"""
+ to pure python where it is more feasible
+ :note: To assure consistent behaviour across implementations, when calling the
+ ``stream()`` method a cache is created. This makes this implementation a bad
+ choice when reading big files as these are streamed from memory in all cases."""
class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB):
"""A database which fills in its missing implementation using the pure python