summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-03 23:20:34 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-03 23:20:34 +0200
commit1e2b46138ba58033738a24dadccc265748fce2ca (patch)
tree0f2a625a371c16cc95e53e024e007d8b89d87c92 /lib/git/objects
parent4b4a514e51fbc7dc6ddcb27c188159d57b5d1fa9 (diff)
downloadgitpython-1e2b46138ba58033738a24dadccc265748fce2ca.tar.gz
commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written
Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/base.py4
-rw-r--r--lib/git/objects/commit.py42
-rw-r--r--lib/git/objects/utils.py25
3 files changed, 49 insertions, 22 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 64a5678e..f7043199 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -125,8 +125,8 @@ class Object(LazyMixin):
Returns
File Object compatible stream to the uncompressed raw data of the object
"""
- sha, type, size, stream = self.repo.git.stream_object_data(self.sha)
- return stream
+ proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
+ return utils.ProcessStreamAdapter(proc, "stdout")
def stream_data(self, ostream):
"""
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 98aca360..d56ce306 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -91,15 +91,6 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
"""
super(Commit,self).__init__(repo, sha)
self._set_self_from_args_(locals())
-
- if parents is not None:
- cls = type(self)
- self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls))
- # END for each parent to convert
-
- if self.sha and tree is not None:
- self.tree = Tree(repo, tree, path='')
- # END id to tree conversion
@classmethod
def _get_intermediate_items(cls, commit):
@@ -350,7 +341,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
committer, committer_time, committer_offset,
message, parent_commits, conf_encoding)
- # serialize !
+ stream = StringIO()
+ new_commit._serialize(stream)
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ new_commit.sha = repo.odb.to_object(cls.type, streamlen, stream, sha_as_hex=True)
if head:
try:
@@ -377,8 +373,28 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
#{ Serializable Implementation
def _serialize(self, stream):
- # for now, this is very inefficient and in fact shouldn't be used like this
- return super(Commit, self)._serialize(stream)
+ write = stream.write
+ write("tree %s\n" % self.tree)
+ for p in self.parents:
+ write("parent %s\n" % p)
+
+ a = self.author
+ c = self.committer
+ fmt = "%s %s <%s> %s %s\n"
+ write(fmt % ("author", a.name, a.email,
+ self.authored_date,
+ utils.altz_to_utctz_str(self.author_tz_offset)))
+
+ write(fmt % ("committer", c.name, c.email,
+ self.committed_date,
+ utils.altz_to_utctz_str(self.committer_tz_offset)))
+
+ if self.encoding != self.default_encoding:
+ write("encoding %s\n" % self.encoding)
+
+ write("\n")
+ write(self.message)
+ return self
def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
@@ -416,7 +432,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
- self.message = stream.read()[:-1]
+ self.message = stream.read()
return self
#} END serializable implementation
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 6d378a72..c93f2091 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -16,7 +16,8 @@ import time
import os
__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date',
- 'ProcessStreamAdapter', 'Traversable')
+ 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
+ 'verify_utctz')
def get_object_type_by_name(object_type_name):
"""
@@ -57,14 +58,24 @@ def get_user_id():
return "%s@%s" % (username, platform.node())
-def _utc_tz_to_altz(utctz):
+def utctz_to_altz(utctz):
"""we convert utctz to the timezone in seconds, it is the format time.altzone
returns. Git stores it as UTC timezon which has the opposite sign as well,
which explains the -1 * ( that was made explicit here )
:param utctz: git utc timezone string, i.e. +0200"""
return -1 * int(float(utctz)/100*3600)
+
+def altz_to_utctz_str(altz):
+ """As above, but inverses the operation, returning a string that can be used
+ in commit objects"""
+ utci = -1 * int((altz / 3600)*100)
+ utcs = str(abs(utci))
+ utcs = "0"*(4-len(utcs)) + utcs
+ prefix = (utci < 0 and '-') or '+'
+ return prefix + utcs
+
-def _verify_utctz(offset):
+def verify_utctz(offset):
""":raise ValueError: if offset is incorrect
:return: offset"""
fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
@@ -97,11 +108,11 @@ def parse_date(string_date):
if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
timestamp, offset = string_date.split()
timestamp = int(timestamp)
- return timestamp, _utc_tz_to_altz(_verify_utctz(offset))
+ return timestamp, utctz_to_altz(verify_utctz(offset))
else:
offset = "+0000" # local time by default
if string_date[-5] in '-+':
- offset = _verify_utctz(string_date[-5:])
+ offset = verify_utctz(string_date[-5:])
string_date = string_date[:-6] # skip space as well
# END split timezone info
@@ -139,7 +150,7 @@ def parse_date(string_date):
fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
- return int(time.mktime(fstruct)), _utc_tz_to_altz(offset)
+ return int(time.mktime(fstruct)), utctz_to_altz(offset)
except ValueError:
continue
# END exception handling
@@ -167,7 +178,7 @@ def parse_actor_and_date(line):
"""
m = _re_actor_epoch.search(line)
actor, epoch, offset = m.groups()
- return (Actor._from_string(actor), int(epoch), _utc_tz_to_altz(offset))
+ return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))