From a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2011 20:17:00 +0200 Subject: Updated objects to use the ones defined in gitdb as basis. Only the submodule implementation is left in git-python as it requires some advanced features. No tests where run yet --- git/objects/commit.py | 267 +++----------------------------------------------- 1 file changed, 13 insertions(+), 254 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index fd4187b0..d932ab1a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -3,142 +3,28 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from git.util import ( - Actor, - Iterable, - Stats, - ) +from git.util import RepoAliasMixin +from gitdb.object.commit import Commit as GitDB_Commit from git.diff import Diffable -from tree import Tree +from gitdb.util import ( + Iterable, + Actor + ) + from gitdb import IStream from cStringIO import StringIO -import base -from gitdb.util import ( - hex_to_bin - ) -from util import ( - Traversable, - Serializable, - parse_date, - altz_to_utctz_str, - parse_actor_and_date - ) -from time import ( - time, - altzone - ) +from util import parse_date +from time import altzone + import os -import sys __all__ = ('Commit', ) -class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): - """Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary.""" - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_date = "GIT_AUTHOR_DATE" - env_committer_date = "GIT_COMMITTER_DATE" - - # CONFIGURATION KEYS - conf_encoding = 'i18n.commitencoding' - - # INVARIANTS - default_encoding = "UTF-8" +class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): + """Provides additional git-command based functionality to the default gitdb commit object""" + __slots__ = tuple() - - # object configuration - type = "commit" - __slots__ = ("tree", - "author", "authored_date", "author_tz_offset", - "committer", "committed_date", "committer_tz_offset", - "message", "parents", "encoding") - _id_attribute_ = "binsha" - - def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, - committer=None, committed_date=None, committer_tz_offset=None, - message=None, parents=None, encoding=None): - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree - Tree object - :param author: Actor - is the author string ( will be implicitly converted into an Actor object ) - :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string - :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param message: string - is the commit message - :param encoding: string - encoding of the message, defaults to UTF-8 - :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit - - :note: Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit,self).__init__(repo, binsha) - if tree is not None: - assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - if tree is not None: - self.tree = tree - if author is not None: - self.author = author - if authored_date is not None: - self.authored_date = authored_date - if author_tz_offset is not None: - self.author_tz_offset = author_tz_offset - if committer is not None: - self.committer = committer - if committed_date is not None: - self.committed_date = committed_date - if committer_tz_offset is not None: - self.committer_tz_offset = committer_tz_offset - if message is not None: - self.message = message - if parents is not None: - self.parents = parents - if encoding is not None: - self.encoding = encoding - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper - binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) - else: - super(Commit, self)._set_cache_(attr) - # END handle attrs - - @property - def summary(self): - """:return: First line of the commit message""" - return self.message.split('\n', 1)[0] - def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit @@ -225,33 +111,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text) - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database - - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" - stream = proc_or_stream - if not hasattr(stream,'readline'): - stream = proc_or_stream.stdout - - readline = stream.readline - while True: - line = readline() - if not line: - break - hexsha = line.strip() - if len(hexsha) > 40: - # split additional information, as returned by bisect for instance - hexsha, rest = line.split(None, 1) - # END handle extra info - - assert len(hexsha) == 40, "Invalid line: %s" % hexsha - yield Commit(repo, hex_to_bin(hexsha)) - # END for each line in stream - @classmethod def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): @@ -361,105 +220,5 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # END advance head handling return new_commit - - #{ Serializable Implementation - - def _serialize(self, stream): - write = stream.write - write("tree %s\n" % self.tree) - for p in self.parents: - write("parent %s\n" % p) - - a = self.author - aname = a.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - - c = self.committer - fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) - - # encode committer - aname = c.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - write(fmt % ("committer", aname, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) - - if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) - - write("\n") - - # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): - write(self.message.encode(self.encoding)) - else: - write(self.message) - # END handle encoding - return self - - def _deserialize(self, stream): - """:param from_rev_list: if true, the stream format is coming from the rev-list command - Otherwise it is assumed to be a plain data stream from our object""" - readline = stream.readline - self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') - - self.parents = list() - next_line = None - while True: - parent_line = readline() - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) - # END for each parent line - self.parents = tuple(self.parents) - - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) - - - # now we can have the encoding line, or an empty line followed by the optional - # message. - self.encoding = self.default_encoding - # read encoding or empty line to separate message - enc = readline() - enc = enc.strip() - if enc: - self.encoding = enc[enc.find(' ')+1:] - # now comes the message separator - readline() - # END handle encoding - - # decode the authors name - try: - self.author.name = self.author.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) - # END handle author's encoding - - # decode committer name - try: - self.committer.name = self.committer.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) - # END handle author's encoding - - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip - self.message = stream.read() - try: - self.message = self.message.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) - # END exception handling - return self #} END serializable implementation -- cgit v1.2.1 From 4177eefd7bdaea96a529b00ba9cf751924ede202 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 May 2011 19:43:22 +0200 Subject: Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work --- git/objects/commit.py | 259 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 253 insertions(+), 6 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index d932ab1a..30dcaa0a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -3,28 +3,68 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import RepoAliasMixin -from gitdb.object.commit import Commit as GitDB_Commit -from git.diff import Diffable +import base + +from gitdb.typ import ObjectType +from tree import Tree +from cStringIO import StringIO + from gitdb.util import ( + hex_to_bin, + Actor, + RepoAliasMixin, Iterable, Actor ) -from gitdb import IStream +from util import ( + Traversable, + Serializable, + altz_to_utctz_str, + parse_actor_and_date + ) +from git.diff import Diffable +from gitdb.base import IStream from cStringIO import StringIO from util import parse_date from time import altzone import os +import sys __all__ = ('Commit', ) -class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): - """Provides additional git-command based functionality to the default gitdb commit object""" +class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): + """Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary.""" __slots__ = tuple() + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_date = "GIT_AUTHOR_DATE" + env_committer_date = "GIT_COMMITTER_DATE" + + # CONFIGURATION KEYS + conf_encoding = 'i18n.commitencoding' + + # INVARIANTS + default_encoding = "UTF-8" + + + # object configuration + type = ObjectType.commit + type_id = ObjectType.commit_id + + __slots__ = ("tree", + "author", "authored_date", "author_tz_offset", + "committer", "committed_date", "committer_tz_offset", + "message", "parents", "encoding") + _id_attribute_ = "binsha" + + def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit @@ -221,4 +261,211 @@ class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): return new_commit + def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, + committer=None, committed_date=None, committer_tz_offset=None, + message=None, parents=None, encoding=None): + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. + + :param binsha: 20 byte sha1 + :param parents: tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + :param tree: Tree + Tree object + :param author: Actor + is the author string ( will be implicitly converted into an Actor object ) + :param authored_date: int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + :param author_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param committer: Actor + is the committer string + :param committed_date: int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + :param committer_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param message: string + is the commit message + :param encoding: string + encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph + :return: git.Commit + + :note: Timezone information is in the same format and in the same sign + as what time.altzone returns. The sign is inverted compared to git's + UTC timezone.""" + super(Commit,self).__init__(odb, binsha) + if tree is not None: + assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + if attr in Commit.__slots__: + # read the data in a chunk, its faster - then provide a file wrapper + binsha, typename, self.size, stream = self.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) + else: + super(Commit, self)._set_cache_(attr) + # END handle attrs + + @property + def summary(self): + """:return: First line of the commit message""" + return self.message.split('\n', 1)[0] + + @classmethod + def _iter_from_process_or_stream(cls, odb, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database + + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" + stream = proc_or_stream + if not hasattr(stream,'readline'): + stream = proc_or_stream.stdout + + readline = stream.readline + while True: + line = readline() + if not line: + break + hexsha = line.strip() + if len(hexsha) > 40: + # split additional information, as returned by bisect for instance + hexsha, rest = line.split(None, 1) + # END handle extra info + + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield cls(odb, hex_to_bin(hexsha)) + # END for each line in stream + + #{ Serializable Implementation + + def _serialize(self, stream): + write = stream.write + write("tree %s\n" % self.tree) + for p in self.parents: + write("parent %s\n" % p) + + a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + + c = self.committer + fmt = "%s %s <%s> %s %s\n" + write(fmt % ("author", aname, a.email, + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))) + + # encode committer + aname = c.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + write(fmt % ("committer", aname, c.email, + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))) + + if self.encoding != self.default_encoding: + write("encoding %s\n" % self.encoding) + + write("\n") + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding + return self + + def _deserialize(self, stream): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + readline = stream.readline + self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + + self.parents = list() + next_line = None + while True: + parent_line = readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1]))) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + + + # now we can have the encoding line, or an empty line followed by the optional + # message. + self.encoding = self.default_encoding + # read encoding or empty line to separate message + enc = readline() + enc = enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # now comes the message separator + readline() + # END handle encoding + + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) + # END handle author's encoding + + # decode committer name + try: + self.committer.name = self.committer.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) + # END handle author's encoding + + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) + # END exception handling + return self + #} END serializable implementation + -- cgit v1.2.1 From acf5e6ea64a2f24117f1d419c208ed1c38c43690 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 15:03:14 +0200 Subject: replaced all gitdb strings with git --- git/objects/commit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 30dcaa0a..4ca5877e 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -5,11 +5,11 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import base -from gitdb.typ import ObjectType +from git.typ import ObjectType from tree import Tree from cStringIO import StringIO -from gitdb.util import ( +from git.util import ( hex_to_bin, Actor, RepoAliasMixin, @@ -24,7 +24,7 @@ from util import ( parse_actor_and_date ) from git.diff import Diffable -from gitdb.base import IStream +from git.base import IStream from cStringIO import StringIO from util import parse_date -- cgit v1.2.1 From 7ae36c3e019a5cc16924d1b6007774bfb625036f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 18:53:59 +0200 Subject: Started to fix imports - tests still have no chance to work as database changed drastically. Now the actual work begins --- git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 4ca5877e..45a821a1 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -35,7 +35,7 @@ import sys __all__ = ('Commit', ) -class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): +class Commit(Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): """Wraps a git Commit object. This class will act lazily on some of its attributes and will query the -- cgit v1.2.1 From 7fab60c596cdd2588f9c7b2b4eb9f93f8736b915 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 20:10:47 +0200 Subject: Fixed all of the object tests, except for the submodule handling which needs more work as the amount of submodules changed in fact. Maybe I should just generate a test repository with gitpython as submodule to get the recursion depth required to satisfy the test --- git/objects/commit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 45a821a1..c201780c 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -14,7 +14,8 @@ from git.util import ( Actor, RepoAliasMixin, Iterable, - Actor + Actor, + Stats ) from util import ( -- cgit v1.2.1 From 6f960586feccff8c1f2c717765eb0a5e8b9cd6f3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 21:14:22 +0200 Subject: Fixed remaining tests as good as possible. remote/fetch/pull and submodule tests need some more work. Also, the tests need to be reorganized and move closer to their actual location within gitpython. Hence the refs tests go to git.test.refs, etc --- git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index c201780c..c32bbf1a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -29,7 +29,7 @@ from git.base import IStream from cStringIO import StringIO from util import parse_date -from time import altzone +from time import altzone, time import os import sys -- cgit v1.2.1