diff options
-rw-r--r-- | lib/git/objects/commit.py | 98 | ||||
-rw-r--r-- | test/fixtures/rev_list | 27 | ||||
-rw-r--r-- | test/git/performance/test_commit.py | 8 | ||||
-rw-r--r-- | test/git/test_commit.py | 310 | ||||
-rw-r--r-- | test/git/test_repo.py | 39 |
5 files changed, 217 insertions, 265 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 948e9a54..98aca360 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -106,13 +106,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri return commit.parents def _set_cache_(self, attr): - """ - Called by LazyMixin superclass when the given uninitialized member needs + """ Called by LazyMixin superclass when the given uninitialized member needs to be set. - We set all values at once. - """ + We set all values at once. """ if attr in Commit.__slots__: # read the data in a chunk, its faster - then provide a file wrapper + # Could use self.data, but lets try to get it with less calls hexsha, typename, size, data = self.repo.git.get_object_data(self) self._deserialize(StringIO(data)) else: @@ -181,16 +180,16 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri Returns iterator yielding Commit items """ - options = {'pretty': 'raw', 'as_process' : True } - options.update(kwargs) - + if 'pretty' in kwargs: + raise ValueError("--pretty cannot be used as parsing expects single sha's only") + # END handle pretty args = list() if paths: args.extend(('--', paths)) # END if paths - proc = repo.git.rev_list(rev, args, **options) - return cls._iter_from_process_or_stream(repo, proc, True) + proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) + return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths='', **kwargs): """ @@ -235,35 +234,30 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri return stats.Stats._list_from_string(self.repo, text) @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``proc`` - git-rev-list process instance (raw format) + def _iter_from_process_or_stream(cls, repo, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database - ``from_rev_list`` - If True, the stream was created by rev-list in which case we parse - the message differently - Returns - iterator returning Commit objects - """ + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" stream = proc_or_stream if not hasattr(stream,'readline'): stream = proc_or_stream.stdout + readline = stream.readline while True: - line = stream.readline() + line = readline() if not line: break - commit_tokens = line.split() - id = commit_tokens[1] - assert commit_tokens[0] == "commit" + sha = line.strip() + if len(sha) > 40: + # split additional information, as returned by bisect for instance + sha, rest = line.split(None, 1) + # END handle extra info - yield Commit(repo, id)._deserialize(stream, from_rev_list) + assert len(sha) == 40, "Invalid line: %s" % sha + yield Commit(repo, sha) # END for each line in stream @@ -386,15 +380,16 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri # for now, this is very inefficient and in fact shouldn't be used like this return super(Commit, self)._serialize(stream) - def _deserialize(self, stream, from_rev_list=False): + def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" - self.tree = Tree(self.repo, stream.readline().split()[1], 0, '') + readline = stream.readline + self.tree = Tree(self.repo, readline().split()[1], 0, '') self.parents = list() next_line = None while True: - parent_line = stream.readline() + parent_line = readline() if not parent_line.startswith('parent'): next_line = parent_line break @@ -404,37 +399,24 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri self.parents = tuple(self.parents) self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline()) + self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline()) - # empty line + # now we can have the encoding line, or an empty line followed by the optional + # message. self.encoding = self.default_encoding - enc = stream.readline() - enc.strip() + # read encoding or empty line to separate message + enc = readline() + enc = enc.strip() if enc: self.encoding = enc[enc.find(' ')+1:] - # END parse encoding - - message_lines = list() - if from_rev_list: - while True: - msg_line = stream.readline() - if not msg_line.startswith(' '): - # and forget about this empty marker - # cut the last newline to get rid of the artificial newline added - # by rev-list command. Lets hope its just linux style \n - message_lines[-1] = message_lines[-1][:-1] - break - # END abort message reading - # strip leading 4 spaces - message_lines.append(msg_line[4:]) - # END while there are message lines - self.message = ''.join(message_lines) - else: - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip - self.message = stream.read()[:-1] - # END message parsing + # now comes the message separator + readline() + # END handle encoding + + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read()[:-1] return self #} END serializable implementation diff --git a/test/fixtures/rev_list b/test/fixtures/rev_list index 95a1ebff..1a576118 100644 --- a/test/fixtures/rev_list +++ b/test/fixtures/rev_list @@ -1,24 +1,3 @@ -commit 4c8124ffcf4039d292442eeccabdeca5af5c5017 -tree 672eca9b7f9e09c22dcb128c283e8c3c8d7697a4 -parent 634396b2f541a9f2d58b00be1a07f0c358b999b3 -author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 -committer Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - - implement Grit#heads - -commit 634396b2f541a9f2d58b00be1a07f0c358b999b3 -tree b35b4bf642d667fdd613eebcfe4e17efd420fb8a -author Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700 -committer Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700 - - initial grit setup - -commit ab25fd8483882c3bda8a458ad2965d2248654335 -tree c20b5ec543bde1e43a931449b196052c06ed8acc -parent 6e64c55896aabb9a7d8e9f8f296f426d21a78c2c -parent 7f874954efb9ba35210445be456c74e037ba6af2 -author Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700 -committer Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700 - - Merge branch 'site' - Some other stuff +4c8124ffcf4039d292442eeccabdeca5af5c5017 +634396b2f541a9f2d58b00be1a07f0c358b999b3 +ab25fd8483882c3bda8a458ad2965d2248654335 diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index c1f8ce59..b4a9d868 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -4,12 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from test.testlib import * +from lib import * from git import * from time import time import sys -class TestPerformance(TestBase): +class TestPerformance(TestBigRepoReadOnly): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +48,7 @@ class TestPerformance(TestBase): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,7 +59,7 @@ class TestPerformance(TestBase): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.rorepo, self.ref_100): + for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit diff --git a/test/git/test_commit.py b/test/git/test_commit.py index 28b407ac..ad7a0082 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -9,169 +9,165 @@ from git import * class TestCommit(TestBase): - def test_bake(self): + def test_bake(self): - commit = Commit(self.rorepo, **{'sha': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'}) - commit.author # bake + commit = Commit(self.rorepo, '2454ae89983a4496a445ce347d7a41c0bb0ea7ae') + commit.author # bake - assert_equal("Sebastian Thiel", commit.author.name) - assert_equal("byronimo@gmail.com", commit.author.email) - assert commit.author == commit.committer - assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) - assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) - assert commit.message == "Added missing information to docstrings of commit and stats module" + assert_equal("Sebastian Thiel", commit.author.name) + assert_equal("byronimo@gmail.com", commit.author.email) + assert commit.author == commit.committer + assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) + assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) + assert commit.message == "Added missing information to docstrings of commit and stats module" - def test_stats(self): - commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781') - stats = commit.stats - - def check_entries(d): - assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): - assert key in d - # END assertion helper - assert stats.files - assert stats.total - - check_entries(stats.total) - assert "files" in stats.total - - for filepath, d in stats.files.items(): - check_entries(d) - # END for each stated file - - # assure data is parsed properly - michael = Actor._from_string("Michael Trier <mtrier@gmail.com>") - assert commit.author == michael - assert commit.committer == michael - assert commit.authored_date == 1210193388 - assert commit.committed_date == 1210193388 - assert commit.author_tz_offset == 14400, commit.author_tz_offset - assert commit.committer_tz_offset == 14400, commit.committer_tz_offset - assert commit.message == "initial project" - - def test_traversal(self): - start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") - first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") - p0 = start.parents[0] - p1 = start.parents[1] - p00 = p0.parents[0] - p10 = p1.parents[0] - - # basic branch first, depth first - dfirst = start.traverse(branch_first=False) - bfirst = start.traverse(branch_first=True) - assert dfirst.next() == p0 - assert dfirst.next() == p00 - - assert bfirst.next() == p0 - assert bfirst.next() == p1 - assert bfirst.next() == p00 - assert bfirst.next() == p10 - - # at some point, both iterations should stop - assert list(bfirst)[-1] == first - stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) - l = list(stoptraverse) - assert len(l[0]) == 2 - - # ignore self - assert start.traverse(ignore_self=False).next() == start - - # depth - assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 - - # prune - assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 - - # predicate - assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 - - # traversal should stop when the beginning is reached - self.failUnlessRaises(StopIteration, first.traverse().next) - - # parents of the first commit should be empty ( as the only parent has a null - # sha ) - assert len(first.parents) == 0 - - def test_iteration(self): - # we can iterate commits - all_commits = Commit.list_items(self.rorepo, self.rorepo.head) - assert all_commits - assert all_commits == list(self.rorepo.iter_commits()) - - # this includes merge commits - mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d') - assert mcomit in all_commits - - # we can limit the result to paths - ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) - assert ltd_commits and len(ltd_commits) < len(all_commits) - - # show commits of multiple paths, resulting in a union of commits - less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) - assert len(ltd_commits) < len(less_ltd_commits) - - - @patch_object(Git, '_call_process') - def test_rev_list_bisect_all(self, git): - """ - 'git rev-list --bisect-all' returns additional information - in the commit header. This test ensures that we properly parse it. - """ + def test_stats(self): + commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781') + stats = commit.stats + + def check_entries(d): + assert isinstance(d, dict) + for key in ("insertions", "deletions", "lines"): + assert key in d + # END assertion helper + assert stats.files + assert stats.total + + check_entries(stats.total) + assert "files" in stats.total + + for filepath, d in stats.files.items(): + check_entries(d) + # END for each stated file + + # assure data is parsed properly + michael = Actor._from_string("Michael Trier <mtrier@gmail.com>") + assert commit.author == michael + assert commit.committer == michael + assert commit.authored_date == 1210193388 + assert commit.committed_date == 1210193388 + assert commit.author_tz_offset == 14400, commit.author_tz_offset + assert commit.committer_tz_offset == 14400, commit.committer_tz_offset + assert commit.message == "initial project" + + def test_traversal(self): + start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") + first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") + p0 = start.parents[0] + p1 = start.parents[1] + p00 = p0.parents[0] + p10 = p1.parents[0] + + # basic branch first, depth first + dfirst = start.traverse(branch_first=False) + bfirst = start.traverse(branch_first=True) + assert dfirst.next() == p0 + assert dfirst.next() == p00 + + assert bfirst.next() == p0 + assert bfirst.next() == p1 + assert bfirst.next() == p00 + assert bfirst.next() == p10 + + # at some point, both iterations should stop + assert list(bfirst)[-1] == first + stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) + l = list(stoptraverse) + assert len(l[0]) == 2 + + # ignore self + assert start.traverse(ignore_self=False).next() == start + + # depth + assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 + + # prune + assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 + + # predicate + assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 + + # traversal should stop when the beginning is reached + self.failUnlessRaises(StopIteration, first.traverse().next) + + # parents of the first commit should be empty ( as the only parent has a null + # sha ) + assert len(first.parents) == 0 + + def test_iteration(self): + # we can iterate commits + all_commits = Commit.list_items(self.rorepo, self.rorepo.head) + assert all_commits + assert all_commits == list(self.rorepo.iter_commits()) + + # this includes merge commits + mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d') + assert mcomit in all_commits + + # we can limit the result to paths + ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) + assert ltd_commits and len(ltd_commits) < len(all_commits) + + # show commits of multiple paths, resulting in a union of commits + less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) + assert len(ltd_commits) < len(less_ltd_commits) + + def test_iter_items(self): + # pretty not allowed + self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw") + + def test_rev_list_bisect_all(self): + """ + 'git rev-list --bisect-all' returns additional information + in the commit header. This test ensures that we properly parse it. + """ + revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474', + first_parent=True, + bisect_all=True) - git.return_value = fixture('rev_list_bisect_all') + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs)) + expected_ids = ( + '7156cece3c49544abb6bf7a0c218eb36646fad6d', + '1f66cfbbce58b4b552b041707a12d437cc5f400a', + '33ebe7acec14b25c5f84f35a664803fcab2f7781', + '933d23bf95a5bd1624fbcdf328d904e1fa173474' + ) + for sha1, commit in zip(expected_ids, commits): + assert_equal(sha1, commit.sha) - revs = self.rorepo.git.rev_list('HEAD', - pretty='raw', - first_parent=True, - bisect_all=True) - assert_true(git.called) + def test_count(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 + + def test_list(self): + assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit) - commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True) - expected_ids = ( - 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b', - '33ebe7acec14b25c5f84f35a664803fcab2f7781', - 'a6604a00a652e754cb8b6b0b9f194f839fc38d7c', - '8df638c22c75ddc9a43ecdde90c0c9939f5009e7', - 'c231551328faa864848bde6ff8127f59c9566e90', - ) - for sha1, commit in zip(expected_ids, commits): - assert_equal(sha1, commit.sha) + def test_str(self): + commit = Commit(self.rorepo, 'abc') + assert_equal ("abc", str(commit)) - def test_count(self): - assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 - - def test_list(self): - assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit) + def test_repr(self): + commit = Commit(self.rorepo, 'abc') + assert_equal('<git.Commit "abc">', repr(commit)) - def test_str(self): - commit = Commit(self.rorepo, 'abc') - assert_equal ("abc", str(commit)) - - def test_repr(self): - commit = Commit(self.rorepo, 'abc') - assert_equal('<git.Commit "abc">', repr(commit)) - - def test_equality(self): - commit1 = Commit(self.rorepo, 'abc') - commit2 = Commit(self.rorepo, 'abc') - commit3 = Commit(self.rorepo, 'zyx') - assert_equal(commit1, commit2) - assert_not_equal(commit2, commit3) - - def test_iter_parents(self): - # should return all but ourselves, even if skip is defined - c = self.rorepo.commit('0.1.5') - for skip in (0, 1): - piter = c.iter_parents(skip=skip) - first_parent = piter.next() - assert first_parent != c - assert first_parent == c.parents[0] - # END for each - - def test_base(self): - name_rev = self.rorepo.head.commit.name_rev - assert isinstance(name_rev, basestring) - + def test_equality(self): + commit1 = Commit(self.rorepo, 'abc') + commit2 = Commit(self.rorepo, 'abc') + commit3 = Commit(self.rorepo, 'zyx') + assert_equal(commit1, commit2) + assert_not_equal(commit2, commit3) + + def test_iter_parents(self): + # should return all but ourselves, even if skip is defined + c = self.rorepo.commit('0.1.5') + for skip in (0, 1): + piter = c.iter_parents(skip=skip) + first_parent = piter.next() + assert first_parent != c + assert first_parent == c.parents[0] + # END for each + + def test_base(self): + name_rev = self.rorepo.head.commit.name_rev + assert isinstance(name_rev, basestring) + diff --git a/test/git/test_repo.py b/test/git/test_repo.py index 9316245b..ddf2b3e1 100644 --- a/test/git/test_repo.py +++ b/test/git/test_repo.py @@ -55,32 +55,27 @@ class TestRepo(TestBase): # try from invalid revision that does not exist self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world') - @patch_object(Git, '_call_process') - def test_commits(self, git): - git.return_value = StringProcessAdapter(fixture('rev_list')) - - commits = list(self.rorepo.iter_commits('master', max_count=10)) + def test_commits(self): + mc = 10 + commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc)) + assert len(commits) == mc c = commits[0] - assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha) - assert_equal(["634396b2f541a9f2d58b00be1a07f0c358b999b3"], [p.sha for p in c.parents]) - assert_equal("672eca9b7f9e09c22dcb128c283e8c3c8d7697a4", c.tree.sha) - assert_equal("Tom Preston-Werner", c.author.name) - assert_equal("tom@mojombo.com", c.author.email) - assert_equal(1191999972, c.authored_date) - assert_equal("Tom Preston-Werner", c.committer.name) - assert_equal("tom@mojombo.com", c.committer.email) - assert_equal(1191999972, c.committed_date) - assert_equal("implement Grit#heads", c.message) + assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.sha) + assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.sha for p in c.parents]) + assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.sha) + assert_equal("Michael Trier", c.author.name) + assert_equal("mtrier@gmail.com", c.author.email) + assert_equal(1232829715, c.authored_date) + assert_equal(5*3600, c.author_tz_offset) + assert_equal("Michael Trier", c.committer.name) + assert_equal("mtrier@gmail.com", c.committer.email) + assert_equal(1232829715, c.committed_date) + assert_equal(5*3600, c.committer_tz_offset) + assert_equal("Bumped version 0.1.6", c.message) c = commits[1] - assert_equal(tuple(), c.parents) - - c = commits[2] - assert_equal(["6e64c55896aabb9a7d8e9f8f296f426d21a78c2c", "7f874954efb9ba35210445be456c74e037ba6af2"], map(lambda p: p.sha, c.parents)) - assert_equal("Merge branch 'site'", c.summary) - - assert_true(git.called) + assert isinstance(c.parents, tuple) def test_trees(self): mc = 30 |