From 1faf84f8eb760b003ad2be81432443bf443b82e6 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 15:26:23 +0200 Subject: Fix bug in diff parser output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The diff --patch parser was missing some edge case where Git would encode non-ASCII chars in path names as octals, but these weren't decoded properly. \360\237\222\251.txt Decoded via utf-8, that will return: 💩.txt --- git/diff.py | 17 +++++++++++++++-- git/test/fixtures/diff_patch_unsafe_paths | 7 +++++++ git/test/test_diff.py | 13 +++++++------ 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'git') diff --git a/git/diff.py b/git/diff.py index 44a65017..9073767e 100644 --- a/git/diff.py +++ b/git/diff.py @@ -15,12 +15,23 @@ from git.compat import ( PY3 ) - __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE') # Special object to compare against the empty tree in diffs NULL_TREE = object() +_octal_byte_re = re.compile(b'\\\\([0-9]{3})') + + +def _octal_repl(matchobj): + value = matchobj.group(1) + value = int(value, 8) + if PY3: + value = bytes(bytearray((value,))) + else: + value = chr(value) + return value + def decode_path(path, has_ab_prefix=True): if path == b'/dev/null': @@ -32,6 +43,8 @@ def decode_path(path, has_ab_prefix=True): .replace(b'\\"', b'"') .replace(b'\\\\', b'\\')) + path = _octal_byte_re.sub(_octal_repl, path) + if has_ab_prefix: assert path.startswith(b'a/') or path.startswith(b'b/') path = path[2:] @@ -337,7 +350,7 @@ class Diff(object): :note: This property is deprecated, please use ``renamed_file`` instead. """ return self.renamed_file - + @property def renamed_file(self): """:returns: True if the blob of our diff has been renamed diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths index 14375f79..9ee6b834 100644 --- a/git/test/fixtures/diff_patch_unsafe_paths +++ b/git/test/fixtures/diff_patch_unsafe_paths @@ -61,6 +61,13 @@ index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94 +++ "b/path/¯\\_(ツ)_|¯" @@ -0,0 +1 @@ +dummy content +diff --git "a/path/\360\237\222\251.txt" "b/path/\360\237\222\251.txt" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/\360\237\222\251.txt" +@@ -0,0 +1 @@ ++dummy content diff --git a/a/with spaces b/b/with some spaces similarity index 100% rename from a/with spaces diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 1d7a4fda..8966351a 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -161,16 +161,17 @@ class TestDiff(TestBase): self.assertEqual(res[6].b_path, u'path/with spaces') self.assertEqual(res[7].b_path, u'path/with-question-mark?') self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') + self.assertEqual(res[9].b_path, u'path/💩.txt') # The "Moves" # NOTE: The path prefixes a/ and b/ here are legit! We're actually # verifying that it's not "a/a/" that shows up, see the fixture data. - self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit! - self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit! - self.assertEqual(res[10].a_path, u'a/ending in a space ') - self.assertEqual(res[10].b_path, u'b/ending with space ') - self.assertEqual(res[11].a_path, u'a/"with-quotes"') - self.assertEqual(res[11].b_path, u'b/"with even more quotes"') + self.assertEqual(res[10].a_path, u'a/with spaces') # NOTE: path a/ here legit! + self.assertEqual(res[10].b_path, u'b/with some spaces') # NOTE: path b/ here legit! + self.assertEqual(res[11].a_path, u'a/ending in a space ') + self.assertEqual(res[11].b_path, u'b/ending with space ') + self.assertEqual(res[12].a_path, u'a/"with-quotes"') + self.assertEqual(res[12].b_path, u'b/"with even more quotes"') def test_diff_patch_format(self): # test all of the 'old' format diffs for completness - it should at least -- cgit v1.2.1 From 0235f910916b49a38aaf1fcbaa6cfbef32c567a6 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 15:59:46 +0200 Subject: Skip test that always fails on Travis CI --- git/test/test_docs.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'git') diff --git a/git/test/test_docs.py b/git/test/test_docs.py index 7b3b7474..27470748 100644 --- a/git/test/test_docs.py +++ b/git/test/test_docs.py @@ -7,11 +7,12 @@ import os from git.test.lib import TestBase -from gitdb.test.lib import with_rw_directory +from gitdb.test.lib import skip_on_travis_ci, with_rw_directory class Tutorials(TestBase): + @skip_on_travis_ci @with_rw_directory def test_init_repo_object(self, rw_dir): # [1-test_init_repo_object] @@ -165,7 +166,7 @@ class Tutorials(TestBase): for sm in cloned_repo.submodules: assert not sm.remove().exists() # after removal, the sm doesn't exist anymore sm = cloned_repo.create_submodule('mysubrepo', 'path/to/subrepo', url=bare_repo.git_dir, branch='master') - + # .gitmodules was written and added to the index, which is now being committed cloned_repo.index.commit("Added submodule") assert sm.exists() and sm.module_exists() # this submodule is defintely available @@ -395,7 +396,7 @@ class Tutorials(TestBase): hcommit.diff() # diff tree against index hcommit.diff('HEAD~1') # diff tree against previous tree hcommit.diff(None) # diff tree against working tree - + index = repo.index index.diff() # diff index against itself yielding empty diff index.diff(None) # diff index against working copy @@ -446,7 +447,7 @@ class Tutorials(TestBase): sm = sms[0] assert sm.name == 'gitdb' # git-python has gitdb as single submodule ... assert sm.children()[0].name == 'smmap' # ... which has smmap as single submodule - + # The module is the repository referenced by the submodule assert sm.module_exists() # the module is available, which doesn't have to be the case. assert sm.module().working_tree_dir.endswith('gitdb') @@ -458,7 +459,7 @@ class Tutorials(TestBase): assert sm.config_reader().get_value('path') == sm.path assert len(sm.children()) == 1 # query the submodule hierarchy # ![1-test_submodules] - + @with_rw_directory def test_add_file_and_commit(self, rw_dir): import git -- cgit v1.2.1 From 0eafe201905d85be767c24106eb1ab12efd3ee22 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 16:20:22 +0200 Subject: Add test case as example of Git commit with invalid data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a real commit from the microjs.com open source project, see https://github.com/madrobby/microjs.com/commit/7e8457c17850d0991763941213dcb403d80f39f8, which is declared to be encoded in UTF-8, but contains invalid bytes. This makes GitPython choke on it while decoding. Rather than choking, this should instead accept the error and replace the invalid bytes by the � (\x80) char. --- git/test/fixtures/commit_invalid_data | 6 ++++++ git/test/test_commit.py | 7 +++++++ 2 files changed, 13 insertions(+) create mode 100644 git/test/fixtures/commit_invalid_data (limited to 'git') diff --git a/git/test/fixtures/commit_invalid_data b/git/test/fixtures/commit_invalid_data new file mode 100644 index 00000000..d112bf2d --- /dev/null +++ b/git/test/fixtures/commit_invalid_data @@ -0,0 +1,6 @@ +tree 9f1a495d7d9692d24f5caedaa89f5c2c32d59368 +parent 492ace2ffce0e426ebeb55e364e987bcf024dd3b +author E.Azer KoÃoÃoÃoculu 1306710073 +0300 +committer E.Azer KoÃoÃoÃoculu 1306710073 +0300 + +add environjs diff --git a/git/test/test_commit.py b/git/test/test_commit.py index 23b7154a..ea8cd9af 100644 --- a/git/test/test_commit.py +++ b/git/test/test_commit.py @@ -306,6 +306,13 @@ class TestCommit(TestBase): # it appears cmt.author.__repr__() + def test_invalid_commit(self): + cmt = self.rorepo.commit() + cmt._deserialize(open(fixture_path('commit_invalid_data'), 'rb')) + + assert cmt.author.name == u'E.Azer Ko�o�o�oculu', cmt.author.name + assert cmt.author.email == 'azer@kodfabrik.com', cmt.author.email + def test_gpgsig(self): cmt = self.rorepo.commit() cmt._deserialize(open(fixture_path('commit_with_gpgsig'), 'rb')) -- cgit v1.2.1 From 79c99c0f66c8f3c8d13258376c82125a23b1b5c8 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 16:26:43 +0200 Subject: Ignore invalid data when decoding commit objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, GitPython chokes on this while decoding. Rather than choking, instead accept the error and replace the invalid bytes by the � (\x80) char. --- git/objects/commit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'git') diff --git a/git/objects/commit.py b/git/objects/commit.py index dc722f97..58a8912f 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -501,14 +501,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): try: self.author, self.authored_date, self.author_tz_offset = \ - parse_actor_and_date(author_line.decode(self.encoding)) + parse_actor_and_date(author_line.decode(self.encoding, errors='replace')) except UnicodeDecodeError: log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) try: self.committer, self.committed_date, self.committer_tz_offset = \ - parse_actor_and_date(committer_line.decode(self.encoding)) + parse_actor_and_date(committer_line.decode(self.encoding, errors='replace')) except UnicodeDecodeError: log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) @@ -518,7 +518,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: - self.message = self.message.decode(self.encoding) + self.message = self.message.decode(self.encoding, errors='replace') except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling -- cgit v1.2.1 From 25844b80c56890abc79423a7a727a129b2b9db85 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Mon, 30 May 2016 21:20:47 +0200 Subject: Fix regex This catches the case where the matched line contains "(" or ")" characters. --- git/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git') diff --git a/git/remote.py b/git/remote.py index 30e32ae3..42753977 100644 --- a/git/remote.py +++ b/git/remote.py @@ -204,7 +204,7 @@ class FetchInfo(object): NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ FAST_FORWARD, ERROR = [1 << x for x in range(8)] - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.$@]+\]?)\s+(.+) -> ([/\w_\+\.\-$@#]+)( \(.*\)?$)?") + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.$@]+\]?)\s+(.+) -> ([/\w_\+\.\-$@#()]+)( \(.*\)?$)?") _flag_map = {'!': ERROR, '+': FORCED_UPDATE, -- cgit v1.2.1 From 2219f13eb6e18bdd498b709e074ff9c7e8cb3511 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 1 Jun 2016 09:12:04 +0200 Subject: fix(test): do not skip test on travis Please exclude the particular assertion instead. Related to https://github.com/gitpython-developers/GitPython/commit/a3f24f64a20d1e09917288f67fd21969f4444acd#commitcomment-17691581 --- git/ext/gitdb | 2 +- git/test/test_docs.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'git') diff --git a/git/ext/gitdb b/git/ext/gitdb index d1996e04..2389b752 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit d1996e04dbf4841b853b60c1365f0f5fd28d170c +Subproject commit 2389b75280efb1a63e6ea578eae7f897fd4beb1b diff --git a/git/test/test_docs.py b/git/test/test_docs.py index 27470748..bc961230 100644 --- a/git/test/test_docs.py +++ b/git/test/test_docs.py @@ -11,8 +11,6 @@ from gitdb.test.lib import skip_on_travis_ci, with_rw_directory class Tutorials(TestBase): - - @skip_on_travis_ci @with_rw_directory def test_init_repo_object(self, rw_dir): # [1-test_init_repo_object] -- cgit v1.2.1 From 55969cb6034d5b416946cdb8aaf7223b1c3cbea6 Mon Sep 17 00:00:00 2001 From: Andreas Maier Date: Wed, 1 Jun 2016 10:02:44 +0200 Subject: Fixed 'TypeError: decode() takes no keyword arguments' on Python 2.6 --- git/objects/commit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'git') diff --git a/git/objects/commit.py b/git/objects/commit.py index 58a8912f..9e434c92 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -501,14 +501,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): try: self.author, self.authored_date, self.author_tz_offset = \ - parse_actor_and_date(author_line.decode(self.encoding, errors='replace')) + parse_actor_and_date(author_line.decode(self.encoding, 'replace')) except UnicodeDecodeError: log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) try: self.committer, self.committed_date, self.committer_tz_offset = \ - parse_actor_and_date(committer_line.decode(self.encoding, errors='replace')) + parse_actor_and_date(committer_line.decode(self.encoding, 'replace')) except UnicodeDecodeError: log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) @@ -518,7 +518,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: - self.message = self.message.decode(self.encoding, errors='replace') + self.message = self.message.decode(self.encoding, 'replace') except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling -- cgit v1.2.1 From 85e78ca3d9decf8807508b41dbe5335ffb6050a7 Mon Sep 17 00:00:00 2001 From: David Danier Date: Wed, 1 Jun 2016 18:01:34 +0200 Subject: Make sure os is not even partly destroyed --- git/cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git') diff --git a/git/cmd.py b/git/cmd.py index c29e3485..a8afc144 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -287,7 +287,7 @@ class Git(LazyMixin): return # can be that nothing really exists anymore ... - if os is None: + if os is None or os.kill is None: return # try to kill it -- cgit v1.2.1 From fde89f2a65c2503e5aaf44628e05079504e559a0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 2 Jun 2016 06:42:45 +0200 Subject: fix(test): remove unused import --- git/ext/gitdb | 2 +- git/test/test_docs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'git') diff --git a/git/ext/gitdb b/git/ext/gitdb index 2389b752..d1996e04 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 2389b75280efb1a63e6ea578eae7f897fd4beb1b +Subproject commit d1996e04dbf4841b853b60c1365f0f5fd28d170c diff --git a/git/test/test_docs.py b/git/test/test_docs.py index bc961230..8dc08559 100644 --- a/git/test/test_docs.py +++ b/git/test/test_docs.py @@ -7,7 +7,7 @@ import os from git.test.lib import TestBase -from gitdb.test.lib import skip_on_travis_ci, with_rw_directory +from gitdb.test.lib import with_rw_directory class Tutorials(TestBase): -- cgit v1.2.1