diff options
author | Félix Mattrat <mattr.felix@gmail.com> | 2016-04-15 12:06:53 +0200 |
---|---|---|
committer | Félix Mattrat <mattr.felix@gmail.com> | 2016-04-15 12:06:53 +0200 |
commit | 9adaa031887b5511508fd2ee2b1f60680a788a90 (patch) | |
tree | f58c9bf5ef8e23f1f784226662d30482754fcf81 | |
parent | ee90b2a6afeebe24da17f5545eda7f0676867cf4 (diff) | |
download | python-fastimport-git-9adaa031887b5511508fd2ee2b1f60680a788a90.tar.gz |
sorted out string/unicode problems, updated the unicode test, renamed assertEquals method to assertEqual (deprecated in py3)
-rw-r--r-- | fastimport/commands.py | 29 | ||||
-rw-r--r-- | fastimport/parser.py | 29 | ||||
-rw-r--r-- | fastimport/processor.py | 5 | ||||
-rw-r--r-- | fastimport/tests/test_commands.py | 28 | ||||
-rw-r--r-- | fastimport/tests/test_dates.py | 8 | ||||
-rw-r--r-- | fastimport/tests/test_filter_processor.py | 2 | ||||
-rw-r--r-- | fastimport/tests/test_parser.py | 10 |
7 files changed, 75 insertions, 36 deletions
diff --git a/fastimport/commands.py b/fastimport/commands.py index 575c304..e4f2bf3 100644 --- a/fastimport/commands.py +++ b/fastimport/commands.py @@ -19,13 +19,19 @@ These objects are used by the parser to represent the content of a fast-import stream. """ from __future__ import division + from past.utils import old_div from past.builtins import basestring +from future.utils import PY2 + from builtins import object +from builtins import str as _text +import sys import stat + # There is a bug in git 1.5.4.3 and older by which unquoting a string consumes # one extra character. Set this variable to True to work-around it. It only # happens when renaming a file whose name contains spaces and/or quotes, and @@ -175,7 +181,9 @@ class CommitCommand(ImportCommand): if use_features and self.more_authors: for author in self.more_authors: author_section += "\nauthor %s" % format_who_when(author) + committer = "committer %s" % format_who_when(self.committer) + if self.message is None: msg_section = "" else: @@ -421,7 +429,6 @@ def check_path(path): if path is None or path == '' or path[0] == "/": raise ValueError("illegal path '%s'" % path) if not isinstance(path, basestring): - import ipdb;ipdb.set_trace() raise TypeError("illegale type for path '%r'" % path) return path @@ -452,24 +459,36 @@ def format_who_when(fields): offset_minutes = old_div(offset, 60) - offset_hours * 60 offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes) name = fields[0] + if name == '': sep = '' else: sep = ' ' - if isinstance(name, basestring): + + if isinstance(name, basestring) and PY2: name = name.encode('utf8') + email = fields[1] - if isinstance(email, basestring): + + if isinstance(email, basestring) and PY2: email = email.encode('utf8') + result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str) + return result def format_property(name, value): """Format the name and value (both unicode) of a property as a string.""" - utf8_name = name.encode('utf8') + utf8_name = name + + if PY2: + utf8_name = name.encode('utf8') + if value is not None: - utf8_value = value.encode('utf8') + utf8_value = value + if PY2: + utf8_name = name.encode('utf8') result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value) else: result = "property %s" % (utf8_name,) diff --git a/fastimport/parser.py b/fastimport/parser.py index f44c963..1d8cbe6 100644 --- a/fastimport/parser.py +++ b/fastimport/parser.py @@ -158,17 +158,17 @@ The grammar is: not_lf ::= # Any byte that is not ASCII newline (LF); """ from __future__ import print_function -from __future__ import unicode_literals from future import standard_library standard_library.install_aliases() from builtins import map from builtins import object -from builtins import str +from builtins import str as _text import collections import re import sys +import codecs from fastimport import ( commands, @@ -574,7 +574,7 @@ class ImportParser(LineBasedParser): if still_to_read > 0: read_bytes = self.read_bytes(still_to_read) value += "\n" + read_bytes[:still_to_read - 1] - value = value.decode('utf8') + value = _text(value) return (name, value) def _path(self, s): @@ -621,11 +621,26 @@ class ImportParser(LineBasedParser): self.abort(errors.BadFormat, 'filemodify', 'mode', s) +ESCAPE_SEQUENCE_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'"abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE +) + def _unquote_c_string(s): """replace C-style escape sequences (\n, \", etc.) with real chars.""" - # HACK: Python strings are close enough - #s = str(s) - #import ipdb;ipdb.set_trace() - return s.decode('string_escape', 'replace') + + # doing a s.encode('utf-8').decode('unicode_escape') can return an + # incorrect output with unicode string (both in py2 and py3) the safest way + # is to match the escape sequences and decoding them alone. + def decode_match(match): + return codecs.decode(match.group(0), 'unicode-escape') + + return ESCAPE_SEQUENCE_RE.sub(decode_match, s) + Authorship = collections.namedtuple('Authorship', 'name email timestamp timezone') diff --git a/fastimport/processor.py b/fastimport/processor.py index 3b601f5..c2b9374 100644 --- a/fastimport/processor.py +++ b/fastimport/processor.py @@ -30,12 +30,13 @@ See git-fast-import.1 for the meaning of each command and the processors package for examples. """ from __future__ import absolute_import + from builtins import object import sys import time -from . import errors +from fastimport import errors class ImportProcessor(object): @@ -151,7 +152,7 @@ class ImportProcessor(object): class CommitHandler(object): """Base class for commit handling. - + Subclasses should override the pre_*, post_* and *_handler methods as appropriate. """ diff --git a/fastimport/tests/test_commands.py b/fastimport/tests/test_commands.py index 2139f8c..1037c22 100644 --- a/fastimport/tests/test_commands.py +++ b/fastimport/tests/test_commands.py @@ -16,6 +16,7 @@ """Test how Commands are displayed""" from future import standard_library standard_library.install_aliases() +from future.utils import PY2 from builtins import map from unittest import TestCase @@ -61,21 +62,24 @@ class TestCommitDisplay(TestCase): def test_commit_unicode_committer(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam' - name_utf8 = name.encode('utf8') + + commit_utf8 = ( + u"commit refs/heads/master\n" + u"mark :bbb\n" + u"committer %s <test@example.com> 1234567890 -0600\n" + u"data 12\n" + u"release v1.0\n" + u"from :aaa" % (name,) + ) + + if PY2: + commit_utf8 = commit_utf8.encode('utf8') + committer = (name, 'test@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, "release v1.0", ":aaa", None, None) - try: - self.assertEqual( - "commit refs/heads/master\n" - "mark :bbb\n" - "committer %s <test@example.com> 1234567890 -0600\n" - "data 12\n" - "release v1.0\n" - "from :aaa" % (name_utf8,), - repr(c)) - except UnicodeEncodeError: - import ipdb;ipdb.set_trace() + + self.assertEqual(commit_utf8, repr(c)) def test_commit_no_mark(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) diff --git a/fastimport/tests/test_dates.py b/fastimport/tests/test_dates.py index f893da9..5209540 100644 --- a/fastimport/tests/test_dates.py +++ b/fastimport/tests/test_dates.py @@ -24,11 +24,11 @@ from fastimport import ( class ParseTzTests(TestCase): def test_parse_tz_utc(self): - self.assertEquals(0, dates.parse_tz("+0000")) - self.assertEquals(0, dates.parse_tz("-0000")) + self.assertEqual(0, dates.parse_tz("+0000")) + self.assertEqual(0, dates.parse_tz("-0000")) def test_parse_tz_cet(self): - self.assertEquals(3600, dates.parse_tz("+0100")) + self.assertEqual(3600, dates.parse_tz("+0100")) def test_parse_tz_odd(self): - self.assertEquals(1864800, dates.parse_tz("+51800")) + self.assertEqual(1864800, dates.parse_tz("+51800")) diff --git a/fastimport/tests/test_filter_processor.py b/fastimport/tests/test_filter_processor.py index 153c05f..742d15a 100644 --- a/fastimport/tests/test_filter_processor.py +++ b/fastimport/tests/test_filter_processor.py @@ -117,7 +117,7 @@ class TestCaseWithFiltering(TestCase): p = parser.ImportParser(s) proc.process(p.iter_commands) out = outf.getvalue() - self.assertEquals(expected, out) + self.assertEqual(expected, out) class TestNoFiltering(TestCaseWithFiltering): diff --git a/fastimport/tests/test_parser.py b/fastimport/tests/test_parser.py index 73bfeb0..8204346 100644 --- a/fastimport/tests/test_parser.py +++ b/fastimport/tests/test_parser.py @@ -304,7 +304,7 @@ class TestStringParsing(unittest.TestCase): def test_unquote(self): s = r'hello \"sweet\" wo\\r\tld' - self.assertEquals(r'hello "sweet" wo\r' + "\tld", + self.assertEqual(r'hello "sweet" wo\r' + "\tld", parser._unquote_c_string(s)) @@ -330,9 +330,9 @@ class TestTagParsing(unittest.TestCase): u"data 11\n" u"create v1.0")) cmds = list(p.iter_commands()) - self.assertEquals(1, len(cmds)) + self.assertEqual(1, len(cmds)) self.assertTrue(isinstance(cmds[0], commands.TagCommand)) - self.assertEquals(cmds[0].tagger, + self.assertEqual(cmds[0].tagger, ('Joe Wong', 'joe@example.com', 1234567890.0, -21600)) def test_tagger_no_email_strict(self): @@ -352,6 +352,6 @@ class TestTagParsing(unittest.TestCase): u"data 11\n" u"create v1.0"), strict=False) cmds = list(p.iter_commands()) - self.assertEquals(1, len(cmds)) + self.assertEqual(1, len(cmds)) self.assertTrue(isinstance(cmds[0], commands.TagCommand)) - self.assertEquals(cmds[0].tagger[:2], ('Joe Wong', None)) + self.assertEqual(cmds[0].tagger[:2], ('Joe Wong', None)) |