summaryrefslogtreecommitdiff
path: root/fastimport
diff options
context:
space:
mode:
authorFélix Mattrat <mattr.felix@gmail.com>2016-04-15 12:06:53 +0200
committerFélix Mattrat <mattr.felix@gmail.com>2016-04-15 12:06:53 +0200
commit9adaa031887b5511508fd2ee2b1f60680a788a90 (patch)
treef58c9bf5ef8e23f1f784226662d30482754fcf81 /fastimport
parentee90b2a6afeebe24da17f5545eda7f0676867cf4 (diff)
downloadpython-fastimport-git-9adaa031887b5511508fd2ee2b1f60680a788a90.tar.gz
sorted out string/unicode problems, updated the unicode test, renamed assertEquals method to assertEqual (deprecated in py3)
Diffstat (limited to 'fastimport')
-rw-r--r--fastimport/commands.py29
-rw-r--r--fastimport/parser.py29
-rw-r--r--fastimport/processor.py5
-rw-r--r--fastimport/tests/test_commands.py28
-rw-r--r--fastimport/tests/test_dates.py8
-rw-r--r--fastimport/tests/test_filter_processor.py2
-rw-r--r--fastimport/tests/test_parser.py10
7 files changed, 75 insertions, 36 deletions
diff --git a/fastimport/commands.py b/fastimport/commands.py
index 575c304..e4f2bf3 100644
--- a/fastimport/commands.py
+++ b/fastimport/commands.py
@@ -19,13 +19,19 @@ These objects are used by the parser to represent the content of
a fast-import stream.
"""
from __future__ import division
+
from past.utils import old_div
from past.builtins import basestring
+from future.utils import PY2
+
from builtins import object
+from builtins import str as _text
+import sys
import stat
+
# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes
# one extra character. Set this variable to True to work-around it. It only
# happens when renaming a file whose name contains spaces and/or quotes, and
@@ -175,7 +181,9 @@ class CommitCommand(ImportCommand):
if use_features and self.more_authors:
for author in self.more_authors:
author_section += "\nauthor %s" % format_who_when(author)
+
committer = "committer %s" % format_who_when(self.committer)
+
if self.message is None:
msg_section = ""
else:
@@ -421,7 +429,6 @@ def check_path(path):
if path is None or path == '' or path[0] == "/":
raise ValueError("illegal path '%s'" % path)
if not isinstance(path, basestring):
- import ipdb;ipdb.set_trace()
raise TypeError("illegale type for path '%r'" % path)
return path
@@ -452,24 +459,36 @@ def format_who_when(fields):
offset_minutes = old_div(offset, 60) - offset_hours * 60
offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes)
name = fields[0]
+
if name == '':
sep = ''
else:
sep = ' '
- if isinstance(name, basestring):
+
+ if isinstance(name, basestring) and PY2:
name = name.encode('utf8')
+
email = fields[1]
- if isinstance(email, basestring):
+
+ if isinstance(email, basestring) and PY2:
email = email.encode('utf8')
+
result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str)
+
return result
def format_property(name, value):
"""Format the name and value (both unicode) of a property as a string."""
- utf8_name = name.encode('utf8')
+ utf8_name = name
+
+ if PY2:
+ utf8_name = name.encode('utf8')
+
if value is not None:
- utf8_value = value.encode('utf8')
+ utf8_value = value
+ if PY2:
+ utf8_name = name.encode('utf8')
result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value)
else:
result = "property %s" % (utf8_name,)
diff --git a/fastimport/parser.py b/fastimport/parser.py
index f44c963..1d8cbe6 100644
--- a/fastimport/parser.py
+++ b/fastimport/parser.py
@@ -158,17 +158,17 @@ The grammar is:
not_lf ::= # Any byte that is not ASCII newline (LF);
"""
from __future__ import print_function
-from __future__ import unicode_literals
from future import standard_library
standard_library.install_aliases()
from builtins import map
from builtins import object
-from builtins import str
+from builtins import str as _text
import collections
import re
import sys
+import codecs
from fastimport import (
commands,
@@ -574,7 +574,7 @@ class ImportParser(LineBasedParser):
if still_to_read > 0:
read_bytes = self.read_bytes(still_to_read)
value += "\n" + read_bytes[:still_to_read - 1]
- value = value.decode('utf8')
+ value = _text(value)
return (name, value)
def _path(self, s):
@@ -621,11 +621,26 @@ class ImportParser(LineBasedParser):
self.abort(errors.BadFormat, 'filemodify', 'mode', s)
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'"abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE
+)
+
def _unquote_c_string(s):
"""replace C-style escape sequences (\n, \", etc.) with real chars."""
- # HACK: Python strings are close enough
- #s = str(s)
- #import ipdb;ipdb.set_trace()
- return s.decode('string_escape', 'replace')
+
+ # doing a s.encode('utf-8').decode('unicode_escape') can return an
+ # incorrect output with unicode string (both in py2 and py3) the safest way
+ # is to match the escape sequences and decoding them alone.
+ def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode-escape')
+
+ return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
Authorship = collections.namedtuple('Authorship', 'name email timestamp timezone')
diff --git a/fastimport/processor.py b/fastimport/processor.py
index 3b601f5..c2b9374 100644
--- a/fastimport/processor.py
+++ b/fastimport/processor.py
@@ -30,12 +30,13 @@ See git-fast-import.1 for the meaning of each command and the
processors package for examples.
"""
from __future__ import absolute_import
+
from builtins import object
import sys
import time
-from . import errors
+from fastimport import errors
class ImportProcessor(object):
@@ -151,7 +152,7 @@ class ImportProcessor(object):
class CommitHandler(object):
"""Base class for commit handling.
-
+
Subclasses should override the pre_*, post_* and *_handler
methods as appropriate.
"""
diff --git a/fastimport/tests/test_commands.py b/fastimport/tests/test_commands.py
index 2139f8c..1037c22 100644
--- a/fastimport/tests/test_commands.py
+++ b/fastimport/tests/test_commands.py
@@ -16,6 +16,7 @@
"""Test how Commands are displayed"""
from future import standard_library
standard_library.install_aliases()
+from future.utils import PY2
from builtins import map
from unittest import TestCase
@@ -61,21 +62,24 @@ class TestCommitDisplay(TestCase):
def test_commit_unicode_committer(self):
# user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam'
- name_utf8 = name.encode('utf8')
+
+ commit_utf8 = (
+ u"commit refs/heads/master\n"
+ u"mark :bbb\n"
+ u"committer %s <test@example.com> 1234567890 -0600\n"
+ u"data 12\n"
+ u"release v1.0\n"
+ u"from :aaa" % (name,)
+ )
+
+ if PY2:
+ commit_utf8 = commit_utf8.encode('utf8')
+
committer = (name, 'test@example.com', 1234567890, -6 * 3600)
c = commands.CommitCommand("refs/heads/master", "bbb", None, committer,
"release v1.0", ":aaa", None, None)
- try:
- self.assertEqual(
- "commit refs/heads/master\n"
- "mark :bbb\n"
- "committer %s <test@example.com> 1234567890 -0600\n"
- "data 12\n"
- "release v1.0\n"
- "from :aaa" % (name_utf8,),
- repr(c))
- except UnicodeEncodeError:
- import ipdb;ipdb.set_trace()
+
+ self.assertEqual(commit_utf8, repr(c))
def test_commit_no_mark(self):
# user tuple is (name, email, secs-since-epoch, secs-offset-from-utc)
diff --git a/fastimport/tests/test_dates.py b/fastimport/tests/test_dates.py
index f893da9..5209540 100644
--- a/fastimport/tests/test_dates.py
+++ b/fastimport/tests/test_dates.py
@@ -24,11 +24,11 @@ from fastimport import (
class ParseTzTests(TestCase):
def test_parse_tz_utc(self):
- self.assertEquals(0, dates.parse_tz("+0000"))
- self.assertEquals(0, dates.parse_tz("-0000"))
+ self.assertEqual(0, dates.parse_tz("+0000"))
+ self.assertEqual(0, dates.parse_tz("-0000"))
def test_parse_tz_cet(self):
- self.assertEquals(3600, dates.parse_tz("+0100"))
+ self.assertEqual(3600, dates.parse_tz("+0100"))
def test_parse_tz_odd(self):
- self.assertEquals(1864800, dates.parse_tz("+51800"))
+ self.assertEqual(1864800, dates.parse_tz("+51800"))
diff --git a/fastimport/tests/test_filter_processor.py b/fastimport/tests/test_filter_processor.py
index 153c05f..742d15a 100644
--- a/fastimport/tests/test_filter_processor.py
+++ b/fastimport/tests/test_filter_processor.py
@@ -117,7 +117,7 @@ class TestCaseWithFiltering(TestCase):
p = parser.ImportParser(s)
proc.process(p.iter_commands)
out = outf.getvalue()
- self.assertEquals(expected, out)
+ self.assertEqual(expected, out)
class TestNoFiltering(TestCaseWithFiltering):
diff --git a/fastimport/tests/test_parser.py b/fastimport/tests/test_parser.py
index 73bfeb0..8204346 100644
--- a/fastimport/tests/test_parser.py
+++ b/fastimport/tests/test_parser.py
@@ -304,7 +304,7 @@ class TestStringParsing(unittest.TestCase):
def test_unquote(self):
s = r'hello \"sweet\" wo\\r\tld'
- self.assertEquals(r'hello "sweet" wo\r' + "\tld",
+ self.assertEqual(r'hello "sweet" wo\r' + "\tld",
parser._unquote_c_string(s))
@@ -330,9 +330,9 @@ class TestTagParsing(unittest.TestCase):
u"data 11\n"
u"create v1.0"))
cmds = list(p.iter_commands())
- self.assertEquals(1, len(cmds))
+ self.assertEqual(1, len(cmds))
self.assertTrue(isinstance(cmds[0], commands.TagCommand))
- self.assertEquals(cmds[0].tagger,
+ self.assertEqual(cmds[0].tagger,
('Joe Wong', 'joe@example.com', 1234567890.0, -21600))
def test_tagger_no_email_strict(self):
@@ -352,6 +352,6 @@ class TestTagParsing(unittest.TestCase):
u"data 11\n"
u"create v1.0"), strict=False)
cmds = list(p.iter_commands())
- self.assertEquals(1, len(cmds))
+ self.assertEqual(1, len(cmds))
self.assertTrue(isinstance(cmds[0], commands.TagCommand))
- self.assertEquals(cmds[0].tagger[:2], ('Joe Wong', None))
+ self.assertEqual(cmds[0].tagger[:2], ('Joe Wong', None))