diff options
author | Jelmer Vernooij <jelmer@samba.org> | 2010-12-12 04:52:43 +0100 |
---|---|---|
committer | Jelmer Vernooij <jelmer@samba.org> | 2010-12-12 04:52:43 +0100 |
commit | cb7b46ec679b67e5d16233a97d4afbe34d999644 (patch) | |
tree | 2f1e4032714a4469dafbea820b3d72549c0755ae | |
parent | 4ea14a0417497be92293cf56c9e8b01cd6d9a23d (diff) | |
download | python-fastimport-cb7b46ec679b67e5d16233a97d4afbe34d999644.tar.gz |
Avoid attempting to utf-8 decode/encode committer and author information.
-rw-r--r-- | fastimport/commands.py | 6 | ||||
-rw-r--r-- | fastimport/parser.py | 15 |
2 files changed, 4 insertions, 17 deletions
diff --git a/fastimport/commands.py b/fastimport/commands.py index f3b9541..957928e 100644 --- a/fastimport/commands.py +++ b/fastimport/commands.py @@ -159,7 +159,7 @@ class CommitCommand(ImportCommand): if self.message is None: msg_section = "" else: - msg = self.message.encode('utf8') + msg = self.message msg_section = "\ndata %d\n%s" % (len(msg), msg) if self.from_ is None: from_line = "" @@ -278,7 +278,7 @@ class TagCommand(ImportCommand): if self.message is None: msg_section = "" else: - msg = self.message.encode('utf8') + msg = self.message msg_section = "\ndata %d\n%s" % (len(msg), msg) return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section) @@ -402,7 +402,7 @@ def format_path(p, quote_spaces=False): if quote: extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or '' p = '"%s"%s' % (p, extra) - return p.encode('utf8') + return p def format_who_when(fields): diff --git a/fastimport/parser.py b/fastimport/parser.py index e6573e8..befff2a 100644 --- a/fastimport/parser.py +++ b/fastimport/parser.py @@ -530,24 +530,11 @@ class ImportParser(LineBasedParser): name = match.group(1) if len(name) > 0: if name[-1] == " ": - try: - name = name[:-1].decode('utf_8') - except UnicodeDecodeError: - # The spec says names are *typically* utf8 encoded - # but that isn't enforced by git-fast-export (at least) - self.warning("%s name not in utf8 - replacing unknown " - "characters" % (section,)) - name = name[:-1].decode('utf_8', 'replace') + name = name[:-1] email = match.group(2) # While it shouldn't happen, some datasets have email addresses # which contain unicode characters. See bug 338186. We sanitize # the data at this level just in case. - try: - email = email.decode('utf_8') - except UnicodeDecodeError: - self.warning("%s email not in utf8 - replacing unknown characters" - % (section,)) - email = email.decode('utf_8', 'replace') if self.user_mapper: name, email = self.user_mapper.map_name_and_email(name, email) return (name, email, when[0], when[1]) |