summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelmer Vernooij <jelmer@samba.org>2010-12-12 04:52:43 +0100
committerJelmer Vernooij <jelmer@samba.org>2010-12-12 04:52:43 +0100
commitcb7b46ec679b67e5d16233a97d4afbe34d999644 (patch)
tree2f1e4032714a4469dafbea820b3d72549c0755ae
parent4ea14a0417497be92293cf56c9e8b01cd6d9a23d (diff)
downloadpython-fastimport-cb7b46ec679b67e5d16233a97d4afbe34d999644.tar.gz
Avoid attempting to utf-8 decode/encode committer and author information.
-rw-r--r--fastimport/commands.py6
-rw-r--r--fastimport/parser.py15
2 files changed, 4 insertions, 17 deletions
diff --git a/fastimport/commands.py b/fastimport/commands.py
index f3b9541..957928e 100644
--- a/fastimport/commands.py
+++ b/fastimport/commands.py
@@ -159,7 +159,7 @@ class CommitCommand(ImportCommand):
if self.message is None:
msg_section = ""
else:
- msg = self.message.encode('utf8')
+ msg = self.message
msg_section = "\ndata %d\n%s" % (len(msg), msg)
if self.from_ is None:
from_line = ""
@@ -278,7 +278,7 @@ class TagCommand(ImportCommand):
if self.message is None:
msg_section = ""
else:
- msg = self.message.encode('utf8')
+ msg = self.message
msg_section = "\ndata %d\n%s" % (len(msg), msg)
return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
@@ -402,7 +402,7 @@ def format_path(p, quote_spaces=False):
if quote:
extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
p = '"%s"%s' % (p, extra)
- return p.encode('utf8')
+ return p
def format_who_when(fields):
diff --git a/fastimport/parser.py b/fastimport/parser.py
index e6573e8..befff2a 100644
--- a/fastimport/parser.py
+++ b/fastimport/parser.py
@@ -530,24 +530,11 @@ class ImportParser(LineBasedParser):
name = match.group(1)
if len(name) > 0:
if name[-1] == " ":
- try:
- name = name[:-1].decode('utf_8')
- except UnicodeDecodeError:
- # The spec says names are *typically* utf8 encoded
- # but that isn't enforced by git-fast-export (at least)
- self.warning("%s name not in utf8 - replacing unknown "
- "characters" % (section,))
- name = name[:-1].decode('utf_8', 'replace')
+ name = name[:-1]
email = match.group(2)
# While it shouldn't happen, some datasets have email addresses
# which contain unicode characters. See bug 338186. We sanitize
# the data at this level just in case.
- try:
- email = email.decode('utf_8')
- except UnicodeDecodeError:
- self.warning("%s email not in utf8 - replacing unknown characters"
- % (section,))
- email = email.decode('utf_8', 'replace')
if self.user_mapper:
name, email = self.user_mapper.map_name_and_email(name, email)
return (name, email, when[0], when[1])