Avoid attempting to utf-8 decode/encode committer and author information.

author: Jelmer Vernooij <jelmer@samba.org> 2010-12-12 04:52:43 +0100
committer: Jelmer Vernooij <jelmer@samba.org> 2010-12-12 04:52:43 +0100
commit: cb7b46ec679b67e5d16233a97d4afbe34d999644 (patch)
tree: 2f1e4032714a4469dafbea820b3d72549c0755ae
parent: 4ea14a0417497be92293cf56c9e8b01cd6d9a23d (diff)
download: python-fastimport-cb7b46ec679b67e5d16233a97d4afbe34d999644.tar.gz
2 files changed, 4 insertions, 17 deletions
diff --git a/fastimport/commands.py b/fastimport/commands.py
index f3b9541..957928e 100644
--- a/fastimport/commands.py
+++ b/fastimport/commands.py
@@ -159,7 +159,7 @@ class CommitCommand(ImportCommand):
         if self.message is None:
             msg_section = ""
         else:
-            msg = self.message.encode('utf8')
+            msg = self.message
             msg_section = "\ndata %d\n%s" % (len(msg), msg)
         if self.from_ is None:
             from_line = ""
@@ -278,7 +278,7 @@ class TagCommand(ImportCommand):
         if self.message is None:
             msg_section = ""
         else:
-            msg = self.message.encode('utf8')
+            msg = self.message
             msg_section = "\ndata %d\n%s" % (len(msg), msg)
         return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
 
@@ -402,7 +402,7 @@ def format_path(p, quote_spaces=False):
     if quote:
         extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
         p = '"%s"%s' % (p, extra)
-    return p.encode('utf8')
+    return p
 
 
 def format_who_when(fields):
diff --git a/fastimport/parser.py b/fastimport/parser.py
index e6573e8..befff2a 100644
--- a/fastimport/parser.py
+++ b/fastimport/parser.py
@@ -530,24 +530,11 @@ class ImportParser(LineBasedParser):
         name = match.group(1)
         if len(name) > 0:
             if name[-1] == " ":
-                try:
-                    name = name[:-1].decode('utf_8')
-                except UnicodeDecodeError:
-                    # The spec says names are *typically* utf8 encoded
-                    # but that isn't enforced by git-fast-export (at least)
-                    self.warning("%s name not in utf8 - replacing unknown "
-                        "characters" % (section,))
-                    name = name[:-1].decode('utf_8', 'replace')
+                name = name[:-1]
         email = match.group(2)
         # While it shouldn't happen, some datasets have email addresses
         # which contain unicode characters. See bug 338186. We sanitize
         # the data at this level just in case.
-        try:
-            email = email.decode('utf_8')
-        except UnicodeDecodeError:
-            self.warning("%s email not in utf8 - replacing unknown characters"
-                % (section,))
-            email = email.decode('utf_8', 'replace')
         if self.user_mapper:
             name, email = self.user_mapper.map_name_and_email(name, email)
         return (name, email, when[0], when[1])
author	Jelmer Vernooij <jelmer@samba.org>	2010-12-12 04:52:43 +0100
committer	Jelmer Vernooij <jelmer@samba.org>	2010-12-12 04:52:43 +0100
commit	cb7b46ec679b67e5d16233a97d4afbe34d999644 (patch)
tree	2f1e4032714a4469dafbea820b3d72549c0755ae
parent	4ea14a0417497be92293cf56c9e8b01cd6d9a23d (diff)
download	python-fastimport-cb7b46ec679b67e5d16233a97d4afbe34d999644.tar.gz