summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Clatworthy <ian.clatworthy@canonical.com>2009-08-29 00:28:10 +1000
committerIan Clatworthy <ian.clatworthy@canonical.com>2009-08-29 00:28:10 +1000
commit83d8431990c9d97e5c0ac299de7dc1853071f0be (patch)
tree4e42066ed22b2a0aabd7eae7f7b187e4d3355cbd
parentab4304ecba90bdc893cf49d137651de864be5baf (diff)
downloadpython-fastimport-83d8431990c9d97e5c0ac299de7dc1853071f0be.tar.gz
Fix unicode email address parsing
-rw-r--r--parser.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/parser.py b/parser.py
index 70c80b1..6d42862 100644
--- a/parser.py
+++ b/parser.py
@@ -531,15 +531,19 @@ class ImportParser(LineBasedParser):
except UnicodeDecodeError:
# The spec says names are *typically* utf8 encoded
# but that isn't enforced by git-fast-export (at least)
- name = name[:-1]
+ self.warning("%s name not in utf8 - replacing unknown "
+ "characters" % (section,))
+ name = name[:-1].decode('utf_8', 'replace')
email = match.group(2)
# While it shouldn't happen, some datasets have email addresses
# which contain unicode characters. See bug 338186. We sanitize
# the data at this level just in case.
try:
- email = "%s" % (email,)
+ email = email.decode('utf_8')
except UnicodeDecodeError:
- email = "%s" % (email.decode('utf_8'),)
+ self.warning("%s email not in utf8 - replacing unknown characters"
+ % (section,))
+ email = email.decode('utf_8', 'replace')
return (name, email, when[0], when[1])
def _name_value(self, s):