diff options
author | Ian Clatworthy <ian.clatworthy@canonical.com> | 2009-08-29 00:28:10 +1000 |
---|---|---|
committer | Ian Clatworthy <ian.clatworthy@canonical.com> | 2009-08-29 00:28:10 +1000 |
commit | 83d8431990c9d97e5c0ac299de7dc1853071f0be (patch) | |
tree | 4e42066ed22b2a0aabd7eae7f7b187e4d3355cbd | |
parent | ab4304ecba90bdc893cf49d137651de864be5baf (diff) | |
download | python-fastimport-83d8431990c9d97e5c0ac299de7dc1853071f0be.tar.gz |
Fix unicode email address parsing
-rw-r--r-- | parser.py | 10 |
1 files changed, 7 insertions, 3 deletions
@@ -531,15 +531,19 @@ class ImportParser(LineBasedParser): except UnicodeDecodeError: # The spec says names are *typically* utf8 encoded # but that isn't enforced by git-fast-export (at least) - name = name[:-1] + self.warning("%s name not in utf8 - replacing unknown " + "characters" % (section,)) + name = name[:-1].decode('utf_8', 'replace') email = match.group(2) # While it shouldn't happen, some datasets have email addresses # which contain unicode characters. See bug 338186. We sanitize # the data at this level just in case. try: - email = "%s" % (email,) + email = email.decode('utf_8') except UnicodeDecodeError: - email = "%s" % (email.decode('utf_8'),) + self.warning("%s email not in utf8 - replacing unknown characters" + % (section,)) + email = email.decode('utf_8', 'replace') return (name, email, when[0], when[1]) def _name_value(self, s): |