diff options
-rw-r--r-- | logilab/common/umessage.py | 11 | ||||
-rw-r--r-- | test/unittest_umessage.py | 12 |
2 files changed, 22 insertions, 1 deletions
diff --git a/logilab/common/umessage.py b/logilab/common/umessage.py index 664d3e0..744e96e 100644 --- a/logilab/common/umessage.py +++ b/logilab/common/umessage.py @@ -27,6 +27,8 @@ from email.header import decode_header from datetime import datetime +from six import text_type, binary_type + try: from mx.DateTime import DateTime except ImportError: @@ -40,7 +42,14 @@ def decode_QP(string): for decoded, charset in decode_header(string): if not charset : charset = 'iso-8859-15' - parts.append(decoded.decode(charset, 'replace')) + # python 3 sometimes returns str and sometimes bytes. + # the 'official' fix is to use the new 'policy' APIs + # https://bugs.python.org/issue24797 + # let's just handle this bug ourselves for now + if isinstance(decoded, binary_type): + decoded = decoded.decode(charset, 'replace') + assert isinstance(decoded, text_type) + parts.append(decoded) if sys.version_info < (3, 3): # decoding was non-RFC compliant wrt to whitespace handling diff --git a/test/unittest_umessage.py b/test/unittest_umessage.py index 531573c..940b308 100644 --- a/test/unittest_umessage.py +++ b/test/unittest_umessage.py @@ -60,6 +60,18 @@ class UMessageTC(TestCase): self.assertEqual(type(test), text_type) self.assertEqual(test, u'Raphaël DUPONT<raphael.dupont@societe.fr>') + def test_decode_QP_utf8(self): + test_line = '=?utf-8?q?o=C3=AEm?= <oim@logilab.fr>' + test = decode_QP(test_line) + self.assertEqual(type(test), text_type) + self.assertEqual(test, u'oîm <oim@logilab.fr>') + + def test_decode_QP_ascii(self): + test_line = 'test <test@logilab.fr>' + test = decode_QP(test_line) + self.assertEqual(type(test), text_type) + self.assertEqual(test, u'test <test@logilab.fr>') + if __name__ == '__main__': unittest_main() |