diff options
author | Rémi Cardona <remi.cardona@logilab.fr> | 2015-09-18 15:45:46 +0200 |
---|---|---|
committer | Rémi Cardona <remi.cardona@logilab.fr> | 2015-09-18 15:45:46 +0200 |
commit | cf213fc69c1c1e94b4abcc1ab26870464ba564f6 (patch) | |
tree | 80752fcb01408c41e4dbeab5236c98d7a56f545c | |
parent | d8a6c1835c579e9e8d10cea5dc965eb1c817abeb (diff) | |
download | logilab-common-cf213fc69c1c1e94b4abcc1ab26870464ba564f6.tar.gz |
[umessage] Work around bug in python 3's decode_header
We sometimes get unicode objects out of that function, on which
.decode() obviously fails. Upstream has apparently not to fix the
existing function, so let's handle it ourselves.
Tests are taken from CubicWeb. Related to #1716128.
-rw-r--r-- | logilab/common/umessage.py | 11 | ||||
-rw-r--r-- | test/unittest_umessage.py | 12 |
2 files changed, 22 insertions, 1 deletions
diff --git a/logilab/common/umessage.py b/logilab/common/umessage.py index 664d3e0..744e96e 100644 --- a/logilab/common/umessage.py +++ b/logilab/common/umessage.py @@ -27,6 +27,8 @@ from email.header import decode_header from datetime import datetime +from six import text_type, binary_type + try: from mx.DateTime import DateTime except ImportError: @@ -40,7 +42,14 @@ def decode_QP(string): for decoded, charset in decode_header(string): if not charset : charset = 'iso-8859-15' - parts.append(decoded.decode(charset, 'replace')) + # python 3 sometimes returns str and sometimes bytes. + # the 'official' fix is to use the new 'policy' APIs + # https://bugs.python.org/issue24797 + # let's just handle this bug ourselves for now + if isinstance(decoded, binary_type): + decoded = decoded.decode(charset, 'replace') + assert isinstance(decoded, text_type) + parts.append(decoded) if sys.version_info < (3, 3): # decoding was non-RFC compliant wrt to whitespace handling diff --git a/test/unittest_umessage.py b/test/unittest_umessage.py index 531573c..940b308 100644 --- a/test/unittest_umessage.py +++ b/test/unittest_umessage.py @@ -60,6 +60,18 @@ class UMessageTC(TestCase): self.assertEqual(type(test), text_type) self.assertEqual(test, u'Raphaël DUPONT<raphael.dupont@societe.fr>') + def test_decode_QP_utf8(self): + test_line = '=?utf-8?q?o=C3=AEm?= <oim@logilab.fr>' + test = decode_QP(test_line) + self.assertEqual(type(test), text_type) + self.assertEqual(test, u'oîm <oim@logilab.fr>') + + def test_decode_QP_ascii(self): + test_line = 'test <test@logilab.fr>' + test = decode_QP(test_line) + self.assertEqual(type(test), text_type) + self.assertEqual(test, u'test <test@logilab.fr>') + if __name__ == '__main__': unittest_main() |