summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--logilab/common/umessage.py11
-rw-r--r--test/unittest_umessage.py12
2 files changed, 22 insertions, 1 deletions
diff --git a/logilab/common/umessage.py b/logilab/common/umessage.py
index 664d3e0..744e96e 100644
--- a/logilab/common/umessage.py
+++ b/logilab/common/umessage.py
@@ -27,6 +27,8 @@ from email.header import decode_header
from datetime import datetime
+from six import text_type, binary_type
+
try:
from mx.DateTime import DateTime
except ImportError:
@@ -40,7 +42,14 @@ def decode_QP(string):
for decoded, charset in decode_header(string):
if not charset :
charset = 'iso-8859-15'
- parts.append(decoded.decode(charset, 'replace'))
+ # python 3 sometimes returns str and sometimes bytes.
+ # the 'official' fix is to use the new 'policy' APIs
+ # https://bugs.python.org/issue24797
+ # let's just handle this bug ourselves for now
+ if isinstance(decoded, binary_type):
+ decoded = decoded.decode(charset, 'replace')
+ assert isinstance(decoded, text_type)
+ parts.append(decoded)
if sys.version_info < (3, 3):
# decoding was non-RFC compliant wrt to whitespace handling
diff --git a/test/unittest_umessage.py b/test/unittest_umessage.py
index 531573c..940b308 100644
--- a/test/unittest_umessage.py
+++ b/test/unittest_umessage.py
@@ -60,6 +60,18 @@ class UMessageTC(TestCase):
self.assertEqual(type(test), text_type)
self.assertEqual(test, u'Raphaël DUPONT<raphael.dupont@societe.fr>')
+ def test_decode_QP_utf8(self):
+ test_line = '=?utf-8?q?o=C3=AEm?= <oim@logilab.fr>'
+ test = decode_QP(test_line)
+ self.assertEqual(type(test), text_type)
+ self.assertEqual(test, u'oîm <oim@logilab.fr>')
+
+ def test_decode_QP_ascii(self):
+ test_line = 'test <test@logilab.fr>'
+ test = decode_QP(test_line)
+ self.assertEqual(type(test), text_type)
+ self.assertEqual(test, u'test <test@logilab.fr>')
+
if __name__ == '__main__':
unittest_main()