summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRémi Cardona <remi.cardona@logilab.fr>2015-09-18 15:45:46 +0200
committerRémi Cardona <remi.cardona@logilab.fr>2015-09-18 15:45:46 +0200
commitcf213fc69c1c1e94b4abcc1ab26870464ba564f6 (patch)
tree80752fcb01408c41e4dbeab5236c98d7a56f545c
parentd8a6c1835c579e9e8d10cea5dc965eb1c817abeb (diff)
downloadlogilab-common-cf213fc69c1c1e94b4abcc1ab26870464ba564f6.tar.gz
[umessage] Work around bug in python 3's decode_header
We sometimes get unicode objects out of that function, on which .decode() obviously fails. Upstream has apparently not to fix the existing function, so let's handle it ourselves. Tests are taken from CubicWeb. Related to #1716128.
-rw-r--r--logilab/common/umessage.py11
-rw-r--r--test/unittest_umessage.py12
2 files changed, 22 insertions, 1 deletions
diff --git a/logilab/common/umessage.py b/logilab/common/umessage.py
index 664d3e0..744e96e 100644
--- a/logilab/common/umessage.py
+++ b/logilab/common/umessage.py
@@ -27,6 +27,8 @@ from email.header import decode_header
from datetime import datetime
+from six import text_type, binary_type
+
try:
from mx.DateTime import DateTime
except ImportError:
@@ -40,7 +42,14 @@ def decode_QP(string):
for decoded, charset in decode_header(string):
if not charset :
charset = 'iso-8859-15'
- parts.append(decoded.decode(charset, 'replace'))
+ # python 3 sometimes returns str and sometimes bytes.
+ # the 'official' fix is to use the new 'policy' APIs
+ # https://bugs.python.org/issue24797
+ # let's just handle this bug ourselves for now
+ if isinstance(decoded, binary_type):
+ decoded = decoded.decode(charset, 'replace')
+ assert isinstance(decoded, text_type)
+ parts.append(decoded)
if sys.version_info < (3, 3):
# decoding was non-RFC compliant wrt to whitespace handling
diff --git a/test/unittest_umessage.py b/test/unittest_umessage.py
index 531573c..940b308 100644
--- a/test/unittest_umessage.py
+++ b/test/unittest_umessage.py
@@ -60,6 +60,18 @@ class UMessageTC(TestCase):
self.assertEqual(type(test), text_type)
self.assertEqual(test, u'Raphaël DUPONT<raphael.dupont@societe.fr>')
+ def test_decode_QP_utf8(self):
+ test_line = '=?utf-8?q?o=C3=AEm?= <oim@logilab.fr>'
+ test = decode_QP(test_line)
+ self.assertEqual(type(test), text_type)
+ self.assertEqual(test, u'oîm <oim@logilab.fr>')
+
+ def test_decode_QP_ascii(self):
+ test_line = 'test <test@logilab.fr>'
+ test = decode_QP(test_line)
+ self.assertEqual(type(test), text_type)
+ self.assertEqual(test, u'test <test@logilab.fr>')
+
if __name__ == '__main__':
unittest_main()