diff options
author | Yann Voté <yann.vote@logilab.fr> | 2017-02-28 09:54:14 +0100 |
---|---|---|
committer | Yann Voté <yann.vote@logilab.fr> | 2017-02-28 09:54:14 +0100 |
commit | 5ce7119fb687fb992acd244091fa00a388bced80 (patch) | |
tree | e786957324e0933168f192c768cb25a0686ceee7 | |
parent | 0be3291908df2109831b377f4e2f48d47e9a2429 (diff) | |
download | logilab-common-5ce7119fb687fb992acd244091fa00a388bced80.tar.gz |
[textutils] Add two more manual mappings to convert unicode into ASCII
-rw-r--r-- | logilab/common/textutils.py | 2 | ||||
-rw-r--r-- | test/unittest_textutils.py | 2 |
2 files changed, 4 insertions, 0 deletions
diff --git a/logilab/common/textutils.py b/logilab/common/textutils.py index 9046f97..356b1a8 100644 --- a/logilab/common/textutils.py +++ b/logilab/common/textutils.py @@ -70,6 +70,8 @@ MANUAL_UNICODE_MAP = { u'\xf8': u'o', # LATIN SMALL LETTER O WITH STROKE u'\xbb': u'"', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xdf': u'ss', # LATIN SMALL LETTER SHARP S + u'\u2013': u'-', # HYPHEN + u'\u2019': u"'", # SIMPLE QUOTE } def unormalize(ustring, ignorenonascii=None, substitute=None): diff --git a/test/unittest_textutils.py b/test/unittest_textutils.py index 8deb4ee..330d49c 100644 --- a/test/unittest_textutils.py +++ b/test/unittest_textutils.py @@ -244,6 +244,8 @@ class UnormalizeTC(TestCase): (u'ÀÈÙÉÏÎÔÊÇ', u'AEUEIIOEC'), (u'\xa0', u' '), # NO-BREAK SPACE managed by NFKD decomposition (u'\u0154', u'R'), + (u'Pointe d\u2019Yves', u"Pointe d'Yves"), + (u'Bordeaux\u2013Mérignac', u'Bordeaux-Merignac'), ] for input, output in data: yield self.assertEqual, tu.unormalize(input), output |