diff options
author | Sylvain <syt@logilab.fr> | 2007-09-13 13:11:39 +0200 |
---|---|---|
committer | Sylvain <syt@logilab.fr> | 2007-09-13 13:11:39 +0200 |
commit | 2d92497a6ad96d615df12f40a5452d399a4e7159 (patch) | |
tree | 1947cd71718e26e31d968624ef1fc67cf8a0bda0 /textutils.py | |
parent | 3f72c74f74b5cee16e7923627f5e79c5624cad58 (diff) | |
download | logilab-common-2d92497a6ad96d615df12f40a5452d399a4e7159.tar.gz |
backported unormalize from ginco
Diffstat (limited to 'textutils.py')
-rw-r--r-- | textutils.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/textutils.py b/textutils.py index ba0bffa..5af9ed7 100644 --- a/textutils.py +++ b/textutils.py @@ -48,8 +48,24 @@ unquote, colorize_ansi __docformat__ = "restructuredtext en" import re +from unicodedata import normalize as _uninormalize from os import linesep +def unormalize(ustring, killchars='', ignorenonascii=False): + """replace diacritical characters with their corresponding ascii characters + """ + res = [] + for letter in ustring[:]: + if ord(letter) >= 2**8: + if ignorenonascii: + continue + raise ValueError("can't deal with non-ascii based characters") + replacement = _uninormalize('NFD', letter)[0] + if replacement in killchars: + continue + res.append(replacement) + return u''.join(res) + def unquote(string): """remove optional quotes (simple or double) from the string |