diff options
author | Sylvain <syt@logilab.fr> | 2007-09-13 13:11:39 +0200 |
---|---|---|
committer | Sylvain <syt@logilab.fr> | 2007-09-13 13:11:39 +0200 |
commit | 2d92497a6ad96d615df12f40a5452d399a4e7159 (patch) | |
tree | 1947cd71718e26e31d968624ef1fc67cf8a0bda0 | |
parent | 3f72c74f74b5cee16e7923627f5e79c5624cad58 (diff) | |
download | logilab-common-2d92497a6ad96d615df12f40a5452d399a4e7159.tar.gz |
backported unormalize from ginco
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | textutils.py | 16 |
2 files changed, 25 insertions, 4 deletions
@@ -2,9 +2,14 @@ ChangeLog for logilab.common ============================ -- - * db: mark support_users and support_groups methods as obsolete in - favor of users_support and groups_support attributes, new - ilike_support property on dbms helpers + * db: + + - mark support_users and support_groups methods as obsolete in + favor of users_support and groups_support attributes + - new ilike_support property on dbms helpers + + * textutils: new unormalize function to normalize diacritical chars by + their ascii equivalent * modutils: new load_module_from_file shortcut function * clcommands: pop_args accept None as value for expected_size_after, meaning remaining args should not be checked @@ -13,7 +18,7 @@ ChangeLog for logilab.common * new 'typechanged' action for configuration.read_old_config 2007-05-14 -- 0.22.1 - * important bug fix in bd.py + * important bug fix in db.py * added history in pytest debugger sessions * fix pytest coverage bug * fix textutils test diff --git a/textutils.py b/textutils.py index ba0bffa..5af9ed7 100644 --- a/textutils.py +++ b/textutils.py @@ -48,8 +48,24 @@ unquote, colorize_ansi __docformat__ = "restructuredtext en" import re +from unicodedata import normalize as _uninormalize from os import linesep +def unormalize(ustring, killchars='', ignorenonascii=False): + """replace diacritical characters with their corresponding ascii characters + """ + res = [] + for letter in ustring[:]: + if ord(letter) >= 2**8: + if ignorenonascii: + continue + raise ValueError("can't deal with non-ascii based characters") + replacement = _uninormalize('NFD', letter)[0] + if replacement in killchars: + continue + res.append(replacement) + return u''.join(res) + def unquote(string): """remove optional quotes (simple or double) from the string |