summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain <syt@logilab.fr>2007-09-13 13:11:39 +0200
committerSylvain <syt@logilab.fr>2007-09-13 13:11:39 +0200
commit2d92497a6ad96d615df12f40a5452d399a4e7159 (patch)
tree1947cd71718e26e31d968624ef1fc67cf8a0bda0
parent3f72c74f74b5cee16e7923627f5e79c5624cad58 (diff)
downloadlogilab-common-2d92497a6ad96d615df12f40a5452d399a4e7159.tar.gz
backported unormalize from ginco
-rw-r--r--ChangeLog13
-rw-r--r--textutils.py16
2 files changed, 25 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 93cf883..e47eed8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,9 +2,14 @@ ChangeLog for logilab.common
============================
--
- * db: mark support_users and support_groups methods as obsolete in
- favor of users_support and groups_support attributes, new
- ilike_support property on dbms helpers
+ * db:
+
+ - mark support_users and support_groups methods as obsolete in
+ favor of users_support and groups_support attributes
+ - new ilike_support property on dbms helpers
+
+ * textutils: new unormalize function to normalize diacritical chars by
+ their ascii equivalent
* modutils: new load_module_from_file shortcut function
* clcommands: pop_args accept None as value for expected_size_after,
meaning remaining args should not be checked
@@ -13,7 +18,7 @@ ChangeLog for logilab.common
* new 'typechanged' action for configuration.read_old_config
2007-05-14 -- 0.22.1
- * important bug fix in bd.py
+ * important bug fix in db.py
* added history in pytest debugger sessions
* fix pytest coverage bug
* fix textutils test
diff --git a/textutils.py b/textutils.py
index ba0bffa..5af9ed7 100644
--- a/textutils.py
+++ b/textutils.py
@@ -48,8 +48,24 @@ unquote, colorize_ansi
__docformat__ = "restructuredtext en"
import re
+from unicodedata import normalize as _uninormalize
from os import linesep
+def unormalize(ustring, killchars='', ignorenonascii=False):
+ """replace diacritical characters with their corresponding ascii characters
+ """
+ res = []
+ for letter in ustring[:]:
+ if ord(letter) >= 2**8:
+ if ignorenonascii:
+ continue
+ raise ValueError("can't deal with non-ascii based characters")
+ replacement = _uninormalize('NFD', letter)[0]
+ if replacement in killchars:
+ continue
+ res.append(replacement)
+ return u''.join(res)
+
def unquote(string):
"""remove optional quotes (simple or double) from the string