backported unormalize from ginco

author: Sylvain <syt@logilab.fr> 2007-09-13 13:11:39 +0200
committer: Sylvain <syt@logilab.fr> 2007-09-13 13:11:39 +0200
commit: 2d92497a6ad96d615df12f40a5452d399a4e7159 (patch)
tree: 1947cd71718e26e31d968624ef1fc67cf8a0bda0
parent: 3f72c74f74b5cee16e7923627f5e79c5624cad58 (diff)
download: logilab-common-2d92497a6ad96d615df12f40a5452d399a4e7159.tar.gz
2 files changed, 25 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 93cf883..e47eed8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,9 +2,14 @@ ChangeLog for logilab.common
 ============================
 
   --
-    * db: mark support_users and support_groups methods as obsolete in
-      favor of users_support and groups_support attributes, new
-      ilike_support property on dbms helpers
+    * db:
+	
+      - mark support_users and support_groups methods as obsolete in
+        favor of users_support and groups_support attributes
+      - new ilike_support property on dbms helpers
+	
+    * textutils: new unormalize function to normalize diacritical chars by
+      their ascii equivalent
     * modutils: new load_module_from_file shortcut function
     * clcommands: pop_args accept None as value for expected_size_after,
       meaning remaining args should not be checked
@@ -13,7 +18,7 @@ ChangeLog for logilab.common
     * new 'typechanged' action for configuration.read_old_config
 
 2007-05-14  --  0.22.1
-    * important bug fix in bd.py
+    * important bug fix in db.py
     * added history in pytest debugger sessions
     * fix pytest coverage bug
     * fix textutils test
diff --git a/textutils.py b/textutils.py
index ba0bffa..5af9ed7 100644
--- a/textutils.py
+++ b/textutils.py
@@ -48,8 +48,24 @@ unquote, colorize_ansi
 __docformat__ = "restructuredtext en"
 
 import re
+from unicodedata import normalize as _uninormalize
 from os import linesep
 
+def unormalize(ustring, killchars='', ignorenonascii=False):
+    """replace diacritical characters with their corresponding ascii characters
+    """
+    res = []
+    for letter in ustring[:]:
+        if ord(letter) >= 2**8:
+            if ignorenonascii:
+                continue
+            raise ValueError("can't deal with non-ascii based characters")
+        replacement = _uninormalize('NFD', letter)[0]
+        if replacement in killchars:
+            continue
+        res.append(replacement)
+    return u''.join(res)
+
 def unquote(string):
     """remove optional quotes (simple or double) from the string
author	Sylvain <syt@logilab.fr>	2007-09-13 13:11:39 +0200
committer	Sylvain <syt@logilab.fr>	2007-09-13 13:11:39 +0200
commit	2d92497a6ad96d615df12f40a5452d399a4e7159 (patch)
tree	1947cd71718e26e31d968624ef1fc67cf8a0bda0
parent	3f72c74f74b5cee16e7923627f5e79c5624cad58 (diff)
download	logilab-common-2d92497a6ad96d615df12f40a5452d399a4e7159.tar.gz