[textutils] use NFKD decomposition in unormalize()

The normal form KD (NFKD) will apply the compatibility decomposition, i.e. replace all compatibility characters with their equivalents.
author: Julien Jehannet <julien.jehannet@logilab.fr> 2010-09-23 14:52:49 +0200
committer: Julien Jehannet <julien.jehannet@logilab.fr> 2010-09-23 14:52:49 +0200
commit: 7e8c50863b305a6cb01f715d1f8f41042d919ccf (patch)
tree: 95a500796ddec9b7d5c74ed9213cc967f330881e /textutils.py
parent: a4d541d67062fe234be812c7ee83a8f48440916b (diff)
download: logilab-common-7e8c50863b305a6cb01f715d1f8f41042d919ccf.tar.gz
1 files changed, 9 insertions, 1 deletions
diff --git a/textutils.py b/textutils.py
index db69d3b..cf4deb2 100644
--- a/textutils.py
+++ b/textutils.py
@@ -73,6 +73,14 @@ MANUAL_UNICODE_MAP = {
 
 def unormalize(ustring, ignorenonascii=False):
     """replace diacritical characters with their corresponding ascii characters
+    
+    Convert the unicode string to its long normalized form (unicode character
+    will be transform into several characters) and keep the first one only.
+    The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
+    replace all compatibility characters with their equivalents.
+    
+    :see: Another project about ASCII transliterations of Unicode text
+          http://pypi.python.org/pypi/Unidecode
     """
     res = []
     for letter in ustring[:]:
@@ -83,7 +91,7 @@ def unormalize(ustring, ignorenonascii=False):
                 if ignorenonascii:
                     continue
                 raise ValueError("can't deal with non-ascii based characters")
-            replacement = _uninormalize('NFD', letter)[0]
+            replacement = _uninormalize('NFKD', letter)[0]
         res.append(replacement)
     return u''.join(res)
author	Julien Jehannet <julien.jehannet@logilab.fr>	2010-09-23 14:52:49 +0200
committer	Julien Jehannet <julien.jehannet@logilab.fr>	2010-09-23 14:52:49 +0200
commit	7e8c50863b305a6cb01f715d1f8f41042d919ccf (patch)
tree	95a500796ddec9b7d5c74ed9213cc967f330881e /textutils.py
parent	a4d541d67062fe234be812c7ee83a8f48440916b (diff)
download	logilab-common-7e8c50863b305a6cb01f715d1f8f41042d919ccf.tar.gz