summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStuart Rackham <srackham@methods.co.nz>2011-02-10 08:41:25 +1300
committerStuart Rackham <srackham@methods.co.nz>2011-02-10 08:41:25 +1300
commit1fa1196e26771824a814ebfefba3aaf2bb488401 (patch)
treeb72ad17d953c360521c697aa8e06256a2c6e9ada
parentb398d8c816f7e72d07eb6629f1a92ab066f62345 (diff)
downloadasciidoc-1fa1196e26771824a814ebfefba3aaf2bb488401.tar.gz
FIXED: Auto-generated section title ids are now unicode aware.
-rwxr-xr-xasciidoc.py5
-rw-r--r--tests/data/testcases.txt1
2 files changed, 5 insertions, 1 deletions
diff --git a/asciidoc.py b/asciidoc.py
index 37ba984..e688f97 100755
--- a/asciidoc.py
+++ b/asciidoc.py
@@ -2113,7 +2113,10 @@ class Section:
NCNameStartChar ::= Letter | '_'
NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
"""
- base_ident = re.sub(r'[^a-zA-Z0-9]+', '_', title).strip('_').lower()
+ # Replace non-alpha numeric characters in title with underscores and
+ # convert to lower case.
+ base_ident = char_encode(re.sub(r'(?u)\W+', '_',
+ char_decode(title)).strip('_').lower())
# Prefix the ID name with idprefix attribute or underscore if not
# defined. Prefix ensures the ID does not clash with existing IDs.
idprefix = document.attributes.get('idprefix','_')
diff --git a/tests/data/testcases.txt b/tests/data/testcases.txt
index 1e34c18..0281736 100644
--- a/tests/data/testcases.txt
+++ b/tests/data/testcases.txt
@@ -660,4 +660,5 @@ Lorum ipsum...
这是一个测试
------------
Double-with character titles.
+<<_这是一个测试,link to auto-generated section ID>>.