diff options
author | Antoine <me@atelierhsn.com> | 2020-10-02 03:54:37 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-01 14:54:37 -0400 |
commit | 2e0962ef1412b0cd566331954aff82711ce93acf (patch) | |
tree | 45d6b44ffe98317d4b2b3960b71717d67e2e8e8a /markdown/extensions | |
parent | b701c34ebd7b2d0eb319517b9a275ddf0c89608d (diff) | |
download | python-markdown-2e0962ef1412b0cd566331954aff82711ce93acf.tar.gz |
Support unicode ids in toc (#970)
A second function, `slugify_unicode` was added rather than changing the existing function so as to maintain backward compatibility. While an `encoding` parameter was added to the `slugify` function, we can't expect existing third party functions to accept a third parameter. Therefore, the two parameter API was preserved with this change.
Diffstat (limited to 'markdown/extensions')
-rw-r--r-- | markdown/extensions/toc.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index b6cdc73..b2564c9 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -23,11 +23,16 @@ import unicodedata import xml.etree.ElementTree as etree -def slugify(value, separator): +def slugify(value, separator, encoding='ascii'): """ Slugify a string, to make it URL friendly. """ - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = re.sub(r'[^\w\s-]', '', value.decode('ascii')).strip().lower() - return re.sub(r'[%s\s]+' % separator, separator, value) + value = unicodedata.normalize('NFKD', value).encode(encoding, 'ignore') + value = re.sub(r'[^\w\s-]', '', value.decode(encoding)).strip().lower() + return re.sub(r'[{}\s]+'.format(separator), separator, value) + + +def slugify_unicode(value, separator): + """ Slugify a string, to make it URL friendly while preserving Unicode characters. """ + return slugify(value, separator, 'utf-8') IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') |