diff options
author | Yu-Jie Lin <livibetter@gmail.com> | 2016-12-25 14:02:32 +0800 |
---|---|---|
committer | Yu-Jie Lin <livibetter@gmail.com> | 2016-12-25 14:02:32 +0800 |
commit | bb08b484aed7edf121c219f903fdf07a47457c08 (patch) | |
tree | eaa633bce33aeb4859296d01c4f77f11a93f6a33 /smartypants.py | |
parent | bede08d8f6c3d28f7009501f5dafbbf30345ee39 (diff) | |
download | smartypants-bb08b484aed7edf121c219f903fdf07a47457c08.tar.gz |
add Attr.u (Unicode) and Attr.h (HTML named entities) output options (#6)
Diffstat (limited to 'smartypants.py')
-rwxr-xr-x | smartypants.py | 69 |
1 files changed, 49 insertions, 20 deletions
diff --git a/smartypants.py b/smartypants.py index 975297b..7078547 100755 --- a/smartypants.py +++ b/smartypants.py @@ -92,13 +92,28 @@ class _Attr(object): regular quotes so SmartyPants can educate them. """ - s = 1 << 8 + u = 0 << 9 | 1 << 8 """ - Stupefy mode. Reverses the SmartyPants transformation process, turning - the HTML entities produced by SmartyPants into their ASCII equivalents. - E.g. ``“`` is turned into a simple double-quote ("), ``—`` is - turned into two dashes, etc. + Output Unicode characters instead of numeric character references, for + example, from ``“`` to left double quotation mark (``“``) (U+201C). + + .. seealso:: :func:`convert_entities` + """ + h = 1 << 9 | 0 << 8 + """ + Output HTML named entities instead of numeric character references, for + example, from ``“`` to ``“``. + + .. seealso:: :func:`convert_entities` + """ + s = 1 << 9 | 1 << 8 """ + Output ASCII equivalents instead of numeric character references, for + example, from ``—`` to ``--``. + + .. seealso:: :func:`convert_entities` + """ + mask_o = u | h | s set0 = 0 "suppress all transformations. (Do nothing.)" @@ -183,7 +198,7 @@ def smartypants(text, attr=None): do_backticks = attr & Attr.mask_b do_dashes = attr & Attr.mask_d do_ellipses = attr & Attr.e - do_stupefy = attr & Attr.s + do_entities = attr & Attr.mask_o convert_quot = attr & Attr.w tokens = _tokenize(text) @@ -267,8 +282,12 @@ def smartypants(text, attr=None): # Normal case: t = convert_quotes(t) - if do_stupefy: - t = stupefy_entities(t) + if do_entities: + mode = (0 if do_entities == Attr.u else + 1 if do_entities == Attr.h else + 2 if do_entities == Attr.s else + 3) # would result in key error + t = convert_entities(t, mode) prev_token_last_char = last_char result.append(t) @@ -464,24 +483,34 @@ def convert_ellipses(text): return text -def stupefy_entities(text): +def convert_entities(text, mode): """ - Convert SmartyPants HTML entities in *text* into their ASCII counterparts. + Convert numeric character references to, if *mode* is + + - *0*: Unicode characters + - *1*: HTML named entities + - *2*: ASCII equivalents - >>> print(stupefy_entities('“Hello — world.”')) + >>> print(convert_entities('‘', 0)) + ‘ + >>> print(convert_entities('‘SmartyPants’', 1)) + ‘SmartyPants’ + >>> print(convert_entities('“Hello — world.”', 2)) "Hello -- world." """ - text = re.sub('–', '-', text) # en-dash - text = re.sub('—', '--', text) # em-dash - - text = re.sub('‘', "'", text) # open single quote - text = re.sub('’', "'", text) # close single quote - - text = re.sub('“', '"', text) # open double quote - text = re.sub('”', '"', text) # close double quote + CTBL = { + '–': ('–', '–', '-'), + '—': ('—', '—', '--'), + '‘': ('‘', '‘', "'"), + '’': ('’', '’', "'"), + '“': ('“', '“', '"'), + '”': ('”', '”', '"'), + '…': ('…', '…', '...'), + } - text = re.sub('…', '...', text) # ellipsis + for k, v in CTBL.items(): + text = text.replace(k, v[mode]) return text |