diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2018-11-20 23:55:40 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2018-11-20 23:55:40 +0000 |
commit | b95bd7805b9b5f859410758ce3c1ef6177e1a8cc (patch) | |
tree | 8396f524b8b396755d716e4e3c6a18bfb5cdfbbf | |
parent | 68406f5b1612ed128449330a7ed0418bb2ae6787 (diff) | |
download | docutils-b95bd7805b9b5f859410758ce3c1ef6177e1a8cc.tar.gz |
smartquotes: Use single backslashes for escaping.
Let the smartquote transform take advantage of the
representation of escaping backslashes in the doctree
as null characters.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8234 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | docutils/HISTORY.txt | 1 | ||||
-rw-r--r-- | docutils/docs/user/smartquotes.txt | 43 | ||||
-rw-r--r-- | docutils/docutils/transforms/universal.py | 21 | ||||
-rw-r--r-- | docutils/test/test_transforms/test_smartquotes.py | 117 |
4 files changed, 144 insertions, 38 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt index b4820af63..17d110226 100644 --- a/docutils/HISTORY.txt +++ b/docutils/HISTORY.txt @@ -67,6 +67,7 @@ Changes Since 0.14 * docutils/utils/smartquotes.py: - Fix bug #332: use open quote after whitespace, ZWSP, and ZWNJ. + - Use single backslashes for escaping. * docutils/writers/html5_polyglot/ diff --git a/docutils/docs/user/smartquotes.txt b/docutils/docs/user/smartquotes.txt index 92bbafd2d..0cdfbe27b 100644 --- a/docutils/docs/user/smartquotes.txt +++ b/docutils/docs/user/smartquotes.txt @@ -5,8 +5,8 @@ Smart Quotes for Docutils :Author: Günter Milde, based on SmartyPants by John Gruber, Brad Choate, and Chad Miller :Contact: docutils-develop@lists.sourceforge.net -:Revision: $Revision$ -:Date: $Date$ +:Revision: $Revision: 8112 $ +:Date: $Date: 2017-06-14 16:20:20 +0200 (Mi, 14. Jun 2017) $ :License: Released under the terms of the `2-Clause BSD license`_ :Abstract: This document describes the Docutils `smartquotes` module. @@ -25,19 +25,19 @@ transformation on Text nodes that includes the following steps: - three consecutive dots (``...`` or ``. . .``) into an ellipsis entity. This means you can write, edit, and save your documents using plain old -ASCII---straight quotes, plain dashes, and plain dots---while Docutils +ASCII -- straight quotes, plain dashes, and plain dots -- while Docutils generates documents with typographical quotes, dashes, and ellipses. Advantages: * typing speed (especially when blind-typing), * the possibility to change the quoting style of the - complete document with just one configuration option, and -* restriction to 7-bit characters in the source. + complete document with just one configuration option, +* restriction to 7-bit ASCII characters in the source. However, there are `algorithmic shortcomings`_ for 2 reasons: -* Dual use of the "ASCII-apostrophe" (') as single quote and apostrophe, +* dual use of the "ASCII-apostrophe" (') as single quote and apostrophe, * languages that do not use whitespace around words. So, please consider also @@ -54,22 +54,25 @@ The `SmartQuotes` transform does not modify characters in literal text such as source code, maths, or literal blocks. If you need literal straight quotes (or plain hyphens and periods) in normal -text, you can backslash escape the characters to preserve -ASCII-punctuation. You need two backslashes as one backslash is removed by -the reStructuredText `escaping mechanism`_. - -======== ========= ======== ========= -Escape Character Escape Character -======== ========= ======== ========= -``\\`` \\ ``\\.`` \\. -``\\"`` \\" ``\\-`` \\- -``\\'`` \\' ``\\``` \\` -======== ========= ======== ========= +text, you can `backslash escape`_ the characters to preserve +ASCII-punctuation. + +.. class:: booktabs + +========= ========= == ======== ========== +Input Output Input Output +========= ========= == ======== ========== +``\\`` \\ ``\...`` \... +``\"`` \" ``\--`` \-- +``\'`` \' ``\``` \` +========= ========= == ======== ========== This is useful, for example, when you want to use straight quotes as -foot and inch marks: 6\\'2\\" tall; a 17\\" iMac. +foot and inch marks: + + 6\'2\" tall; a 17\" monitor. -.. _escaping mechanism: ../ref/rst/restructuredtext.html#escaping-mechanism +.. _backslash escape: ../ref/rst/restructuredtext.html#escaping-mechanism Localisation @@ -82,7 +85,7 @@ __ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table `SmartQuotes` inserts quotation marks depending on the language of the current block element and the value of the `"smart_quotes" setting`_.\ -[#x-altquot]_ +[#x-altquot]_ There is built-in support for the following languages:\ [#smartquotes-locales]_ :af: .. class:: language-af diff --git a/docutils/docutils/transforms/universal.py b/docutils/docutils/transforms/universal.py index 0e7f305e6..1b42d854e 100644 --- a/docutils/docutils/transforms/universal.py +++ b/docutils/docutils/transforms/universal.py @@ -222,9 +222,10 @@ class SmartQuotes(Transform): nodes_to_skip = (nodes.FixedTextElement, nodes.Special) """Do not apply "smartquotes" to instances of these block-level nodes.""" - literal_nodes = (nodes.image, nodes.literal, nodes.math, + literal_nodes = (nodes.FixedTextElement, nodes.Special, + nodes.image, nodes.literal, nodes.math, nodes.raw, nodes.problematic) - """Do not change quotes in instances of these inline nodes.""" + """Do apply smartquotes to instances of these inline nodes.""" smartquotes_action = 'qDe' """Setting to select smartquote transformations. @@ -240,14 +241,14 @@ class SmartQuotes(Transform): def get_tokens(self, txtnodes): # A generator that yields ``(texttype, nodetext)`` tuples for a list # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). - - texttype = {True: 'literal', # "literal" text is not changed: - False: 'plain'} - for txtnode in txtnodes: - nodetype = texttype[isinstance(txtnode.parent, - self.literal_nodes)] - yield (nodetype, txtnode.astext()) - + for node in txtnodes: + if (isinstance(node.parent, self.literal_nodes) + or isinstance(node.parent.parent, self.literal_nodes)): + yield ('literal', unicode(node)) + else: + # SmartQuotes uses backslash escapes instead of null-escapes + txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', unicode(node)) + yield ('plain', txt) def apply(self): smart_quotes = self.document.settings.smart_quotes diff --git a/docutils/test/test_transforms/test_smartquotes.py b/docutils/test/test_transforms/test_smartquotes.py index 7e9613b78..2604a1e40 100644 --- a/docutils/test/test_transforms/test_smartquotes.py +++ b/docutils/test/test_transforms/test_smartquotes.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# $Id$ +# $Id: test_smartquotes.py 8190 2017-10-25 13:57:27Z milde $ # # :Copyright: © 2011 Günter Milde. # :Maintainer: docutils-develop@lists.sourceforge.net @@ -24,7 +24,8 @@ from docutils.parsers.rst import Parser def suite(): parser = Parser() - settings = {'smart_quotes': True} + settings = {'smart_quotes': True, + 'trim_footnote_ref_space': True} s = DocutilsTestSupport.TransformTestSuite( parser, suite_settings=settings) s.generateTests(totest) @@ -43,7 +44,7 @@ totest_de = {} totest_de_alt = {} totest_locales = {} -totest['transitions'] = ((SmartQuotes,), [ +totest['smartquotes'] = ((SmartQuotes,), [ ["""\ Test "smart quotes", 'secondary smart quotes', "'nested' smart" quotes @@ -56,7 +57,7 @@ u"""\ “‘nested’ smart” quotes – and —also long— dashes. """], -[r"""Escaped \\"ASCII quotes\\" and \\'secondary ASCII quotes\\'. +[r"""Escaped \"ASCII quotes\" and \'secondary ASCII quotes\'. """, u"""\ <document source="test data"> @@ -113,6 +114,7 @@ em space "a" 'a', NBSP "a" 'a', ZWSP\u200B"a" and\u200B'a', ZWNJ\u200C"a" and\u200C'a', +escaped space\ "a" and\ 'a', —"a",—'a' en dash–"a"–'a', @@ -131,6 +133,7 @@ u"""\ NBSP “a” ‘a’, ZWSP\u200B“a” and\u200B‘a’, ZWNJ\u200C“a” and\u200C‘a’, + escaped space“a” and‘a’, <paragraph> —“a”,—‘a’ en dash–“a”–‘a’, @@ -196,7 +199,7 @@ Do not convert context-character at inline-tag boundaries and links to "targets_". Inside *"emphasized"* or other `inline "roles"`: - (``"string"``), (``'string'``), *\\"betont\\"*, \\"*betont*". + (``"string"``), (``'string'``), *\"betont\"*, \"*betont*". Do not drop characters from intra-word inline markup like *re*\ ``Structured``\ *Text*. @@ -252,6 +255,104 @@ u"""\ Text . """], +[r""" +Docutils escape mechanism uses the backslash: + +\Remove \non-escaped \backslashes\: +\item \newline \tab \" \' \*. + +\ Remove-\ escaped-\ white\ space-\ +including-\ newlines. + +\\Keep\\escaped\\backslashes\\ +(but\\only\\one). + +\\ Keep \\ space\\ around \\ backslashes. + +Keep backslashes ``\in\ literal``, :math:`in \mathrm{math}`, +and :code:`in\ code`. + +Test around inline elements:\ [*]_ + +*emphasized*, H\ :sub:`2`\ O and :math:`x^2` + +*emphasized*, H\ :sub:`2`\ O and :math:`x^2` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. [*] and footnotes +""", +u"""\ +<document source="test data"> + <paragraph> + Docutils escape mechanism uses the backslash: + <paragraph> + Remove non-escaped backslashes: + item newline tab " \' *. + <paragraph> + Remove-escaped-whitespace-including-newlines. + <paragraph> + \\Keep\\escaped\\backslashes\\ + (but\\only\\one). + <paragraph> + \\ Keep \\ space\\ around \\ backslashes. + <paragraph> + Keep backslashes \n\ + <literal> + \\in\\ literal + , \n\ + <math> + in \\mathrm{math} + , + and \n\ + <literal classes="code"> + in\\ code + . + <paragraph> + Test around inline elements: + <footnote_reference auto="*" ids="id1"> + <paragraph> + <emphasis> + emphasized + , H + <subscript> + 2 + O and \n\ + <math> + x^2 + <section ids="emphasized-h2o-and-x-2" names="emphasized,\\ h2o\\ and\\ x^2"> + <title> + <emphasis> + emphasized + , H + <subscript> + 2 + O and \n\ + <math> + x^2 + <footnote auto="*" ids="id2"> + <paragraph> + and footnotes +"""], +[r""" +Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p +with backslash-escaped whitespace, including new\ +lines. +""", +"""\ +<document source="test data"> + <paragraph> + Character-level m + <emphasis> + a + <strong> + r + <literal> + k + <title_reference> + u + p + with backslash-escaped whitespace, including newlines. +"""], ["""\ .. class:: language-de @@ -287,7 +388,7 @@ u"""\ """], ]) -totest_de['transitions'] = ((SmartQuotes,), [ +totest_de['smartquotes'] = ((SmartQuotes,), [ ["""\ German "smart quotes" and 'secondary smart quotes'. @@ -304,7 +405,7 @@ u"""\ """], ]) -totest_de_alt['transitions'] = ((SmartQuotes,), [ +totest_de_alt['smartquotes'] = ((SmartQuotes,), [ ["""\ Alternative German "smart quotes" and 'secondary smart quotes'. @@ -333,7 +434,7 @@ u"""\ """], ]) -totest_locales['transitions'] = ((SmartQuotes,), [ +totest_locales['smartquotes'] = ((SmartQuotes,), [ ["""\ German "smart quotes" and 'secondary smart quotes'. |