summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-20 23:55:40 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-20 23:55:40 +0000
commitb95bd7805b9b5f859410758ce3c1ef6177e1a8cc (patch)
tree8396f524b8b396755d716e4e3c6a18bfb5cdfbbf
parent68406f5b1612ed128449330a7ed0418bb2ae6787 (diff)
downloaddocutils-b95bd7805b9b5f859410758ce3c1ef6177e1a8cc.tar.gz
smartquotes: Use single backslashes for escaping.
Let the smartquote transform take advantage of the representation of escaping backslashes in the doctree as null characters. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8234 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/HISTORY.txt1
-rw-r--r--docutils/docs/user/smartquotes.txt43
-rw-r--r--docutils/docutils/transforms/universal.py21
-rw-r--r--docutils/test/test_transforms/test_smartquotes.py117
4 files changed, 144 insertions, 38 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt
index b4820af63..17d110226 100644
--- a/docutils/HISTORY.txt
+++ b/docutils/HISTORY.txt
@@ -67,6 +67,7 @@ Changes Since 0.14
* docutils/utils/smartquotes.py:
- Fix bug #332: use open quote after whitespace, ZWSP, and ZWNJ.
+ - Use single backslashes for escaping.
* docutils/writers/html5_polyglot/
diff --git a/docutils/docs/user/smartquotes.txt b/docutils/docs/user/smartquotes.txt
index 92bbafd2d..0cdfbe27b 100644
--- a/docutils/docs/user/smartquotes.txt
+++ b/docutils/docs/user/smartquotes.txt
@@ -5,8 +5,8 @@ Smart Quotes for Docutils
:Author: Günter Milde,
based on SmartyPants by John Gruber, Brad Choate, and Chad Miller
:Contact: docutils-develop@lists.sourceforge.net
-:Revision: $Revision$
-:Date: $Date$
+:Revision: $Revision: 8112 $
+:Date: $Date: 2017-06-14 16:20:20 +0200 (Mi, 14. Jun 2017) $
:License: Released under the terms of the `2-Clause BSD license`_
:Abstract: This document describes the Docutils `smartquotes` module.
@@ -25,19 +25,19 @@ transformation on Text nodes that includes the following steps:
- three consecutive dots (``...`` or ``. . .``) into an ellipsis entity.
This means you can write, edit, and save your documents using plain old
-ASCII---straight quotes, plain dashes, and plain dots---while Docutils
+ASCII -- straight quotes, plain dashes, and plain dots -- while Docutils
generates documents with typographical quotes, dashes, and ellipses.
Advantages:
* typing speed (especially when blind-typing),
* the possibility to change the quoting style of the
- complete document with just one configuration option, and
-* restriction to 7-bit characters in the source.
+ complete document with just one configuration option,
+* restriction to 7-bit ASCII characters in the source.
However, there are `algorithmic shortcomings`_ for 2 reasons:
-* Dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
+* dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
* languages that do not use whitespace around words.
So, please consider also
@@ -54,22 +54,25 @@ The `SmartQuotes` transform does not modify characters in literal text
such as source code, maths, or literal blocks.
If you need literal straight quotes (or plain hyphens and periods) in normal
-text, you can backslash escape the characters to preserve
-ASCII-punctuation. You need two backslashes as one backslash is removed by
-the reStructuredText `escaping mechanism`_.
-
-======== ========= ======== =========
-Escape Character Escape Character
-======== ========= ======== =========
-``\\`` \\ ``\\.`` \\.
-``\\"`` \\" ``\\-`` \\-
-``\\'`` \\' ``\\``` \\`
-======== ========= ======== =========
+text, you can `backslash escape`_ the characters to preserve
+ASCII-punctuation.
+
+.. class:: booktabs
+
+========= ========= == ======== ==========
+Input Output Input Output
+========= ========= == ======== ==========
+``\\`` \\ ``\...`` \...
+``\"`` \" ``\--`` \--
+``\'`` \' ``\``` \`
+========= ========= == ======== ==========
This is useful, for example, when you want to use straight quotes as
-foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
+foot and inch marks:
+
+ 6\'2\" tall; a 17\" monitor.
-.. _escaping mechanism: ../ref/rst/restructuredtext.html#escaping-mechanism
+.. _backslash escape: ../ref/rst/restructuredtext.html#escaping-mechanism
Localisation
@@ -82,7 +85,7 @@ __ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table
`SmartQuotes` inserts quotation marks depending on the language of the
current block element and the value of the `"smart_quotes" setting`_.\
-[#x-altquot]_
+[#x-altquot]_
There is built-in support for the following languages:\ [#smartquotes-locales]_
:af: .. class:: language-af
diff --git a/docutils/docutils/transforms/universal.py b/docutils/docutils/transforms/universal.py
index 0e7f305e6..1b42d854e 100644
--- a/docutils/docutils/transforms/universal.py
+++ b/docutils/docutils/transforms/universal.py
@@ -222,9 +222,10 @@ class SmartQuotes(Transform):
nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
"""Do not apply "smartquotes" to instances of these block-level nodes."""
- literal_nodes = (nodes.image, nodes.literal, nodes.math,
+ literal_nodes = (nodes.FixedTextElement, nodes.Special,
+ nodes.image, nodes.literal, nodes.math,
nodes.raw, nodes.problematic)
- """Do not change quotes in instances of these inline nodes."""
+ """Do apply smartquotes to instances of these inline nodes."""
smartquotes_action = 'qDe'
"""Setting to select smartquote transformations.
@@ -240,14 +241,14 @@ class SmartQuotes(Transform):
def get_tokens(self, txtnodes):
# A generator that yields ``(texttype, nodetext)`` tuples for a list
# of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
-
- texttype = {True: 'literal', # "literal" text is not changed:
- False: 'plain'}
- for txtnode in txtnodes:
- nodetype = texttype[isinstance(txtnode.parent,
- self.literal_nodes)]
- yield (nodetype, txtnode.astext())
-
+ for node in txtnodes:
+ if (isinstance(node.parent, self.literal_nodes)
+ or isinstance(node.parent.parent, self.literal_nodes)):
+ yield ('literal', unicode(node))
+ else:
+ # SmartQuotes uses backslash escapes instead of null-escapes
+ txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', unicode(node))
+ yield ('plain', txt)
def apply(self):
smart_quotes = self.document.settings.smart_quotes
diff --git a/docutils/test/test_transforms/test_smartquotes.py b/docutils/test/test_transforms/test_smartquotes.py
index 7e9613b78..2604a1e40 100644
--- a/docutils/test/test_transforms/test_smartquotes.py
+++ b/docutils/test/test_transforms/test_smartquotes.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# $Id$
+# $Id: test_smartquotes.py 8190 2017-10-25 13:57:27Z milde $
#
# :Copyright: © 2011 Günter Milde.
# :Maintainer: docutils-develop@lists.sourceforge.net
@@ -24,7 +24,8 @@ from docutils.parsers.rst import Parser
def suite():
parser = Parser()
- settings = {'smart_quotes': True}
+ settings = {'smart_quotes': True,
+ 'trim_footnote_ref_space': True}
s = DocutilsTestSupport.TransformTestSuite(
parser, suite_settings=settings)
s.generateTests(totest)
@@ -43,7 +44,7 @@ totest_de = {}
totest_de_alt = {}
totest_locales = {}
-totest['transitions'] = ((SmartQuotes,), [
+totest['smartquotes'] = ((SmartQuotes,), [
["""\
Test "smart quotes", 'secondary smart quotes',
"'nested' smart" quotes
@@ -56,7 +57,7 @@ u"""\
“‘nested’ smart” quotes
– and —also long— dashes.
"""],
-[r"""Escaped \\"ASCII quotes\\" and \\'secondary ASCII quotes\\'.
+[r"""Escaped \"ASCII quotes\" and \'secondary ASCII quotes\'.
""",
u"""\
<document source="test data">
@@ -113,6 +114,7 @@ em space "a" 'a',
NBSP "a" 'a',
ZWSP\u200B"a" and\u200B'a',
ZWNJ\u200C"a" and\u200C'a',
+escaped space\ "a" and\ 'a',
&mdash;"a",&mdash;'a'
en dash–"a"–'a',
@@ -131,6 +133,7 @@ u"""\
NBSP “a” ‘a’,
ZWSP\u200B“a” and\u200B‘a’,
ZWNJ\u200C“a” and\u200C‘a’,
+ escaped space“a” and‘a’,
<paragraph>
&mdash;“a”,&mdash;‘a’
en dash–“a”–‘a’,
@@ -196,7 +199,7 @@ Do not convert context-character at inline-tag boundaries
and links to "targets_".
Inside *"emphasized"* or other `inline "roles"`:
- (``"string"``), (``'string'``), *\\"betont\\"*, \\"*betont*".
+ (``"string"``), (``'string'``), *\"betont\"*, \"*betont*".
Do not drop characters from intra-word inline markup like
*re*\ ``Structured``\ *Text*.
@@ -252,6 +255,104 @@ u"""\
Text
.
"""],
+[r"""
+Docutils escape mechanism uses the backslash:
+
+\Remove \non-escaped \backslashes\:
+\item \newline \tab \" \' \*.
+
+\ Remove-\ escaped-\ white\ space-\
+including-\ newlines.
+
+\\Keep\\escaped\\backslashes\\
+(but\\only\\one).
+
+\\ Keep \\ space\\ around \\ backslashes.
+
+Keep backslashes ``\in\ literal``, :math:`in \mathrm{math}`,
+and :code:`in\ code`.
+
+Test around inline elements:\ [*]_
+
+*emphasized*, H\ :sub:`2`\ O and :math:`x^2`
+
+*emphasized*, H\ :sub:`2`\ O and :math:`x^2`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. [*] and footnotes
+""",
+u"""\
+<document source="test data">
+ <paragraph>
+ Docutils escape mechanism uses the backslash:
+ <paragraph>
+ Remove non-escaped backslashes:
+ item newline tab " \' *.
+ <paragraph>
+ Remove-escaped-whitespace-including-newlines.
+ <paragraph>
+ \\Keep\\escaped\\backslashes\\
+ (but\\only\\one).
+ <paragraph>
+ \\ Keep \\ space\\ around \\ backslashes.
+ <paragraph>
+ Keep backslashes \n\
+ <literal>
+ \\in\\ literal
+ , \n\
+ <math>
+ in \\mathrm{math}
+ ,
+ and \n\
+ <literal classes="code">
+ in\\ code
+ .
+ <paragraph>
+ Test around inline elements:
+ <footnote_reference auto="*" ids="id1">
+ <paragraph>
+ <emphasis>
+ emphasized
+ , H
+ <subscript>
+ 2
+ O and \n\
+ <math>
+ x^2
+ <section ids="emphasized-h2o-and-x-2" names="emphasized,\\ h2o\\ and\\ x^2">
+ <title>
+ <emphasis>
+ emphasized
+ , H
+ <subscript>
+ 2
+ O and \n\
+ <math>
+ x^2
+ <footnote auto="*" ids="id2">
+ <paragraph>
+ and footnotes
+"""],
+[r"""
+Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
+with backslash-escaped whitespace, including new\
+lines.
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ Character-level m
+ <emphasis>
+ a
+ <strong>
+ r
+ <literal>
+ k
+ <title_reference>
+ u
+ p
+ with backslash-escaped whitespace, including newlines.
+"""],
["""\
.. class:: language-de
@@ -287,7 +388,7 @@ u"""\
"""],
])
-totest_de['transitions'] = ((SmartQuotes,), [
+totest_de['smartquotes'] = ((SmartQuotes,), [
["""\
German "smart quotes" and 'secondary smart quotes'.
@@ -304,7 +405,7 @@ u"""\
"""],
])
-totest_de_alt['transitions'] = ((SmartQuotes,), [
+totest_de_alt['smartquotes'] = ((SmartQuotes,), [
["""\
Alternative German "smart quotes" and 'secondary smart quotes'.
@@ -333,7 +434,7 @@ u"""\
"""],
])
-totest_locales['transitions'] = ((SmartQuotes,), [
+totest_locales['smartquotes'] = ((SmartQuotes,), [
["""\
German "smart quotes" and 'secondary smart quotes'.