summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-21 13:58:51 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-21 13:58:51 +0000
commit556aec900cdaccbec3fce287e7af69e82d80ea53 (patch)
treee57f39d78e6a9af01eb5fc0f63237f85abe48476
parentb95bd7805b9b5f859410758ce3c1ef6177e1a8cc (diff)
downloaddocutils-556aec900cdaccbec3fce287e7af69e82d80ea53.tar.gz
Revert the fix for backslash escaping in transforms.
Still waiting for review. Reverts last three commits from a local "feature branch" unintentionally applied to trunk with `git svn`. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8235 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/HISTORY.txt1
-rw-r--r--docutils/docs/user/smartquotes.txt43
-rw-r--r--docutils/docutils/parsers/rst/roles.py12
-rw-r--r--docutils/docutils/parsers/rst/states.py68
-rw-r--r--docutils/docutils/transforms/universal.py21
-rw-r--r--docutils/test/test_transforms/test_smartquotes.py117
6 files changed, 76 insertions, 186 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt
index 17d110226..b4820af63 100644
--- a/docutils/HISTORY.txt
+++ b/docutils/HISTORY.txt
@@ -67,7 +67,6 @@ Changes Since 0.14
* docutils/utils/smartquotes.py:
- Fix bug #332: use open quote after whitespace, ZWSP, and ZWNJ.
- - Use single backslashes for escaping.
* docutils/writers/html5_polyglot/
diff --git a/docutils/docs/user/smartquotes.txt b/docutils/docs/user/smartquotes.txt
index 0cdfbe27b..92bbafd2d 100644
--- a/docutils/docs/user/smartquotes.txt
+++ b/docutils/docs/user/smartquotes.txt
@@ -5,8 +5,8 @@ Smart Quotes for Docutils
:Author: Günter Milde,
based on SmartyPants by John Gruber, Brad Choate, and Chad Miller
:Contact: docutils-develop@lists.sourceforge.net
-:Revision: $Revision: 8112 $
-:Date: $Date: 2017-06-14 16:20:20 +0200 (Mi, 14. Jun 2017) $
+:Revision: $Revision$
+:Date: $Date$
:License: Released under the terms of the `2-Clause BSD license`_
:Abstract: This document describes the Docutils `smartquotes` module.
@@ -25,19 +25,19 @@ transformation on Text nodes that includes the following steps:
- three consecutive dots (``...`` or ``. . .``) into an ellipsis entity.
This means you can write, edit, and save your documents using plain old
-ASCII -- straight quotes, plain dashes, and plain dots -- while Docutils
+ASCII---straight quotes, plain dashes, and plain dots---while Docutils
generates documents with typographical quotes, dashes, and ellipses.
Advantages:
* typing speed (especially when blind-typing),
* the possibility to change the quoting style of the
- complete document with just one configuration option,
-* restriction to 7-bit ASCII characters in the source.
+ complete document with just one configuration option, and
+* restriction to 7-bit characters in the source.
However, there are `algorithmic shortcomings`_ for 2 reasons:
-* dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
+* Dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
* languages that do not use whitespace around words.
So, please consider also
@@ -54,25 +54,22 @@ The `SmartQuotes` transform does not modify characters in literal text
such as source code, maths, or literal blocks.
If you need literal straight quotes (or plain hyphens and periods) in normal
-text, you can `backslash escape`_ the characters to preserve
-ASCII-punctuation.
-
-.. class:: booktabs
-
-========= ========= == ======== ==========
-Input Output Input Output
-========= ========= == ======== ==========
-``\\`` \\ ``\...`` \...
-``\"`` \" ``\--`` \--
-``\'`` \' ``\``` \`
-========= ========= == ======== ==========
+text, you can backslash escape the characters to preserve
+ASCII-punctuation. You need two backslashes as one backslash is removed by
+the reStructuredText `escaping mechanism`_.
+
+======== ========= ======== =========
+Escape Character Escape Character
+======== ========= ======== =========
+``\\`` \\ ``\\.`` \\.
+``\\"`` \\" ``\\-`` \\-
+``\\'`` \\' ``\\``` \\`
+======== ========= ======== =========
This is useful, for example, when you want to use straight quotes as
-foot and inch marks:
-
- 6\'2\" tall; a 17\" monitor.
+foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
-.. _backslash escape: ../ref/rst/restructuredtext.html#escaping-mechanism
+.. _escaping mechanism: ../ref/rst/restructuredtext.html#escaping-mechanism
Localisation
@@ -85,7 +82,7 @@ __ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table
`SmartQuotes` inserts quotation marks depending on the language of the
current block element and the value of the `"smart_quotes" setting`_.\
-[#x-altquot]_
+[#x-altquot]_
There is built-in support for the following languages:\ [#smartquotes-locales]_
:af: .. class:: language-af
diff --git a/docutils/docutils/parsers/rst/roles.py b/docutils/docutils/parsers/rst/roles.py
index ef690c5fa..35227e6d1 100644
--- a/docutils/docutils/parsers/rst/roles.py
+++ b/docutils/docutils/parsers/rst/roles.py
@@ -195,7 +195,7 @@ class GenericRole:
def __call__(self, role, rawtext, text, lineno, inliner,
options={}, content=[]):
set_classes(options)
- return [self.node_class(rawtext, text, **options)], []
+ return [self.node_class(rawtext, utils.unescape(text), **options)], []
class CustomRole:
@@ -234,7 +234,7 @@ def generic_custom_role(role, rawtext, text, lineno, inliner,
# Once nested inline markup is implemented, this and other methods should
# recursively call inliner.nested_parse().
set_classes(options)
- return [nodes.inline(rawtext, text, **options)], []
+ return [nodes.inline(rawtext, utils.unescape(text), **options)], []
generic_custom_role.options = {'class': directives.class_option}
@@ -255,7 +255,7 @@ register_generic_role('title-reference', nodes.title_reference)
def pep_reference_role(role, rawtext, text, lineno, inliner,
options={}, content=[]):
try:
- pepnum = int(utils.unescape(text))
+ pepnum = int(text)
if pepnum < 0 or pepnum > 9999:
raise ValueError
except ValueError:
@@ -268,7 +268,7 @@ def pep_reference_role(role, rawtext, text, lineno, inliner,
ref = (inliner.document.settings.pep_base_url
+ inliner.document.settings.pep_file_url_template % pepnum)
set_classes(options)
- return [nodes.reference(rawtext, 'PEP ' + text, refuri=ref,
+ return [nodes.reference(rawtext, 'PEP ' + utils.unescape(text), refuri=ref,
**options)], []
register_canonical_role('pep-reference', pep_reference_role)
@@ -276,7 +276,7 @@ register_canonical_role('pep-reference', pep_reference_role)
def rfc_reference_role(role, rawtext, text, lineno, inliner,
options={}, content=[]):
try:
- rfcnum = int(utils.unescape(text))
+ rfcnum = int(text)
if rfcnum <= 0:
raise ValueError
except ValueError:
@@ -288,7 +288,7 @@ def rfc_reference_role(role, rawtext, text, lineno, inliner,
# Base URL mainly used by inliner.rfc_reference, so this is correct:
ref = inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum
set_classes(options)
- node = nodes.reference(rawtext, 'RFC ' + text, refuri=ref,
+ node = nodes.reference(rawtext, 'RFC ' + utils.unescape(text), refuri=ref,
**options)
return [node], []
diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py
index bfa07314c..c7ee06f66 100644
--- a/docutils/docutils/parsers/rst/states.py
+++ b/docutils/docutils/parsers/rst/states.py
@@ -713,20 +713,20 @@ class Inliner:
return (string[:matchend], [], string[matchend:], [], '')
endmatch = end_pattern.search(string[matchend:])
if endmatch and endmatch.start(1): # 1 or more chars
- text = endmatch.string[:endmatch.start(1)]
- if restore_backslashes:
- text = unescape(text, True)
+ _text = endmatch.string[:endmatch.start(1)]
+ text = unescape(_text, restore_backslashes)
textend = matchend + endmatch.end(1)
rawsource = unescape(string[matchstart:textend], True)
node = nodeclass(rawsource, text)
- node[0].rawsource = unescape(text, True)
+ node[0].rawsource = unescape(_text, True)
return (string[:matchstart], [node],
string[textend:], [], endmatch.group(1))
msg = self.reporter.warning(
'Inline %s start-string without end-string.'
% nodeclass.__name__, line=lineno)
text = unescape(string[matchstart:matchend], True)
- prb = self.problematic(text, text, msg)
+ rawsource = unescape(string[matchstart:matchend], True)
+ prb = self.problematic(text, rawsource, msg)
return string[:matchstart], [prb], string[matchend:], [msg], ''
def problematic(self, text, rawsource, message):
@@ -784,7 +784,7 @@ class Inliner:
prb = self.problematic(text, text, msg)
return string[:rolestart], [prb], string[textend:], [msg]
return self.phrase_ref(string[:matchstart], string[textend:],
- rawsource, escaped)
+ rawsource, escaped, unescape(escaped))
else:
rawsource = unescape(string[rolestart:textend], True)
nodelist, messages = self.interpreted(rawsource, escaped, role,
@@ -798,30 +798,26 @@ class Inliner:
prb = self.problematic(text, text, msg)
return string[:matchstart], [prb], string[matchend:], [msg]
- def phrase_ref(self, before, after, rawsource, escaped, text=None):
- # `text` is ignored (since 0.15dev)
+ def phrase_ref(self, before, after, rawsource, escaped, text):
match = self.patterns.embedded_link.search(escaped)
if match: # embedded <URI> or <alias_>
- text = escaped[:match.start(0)]
- unescaped = unescape(text)
- rawtext = unescape(text, True)
- aliastext = match.group(2)
- rawaliastext = unescape(aliastext, True)
+ text = unescape(escaped[:match.start(0)])
+ rawtext = unescape(escaped[:match.start(0)], True)
+ aliastext = unescape(match.group(2))
+ rawaliastext = unescape(match.group(2), True)
underscore_escaped = rawaliastext.endswith(r'\_')
if aliastext.endswith('_') and not (underscore_escaped
or self.patterns.uri.match(aliastext)):
aliastype = 'name'
- alias = normalize_name(unescape(aliastext[:-1]))
+ alias = normalize_name(aliastext[:-1])
target = nodes.target(match.group(1), refname=alias)
- target.indirect_reference_name = whitespace_normalize_name(
- unescape(aliastext[:-1]))
+ target.indirect_reference_name = aliastext[:-1]
else:
aliastype = 'uri'
- # remove unescaped whitespace
alias_parts = split_escaped_whitespace(match.group(2))
- alias = ' '.join(''.join(part.split())
+ alias = ' '.join(''.join(unescape(part).split())
for part in alias_parts)
- alias = self.adjust_uri(unescape(alias))
+ alias = self.adjust_uri(alias)
if alias.endswith(r'\_'):
alias = alias[:-2] + '_'
target = nodes.target(match.group(1), refuri=alias)
@@ -831,17 +827,14 @@ class Inliner:
% aliastext)
if not text:
text = alias
- unescaped = unescape(text)
rawtext = rawaliastext
else:
- text = escaped
- unescaped = unescape(text)
target = None
rawtext = unescape(escaped, True)
- refname = normalize_name(unescaped)
+ refname = normalize_name(text)
reference = nodes.reference(rawsource, text,
- name=whitespace_normalize_name(unescaped))
+ name=whitespace_normalize_name(text))
reference[0].rawsource = rawtext
node_list = [reference]
@@ -998,9 +991,11 @@ class Inliner:
else:
addscheme = ''
text = match.group('whole')
- refuri = addscheme + unescape(text)
- reference = nodes.reference(unescape(text, True), text,
- refuri=refuri)
+ unescaped = unescape(text)
+ rawsource = unescape(text, True)
+ reference = nodes.reference(rawsource, unescaped,
+ refuri=addscheme + unescaped)
+ reference[0].rawsource = rawsource
return [reference]
else: # not a valid scheme
raise MarkupMismatch
@@ -1008,25 +1003,27 @@ class Inliner:
def pep_reference(self, match, lineno):
text = match.group(0)
if text.startswith('pep-'):
- pepnum = int(unescape(match.group('pepnum1')))
+ pepnum = int(match.group('pepnum1'))
elif text.startswith('PEP'):
- pepnum = int(unescape(match.group('pepnum2')))
+ pepnum = int(match.group('pepnum2'))
else:
raise MarkupMismatch
ref = (self.document.settings.pep_base_url
+ self.document.settings.pep_file_url_template % pepnum)
- return [nodes.reference(unescape(text, True), text, refuri=ref)]
+ unescaped = unescape(text)
+ return [nodes.reference(unescape(text, True), unescaped, refuri=ref)]
rfc_url = 'rfc%d.html'
def rfc_reference(self, match, lineno):
text = match.group(0)
if text.startswith('RFC'):
- rfcnum = int(unescape(match.group('rfcnum')))
+ rfcnum = int(match.group('rfcnum'))
ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
else:
raise MarkupMismatch
- return [nodes.reference(unescape(text, True), text, refuri=ref)]
+ unescaped = unescape(text)
+ return [nodes.reference(unescape(text, True), unescaped, refuri=ref)]
def implicit_inline(self, text, lineno):
"""
@@ -1048,7 +1045,7 @@ class Inliner:
self.implicit_inline(text[match.end():], lineno))
except MarkupMismatch:
pass
- return [nodes.Text(text, unescape(text, True))]
+ return [nodes.Text(unescape(text), rawsource=unescape(text, True))]
dispatch = {'*': emphasis,
'**': strong,
@@ -2845,7 +2842,6 @@ class Text(RSTState):
self.nested_parse(indented, input_offset=line_offset, node=definition)
return itemnode, blank_finish
- #@ TODO ignore null-escaped delimiter
classifier_delimiter = re.compile(' +: +')
def term(self, lines, lineno):
@@ -2859,12 +2855,12 @@ class Text(RSTState):
for i in range(len(text_nodes)):
node = text_nodes[i]
if isinstance(node, nodes.Text):
- parts = self.classifier_delimiter.split(node)
+ parts = self.classifier_delimiter.split(node.rawsource)
if len(parts) == 1:
node_list[-1] += node
else:
text = parts[0].rstrip()
- textnode = nodes.Text(text, unescape(text, True))
+ textnode = nodes.Text(utils.unescape(text, True))
node_list[-1] += textnode
for part in parts[1:]:
classifier_node = nodes.classifier(
diff --git a/docutils/docutils/transforms/universal.py b/docutils/docutils/transforms/universal.py
index 1b42d854e..0e7f305e6 100644
--- a/docutils/docutils/transforms/universal.py
+++ b/docutils/docutils/transforms/universal.py
@@ -222,10 +222,9 @@ class SmartQuotes(Transform):
nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
"""Do not apply "smartquotes" to instances of these block-level nodes."""
- literal_nodes = (nodes.FixedTextElement, nodes.Special,
- nodes.image, nodes.literal, nodes.math,
+ literal_nodes = (nodes.image, nodes.literal, nodes.math,
nodes.raw, nodes.problematic)
- """Do apply smartquotes to instances of these inline nodes."""
+ """Do not change quotes in instances of these inline nodes."""
smartquotes_action = 'qDe'
"""Setting to select smartquote transformations.
@@ -241,14 +240,14 @@ class SmartQuotes(Transform):
def get_tokens(self, txtnodes):
# A generator that yields ``(texttype, nodetext)`` tuples for a list
# of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
- for node in txtnodes:
- if (isinstance(node.parent, self.literal_nodes)
- or isinstance(node.parent.parent, self.literal_nodes)):
- yield ('literal', unicode(node))
- else:
- # SmartQuotes uses backslash escapes instead of null-escapes
- txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', unicode(node))
- yield ('plain', txt)
+
+ texttype = {True: 'literal', # "literal" text is not changed:
+ False: 'plain'}
+ for txtnode in txtnodes:
+ nodetype = texttype[isinstance(txtnode.parent,
+ self.literal_nodes)]
+ yield (nodetype, txtnode.astext())
+
def apply(self):
smart_quotes = self.document.settings.smart_quotes
diff --git a/docutils/test/test_transforms/test_smartquotes.py b/docutils/test/test_transforms/test_smartquotes.py
index 2604a1e40..7e9613b78 100644
--- a/docutils/test/test_transforms/test_smartquotes.py
+++ b/docutils/test/test_transforms/test_smartquotes.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# $Id: test_smartquotes.py 8190 2017-10-25 13:57:27Z milde $
+# $Id$
#
# :Copyright: © 2011 Günter Milde.
# :Maintainer: docutils-develop@lists.sourceforge.net
@@ -24,8 +24,7 @@ from docutils.parsers.rst import Parser
def suite():
parser = Parser()
- settings = {'smart_quotes': True,
- 'trim_footnote_ref_space': True}
+ settings = {'smart_quotes': True}
s = DocutilsTestSupport.TransformTestSuite(
parser, suite_settings=settings)
s.generateTests(totest)
@@ -44,7 +43,7 @@ totest_de = {}
totest_de_alt = {}
totest_locales = {}
-totest['smartquotes'] = ((SmartQuotes,), [
+totest['transitions'] = ((SmartQuotes,), [
["""\
Test "smart quotes", 'secondary smart quotes',
"'nested' smart" quotes
@@ -57,7 +56,7 @@ u"""\
“‘nested’ smart” quotes
– and —also long— dashes.
"""],
-[r"""Escaped \"ASCII quotes\" and \'secondary ASCII quotes\'.
+[r"""Escaped \\"ASCII quotes\\" and \\'secondary ASCII quotes\\'.
""",
u"""\
<document source="test data">
@@ -114,7 +113,6 @@ em space "a" 'a',
NBSP "a" 'a',
ZWSP\u200B"a" and\u200B'a',
ZWNJ\u200C"a" and\u200C'a',
-escaped space\ "a" and\ 'a',
&mdash;"a",&mdash;'a'
en dash–"a"–'a',
@@ -133,7 +131,6 @@ u"""\
NBSP “a” ‘a’,
ZWSP\u200B“a” and\u200B‘a’,
ZWNJ\u200C“a” and\u200C‘a’,
- escaped space“a” and‘a’,
<paragraph>
&mdash;“a”,&mdash;‘a’
en dash–“a”–‘a’,
@@ -199,7 +196,7 @@ Do not convert context-character at inline-tag boundaries
and links to "targets_".
Inside *"emphasized"* or other `inline "roles"`:
- (``"string"``), (``'string'``), *\"betont\"*, \"*betont*".
+ (``"string"``), (``'string'``), *\\"betont\\"*, \\"*betont*".
Do not drop characters from intra-word inline markup like
*re*\ ``Structured``\ *Text*.
@@ -255,104 +252,6 @@ u"""\
Text
.
"""],
-[r"""
-Docutils escape mechanism uses the backslash:
-
-\Remove \non-escaped \backslashes\:
-\item \newline \tab \" \' \*.
-
-\ Remove-\ escaped-\ white\ space-\
-including-\ newlines.
-
-\\Keep\\escaped\\backslashes\\
-(but\\only\\one).
-
-\\ Keep \\ space\\ around \\ backslashes.
-
-Keep backslashes ``\in\ literal``, :math:`in \mathrm{math}`,
-and :code:`in\ code`.
-
-Test around inline elements:\ [*]_
-
-*emphasized*, H\ :sub:`2`\ O and :math:`x^2`
-
-*emphasized*, H\ :sub:`2`\ O and :math:`x^2`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. [*] and footnotes
-""",
-u"""\
-<document source="test data">
- <paragraph>
- Docutils escape mechanism uses the backslash:
- <paragraph>
- Remove non-escaped backslashes:
- item newline tab " \' *.
- <paragraph>
- Remove-escaped-whitespace-including-newlines.
- <paragraph>
- \\Keep\\escaped\\backslashes\\
- (but\\only\\one).
- <paragraph>
- \\ Keep \\ space\\ around \\ backslashes.
- <paragraph>
- Keep backslashes \n\
- <literal>
- \\in\\ literal
- , \n\
- <math>
- in \\mathrm{math}
- ,
- and \n\
- <literal classes="code">
- in\\ code
- .
- <paragraph>
- Test around inline elements:
- <footnote_reference auto="*" ids="id1">
- <paragraph>
- <emphasis>
- emphasized
- , H
- <subscript>
- 2
- O and \n\
- <math>
- x^2
- <section ids="emphasized-h2o-and-x-2" names="emphasized,\\ h2o\\ and\\ x^2">
- <title>
- <emphasis>
- emphasized
- , H
- <subscript>
- 2
- O and \n\
- <math>
- x^2
- <footnote auto="*" ids="id2">
- <paragraph>
- and footnotes
-"""],
-[r"""
-Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
-with backslash-escaped whitespace, including new\
-lines.
-""",
-"""\
-<document source="test data">
- <paragraph>
- Character-level m
- <emphasis>
- a
- <strong>
- r
- <literal>
- k
- <title_reference>
- u
- p
- with backslash-escaped whitespace, including newlines.
-"""],
["""\
.. class:: language-de
@@ -388,7 +287,7 @@ u"""\
"""],
])
-totest_de['smartquotes'] = ((SmartQuotes,), [
+totest_de['transitions'] = ((SmartQuotes,), [
["""\
German "smart quotes" and 'secondary smart quotes'.
@@ -405,7 +304,7 @@ u"""\
"""],
])
-totest_de_alt['smartquotes'] = ((SmartQuotes,), [
+totest_de_alt['transitions'] = ((SmartQuotes,), [
["""\
Alternative German "smart quotes" and 'secondary smart quotes'.
@@ -434,7 +333,7 @@ u"""\
"""],
])
-totest_locales['smartquotes'] = ((SmartQuotes,), [
+totest_locales['transitions'] = ((SmartQuotes,), [
["""\
German "smart quotes" and 'secondary smart quotes'.