Revert the fix for backslash escaping in transforms.

Still waiting for review. Reverts last three commits from a local "feature branch" unintentionally applied to trunk with `git svn`. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8235 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2018-11-21 13:58:51 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2018-11-21 13:58:51 +0000
commit: 556aec900cdaccbec3fce287e7af69e82d80ea53 (patch)
tree: e57f39d78e6a9af01eb5fc0f63237f85abe48476
parent: b95bd7805b9b5f859410758ce3c1ef6177e1a8cc (diff)
download: docutils-556aec900cdaccbec3fce287e7af69e82d80ea53.tar.gz
6 files changed, 76 insertions, 186 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt
index 17d110226..b4820af63 100644
--- a/docutils/HISTORY.txt
+++ b/docutils/HISTORY.txt
@@ -67,7 +67,6 @@ Changes Since 0.14
 * docutils/utils/smartquotes.py:
 
   - Fix bug #332: use open quote after whitespace, ZWSP, and ZWNJ.
-  - Use single backslashes for escaping.
 
 * docutils/writers/html5_polyglot/
 
diff --git a/docutils/docs/user/smartquotes.txt b/docutils/docs/user/smartquotes.txt
index 0cdfbe27b..92bbafd2d 100644
--- a/docutils/docs/user/smartquotes.txt
+++ b/docutils/docs/user/smartquotes.txt
@@ -5,8 +5,8 @@ Smart Quotes for Docutils
 :Author: Günter Milde,
          based on SmartyPants by John Gruber, Brad Choate, and Chad Miller
 :Contact: docutils-develop@lists.sourceforge.net
-:Revision: $Revision: 8112 $
-:Date: $Date: 2017-06-14 16:20:20 +0200 (Mi, 14. Jun 2017) $
+:Revision: $Revision$
+:Date: $Date$
 :License: Released under the terms of the `2-Clause BSD license`_
 :Abstract: This document describes the Docutils `smartquotes` module.
 
@@ -25,19 +25,19 @@ transformation on Text nodes that includes the following steps:
 - three consecutive dots (``...`` or ``. . .``) into an ellipsis entity.
 
 This means you can write, edit, and save your documents using plain old
-ASCII -- straight quotes, plain dashes, and plain dots -- while Docutils
+ASCII---straight quotes, plain dashes, and plain dots---while Docutils
 generates documents with typographical quotes, dashes, and ellipses.
 
 Advantages:
 
 * typing speed (especially when blind-typing),
 * the possibility to change the quoting style of the
-  complete document with just one configuration option,
-* restriction to 7-bit ASCII characters in the source.
+  complete document with just one configuration option, and
+* restriction to 7-bit characters in the source.
 
 However, there are `algorithmic shortcomings`_ for 2 reasons:
 
-* dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
+* Dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
 * languages that do not use whitespace around words.
 
 So, please consider also
@@ -54,25 +54,22 @@ The `SmartQuotes` transform does not modify characters in literal text
 such as source code, maths, or literal blocks.
 
 If you need literal straight quotes (or plain hyphens and periods) in normal
-text, you can `backslash escape`_ the characters to preserve
-ASCII-punctuation.
-
-.. class:: booktabs
-
-=========  ========= == ========  ==========
-Input      Output       Input     Output
-=========  ========= == ========  ==========
-``\\``     \\           ``\...``  \...
-``\"``     \"           ``\--``   \--
-``\'``     \'           ``\```    \`
-=========  ========= == ========  ==========
+text, you can backslash escape the characters to preserve
+ASCII-punctuation. You need two backslashes as one backslash is removed by
+the reStructuredText `escaping mechanism`_.
+
+========  =========  ========  =========
+Escape    Character  Escape    Character
+========  =========  ========  =========
+``\\``    \\         ``\\.``   \\.
+``\\"``   \\"        ``\\-``   \\-
+``\\'``   \\'        ``\\```   \\`
+========  =========  ========  =========
 
 This is useful, for example, when you want to use straight quotes as
-foot and inch marks:
-
-  6\'2\" tall; a 17\" monitor.
+foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
 
-.. _backslash escape: ../ref/rst/restructuredtext.html#escaping-mechanism
+.. _escaping mechanism: ../ref/rst/restructuredtext.html#escaping-mechanism
 
 
 Localisation
@@ -85,7 +82,7 @@ __ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table
 
 `SmartQuotes` inserts quotation marks depending on the language of the
 current block element and the value of the `"smart_quotes" setting`_.\
-[#x-altquot]_
+[#x-altquot]_ 
 There is built-in support for the following languages:\ [#smartquotes-locales]_
 
 :af: .. class:: language-af
diff --git a/docutils/docutils/parsers/rst/roles.py b/docutils/docutils/parsers/rst/roles.py
index ef690c5fa..35227e6d1 100644
--- a/docutils/docutils/parsers/rst/roles.py
+++ b/docutils/docutils/parsers/rst/roles.py
@@ -195,7 +195,7 @@ class GenericRole:
     def __call__(self, role, rawtext, text, lineno, inliner,
                  options={}, content=[]):
         set_classes(options)
-        return [self.node_class(rawtext, text, **options)], []
+        return [self.node_class(rawtext, utils.unescape(text), **options)], []
 
 
 class CustomRole:
@@ -234,7 +234,7 @@ def generic_custom_role(role, rawtext, text, lineno, inliner,
     # Once nested inline markup is implemented, this and other methods should
     # recursively call inliner.nested_parse().
     set_classes(options)
-    return [nodes.inline(rawtext, text, **options)], []
+    return [nodes.inline(rawtext, utils.unescape(text), **options)], []
 
 generic_custom_role.options = {'class': directives.class_option}
 
@@ -255,7 +255,7 @@ register_generic_role('title-reference', nodes.title_reference)
 def pep_reference_role(role, rawtext, text, lineno, inliner,
                        options={}, content=[]):
     try:
-        pepnum = int(utils.unescape(text))
+        pepnum = int(text)
         if pepnum < 0 or pepnum > 9999:
             raise ValueError
     except ValueError:
@@ -268,7 +268,7 @@ def pep_reference_role(role, rawtext, text, lineno, inliner,
     ref = (inliner.document.settings.pep_base_url
            + inliner.document.settings.pep_file_url_template % pepnum)
     set_classes(options)
-    return [nodes.reference(rawtext, 'PEP ' + text, refuri=ref,
+    return [nodes.reference(rawtext, 'PEP ' + utils.unescape(text), refuri=ref,
                             **options)], []
 
 register_canonical_role('pep-reference', pep_reference_role)
@@ -276,7 +276,7 @@ register_canonical_role('pep-reference', pep_reference_role)
 def rfc_reference_role(role, rawtext, text, lineno, inliner,
                        options={}, content=[]):
     try:
-        rfcnum = int(utils.unescape(text))
+        rfcnum = int(text)
         if rfcnum <= 0:
             raise ValueError
     except ValueError:
@@ -288,7 +288,7 @@ def rfc_reference_role(role, rawtext, text, lineno, inliner,
     # Base URL mainly used by inliner.rfc_reference, so this is correct:
     ref = inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum
     set_classes(options)
-    node = nodes.reference(rawtext, 'RFC ' + text, refuri=ref,
+    node = nodes.reference(rawtext, 'RFC ' + utils.unescape(text), refuri=ref,
                            **options)
     return [node], []
 
diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py
index bfa07314c..c7ee06f66 100644
--- a/docutils/docutils/parsers/rst/states.py
+++ b/docutils/docutils/parsers/rst/states.py
@@ -713,20 +713,20 @@ class Inliner:
             return (string[:matchend], [], string[matchend:], [], '')
         endmatch = end_pattern.search(string[matchend:])
         if endmatch and endmatch.start(1):  # 1 or more chars
-            text = endmatch.string[:endmatch.start(1)]
-            if restore_backslashes:
-                text = unescape(text, True)
+            _text = endmatch.string[:endmatch.start(1)]
+            text = unescape(_text, restore_backslashes)
             textend = matchend + endmatch.end(1)
             rawsource = unescape(string[matchstart:textend], True)
             node = nodeclass(rawsource, text)
-            node[0].rawsource = unescape(text, True)
+            node[0].rawsource = unescape(_text, True)
             return (string[:matchstart], [node],
                     string[textend:], [], endmatch.group(1))
         msg = self.reporter.warning(
               'Inline %s start-string without end-string.'
               % nodeclass.__name__, line=lineno)
         text = unescape(string[matchstart:matchend], True)
-        prb = self.problematic(text, text, msg)
+        rawsource = unescape(string[matchstart:matchend], True)
+        prb = self.problematic(text, rawsource, msg)
         return string[:matchstart], [prb], string[matchend:], [msg], ''
 
     def problematic(self, text, rawsource, message):
@@ -784,7 +784,7 @@ class Inliner:
                     prb = self.problematic(text, text, msg)
                     return string[:rolestart], [prb], string[textend:], [msg]
                 return self.phrase_ref(string[:matchstart], string[textend:],
-                                       rawsource, escaped)
+                                       rawsource, escaped, unescape(escaped))
             else:
                 rawsource = unescape(string[rolestart:textend], True)
                 nodelist, messages = self.interpreted(rawsource, escaped, role,
@@ -798,30 +798,26 @@ class Inliner:
         prb = self.problematic(text, text, msg)
         return string[:matchstart], [prb], string[matchend:], [msg]
 
-    def phrase_ref(self, before, after, rawsource, escaped, text=None):
-        # `text` is ignored (since 0.15dev)
+    def phrase_ref(self, before, after, rawsource, escaped, text):
         match = self.patterns.embedded_link.search(escaped)
         if match: # embedded <URI> or <alias_>
-            text = escaped[:match.start(0)]
-            unescaped = unescape(text)
-            rawtext = unescape(text, True)
-            aliastext = match.group(2)
-            rawaliastext = unescape(aliastext, True)
+            text = unescape(escaped[:match.start(0)])
+            rawtext = unescape(escaped[:match.start(0)], True)
+            aliastext = unescape(match.group(2))
+            rawaliastext = unescape(match.group(2), True)
             underscore_escaped = rawaliastext.endswith(r'\_')
             if aliastext.endswith('_') and not (underscore_escaped
                                         or self.patterns.uri.match(aliastext)):
                 aliastype = 'name'
-                alias = normalize_name(unescape(aliastext[:-1]))
+                alias = normalize_name(aliastext[:-1])
                 target = nodes.target(match.group(1), refname=alias)
-                target.indirect_reference_name = whitespace_normalize_name(
-                                                    unescape(aliastext[:-1]))
+                target.indirect_reference_name = aliastext[:-1]
             else:
                 aliastype = 'uri'
-                # remove unescaped whitespace
                 alias_parts = split_escaped_whitespace(match.group(2))
-                alias = ' '.join(''.join(part.split())
+                alias = ' '.join(''.join(unescape(part).split())
                                  for part in alias_parts)
-                alias = self.adjust_uri(unescape(alias))
+                alias = self.adjust_uri(alias)
                 if alias.endswith(r'\_'):
                     alias = alias[:-2] + '_'
                 target = nodes.target(match.group(1), refuri=alias)
@@ -831,17 +827,14 @@ class Inliner:
                                        % aliastext)
             if not text:
                 text = alias
-                unescaped = unescape(text)
                 rawtext = rawaliastext
         else:
-            text = escaped
-            unescaped = unescape(text)
             target = None
             rawtext = unescape(escaped, True)
 
-        refname = normalize_name(unescaped)
+        refname = normalize_name(text)
         reference = nodes.reference(rawsource, text,
-                                    name=whitespace_normalize_name(unescaped))
+                                    name=whitespace_normalize_name(text))
         reference[0].rawsource = rawtext
 
         node_list = [reference]
@@ -998,9 +991,11 @@ class Inliner:
             else:
                 addscheme = ''
             text = match.group('whole')
-            refuri = addscheme + unescape(text)
-            reference = nodes.reference(unescape(text, True), text,
-                                        refuri=refuri)
+            unescaped = unescape(text)
+            rawsource = unescape(text, True)
+            reference = nodes.reference(rawsource, unescaped,
+                                        refuri=addscheme + unescaped)
+            reference[0].rawsource = rawsource
             return [reference]
         else:                   # not a valid scheme
             raise MarkupMismatch
@@ -1008,25 +1003,27 @@ class Inliner:
     def pep_reference(self, match, lineno):
         text = match.group(0)
         if text.startswith('pep-'):
-            pepnum = int(unescape(match.group('pepnum1')))
+            pepnum = int(match.group('pepnum1'))
         elif text.startswith('PEP'):
-            pepnum = int(unescape(match.group('pepnum2')))
+            pepnum = int(match.group('pepnum2'))
         else:
             raise MarkupMismatch
         ref = (self.document.settings.pep_base_url
                + self.document.settings.pep_file_url_template % pepnum)
-        return [nodes.reference(unescape(text, True), text, refuri=ref)]
+        unescaped = unescape(text)
+        return [nodes.reference(unescape(text, True), unescaped, refuri=ref)]
 
     rfc_url = 'rfc%d.html'
 
     def rfc_reference(self, match, lineno):
         text = match.group(0)
         if text.startswith('RFC'):
-            rfcnum = int(unescape(match.group('rfcnum')))
+            rfcnum = int(match.group('rfcnum'))
             ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
         else:
             raise MarkupMismatch
-        return [nodes.reference(unescape(text, True), text, refuri=ref)]
+        unescaped = unescape(text)
+        return [nodes.reference(unescape(text, True), unescaped, refuri=ref)]
 
     def implicit_inline(self, text, lineno):
         """
@@ -1048,7 +1045,7 @@ class Inliner:
                             self.implicit_inline(text[match.end():], lineno))
                 except MarkupMismatch:
                     pass
-        return [nodes.Text(text, unescape(text, True))]
+        return [nodes.Text(unescape(text), rawsource=unescape(text, True))]
 
     dispatch = {'*': emphasis,
                 '**': strong,
@@ -2845,7 +2842,6 @@ class Text(RSTState):
         self.nested_parse(indented, input_offset=line_offset, node=definition)
         return itemnode, blank_finish
 
-    #@ TODO ignore null-escaped delimiter
     classifier_delimiter = re.compile(' +: +')
 
     def term(self, lines, lineno):
@@ -2859,12 +2855,12 @@ class Text(RSTState):
         for i in range(len(text_nodes)):
             node = text_nodes[i]
             if isinstance(node, nodes.Text):
-                parts = self.classifier_delimiter.split(node)
+                parts = self.classifier_delimiter.split(node.rawsource)
                 if len(parts) == 1:
                     node_list[-1] += node
                 else:
                     text = parts[0].rstrip()
-                    textnode = nodes.Text(text, unescape(text, True))
+                    textnode = nodes.Text(utils.unescape(text, True))
                     node_list[-1] += textnode
                     for part in parts[1:]:
                         classifier_node = nodes.classifier(
diff --git a/docutils/docutils/transforms/universal.py b/docutils/docutils/transforms/universal.py
index 1b42d854e..0e7f305e6 100644
--- a/docutils/docutils/transforms/universal.py
+++ b/docutils/docutils/transforms/universal.py
@@ -222,10 +222,9 @@ class SmartQuotes(Transform):
     nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
     """Do not apply "smartquotes" to instances of these block-level nodes."""
 
-    literal_nodes = (nodes.FixedTextElement, nodes.Special,
-                     nodes.image, nodes.literal, nodes.math,
+    literal_nodes = (nodes.image, nodes.literal, nodes.math,
                      nodes.raw, nodes.problematic)
-    """Do apply smartquotes to instances of these inline nodes."""
+    """Do not change quotes in instances of these inline nodes."""
 
     smartquotes_action = 'qDe'
     """Setting to select smartquote transformations.
@@ -241,14 +240,14 @@ class SmartQuotes(Transform):
     def get_tokens(self, txtnodes):
         # A generator that yields ``(texttype, nodetext)`` tuples for a list
         # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
-        for node in txtnodes:
-            if (isinstance(node.parent, self.literal_nodes)
-                or isinstance(node.parent.parent, self.literal_nodes)):
-                yield ('literal', unicode(node))
-            else: 
-                # SmartQuotes uses backslash escapes instead of null-escapes
-                txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', unicode(node))
-                yield ('plain', txt)
+
+        texttype = {True: 'literal', # "literal" text is not changed:
+                    False: 'plain'}
+        for txtnode in txtnodes:
+            nodetype = texttype[isinstance(txtnode.parent,
+                                           self.literal_nodes)]
+            yield (nodetype, txtnode.astext())
+
 
     def apply(self):
         smart_quotes = self.document.settings.smart_quotes
diff --git a/docutils/test/test_transforms/test_smartquotes.py b/docutils/test/test_transforms/test_smartquotes.py
index 2604a1e40..7e9613b78 100644
--- a/docutils/test/test_transforms/test_smartquotes.py
+++ b/docutils/test/test_transforms/test_smartquotes.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# $Id: test_smartquotes.py 8190 2017-10-25 13:57:27Z milde $
+# $Id$
 #
 # :Copyright: © 2011 Günter Milde.
 # :Maintainer: docutils-develop@lists.sourceforge.net
@@ -24,8 +24,7 @@ from docutils.parsers.rst import Parser
 
 def suite():
     parser = Parser()
-    settings = {'smart_quotes': True,
-                'trim_footnote_ref_space': True}
+    settings = {'smart_quotes': True}
     s = DocutilsTestSupport.TransformTestSuite(
         parser, suite_settings=settings)
     s.generateTests(totest)
@@ -44,7 +43,7 @@ totest_de = {}
 totest_de_alt = {}
 totest_locales = {}
 
-totest['smartquotes'] = ((SmartQuotes,), [
+totest['transitions'] = ((SmartQuotes,), [
 ["""\
 Test "smart quotes", 'secondary smart quotes',
 "'nested' smart" quotes
@@ -57,7 +56,7 @@ u"""\
         “‘nested’ smart” quotes
         – and —also long— dashes.
 """],
-[r"""Escaped \"ASCII quotes\" and \'secondary ASCII quotes\'.
+[r"""Escaped \\"ASCII quotes\\" and \\'secondary ASCII quotes\\'.
 """,
 u"""\
 <document source="test data">
@@ -114,7 +113,6 @@ em space "a" 'a',
 NBSP "a" 'a',
 ZWSP\u200B"a" and\u200B'a',
 ZWNJ\u200C"a" and\u200C'a',
-escaped space\ "a" and\ 'a',
 
 &mdash;"a",&mdash;'a'
 en dash–"a"–'a',
@@ -133,7 +131,6 @@ u"""\
         NBSP “a” ‘a’,
         ZWSP\u200B“a” and\u200B‘a’,
         ZWNJ\u200C“a” and\u200C‘a’,
-        escaped space“a” and‘a’,
     <paragraph>
         &mdash;“a”,&mdash;‘a’
         en dash–“a”–‘a’,
@@ -199,7 +196,7 @@ Do not convert context-character at inline-tag boundaries
   and links to "targets_".
 
   Inside *"emphasized"* or other `inline "roles"`:
-  (``"string"``), (``'string'``), *\"betont\"*, \"*betont*".
+  (``"string"``), (``'string'``), *\\"betont\\"*, \\"*betont*".
 
   Do not drop characters from intra-word inline markup like
   *re*\ ``Structured``\ *Text*.
@@ -255,104 +252,6 @@ u"""\
             Text
         .
 """],
-[r"""
-Docutils escape mechanism uses the backslash:
-
-\Remove \non-escaped \backslashes\:
-\item \newline \tab \" \' \*.
-
-\ Remove-\ escaped-\ white\ space-\
-including-\ newlines.
-
-\\Keep\\escaped\\backslashes\\
-(but\\only\\one).
-
-\\ Keep \\ space\\ around  \\ backslashes.
-
-Keep backslashes ``\in\ literal``, :math:`in \mathrm{math}`,
-and :code:`in\ code`.
-
-Test around inline elements:\ [*]_
-
-*emphasized*, H\ :sub:`2`\ O and :math:`x^2`
-
-*emphasized*, H\ :sub:`2`\ O and :math:`x^2`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. [*] and footnotes
-""",
-u"""\
-<document source="test data">
-    <paragraph>
-        Docutils escape mechanism uses the backslash:
-    <paragraph>
-        Remove non-escaped backslashes:
-        item newline tab " \' *.
-    <paragraph>
-        Remove-escaped-whitespace-including-newlines.
-    <paragraph>
-        \\Keep\\escaped\\backslashes\\
-        (but\\only\\one).
-    <paragraph>
-        \\ Keep \\ space\\ around  \\ backslashes.
-    <paragraph>
-        Keep backslashes \n\
-        <literal>
-            \\in\\ literal
-        , \n\
-        <math>
-            in \\mathrm{math}
-        ,
-        and \n\
-        <literal classes="code">
-            in\\ code
-        .
-    <paragraph>
-        Test around inline elements:
-        <footnote_reference auto="*" ids="id1">
-    <paragraph>
-        <emphasis>
-            emphasized
-        , H
-        <subscript>
-            2
-        O and \n\
-        <math>
-            x^2
-    <section ids="emphasized-h2o-and-x-2" names="emphasized,\\ h2o\\ and\\ x^2">
-        <title>
-            <emphasis>
-                emphasized
-            , H
-            <subscript>
-                2
-            O and \n\
-            <math>
-                x^2
-        <footnote auto="*" ids="id2">
-            <paragraph>
-                and footnotes
-"""],
-[r"""
-Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
-with backslash-escaped whitespace, including new\
-lines.
-""",
-"""\
-<document source="test data">
-    <paragraph>
-        Character-level m
-        <emphasis>
-            a
-        <strong>
-            r
-        <literal>
-            k
-        <title_reference>
-            u
-        p
-        with backslash-escaped whitespace, including newlines.
-"""],
 ["""\
 .. class:: language-de
 
@@ -388,7 +287,7 @@ u"""\
 """],
 ])
 
-totest_de['smartquotes'] = ((SmartQuotes,), [
+totest_de['transitions'] = ((SmartQuotes,), [
 ["""\
 German "smart quotes" and 'secondary smart quotes'.
 
@@ -405,7 +304,7 @@ u"""\
 """],
 ])
 
-totest_de_alt['smartquotes'] = ((SmartQuotes,), [
+totest_de_alt['transitions'] = ((SmartQuotes,), [
 ["""\
 Alternative German "smart quotes" and 'secondary smart quotes'.
 
@@ -434,7 +333,7 @@ u"""\
 """],
 ])
 
-totest_locales['smartquotes'] = ((SmartQuotes,), [
+totest_locales['transitions'] = ((SmartQuotes,), [
 ["""\
 German "smart quotes" and 'secondary smart quotes'.
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2018-11-21 13:58:51 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2018-11-21 13:58:51 +0000
commit	556aec900cdaccbec3fce287e7af69e82d80ea53 (patch)
tree	e57f39d78e6a9af01eb5fc0f63237f85abe48476
parent	b95bd7805b9b5f859410758ce3c1ef6177e1a8cc (diff)
download	docutils-556aec900cdaccbec3fce287e7af69e82d80ea53.tar.gz