DocInfo transform must not use "rawsource" attribute for escaping.

Remove implementation of escaping author-separators in bibliographic fields that relies on the "rawsource" attribute. This is not safe (rawsource is only for information and debugging purposes). A proper fix can be done with null-escaped text in the doctree. C.f. https://sourceforge.net/p/docutils/bugs/_discuss/thread/c8f86be6/74ed/attachment/null-escape-in-doctree2.patch git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8231 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2018-11-20 23:55:14 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2018-11-20 23:55:14 +0000
commit: e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe (patch)
tree: 7f0d2ea809af8b54d049ac9df351de48c58b75c0
parent: fb23890d93c8d7449e0de9b75c539b0cb1856871 (diff)
download: docutils-e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe.tar.gz
8 files changed, 64 insertions, 84 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt
index 0e2425fbd..b4820af63 100644
--- a/docutils/HISTORY.txt
+++ b/docutils/HISTORY.txt
@@ -59,8 +59,6 @@ Changes Since 0.14
 * docutils/transforms/frontmatter.py:
 
   - Add field name as class argument to generic docinfo fields unconditionally.
-  - Ignore backslash-escaped separators when extracting authors from a
-    paragraph.
 
 * docutils/transforms/references.py:
 
@@ -85,7 +83,6 @@ Changes Since 0.14
 * docutils/utils/__init__.py:
 
   - Deprecate `unique_combinations` (obsoleted by `itertools.combinations`).
-  - New function `unescape_rawsource`.
 
 
 Release 0.14 (2017-08-03)
diff --git a/docutils/RELEASE-NOTES.txt b/docutils/RELEASE-NOTES.txt
index 476c9afd5..95ab96772 100644
--- a/docutils/RELEASE-NOTES.txt
+++ b/docutils/RELEASE-NOTES.txt
@@ -39,6 +39,10 @@ Future changes
 
 .. _rst2html.py: docs/user/tools.html#rst2html-py
 
+* Allow escaping of author-separators in `bibliographic fields`__.
+
+  __ docs/ref/rst/restructuredtext.html#bibliographic-fields
+
 
 Release 0.15b.dev
 =================
@@ -58,10 +62,6 @@ Release 0.15b.dev
 
   - Fixed a bug with the "trim" options of the "unicode" directive.
 
-  - Allow escaping of author-separators in `bibliographic fields`__.
-  
-    __ docs/ref/rst/restructuredtext.html#bibliographic-fields
-
 
 Release 0.14 (2017-08-03)
 =========================
diff --git a/docutils/docutils/parsers/rst/directives/misc.py b/docutils/docutils/parsers/rst/directives/misc.py
index 3015c6cae..66840ef31 100644
--- a/docutils/docutils/parsers/rst/directives/misc.py
+++ b/docutils/docutils/parsers/rst/directives/misc.py
@@ -323,7 +323,7 @@ class Unicode(Directive):
             except ValueError, error:
                 raise self.error(u'Invalid character code: %s\n%s'
                     % (code, ErrorString(error)))
-            element += nodes.Text(utils.unescape_rawsource(decoded), decoded)
+            element += nodes.Text(utils.unescape(decoded), decoded)
         return element.children
 
 
diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py
index 1dbcb2aa8..c7ee06f66 100644
--- a/docutils/docutils/parsers/rst/states.py
+++ b/docutils/docutils/parsers/rst/states.py
@@ -2859,17 +2859,12 @@ class Text(RSTState):
                 if len(parts) == 1:
                     node_list[-1] += node
                 else:
-                    rawtext = parts[0].rstrip()
-                    textnode = nodes.Text(utils.unescape_rawsource(rawtext))
-                    textnode.rawsource = rawtext
+                    text = parts[0].rstrip()
+                    textnode = nodes.Text(utils.unescape(text, True))
                     node_list[-1] += textnode
                     for part in parts[1:]:
-                        classifier_node = nodes.classifier(part,
-                                            utils.unescape_rawsource(part))
-                        # might be a reference or similar in the next node
-                        # then classifier_node is empty
-                        if len(classifier_node) > 0:
-                            classifier_node[0].rawsource = part
+                        classifier_node = nodes.classifier(
+                                            unescape(part, True), part)
                         node_list.append(classifier_node)
             else:
                 node_list[-1] += node
diff --git a/docutils/docutils/transforms/frontmatter.py b/docutils/docutils/transforms/frontmatter.py
index 041632274..8f7a72aa2 100644
--- a/docutils/docutils/transforms/frontmatter.py
+++ b/docutils/docutils/transforms/frontmatter.py
@@ -506,21 +506,19 @@ class DocInfo(Transform):
     def authors_from_one_paragraph(self, field):
         """Return list of Text nodes for ";"- or ","-separated authornames."""
         # @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``)
-        rawnames = (node.rawsource or node.astext
-                    for node in field[1].traverse(nodes.Text))
-        text = ''.join(rawnames)
+        text = ''.join(unicode(node)
+                       for node in field[1].traverse(nodes.Text))
         if not text:
             raise TransformError
         for authorsep in self.language.author_separators:
             # don't split at escaped `authorsep`:
-            pattern = r'(?<=\\\\)%s|(?<!\\)%s' % (authorsep, authorsep)
+            pattern = '(?<!\x00)%s' % authorsep
             authornames = re.split(pattern, text)
             if len(authornames) > 1:
                 break
-        authornames = ((utils.unescape_rawsource(rawname).strip(),
-                        rawname.strip()) for rawname in authornames)
-        authors = [[nodes.Text(author, rawname)]
-                   for (author, rawname) in authornames if author]
+        authornames = (name.strip() for name in authornames)
+        authors = [[nodes.Text(name, utils.unescape(name, True))]
+                   for name in authornames if name]
         return authors
 
     def authors_from_bullet_list(self, field):
diff --git a/docutils/docutils/utils/__init__.py b/docutils/docutils/utils/__init__.py
index 914148b2d..dee90ff6d 100644
--- a/docutils/docutils/utils/__init__.py
+++ b/docutils/docutils/utils/__init__.py
@@ -581,6 +581,7 @@ def unescape(text, restore_backslashes=False, respect_whitespace=False):
     Return a string with nulls removed or restored to backslashes.
     Backslash-escaped spaces are also removed.
     """
+    # `respect_whitespace` is ignored (since introduction 2016-12-16)
     if restore_backslashes:
         return text.replace('\x00', '\\')
     else:
@@ -588,13 +589,6 @@ def unescape(text, restore_backslashes=False, respect_whitespace=False):
             text = ''.join(text.split(sep))
         return text
 
-def unescape_rawsource(text):
-    """Remove escape-backslashes and escaped whitespace."""
-    # remove escaped whitespace or backslash at end of text
-    text = re.sub(r'(?<!\\)\\([ \n]|$)', r'', text)
-    # remove backslash-escapes
-    return re.sub(r'\\(.)', r'\1', text)
-
 def split_escaped_whitespace(text):
     """
     Split `text` on escaped whitespace (null+space or null+newline).
diff --git a/docutils/test/test_transforms/test_docinfo.py b/docutils/test/test_transforms/test_docinfo.py
index 2a03d364e..df49b04bb 100755
--- a/docutils/test/test_transforms/test_docinfo.py
+++ b/docutils/test/test_transforms/test_docinfo.py
@@ -230,54 +230,54 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
             <author>
                 One, Only
 """],
-[r""":Authors: Me\, Myself; **I**
-:Authors: Pac\;Man\\; Ms. Pac\Man; Pac\ Man, Jr.
-:Authors:
-    Here
-
-    The\re
-
-    *Every\ where*
-:Authors: - First\\
-          - Se\ cond
-          - Thir\d
-""",
-"""\
-<document source="test data">
-    <docinfo>
-        <authors>
-            <author>
-                Me, Myself
-            <author>
-                I
-        <authors>
-            <author>
-                Pac;Man\\
-            <author>
-                Ms. PacMan
-            <author>
-                PacMan, Jr.
-        <authors>
-            <author>
-                Here
-            <author>
-                There
-            <author>
-                <emphasis>
-                    Everywhere
-        <authors>
-            <author>
-                First\\
-            <author>
-                Second
-            <author>
-                Third
-"""],
+# [r""":Authors: Me\, Myself; **I**
+# :Authors: Pac\;Man\\; Ms. Pac\Man; Pac\ Man, Jr.
+# :Authors:
+#     Here
+# 
+#     The\re
+# 
+#     *Every\ where*
+# :Authors: - First\\
+#           - Se\ cond
+#           - Thir\d
+# """,
+# """\
+# <document source="test data">
+#     <docinfo>
+#         <authors>
+#             <author>
+#                 Me, Myself
+#             <author>
+#                 I
+#         <authors>
+#             <author>
+#                 Pac;Man\\
+#             <author>
+#                 Ms. PacMan
+#             <author>
+#                 PacMan, Jr.
+#         <authors>
+#             <author>
+#                 Here
+#             <author>
+#                 There
+#             <author>
+#                 <emphasis>
+#                     Everywhere
+#         <authors>
+#             <author>
+#                 First\\
+#             <author>
+#                 Second
+#             <author>
+#                 Third
+# """],
 ["""\
 :Authors:
 
-:Authors: 1. One
-          2. Two
+:Authors: A. Einstein
+          B. Shaw
 
 :Authors:
     -
@@ -307,13 +307,13 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
             <field_name>
                 Authors
             <field_body>
-                <enumerated_list enumtype="arabic" prefix="" suffix=".">
+                <enumerated_list enumtype="upperalpha" prefix="" suffix=".">
                     <list_item>
                         <paragraph>
-                            One
+                            Einstein
                     <list_item>
                         <paragraph>
-                            Two
+                            Shaw
                 <system_message level="2" line="3" source="test data" type="WARNING">
                     <paragraph>
                         Bibliographic field "Authors" incompatible with extraction: it must contain either a single paragraph (with authors separated by one of ";,"), multiple paragraphs (one per author), or a bullet list with one paragraph (one author) per item.
diff --git a/docutils/test/test_utils.py b/docutils/test/test_utils.py
index 41ff7c64e..4f95f5847 100755
--- a/docutils/test/test_utils.py
+++ b/docutils/test/test_utils.py
@@ -337,9 +337,5 @@ class HelperFunctionTests(unittest.TestCase):
         restored = utils.unescape(self.nulled, restore_backslashes=True)
         self.assertEqual(restored, self.escaped)
 
-    def test_unescape_rawsource(self):
-        unescaped = utils.unescape_rawsource(self.escaped)
-        self.assertEqual(unescaped, self.unescaped)
-
 if __name__ == '__main__':
     unittest.main()
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2018-11-20 23:55:14 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2018-11-20 23:55:14 +0000
commit	e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe (patch)
tree	7f0d2ea809af8b54d049ac9df351de48c58b75c0
parent	fb23890d93c8d7449e0de9b75c539b0cb1856871 (diff)
download	docutils-e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe.tar.gz