summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-20 23:55:14 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-20 23:55:14 +0000
commite5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe (patch)
tree7f0d2ea809af8b54d049ac9df351de48c58b75c0
parentfb23890d93c8d7449e0de9b75c539b0cb1856871 (diff)
downloaddocutils-e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe.tar.gz
DocInfo transform must not use "rawsource" attribute for escaping.
Remove implementation of escaping author-separators in bibliographic fields that relies on the "rawsource" attribute. This is not safe (rawsource is only for information and debugging purposes). A proper fix can be done with null-escaped text in the doctree. C.f. https://sourceforge.net/p/docutils/bugs/_discuss/thread/c8f86be6/74ed/attachment/null-escape-in-doctree2.patch git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8231 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/HISTORY.txt3
-rw-r--r--docutils/RELEASE-NOTES.txt8
-rw-r--r--docutils/docutils/parsers/rst/directives/misc.py2
-rw-r--r--docutils/docutils/parsers/rst/states.py13
-rw-r--r--docutils/docutils/transforms/frontmatter.py14
-rw-r--r--docutils/docutils/utils/__init__.py8
-rwxr-xr-xdocutils/test/test_transforms/test_docinfo.py96
-rwxr-xr-xdocutils/test/test_utils.py4
8 files changed, 64 insertions, 84 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt
index 0e2425fbd..b4820af63 100644
--- a/docutils/HISTORY.txt
+++ b/docutils/HISTORY.txt
@@ -59,8 +59,6 @@ Changes Since 0.14
* docutils/transforms/frontmatter.py:
- Add field name as class argument to generic docinfo fields unconditionally.
- - Ignore backslash-escaped separators when extracting authors from a
- paragraph.
* docutils/transforms/references.py:
@@ -85,7 +83,6 @@ Changes Since 0.14
* docutils/utils/__init__.py:
- Deprecate `unique_combinations` (obsoleted by `itertools.combinations`).
- - New function `unescape_rawsource`.
Release 0.14 (2017-08-03)
diff --git a/docutils/RELEASE-NOTES.txt b/docutils/RELEASE-NOTES.txt
index 476c9afd5..95ab96772 100644
--- a/docutils/RELEASE-NOTES.txt
+++ b/docutils/RELEASE-NOTES.txt
@@ -39,6 +39,10 @@ Future changes
.. _rst2html.py: docs/user/tools.html#rst2html-py
+* Allow escaping of author-separators in `bibliographic fields`__.
+
+ __ docs/ref/rst/restructuredtext.html#bibliographic-fields
+
Release 0.15b.dev
=================
@@ -58,10 +62,6 @@ Release 0.15b.dev
- Fixed a bug with the "trim" options of the "unicode" directive.
- - Allow escaping of author-separators in `bibliographic fields`__.
-
- __ docs/ref/rst/restructuredtext.html#bibliographic-fields
-
Release 0.14 (2017-08-03)
=========================
diff --git a/docutils/docutils/parsers/rst/directives/misc.py b/docutils/docutils/parsers/rst/directives/misc.py
index 3015c6cae..66840ef31 100644
--- a/docutils/docutils/parsers/rst/directives/misc.py
+++ b/docutils/docutils/parsers/rst/directives/misc.py
@@ -323,7 +323,7 @@ class Unicode(Directive):
except ValueError, error:
raise self.error(u'Invalid character code: %s\n%s'
% (code, ErrorString(error)))
- element += nodes.Text(utils.unescape_rawsource(decoded), decoded)
+ element += nodes.Text(utils.unescape(decoded), decoded)
return element.children
diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py
index 1dbcb2aa8..c7ee06f66 100644
--- a/docutils/docutils/parsers/rst/states.py
+++ b/docutils/docutils/parsers/rst/states.py
@@ -2859,17 +2859,12 @@ class Text(RSTState):
if len(parts) == 1:
node_list[-1] += node
else:
- rawtext = parts[0].rstrip()
- textnode = nodes.Text(utils.unescape_rawsource(rawtext))
- textnode.rawsource = rawtext
+ text = parts[0].rstrip()
+ textnode = nodes.Text(utils.unescape(text, True))
node_list[-1] += textnode
for part in parts[1:]:
- classifier_node = nodes.classifier(part,
- utils.unescape_rawsource(part))
- # might be a reference or similar in the next node
- # then classifier_node is empty
- if len(classifier_node) > 0:
- classifier_node[0].rawsource = part
+ classifier_node = nodes.classifier(
+ unescape(part, True), part)
node_list.append(classifier_node)
else:
node_list[-1] += node
diff --git a/docutils/docutils/transforms/frontmatter.py b/docutils/docutils/transforms/frontmatter.py
index 041632274..8f7a72aa2 100644
--- a/docutils/docutils/transforms/frontmatter.py
+++ b/docutils/docutils/transforms/frontmatter.py
@@ -506,21 +506,19 @@ class DocInfo(Transform):
def authors_from_one_paragraph(self, field):
"""Return list of Text nodes for ";"- or ","-separated authornames."""
# @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``)
- rawnames = (node.rawsource or node.astext
- for node in field[1].traverse(nodes.Text))
- text = ''.join(rawnames)
+ text = ''.join(unicode(node)
+ for node in field[1].traverse(nodes.Text))
if not text:
raise TransformError
for authorsep in self.language.author_separators:
# don't split at escaped `authorsep`:
- pattern = r'(?<=\\\\)%s|(?<!\\)%s' % (authorsep, authorsep)
+ pattern = '(?<!\x00)%s' % authorsep
authornames = re.split(pattern, text)
if len(authornames) > 1:
break
- authornames = ((utils.unescape_rawsource(rawname).strip(),
- rawname.strip()) for rawname in authornames)
- authors = [[nodes.Text(author, rawname)]
- for (author, rawname) in authornames if author]
+ authornames = (name.strip() for name in authornames)
+ authors = [[nodes.Text(name, utils.unescape(name, True))]
+ for name in authornames if name]
return authors
def authors_from_bullet_list(self, field):
diff --git a/docutils/docutils/utils/__init__.py b/docutils/docutils/utils/__init__.py
index 914148b2d..dee90ff6d 100644
--- a/docutils/docutils/utils/__init__.py
+++ b/docutils/docutils/utils/__init__.py
@@ -581,6 +581,7 @@ def unescape(text, restore_backslashes=False, respect_whitespace=False):
Return a string with nulls removed or restored to backslashes.
Backslash-escaped spaces are also removed.
"""
+ # `respect_whitespace` is ignored (since introduction 2016-12-16)
if restore_backslashes:
return text.replace('\x00', '\\')
else:
@@ -588,13 +589,6 @@ def unescape(text, restore_backslashes=False, respect_whitespace=False):
text = ''.join(text.split(sep))
return text
-def unescape_rawsource(text):
- """Remove escape-backslashes and escaped whitespace."""
- # remove escaped whitespace or backslash at end of text
- text = re.sub(r'(?<!\\)\\([ \n]|$)', r'', text)
- # remove backslash-escapes
- return re.sub(r'\\(.)', r'\1', text)
-
def split_escaped_whitespace(text):
"""
Split `text` on escaped whitespace (null+space or null+newline).
diff --git a/docutils/test/test_transforms/test_docinfo.py b/docutils/test/test_transforms/test_docinfo.py
index 2a03d364e..df49b04bb 100755
--- a/docutils/test/test_transforms/test_docinfo.py
+++ b/docutils/test/test_transforms/test_docinfo.py
@@ -230,54 +230,54 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
<author>
One, Only
"""],
-[r""":Authors: Me\, Myself; **I**
-:Authors: Pac\;Man\\; Ms. Pac\Man; Pac\ Man, Jr.
-:Authors:
- Here
-
- The\re
-
- *Every\ where*
-:Authors: - First\\
- - Se\ cond
- - Thir\d
-""",
-"""\
-<document source="test data">
- <docinfo>
- <authors>
- <author>
- Me, Myself
- <author>
- I
- <authors>
- <author>
- Pac;Man\\
- <author>
- Ms. PacMan
- <author>
- PacMan, Jr.
- <authors>
- <author>
- Here
- <author>
- There
- <author>
- <emphasis>
- Everywhere
- <authors>
- <author>
- First\\
- <author>
- Second
- <author>
- Third
-"""],
+# [r""":Authors: Me\, Myself; **I**
+# :Authors: Pac\;Man\\; Ms. Pac\Man; Pac\ Man, Jr.
+# :Authors:
+# Here
+#
+# The\re
+#
+# *Every\ where*
+# :Authors: - First\\
+# - Se\ cond
+# - Thir\d
+# """,
+# """\
+# <document source="test data">
+# <docinfo>
+# <authors>
+# <author>
+# Me, Myself
+# <author>
+# I
+# <authors>
+# <author>
+# Pac;Man\\
+# <author>
+# Ms. PacMan
+# <author>
+# PacMan, Jr.
+# <authors>
+# <author>
+# Here
+# <author>
+# There
+# <author>
+# <emphasis>
+# Everywhere
+# <authors>
+# <author>
+# First\\
+# <author>
+# Second
+# <author>
+# Third
+# """],
["""\
:Authors:
-:Authors: 1. One
- 2. Two
+:Authors: A. Einstein
+ B. Shaw
:Authors:
-
@@ -307,13 +307,13 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
<field_name>
Authors
<field_body>
- <enumerated_list enumtype="arabic" prefix="" suffix=".">
+ <enumerated_list enumtype="upperalpha" prefix="" suffix=".">
<list_item>
<paragraph>
- One
+ Einstein
<list_item>
<paragraph>
- Two
+ Shaw
<system_message level="2" line="3" source="test data" type="WARNING">
<paragraph>
Bibliographic field "Authors" incompatible with extraction: it must contain either a single paragraph (with authors separated by one of ";,"), multiple paragraphs (one per author), or a bullet list with one paragraph (one author) per item.
diff --git a/docutils/test/test_utils.py b/docutils/test/test_utils.py
index 41ff7c64e..4f95f5847 100755
--- a/docutils/test/test_utils.py
+++ b/docutils/test/test_utils.py
@@ -337,9 +337,5 @@ class HelperFunctionTests(unittest.TestCase):
restored = utils.unescape(self.nulled, restore_backslashes=True)
self.assertEqual(restored, self.escaped)
- def test_unescape_rawsource(self):
- unescaped = utils.unescape_rawsource(self.escaped)
- self.assertEqual(unescaped, self.unescaped)
-
if __name__ == '__main__':
unittest.main()