diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2018-11-20 23:55:14 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2018-11-20 23:55:14 +0000 |
commit | e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe (patch) | |
tree | 7f0d2ea809af8b54d049ac9df351de48c58b75c0 | |
parent | fb23890d93c8d7449e0de9b75c539b0cb1856871 (diff) | |
download | docutils-e5e93069b47d7e8b67c2dfc06c8bdc75a3e375fe.tar.gz |
DocInfo transform must not use "rawsource" attribute for escaping.
Remove implementation of escaping author-separators in bibliographic fields
that relies on the "rawsource" attribute.
This is not safe (rawsource is only for information and debugging purposes).
A proper fix can be done with null-escaped text in the doctree.
C.f. https://sourceforge.net/p/docutils/bugs/_discuss/thread/c8f86be6/74ed/attachment/null-escape-in-doctree2.patch
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8231 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | docutils/HISTORY.txt | 3 | ||||
-rw-r--r-- | docutils/RELEASE-NOTES.txt | 8 | ||||
-rw-r--r-- | docutils/docutils/parsers/rst/directives/misc.py | 2 | ||||
-rw-r--r-- | docutils/docutils/parsers/rst/states.py | 13 | ||||
-rw-r--r-- | docutils/docutils/transforms/frontmatter.py | 14 | ||||
-rw-r--r-- | docutils/docutils/utils/__init__.py | 8 | ||||
-rwxr-xr-x | docutils/test/test_transforms/test_docinfo.py | 96 | ||||
-rwxr-xr-x | docutils/test/test_utils.py | 4 |
8 files changed, 64 insertions, 84 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt index 0e2425fbd..b4820af63 100644 --- a/docutils/HISTORY.txt +++ b/docutils/HISTORY.txt @@ -59,8 +59,6 @@ Changes Since 0.14 * docutils/transforms/frontmatter.py: - Add field name as class argument to generic docinfo fields unconditionally. - - Ignore backslash-escaped separators when extracting authors from a - paragraph. * docutils/transforms/references.py: @@ -85,7 +83,6 @@ Changes Since 0.14 * docutils/utils/__init__.py: - Deprecate `unique_combinations` (obsoleted by `itertools.combinations`). - - New function `unescape_rawsource`. Release 0.14 (2017-08-03) diff --git a/docutils/RELEASE-NOTES.txt b/docutils/RELEASE-NOTES.txt index 476c9afd5..95ab96772 100644 --- a/docutils/RELEASE-NOTES.txt +++ b/docutils/RELEASE-NOTES.txt @@ -39,6 +39,10 @@ Future changes .. _rst2html.py: docs/user/tools.html#rst2html-py +* Allow escaping of author-separators in `bibliographic fields`__. + + __ docs/ref/rst/restructuredtext.html#bibliographic-fields + Release 0.15b.dev ================= @@ -58,10 +62,6 @@ Release 0.15b.dev - Fixed a bug with the "trim" options of the "unicode" directive. - - Allow escaping of author-separators in `bibliographic fields`__. - - __ docs/ref/rst/restructuredtext.html#bibliographic-fields - Release 0.14 (2017-08-03) ========================= diff --git a/docutils/docutils/parsers/rst/directives/misc.py b/docutils/docutils/parsers/rst/directives/misc.py index 3015c6cae..66840ef31 100644 --- a/docutils/docutils/parsers/rst/directives/misc.py +++ b/docutils/docutils/parsers/rst/directives/misc.py @@ -323,7 +323,7 @@ class Unicode(Directive): except ValueError, error: raise self.error(u'Invalid character code: %s\n%s' % (code, ErrorString(error))) - element += nodes.Text(utils.unescape_rawsource(decoded), decoded) + element += nodes.Text(utils.unescape(decoded), decoded) return element.children diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py index 1dbcb2aa8..c7ee06f66 100644 --- a/docutils/docutils/parsers/rst/states.py +++ b/docutils/docutils/parsers/rst/states.py @@ -2859,17 +2859,12 @@ class Text(RSTState): if len(parts) == 1: node_list[-1] += node else: - rawtext = parts[0].rstrip() - textnode = nodes.Text(utils.unescape_rawsource(rawtext)) - textnode.rawsource = rawtext + text = parts[0].rstrip() + textnode = nodes.Text(utils.unescape(text, True)) node_list[-1] += textnode for part in parts[1:]: - classifier_node = nodes.classifier(part, - utils.unescape_rawsource(part)) - # might be a reference or similar in the next node - # then classifier_node is empty - if len(classifier_node) > 0: - classifier_node[0].rawsource = part + classifier_node = nodes.classifier( + unescape(part, True), part) node_list.append(classifier_node) else: node_list[-1] += node diff --git a/docutils/docutils/transforms/frontmatter.py b/docutils/docutils/transforms/frontmatter.py index 041632274..8f7a72aa2 100644 --- a/docutils/docutils/transforms/frontmatter.py +++ b/docutils/docutils/transforms/frontmatter.py @@ -506,21 +506,19 @@ class DocInfo(Transform): def authors_from_one_paragraph(self, field): """Return list of Text nodes for ";"- or ","-separated authornames.""" # @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``) - rawnames = (node.rawsource or node.astext - for node in field[1].traverse(nodes.Text)) - text = ''.join(rawnames) + text = ''.join(unicode(node) + for node in field[1].traverse(nodes.Text)) if not text: raise TransformError for authorsep in self.language.author_separators: # don't split at escaped `authorsep`: - pattern = r'(?<=\\\\)%s|(?<!\\)%s' % (authorsep, authorsep) + pattern = '(?<!\x00)%s' % authorsep authornames = re.split(pattern, text) if len(authornames) > 1: break - authornames = ((utils.unescape_rawsource(rawname).strip(), - rawname.strip()) for rawname in authornames) - authors = [[nodes.Text(author, rawname)] - for (author, rawname) in authornames if author] + authornames = (name.strip() for name in authornames) + authors = [[nodes.Text(name, utils.unescape(name, True))] + for name in authornames if name] return authors def authors_from_bullet_list(self, field): diff --git a/docutils/docutils/utils/__init__.py b/docutils/docutils/utils/__init__.py index 914148b2d..dee90ff6d 100644 --- a/docutils/docutils/utils/__init__.py +++ b/docutils/docutils/utils/__init__.py @@ -581,6 +581,7 @@ def unescape(text, restore_backslashes=False, respect_whitespace=False): Return a string with nulls removed or restored to backslashes. Backslash-escaped spaces are also removed. """ + # `respect_whitespace` is ignored (since introduction 2016-12-16) if restore_backslashes: return text.replace('\x00', '\\') else: @@ -588,13 +589,6 @@ def unescape(text, restore_backslashes=False, respect_whitespace=False): text = ''.join(text.split(sep)) return text -def unescape_rawsource(text): - """Remove escape-backslashes and escaped whitespace.""" - # remove escaped whitespace or backslash at end of text - text = re.sub(r'(?<!\\)\\([ \n]|$)', r'', text) - # remove backslash-escapes - return re.sub(r'\\(.)', r'\1', text) - def split_escaped_whitespace(text): """ Split `text` on escaped whitespace (null+space or null+newline). diff --git a/docutils/test/test_transforms/test_docinfo.py b/docutils/test/test_transforms/test_docinfo.py index 2a03d364e..df49b04bb 100755 --- a/docutils/test/test_transforms/test_docinfo.py +++ b/docutils/test/test_transforms/test_docinfo.py @@ -230,54 +230,54 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [ <author> One, Only """], -[r""":Authors: Me\, Myself; **I** -:Authors: Pac\;Man\\; Ms. Pac\Man; Pac\ Man, Jr. -:Authors: - Here - - The\re - - *Every\ where* -:Authors: - First\\ - - Se\ cond - - Thir\d -""", -"""\ -<document source="test data"> - <docinfo> - <authors> - <author> - Me, Myself - <author> - I - <authors> - <author> - Pac;Man\\ - <author> - Ms. PacMan - <author> - PacMan, Jr. - <authors> - <author> - Here - <author> - There - <author> - <emphasis> - Everywhere - <authors> - <author> - First\\ - <author> - Second - <author> - Third -"""], +# [r""":Authors: Me\, Myself; **I** +# :Authors: Pac\;Man\\; Ms. Pac\Man; Pac\ Man, Jr. +# :Authors: +# Here +# +# The\re +# +# *Every\ where* +# :Authors: - First\\ +# - Se\ cond +# - Thir\d +# """, +# """\ +# <document source="test data"> +# <docinfo> +# <authors> +# <author> +# Me, Myself +# <author> +# I +# <authors> +# <author> +# Pac;Man\\ +# <author> +# Ms. PacMan +# <author> +# PacMan, Jr. +# <authors> +# <author> +# Here +# <author> +# There +# <author> +# <emphasis> +# Everywhere +# <authors> +# <author> +# First\\ +# <author> +# Second +# <author> +# Third +# """], ["""\ :Authors: -:Authors: 1. One - 2. Two +:Authors: A. Einstein + B. Shaw :Authors: - @@ -307,13 +307,13 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [ <field_name> Authors <field_body> - <enumerated_list enumtype="arabic" prefix="" suffix="."> + <enumerated_list enumtype="upperalpha" prefix="" suffix="."> <list_item> <paragraph> - One + Einstein <list_item> <paragraph> - Two + Shaw <system_message level="2" line="3" source="test data" type="WARNING"> <paragraph> Bibliographic field "Authors" incompatible with extraction: it must contain either a single paragraph (with authors separated by one of ";,"), multiple paragraphs (one per author), or a bullet list with one paragraph (one author) per item. diff --git a/docutils/test/test_utils.py b/docutils/test/test_utils.py index 41ff7c64e..4f95f5847 100755 --- a/docutils/test/test_utils.py +++ b/docutils/test/test_utils.py @@ -337,9 +337,5 @@ class HelperFunctionTests(unittest.TestCase): restored = utils.unescape(self.nulled, restore_backslashes=True) self.assertEqual(restored, self.escaped) - def test_unescape_rawsource(self): - unescaped = utils.unescape_rawsource(self.escaped) - self.assertEqual(unescaped, self.unescaped) - if __name__ == '__main__': unittest.main() |