diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2023-01-17 15:40:43 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2023-01-17 15:40:43 +0000 |
commit | f88c02212ed484500c451fd30e58d000f777bfac (patch) | |
tree | 04828090c720163595be8528c7024d0c98194046 | |
parent | b1028c3e8389f51894f37829f63850e2aef07a6a (diff) | |
download | docutils-f88c02212ed484500c451fd30e58d000f777bfac.tar.gz |
More user-friendly DocInfo transform.
More detailled feedback when extracting data from a bibliographic field fails.
In bibliographic fields expecting a single paragraph (e.g. "author"),
restore enumerated lists originating in ambiguous markup like
"A. Einstein" (name with initial): in the given context, an we know that this
should not be an enumeration, so fixing is better than throwing an error.
Fix some cases of "ersatz" Boolean values from pre Python 2.3 times.
git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9322 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | docutils/docs/ref/rst/restructuredtext.txt | 17 | ||||
-rw-r--r-- | docutils/docutils/transforms/frontmatter.py | 50 | ||||
-rwxr-xr-x | docutils/test/test_transforms/test_docinfo.py | 12 |
3 files changed, 54 insertions, 25 deletions
diff --git a/docutils/docs/ref/rst/restructuredtext.txt b/docutils/docs/ref/rst/restructuredtext.txt index 254204d29..36edb1b70 100644 --- a/docutils/docs/ref/rst/restructuredtext.txt +++ b/docutils/docs/ref/rst/restructuredtext.txt @@ -764,11 +764,18 @@ one line. This text is parsed as an enumerated list item:: A. Einstein was a really smart dude. -If a single-line paragraph begins with text identical to an enumerator -("A.", "1.", "(b)", "I)", etc.), the first character will have to be -escaped in order to have the line parsed as an ordinary paragraph:: - - \A. Einstein was a really smart dude. +.. Caution:: + If a single-line paragraph begins with text identical to an enumerator + ("A.", "1.", "(b)", "I)", etc.), the first character will have to be + escaped in order to have the line parsed as an ordinary paragraph:: + + \A. Einstein was a really smart dude. + + Alternatively, you can escape the delimiter :: + + A\. Einstein was a really smart dude. + + or use a literal NO-BREAK SPACE after the initial. Examples of nested enumerated lists:: diff --git a/docutils/docutils/transforms/frontmatter.py b/docutils/docutils/transforms/frontmatter.py index 27bb6095e..2f4384f95 100644 --- a/docutils/docutils/transforms/frontmatter.py +++ b/docutils/docutils/transforms/frontmatter.py @@ -23,7 +23,7 @@ __docformat__ = 'reStructuredText' import re -from docutils import nodes, utils +from docutils import nodes, parsers, utils from docutils.transforms import TransformError, Transform @@ -451,24 +451,42 @@ class DocInfo(Transform): def check_empty_biblio_field(self, field, name): if len(field[-1]) < 1: field[-1] += self.document.reporter.warning( - 'Cannot extract empty bibliographic field "%s".' % name, + f'Cannot extract empty bibliographic field "{name}".', base_node=field) - return None - return 1 + return False + return True def check_compound_biblio_field(self, field, name): - if len(field[-1]) > 1: - field[-1] += self.document.reporter.warning( - 'Cannot extract compound bibliographic field "%s".' % name, - base_node=field) - return None - if not isinstance(field[-1][0], nodes.paragraph): - field[-1] += self.document.reporter.warning( - 'Cannot extract bibliographic field "%s" containing ' - 'anything other than a single paragraph.' % name, - base_node=field) - return None - return 1 + # Check that the `field` body contains a single paragraph + # (i.e. it must *not* be a compound element). + f_body = field[-1] + if len(f_body) == 1 and isinstance(f_body[0], nodes.paragraph): + return True + # Restore single author name with initial (E. Xampl) parsed as + # enumerated list + # https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#enumerated-lists + if (isinstance(f_body[0], nodes.enumerated_list) + and '\n' not in f_body.rawsource): + # parse into a dummy document and use created nodes + _document = utils.new_document('*DocInfo transform*', + field.document.settings) + parser = parsers.rst.Parser() + parser.parse('\\'+f_body.rawsource, _document) + if (len(_document.children) == 1 + and isinstance(_document.children[0], nodes.paragraph)): + f_body.children = _document.children + return True + # Check failed, add a warning + content = [f'<{e.tagname}>' for e in f_body.children] + if len(content) > 1: + content = '[' + ', '.join(content) + ']' + else: + content = 'a ' + content[0] + f_body += self.document.reporter.warning( + f'Bibliographic field "{name}"\nmust contain ' + f'a single <paragraph>, not {content}.', + base_node=field) + return False rcs_keyword_substitutions = [ (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' diff --git a/docutils/test/test_transforms/test_docinfo.py b/docutils/test/test_transforms/test_docinfo.py index 557039879..400b7a68e 100755 --- a/docutils/test/test_transforms/test_docinfo.py +++ b/docutils/test/test_transforms/test_docinfo.py @@ -74,7 +74,7 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [ It is automatically moved to the end of the other bibliographic elements. -:Author: Me +:Author: E. *Xample* :Version: 1 :Date: 2001-08-11 :Parameter i: integer @@ -83,7 +83,9 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [ <document source="test data"> <docinfo> <author> - Me + E. \n\ + <emphasis> + Xample <version> 1 <date> @@ -178,7 +180,8 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [ must be a paragraph <system_message level="2" line="1" source="test data" type="WARNING"> <paragraph> - Cannot extract bibliographic field "Author" containing anything other than a single paragraph. + Bibliographic field "Author" + must contain a single <paragraph>, not a <bullet_list>. <status> a \n\ <emphasis> @@ -194,7 +197,8 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [ paragraph. <system_message level="2" line="3" source="test data" type="WARNING"> <paragraph> - Cannot extract compound bibliographic field "Date". + Bibliographic field "Date" + must contain a single <paragraph>, not [<paragraph>, <paragraph>]. <field classes="version"> <field_name> Version |