More user-friendly DocInfo transform.

More detailled feedback when extracting data from a bibliographic field fails. In bibliographic fields expecting a single paragraph (e.g. "author"), restore enumerated lists originating in ambiguous markup like "A. Einstein" (name with initial): in the given context, an we know that this should not be an enumeration, so fixing is better than throwing an error. Fix some cases of "ersatz" Boolean values from pre Python 2.3 times. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9322 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2023-01-17 15:40:43 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2023-01-17 15:40:43 +0000
commit: f88c02212ed484500c451fd30e58d000f777bfac (patch)
tree: 04828090c720163595be8528c7024d0c98194046
parent: b1028c3e8389f51894f37829f63850e2aef07a6a (diff)
download: docutils-f88c02212ed484500c451fd30e58d000f777bfac.tar.gz
3 files changed, 54 insertions, 25 deletions
diff --git a/docutils/docs/ref/rst/restructuredtext.txt b/docutils/docs/ref/rst/restructuredtext.txt
index 254204d29..36edb1b70 100644
--- a/docutils/docs/ref/rst/restructuredtext.txt
+++ b/docutils/docs/ref/rst/restructuredtext.txt
@@ -764,11 +764,18 @@ one line.  This text is parsed as an enumerated list item::
 
     A. Einstein was a really smart dude.
 
-If a single-line paragraph begins with text identical to an enumerator
-("A.", "1.", "(b)", "I)", etc.), the first character will have to be
-escaped in order to have the line parsed as an ordinary paragraph::
-
-    \A. Einstein was a really smart dude.
+.. Caution:: 
+    If a single-line paragraph begins with text identical to an enumerator
+    ("A.", "1.", "(b)", "I)", etc.), the first character will have to be
+    escaped in order to have the line parsed as an ordinary paragraph::
+    
+        \A. Einstein was a really smart dude.
+        
+    Alternatively, you can escape the delimiter ::
+    
+        A\. Einstein was a really smart dude.
+        
+    or use a literal NO-BREAK SPACE after the initial.    
 
 Examples of nested enumerated lists::
 
diff --git a/docutils/docutils/transforms/frontmatter.py b/docutils/docutils/transforms/frontmatter.py
index 27bb6095e..2f4384f95 100644
--- a/docutils/docutils/transforms/frontmatter.py
+++ b/docutils/docutils/transforms/frontmatter.py
@@ -23,7 +23,7 @@ __docformat__ = 'reStructuredText'
 
 import re
 
-from docutils import nodes, utils
+from docutils import nodes, parsers, utils
 from docutils.transforms import TransformError, Transform
 
 
@@ -451,24 +451,42 @@ class DocInfo(Transform):
     def check_empty_biblio_field(self, field, name):
         if len(field[-1]) < 1:
             field[-1] += self.document.reporter.warning(
-                  'Cannot extract empty bibliographic field "%s".' % name,
+                  f'Cannot extract empty bibliographic field "{name}".',
                   base_node=field)
-            return None
-        return 1
+            return False
+        return True
 
     def check_compound_biblio_field(self, field, name):
-        if len(field[-1]) > 1:
-            field[-1] += self.document.reporter.warning(
-                  'Cannot extract compound bibliographic field "%s".' % name,
-                  base_node=field)
-            return None
-        if not isinstance(field[-1][0], nodes.paragraph):
-            field[-1] += self.document.reporter.warning(
-                  'Cannot extract bibliographic field "%s" containing '
-                  'anything other than a single paragraph.' % name,
-                  base_node=field)
-            return None
-        return 1
+        # Check that the `field` body contains a single paragraph
+        # (i.e. it must *not* be a compound element).
+        f_body = field[-1]
+        if len(f_body) == 1 and isinstance(f_body[0], nodes.paragraph):
+            return True
+        # Restore single author name with initial (E. Xampl) parsed as
+        # enumerated list
+        # https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#enumerated-lists
+        if (isinstance(f_body[0], nodes.enumerated_list)
+            and '\n' not in f_body.rawsource):
+            # parse into a dummy document and use created nodes
+            _document = utils.new_document('*DocInfo transform*',
+                                           field.document.settings)
+            parser = parsers.rst.Parser()
+            parser.parse('\\'+f_body.rawsource, _document)
+            if (len(_document.children) == 1
+                and isinstance(_document.children[0], nodes.paragraph)):
+                f_body.children = _document.children
+                return True
+        # Check failed, add a warning
+        content = [f'<{e.tagname}>' for e in f_body.children]
+        if len(content) > 1:
+            content = '[' + ', '.join(content) + ']'
+        else:
+            content = 'a ' + content[0]
+        f_body += self.document.reporter.warning(
+                      f'Bibliographic field "{name}"\nmust contain '
+                      f'a single <paragraph>, not {content}.',
+                      base_node=field)
+        return False
 
     rcs_keyword_substitutions = [
           (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
diff --git a/docutils/test/test_transforms/test_docinfo.py b/docutils/test/test_transforms/test_docinfo.py
index 557039879..400b7a68e 100755
--- a/docutils/test/test_transforms/test_docinfo.py
+++ b/docutils/test/test_transforms/test_docinfo.py
@@ -74,7 +74,7 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
 
     It is automatically moved to the end of the other bibliographic elements.
 
-:Author: Me
+:Author: E. *Xample*
 :Version: 1
 :Date: 2001-08-11
 :Parameter i: integer
@@ -83,7 +83,9 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
 <document source="test data">
     <docinfo>
         <author>
-            Me
+            E. \n\
+            <emphasis>
+                Xample
         <version>
             1
         <date>
@@ -178,7 +180,8 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
                             must be a paragraph
                 <system_message level="2" line="1" source="test data" type="WARNING">
                     <paragraph>
-                        Cannot extract bibliographic field "Author" containing anything other than a single paragraph.
+                        Bibliographic field "Author"
+                        must contain a single <paragraph>, not a <bullet_list>.
         <status>
             a \n\
             <emphasis>
@@ -194,7 +197,8 @@ totest['bibliographic_field_lists'] = ((DocInfo,), [
                     paragraph.
                 <system_message level="2" line="3" source="test data" type="WARNING">
                     <paragraph>
-                        Cannot extract compound bibliographic field "Date".
+                        Bibliographic field "Date"
+                        must contain a single <paragraph>, not [<paragraph>, <paragraph>].
         <field classes="version">
             <field_name>
                 Version
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2023-01-17 15:40:43 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2023-01-17 15:40:43 +0000
commit	f88c02212ed484500c451fd30e58d000f777bfac (patch)
tree	04828090c720163595be8528c7024d0c98194046
parent	b1028c3e8389f51894f37829f63850e2aef07a6a (diff)
download	docutils-f88c02212ed484500c451fd30e58d000f777bfac.tar.gz