Release 0.4: tagging released revisiondocutils-0.4

git-svn-id: http://svn.code.sf.net/p/docutils/code/tags/docutils-0.4@4268 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2006-01-09 20:44:25 +0000
committer: wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2006-01-09 20:44:25 +0000
commit: d77fdfef70e08114f57cbef5d91707df8717ea9f (patch)
tree: 49444e3486c0c333cb7b33dfa721296c08ee4ece /docutils/transforms/frontmatter.py
parent: 53cd16ca6ca5f638cbe5956988e88f9339e355cf (diff)
parent: 3993c4097756e9885bcfbd07cb1cc1e4e95e50e4 (diff)
download: docutils-0.4.tar.gz
1 files changed, 514 insertions, 0 deletions
diff --git a/docutils/transforms/frontmatter.py b/docutils/transforms/frontmatter.py
new file mode 100644
index 000000000..14a3aa8cc
--- /dev/null
+++ b/docutils/transforms/frontmatter.py
@@ -0,0 +1,514 @@
+# Authors: David Goodger, Ueli Schlaepfer
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Transforms related to the front matter of a document or a section
+(information found before the main text):
+
+- `DocTitle`: Used to transform a lone top level section's title to
+  the document title, promote a remaining lone top-level section's
+  title to the document subtitle, and determine the document's title
+  metadata (document['title']) based on the document title and/or the
+  "title" setting.
+
+- `SectionSubTitle`: Used to transform a lone subsection into a
+  subtitle.
+
+- `DocInfo`: Used to transform a bibliographic field list into docinfo
+  elements.
+"""
+
+__docformat__ = 'reStructuredText'
+
+import re
+from docutils import nodes, utils
+from docutils.transforms import TransformError, Transform
+
+
+class TitlePromoter(Transform):
+
+    """
+    Abstract base class for DocTitle and SectionSubTitle transforms.
+    """
+
+    def promote_title(self, node):
+        """
+        Transform the following tree::
+
+            <node>
+                <section>
+                    <title>
+                    ...
+
+        into ::
+
+            <node>
+                <title>
+                ...
+
+        `node` is normally a document.
+        """
+        # `node` must not have a title yet.
+        assert not (len(node) and isinstance(node[0], nodes.title))
+        section, index = self.candidate_index(node)
+        if index is None:
+            return None
+        # Transfer the section's attributes to the node:
+        node.attributes.update(section.attributes)
+        # setup_child is called automatically for all nodes.
+        node[:] = (section[:1]        # section title
+                   + node[:index]     # everything that was in the
+                                      # node before the section
+                   + section[1:])     # everything that was in the section
+        assert isinstance(node[0], nodes.title)
+        return 1
+
+    def promote_subtitle(self, node):
+        """
+        Transform the following node tree::
+
+            <node>
+                <title>
+                <section>
+                    <title>
+                    ...
+
+        into ::
+
+            <node>
+                <title>
+                <subtitle>
+                ...
+        """
+        subsection, index = self.candidate_index(node)
+        if index is None:
+            return None
+        subtitle = nodes.subtitle()
+        # Transfer the subsection's attributes to the new subtitle:
+        # This causes trouble with list attributes!  To do: Write a
+        # test case which catches direct access to the `attributes`
+        # dictionary and/or write a test case which shows problems in
+        # this particular case.
+        subtitle.attributes.update(subsection.attributes)
+        # We're losing the subtitle's attributes here!  To do: Write a
+        # test case which shows this behavior.
+        # Transfer the contents of the subsection's title to the
+        # subtitle:
+        subtitle[:] = subsection[0][:]
+        node[:] = (node[:1]       # title
+                   + [subtitle]
+                   # everything that was before the section:
+                   + node[1:index]
+                   # everything that was in the subsection:
+                   + subsection[1:])
+        return 1
+
+    def candidate_index(self, node):
+        """
+        Find and return the promotion candidate and its index.
+
+        Return (None, None) if no valid candidate was found.
+        """
+        index = node.first_child_not_matching_class(
+            nodes.PreBibliographic)
+        if index is None or len(node) > (index + 1) or \
+               not isinstance(node[index], nodes.section):
+            return None, None
+        else:
+            return node[index], index
+
+
+class DocTitle(TitlePromoter):
+
+    """
+    In reStructuredText_, there is no way to specify a document title
+    and subtitle explicitly. Instead, we can supply the document title
+    (and possibly the subtitle as well) implicitly, and use this
+    two-step transform to "raise" or "promote" the title(s) (and their
+    corresponding section contents) to the document level.
+
+    1. If the document contains a single top-level section as its
+       first non-comment element, the top-level section's title
+       becomes the document's title, and the top-level section's
+       contents become the document's immediate contents. The lone
+       top-level section header must be the first non-comment element
+       in the document.
+
+       For example, take this input text::
+
+           =================
+            Top-Level Title
+           =================
+
+           A paragraph.
+
+       Once parsed, it looks like this::
+
+           <document>
+               <section names="top-level title">
+                   <title>
+                       Top-Level Title
+                   <paragraph>
+                       A paragraph.
+
+       After running the DocTitle transform, we have::
+
+           <document names="top-level title">
+               <title>
+                   Top-Level Title
+               <paragraph>
+                   A paragraph.
+
+    2. If step 1 successfully determines the document title, we
+       continue by checking for a subtitle.
+
+       If the lone top-level section itself contains a single
+       second-level section as its first non-comment element, that
+       section's title is promoted to the document's subtitle, and
+       that section's contents become the document's immediate
+       contents. Given this input text::
+
+           =================
+            Top-Level Title
+           =================
+
+           Second-Level Title
+           ~~~~~~~~~~~~~~~~~~
+
+           A paragraph.
+
+       After parsing and running the Section Promotion transform, the
+       result is::
+
+           <document names="top-level title">
+               <title>
+                   Top-Level Title
+               <subtitle names="second-level title">
+                   Second-Level Title
+               <paragraph>
+                   A paragraph.
+
+       (Note that the implicit hyperlink target generated by the
+       "Second-Level Title" is preserved on the "subtitle" element
+       itself.)
+
+    Any comment elements occurring before the document title or
+    subtitle are accumulated and inserted as the first body elements
+    after the title(s).
+
+    This transform also sets the document's metadata title
+    (document['title']).
+
+    .. _reStructuredText: http://docutils.sf.net/rst.html
+    """
+
+    default_priority = 320
+
+    def set_metadata(self):
+        """
+        Set document['title'] metadata title from the following
+        sources, listed in order of priority:
+
+        * Existing document['title'] attribute.
+        * "title" setting.
+        * Document title node (as promoted by promote_title).
+        """
+        if not self.document.hasattr('title'):
+            if self.document.settings.title is not None:
+                self.document['title'] = self.document.settings.title
+            elif len(self.document) and isinstance(self.document[0], nodes.title):
+                self.document['title'] = self.document[0].astext()
+
+    def apply(self):
+        if getattr(self.document.settings, 'doctitle_xform', 1):
+            # promote_(sub)title defined in TitlePromoter base class.
+            if self.promote_title(self.document):
+                # If a title has been promoted, also try to promote a
+                # subtitle.
+                self.promote_subtitle(self.document)
+        # Set document['title'].
+        self.set_metadata()
+
+
+class SectionSubTitle(TitlePromoter):
+
+    """
+    This works like document subtitles, but for sections.  For example, ::
+
+        <section>
+            <title>
+                Title
+            <section>
+                <title>
+                    Subtitle
+                ...
+
+    is transformed into ::
+
+        <section>
+            <title>
+                Title
+            <subtitle>
+                Subtitle
+            ...
+
+    For details refer to the docstring of DocTitle.
+    """
+
+    default_priority = 350
+
+    def apply(self):
+        if not getattr(self.document.settings, 'sectsubtitle_xform', 1):
+            return
+        for section in self.document.traverse(nodes.section):
+            # On our way through the node tree, we are deleting
+            # sections, but we call self.promote_subtitle for those
+            # sections nonetheless.  To do: Write a test case which
+            # shows the problem and discuss on Docutils-develop.
+            self.promote_subtitle(section)
+
+
+class DocInfo(Transform):
+
+    """
+    This transform is specific to the reStructuredText_ markup syntax;
+    see "Bibliographic Fields" in the `reStructuredText Markup
+    Specification`_ for a high-level description. This transform
+    should be run *after* the `DocTitle` transform.
+
+    Given a field list as the first non-comment element after the
+    document title and subtitle (if present), registered bibliographic
+    field names are transformed to the corresponding DTD elements,
+    becoming child elements of the "docinfo" element (except for a
+    dedication and/or an abstract, which become "topic" elements after
+    "docinfo").
+
+    For example, given this document fragment after parsing::
+
+        <document>
+            <title>
+                Document Title
+            <field_list>
+                <field>
+                    <field_name>
+                        Author
+                    <field_body>
+                        <paragraph>
+                            A. Name
+                <field>
+                    <field_name>
+                        Status
+                    <field_body>
+                        <paragraph>
+                            $RCSfile$
+            ...
+
+    After running the bibliographic field list transform, the
+    resulting document tree would look like this::
+
+        <document>
+            <title>
+                Document Title
+            <docinfo>
+                <author>
+                    A. Name
+                <status>
+                    frontmatter.py
+            ...
+
+    The "Status" field contained an expanded RCS keyword, which is
+    normally (but optionally) cleaned up by the transform. The sole
+    contents of the field body must be a paragraph containing an
+    expanded RCS keyword of the form "$keyword: expansion text $". Any
+    RCS keyword can be processed in any bibliographic field. The
+    dollar signs and leading RCS keyword name are removed. Extra
+    processing is done for the following RCS keywords:
+
+    - "RCSfile" expands to the name of the file in the RCS or CVS
+      repository, which is the name of the source file with a ",v"
+      suffix appended. The transform will remove the ",v" suffix.
+
+    - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC
+      time zone). The RCS Keywords transform will extract just the
+      date itself and transform it to an ISO 8601 format date, as in
+      "2000-12-31".
+
+      (Since the source file for this text is itself stored under CVS,
+      we can't show an example of the "Date" RCS keyword because we
+      can't prevent any RCS keywords used in this explanation from
+      being expanded. Only the "RCSfile" keyword is stable; its
+      expansion text changes only if the file name changes.)
+
+    .. _reStructuredText: http://docutils.sf.net/rst.html
+    .. _reStructuredText Markup Specification:
+       http://docutils.sf.net/docs/ref/rst/restructuredtext.html
+    """
+
+    default_priority = 340
+
+    biblio_nodes = {
+          'author': nodes.author,
+          'authors': nodes.authors,
+          'organization': nodes.organization,
+          'address': nodes.address,
+          'contact': nodes.contact,
+          'version': nodes.version,
+          'revision': nodes.revision,
+          'status': nodes.status,
+          'date': nodes.date,
+          'copyright': nodes.copyright,
+          'dedication': nodes.topic,
+          'abstract': nodes.topic}
+    """Canonical field name (lowcased) to node class name mapping for
+    bibliographic fields (field_list)."""
+
+    def apply(self):
+        if not getattr(self.document.settings, 'docinfo_xform', 1):
+            return
+        document = self.document
+        index = document.first_child_not_matching_class(
+              nodes.PreBibliographic)
+        if index is None:
+            return
+        candidate = document[index]
+        if isinstance(candidate, nodes.field_list):
+            biblioindex = document.first_child_not_matching_class(
+                  (nodes.Titular, nodes.Decorative))
+            nodelist = self.extract_bibliographic(candidate)
+            del document[index]         # untransformed field list (candidate)
+            document[biblioindex:biblioindex] = nodelist
+
+    def extract_bibliographic(self, field_list):
+        docinfo = nodes.docinfo()
+        bibliofields = self.language.bibliographic_fields
+        labels = self.language.labels
+        topics = {'dedication': None, 'abstract': None}
+        for field in field_list:
+            try:
+                name = field[0][0].astext()
+                normedname = nodes.fully_normalize_name(name)
+                if not (len(field) == 2 and bibliofields.has_key(normedname)
+                        and self.check_empty_biblio_field(field, name)):
+                    raise TransformError
+                canonical = bibliofields[normedname]
+                biblioclass = self.biblio_nodes[canonical]
+                if issubclass(biblioclass, nodes.TextElement):
+                    if not self.check_compound_biblio_field(field, name):
+                        raise TransformError
+                    utils.clean_rcs_keywords(
+                          field[1][0], self.rcs_keyword_substitutions)
+                    docinfo.append(biblioclass('', '', *field[1][0]))
+                elif issubclass(biblioclass, nodes.authors):
+                    self.extract_authors(field, name, docinfo)
+                elif issubclass(biblioclass, nodes.topic):
+                    if topics[canonical]:
+                        field[-1] += self.document.reporter.warning(
+                            'There can only be one "%s" field.' % name,
+                            base_node=field)
+                        raise TransformError
+                    title = nodes.title(name, labels[canonical])
+                    topics[canonical] = biblioclass(
+                        '', title, classes=[canonical], *field[1].children)
+                else:
+                    docinfo.append(biblioclass('', *field[1].children))
+            except TransformError:
+                if len(field[-1]) == 1 \
+                       and isinstance(field[-1][0], nodes.paragraph):
+                    utils.clean_rcs_keywords(
+                        field[-1][0], self.rcs_keyword_substitutions)
+                docinfo.append(field)
+        nodelist = []
+        if len(docinfo) != 0:
+            nodelist.append(docinfo)
+        for name in ('dedication', 'abstract'):
+            if topics[name]:
+                nodelist.append(topics[name])
+        return nodelist
+
+    def check_empty_biblio_field(self, field, name):
+        if len(field[-1]) < 1:
+            field[-1] += self.document.reporter.warning(
+                  'Cannot extract empty bibliographic field "%s".' % name,
+                  base_node=field)
+            return None
+        return 1
+
+    def check_compound_biblio_field(self, field, name):
+        if len(field[-1]) > 1:
+            field[-1] += self.document.reporter.warning(
+                  'Cannot extract compound bibliographic field "%s".' % name,
+                  base_node=field)
+            return None
+        if not isinstance(field[-1][0], nodes.paragraph):
+            field[-1] += self.document.reporter.warning(
+                  'Cannot extract bibliographic field "%s" containing '
+                  'anything other than a single paragraph.' % name,
+                  base_node=field)
+            return None
+        return 1
+
+    rcs_keyword_substitutions = [
+          (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
+                      r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'),
+          (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'),
+          (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),]
+
+    def extract_authors(self, field, name, docinfo):
+        try:
+            if len(field[1]) == 1:
+                if isinstance(field[1][0], nodes.paragraph):
+                    authors = self.authors_from_one_paragraph(field)
+                elif isinstance(field[1][0], nodes.bullet_list):
+                    authors = self.authors_from_bullet_list(field)
+                else:
+                    raise TransformError
+            else:
+                authors = self.authors_from_paragraphs(field)
+            authornodes = [nodes.author('', '', *author)
+                           for author in authors if author]
+            if len(authornodes) >= 1:
+                docinfo.append(nodes.authors('', *authornodes))
+            else:
+                raise TransformError
+        except TransformError:
+            field[-1] += self.document.reporter.warning(
+                  'Bibliographic field "%s" incompatible with extraction: '
+                  'it must contain either a single paragraph (with authors '
+                  'separated by one of "%s"), multiple paragraphs (one per '
+                  'author), or a bullet list with one paragraph (one author) '
+                  'per item.'
+                  % (name, ''.join(self.language.author_separators)),
+                  base_node=field)
+            raise
+
+    def authors_from_one_paragraph(self, field):
+        text = field[1][0].astext().strip()
+        if not text:
+            raise TransformError
+        for authorsep in self.language.author_separators:
+            authornames = text.split(authorsep)
+            if len(authornames) > 1:
+                break
+        authornames = [author.strip() for author in authornames]
+        authors = [[nodes.Text(author)] for author in authornames if author]
+        return authors
+
+    def authors_from_bullet_list(self, field):
+        authors = []
+        for item in field[1][0]:
+            if len(item) != 1 or not isinstance(item[0], nodes.paragraph):
+                raise TransformError
+            authors.append(item[0].children)
+        if not authors:
+            raise TransformError
+        return authors
+
+    def authors_from_paragraphs(self, field):
+        for item in field[1]:
+            if not isinstance(item, nodes.paragraph):
+                raise TransformError
+        authors = [item.children for item in field[1]]
+        return authors
author	wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2006-01-09 20:44:25 +0000
committer	wiemann <wiemann@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2006-01-09 20:44:25 +0000
commit	d77fdfef70e08114f57cbef5d91707df8717ea9f (patch)
tree	49444e3486c0c333cb7b33dfa721296c08ee4ece /docutils/transforms/frontmatter.py
parent	53cd16ca6ca5f638cbe5956988e88f9339e355cf (diff)
parent	3993c4097756e9885bcfbd07cb1cc1e4e95e50e4 (diff)
download	docutils-0.4.tar.gz