diff options
Diffstat (limited to 'docutils/transforms')
-rw-r--r-- | docutils/transforms/__init__.py | 176 | ||||
-rw-r--r-- | docutils/transforms/components.py | 54 | ||||
-rw-r--r-- | docutils/transforms/frontmatter.py | 514 | ||||
-rw-r--r-- | docutils/transforms/misc.py | 145 | ||||
-rw-r--r-- | docutils/transforms/parts.py | 171 | ||||
-rw-r--r-- | docutils/transforms/peps.py | 306 | ||||
-rw-r--r-- | docutils/transforms/references.py | 906 | ||||
-rw-r--r-- | docutils/transforms/universal.py | 171 | ||||
-rw-r--r-- | docutils/transforms/writer_aux.py | 52 |
9 files changed, 2495 insertions, 0 deletions
diff --git a/docutils/transforms/__init__.py b/docutils/transforms/__init__.py new file mode 100644 index 000000000..36aa4e735 --- /dev/null +++ b/docutils/transforms/__init__.py @@ -0,0 +1,176 @@ +# Authors: David Goodger, Ueli Schlaepfer +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +This package contains modules for standard tree transforms available +to Docutils components. Tree transforms serve a variety of purposes: + +- To tie up certain syntax-specific "loose ends" that remain after the + initial parsing of the input plaintext. These transforms are used to + supplement a limited syntax. + +- To automate the internal linking of the document tree (hyperlink + references, footnote references, etc.). + +- To extract useful information from the document tree. These + transforms may be used to construct (for example) indexes and tables + of contents. + +Each transform is an optional step that a Docutils Reader may choose to +perform on the parsed document, depending on the input context. A Docutils +Reader may also perform Reader-specific transforms before or after performing +these standard transforms. +""" + +__docformat__ = 'reStructuredText' + + +from docutils import languages, ApplicationError, TransformSpec + + +class TransformError(ApplicationError): pass + + +class Transform: + + """ + Docutils transform component abstract base class. + """ + + default_priority = None + """Numerical priority of this transform, 0 through 999 (override).""" + + def __init__(self, document, startnode=None): + """ + Initial setup for in-place document transforms. + """ + + self.document = document + """The document tree to transform.""" + + self.startnode = startnode + """Node from which to begin the transform. For many transforms which + apply to the document as a whole, `startnode` is not set (i.e. its + value is `None`).""" + + self.language = languages.get_language( + document.settings.language_code) + """Language module local to this document.""" + + def apply(self, **kwargs): + """Override to apply the transform to the document tree.""" + raise NotImplementedError('subclass must override this method') + + +class Transformer(TransformSpec): + + """ + Stores transforms (`Transform` classes) and applies them to document + trees. Also keeps track of components by component type name. + """ + + def __init__(self, document): + self.transforms = [] + """List of transforms to apply. Each item is a 3-tuple: + ``(priority string, transform class, pending node or None)``.""" + + self.unknown_reference_resolvers = [] + """List of hook functions which assist in resolving references""" + + self.document = document + """The `nodes.document` object this Transformer is attached to.""" + + self.applied = [] + """Transforms already applied, in order.""" + + self.sorted = 0 + """Boolean: is `self.tranforms` sorted?""" + + self.components = {} + """Mapping of component type name to component object. Set by + `self.populate_from_components()`.""" + + self.serialno = 0 + """Internal serial number to keep track of the add order of + transforms.""" + + def add_transform(self, transform_class, priority=None, **kwargs): + """ + Store a single transform. Use `priority` to override the default. + `kwargs` is a dictionary whose contents are passed as keyword + arguments to the `apply` method of the transform. This can be used to + pass application-specific data to the transform instance. + """ + if priority is None: + priority = transform_class.default_priority + priority_string = self.get_priority_string(priority) + self.transforms.append( + (priority_string, transform_class, None, kwargs)) + self.sorted = 0 + + def add_transforms(self, transform_list): + """Store multiple transforms, with default priorities.""" + for transform_class in transform_list: + priority_string = self.get_priority_string( + transform_class.default_priority) + self.transforms.append( + (priority_string, transform_class, None, {})) + self.sorted = 0 + + def add_pending(self, pending, priority=None): + """Store a transform with an associated `pending` node.""" + transform_class = pending.transform + if priority is None: + priority = transform_class.default_priority + priority_string = self.get_priority_string(priority) + self.transforms.append( + (priority_string, transform_class, pending, {})) + self.sorted = 0 + + def get_priority_string(self, priority): + """ + Return a string, `priority` combined with `self.serialno`. + + This ensures FIFO order on transforms with identical priority. + """ + self.serialno += 1 + return '%03d-%03d' % (priority, self.serialno) + + def populate_from_components(self, components): + """ + Store each component's default transforms, with default priorities. + Also, store components by type name in a mapping for later lookup. + """ + for component in components: + if component is None: + continue + self.add_transforms(component.get_transforms()) + self.components[component.component_type] = component + self.sorted = 0 + # Set up all of the reference resolvers for this transformer. Each + # component of this transformer is able to register its own helper + # functions to help resolve references. + unknown_reference_resolvers = [] + for i in components: + unknown_reference_resolvers.extend(i.unknown_reference_resolvers) + decorated_list = [(f.priority, f) for f in unknown_reference_resolvers] + decorated_list.sort() + self.unknown_reference_resolvers.extend([f[1] for f in decorated_list]) + + def apply_transforms(self): + """Apply all of the stored transforms, in priority order.""" + self.document.reporter.attach_observer( + self.document.note_transform_message) + while self.transforms: + if not self.sorted: + # Unsorted initially, and whenever a transform is added. + self.transforms.sort() + self.transforms.reverse() + self.sorted = 1 + priority, transform_class, pending, kwargs = self.transforms.pop() + transform = transform_class(self.document, startnode=pending) + transform.apply(**kwargs) + self.applied.append((priority, transform_class, pending, kwargs)) diff --git a/docutils/transforms/components.py b/docutils/transforms/components.py new file mode 100644 index 000000000..1c3ecbef6 --- /dev/null +++ b/docutils/transforms/components.py @@ -0,0 +1,54 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Docutils component-related transforms. +""" + +__docformat__ = 'reStructuredText' + +import sys +import os +import re +import time +from docutils import nodes, utils +from docutils import ApplicationError, DataError +from docutils.transforms import Transform, TransformError + + +class Filter(Transform): + + """ + Include or exclude elements which depend on a specific Docutils component. + + For use with `nodes.pending` elements. A "pending" element's dictionary + attribute ``details`` must contain the keys "component" and "format". The + value of ``details['component']`` must match the type name of the + component the elements depend on (e.g. "writer"). The value of + ``details['format']`` is the name of a specific format or context of that + component (e.g. "html"). If the matching Docutils component supports that + format or context, the "pending" element is replaced by the contents of + ``details['nodes']`` (a list of nodes); otherwise, the "pending" element + is removed. + + For example, the reStructuredText "meta" directive creates a "pending" + element containing a "meta" element (in ``pending.details['nodes']``). + Only writers (``pending.details['component'] == 'writer'``) supporting the + "html" format (``pending.details['format'] == 'html'``) will include the + "meta" element; it will be deleted from the output of all other writers. + """ + + default_priority = 780 + + def apply(self): + pending = self.startnode + component_type = pending.details['component'] # 'reader' or 'writer' + format = pending.details['format'] + component = self.document.transformer.components[component_type] + if component.supports(format): + pending.replace_self(pending.details['nodes']) + else: + pending.parent.remove(pending) diff --git a/docutils/transforms/frontmatter.py b/docutils/transforms/frontmatter.py new file mode 100644 index 000000000..14a3aa8cc --- /dev/null +++ b/docutils/transforms/frontmatter.py @@ -0,0 +1,514 @@ +# Authors: David Goodger, Ueli Schlaepfer +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Transforms related to the front matter of a document or a section +(information found before the main text): + +- `DocTitle`: Used to transform a lone top level section's title to + the document title, promote a remaining lone top-level section's + title to the document subtitle, and determine the document's title + metadata (document['title']) based on the document title and/or the + "title" setting. + +- `SectionSubTitle`: Used to transform a lone subsection into a + subtitle. + +- `DocInfo`: Used to transform a bibliographic field list into docinfo + elements. +""" + +__docformat__ = 'reStructuredText' + +import re +from docutils import nodes, utils +from docutils.transforms import TransformError, Transform + + +class TitlePromoter(Transform): + + """ + Abstract base class for DocTitle and SectionSubTitle transforms. + """ + + def promote_title(self, node): + """ + Transform the following tree:: + + <node> + <section> + <title> + ... + + into :: + + <node> + <title> + ... + + `node` is normally a document. + """ + # `node` must not have a title yet. + assert not (len(node) and isinstance(node[0], nodes.title)) + section, index = self.candidate_index(node) + if index is None: + return None + # Transfer the section's attributes to the node: + node.attributes.update(section.attributes) + # setup_child is called automatically for all nodes. + node[:] = (section[:1] # section title + + node[:index] # everything that was in the + # node before the section + + section[1:]) # everything that was in the section + assert isinstance(node[0], nodes.title) + return 1 + + def promote_subtitle(self, node): + """ + Transform the following node tree:: + + <node> + <title> + <section> + <title> + ... + + into :: + + <node> + <title> + <subtitle> + ... + """ + subsection, index = self.candidate_index(node) + if index is None: + return None + subtitle = nodes.subtitle() + # Transfer the subsection's attributes to the new subtitle: + # This causes trouble with list attributes! To do: Write a + # test case which catches direct access to the `attributes` + # dictionary and/or write a test case which shows problems in + # this particular case. + subtitle.attributes.update(subsection.attributes) + # We're losing the subtitle's attributes here! To do: Write a + # test case which shows this behavior. + # Transfer the contents of the subsection's title to the + # subtitle: + subtitle[:] = subsection[0][:] + node[:] = (node[:1] # title + + [subtitle] + # everything that was before the section: + + node[1:index] + # everything that was in the subsection: + + subsection[1:]) + return 1 + + def candidate_index(self, node): + """ + Find and return the promotion candidate and its index. + + Return (None, None) if no valid candidate was found. + """ + index = node.first_child_not_matching_class( + nodes.PreBibliographic) + if index is None or len(node) > (index + 1) or \ + not isinstance(node[index], nodes.section): + return None, None + else: + return node[index], index + + +class DocTitle(TitlePromoter): + + """ + In reStructuredText_, there is no way to specify a document title + and subtitle explicitly. Instead, we can supply the document title + (and possibly the subtitle as well) implicitly, and use this + two-step transform to "raise" or "promote" the title(s) (and their + corresponding section contents) to the document level. + + 1. If the document contains a single top-level section as its + first non-comment element, the top-level section's title + becomes the document's title, and the top-level section's + contents become the document's immediate contents. The lone + top-level section header must be the first non-comment element + in the document. + + For example, take this input text:: + + ================= + Top-Level Title + ================= + + A paragraph. + + Once parsed, it looks like this:: + + <document> + <section names="top-level title"> + <title> + Top-Level Title + <paragraph> + A paragraph. + + After running the DocTitle transform, we have:: + + <document names="top-level title"> + <title> + Top-Level Title + <paragraph> + A paragraph. + + 2. If step 1 successfully determines the document title, we + continue by checking for a subtitle. + + If the lone top-level section itself contains a single + second-level section as its first non-comment element, that + section's title is promoted to the document's subtitle, and + that section's contents become the document's immediate + contents. Given this input text:: + + ================= + Top-Level Title + ================= + + Second-Level Title + ~~~~~~~~~~~~~~~~~~ + + A paragraph. + + After parsing and running the Section Promotion transform, the + result is:: + + <document names="top-level title"> + <title> + Top-Level Title + <subtitle names="second-level title"> + Second-Level Title + <paragraph> + A paragraph. + + (Note that the implicit hyperlink target generated by the + "Second-Level Title" is preserved on the "subtitle" element + itself.) + + Any comment elements occurring before the document title or + subtitle are accumulated and inserted as the first body elements + after the title(s). + + This transform also sets the document's metadata title + (document['title']). + + .. _reStructuredText: http://docutils.sf.net/rst.html + """ + + default_priority = 320 + + def set_metadata(self): + """ + Set document['title'] metadata title from the following + sources, listed in order of priority: + + * Existing document['title'] attribute. + * "title" setting. + * Document title node (as promoted by promote_title). + """ + if not self.document.hasattr('title'): + if self.document.settings.title is not None: + self.document['title'] = self.document.settings.title + elif len(self.document) and isinstance(self.document[0], nodes.title): + self.document['title'] = self.document[0].astext() + + def apply(self): + if getattr(self.document.settings, 'doctitle_xform', 1): + # promote_(sub)title defined in TitlePromoter base class. + if self.promote_title(self.document): + # If a title has been promoted, also try to promote a + # subtitle. + self.promote_subtitle(self.document) + # Set document['title']. + self.set_metadata() + + +class SectionSubTitle(TitlePromoter): + + """ + This works like document subtitles, but for sections. For example, :: + + <section> + <title> + Title + <section> + <title> + Subtitle + ... + + is transformed into :: + + <section> + <title> + Title + <subtitle> + Subtitle + ... + + For details refer to the docstring of DocTitle. + """ + + default_priority = 350 + + def apply(self): + if not getattr(self.document.settings, 'sectsubtitle_xform', 1): + return + for section in self.document.traverse(nodes.section): + # On our way through the node tree, we are deleting + # sections, but we call self.promote_subtitle for those + # sections nonetheless. To do: Write a test case which + # shows the problem and discuss on Docutils-develop. + self.promote_subtitle(section) + + +class DocInfo(Transform): + + """ + This transform is specific to the reStructuredText_ markup syntax; + see "Bibliographic Fields" in the `reStructuredText Markup + Specification`_ for a high-level description. This transform + should be run *after* the `DocTitle` transform. + + Given a field list as the first non-comment element after the + document title and subtitle (if present), registered bibliographic + field names are transformed to the corresponding DTD elements, + becoming child elements of the "docinfo" element (except for a + dedication and/or an abstract, which become "topic" elements after + "docinfo"). + + For example, given this document fragment after parsing:: + + <document> + <title> + Document Title + <field_list> + <field> + <field_name> + Author + <field_body> + <paragraph> + A. Name + <field> + <field_name> + Status + <field_body> + <paragraph> + $RCSfile$ + ... + + After running the bibliographic field list transform, the + resulting document tree would look like this:: + + <document> + <title> + Document Title + <docinfo> + <author> + A. Name + <status> + frontmatter.py + ... + + The "Status" field contained an expanded RCS keyword, which is + normally (but optionally) cleaned up by the transform. The sole + contents of the field body must be a paragraph containing an + expanded RCS keyword of the form "$keyword: expansion text $". Any + RCS keyword can be processed in any bibliographic field. The + dollar signs and leading RCS keyword name are removed. Extra + processing is done for the following RCS keywords: + + - "RCSfile" expands to the name of the file in the RCS or CVS + repository, which is the name of the source file with a ",v" + suffix appended. The transform will remove the ",v" suffix. + + - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC + time zone). The RCS Keywords transform will extract just the + date itself and transform it to an ISO 8601 format date, as in + "2000-12-31". + + (Since the source file for this text is itself stored under CVS, + we can't show an example of the "Date" RCS keyword because we + can't prevent any RCS keywords used in this explanation from + being expanded. Only the "RCSfile" keyword is stable; its + expansion text changes only if the file name changes.) + + .. _reStructuredText: http://docutils.sf.net/rst.html + .. _reStructuredText Markup Specification: + http://docutils.sf.net/docs/ref/rst/restructuredtext.html + """ + + default_priority = 340 + + biblio_nodes = { + 'author': nodes.author, + 'authors': nodes.authors, + 'organization': nodes.organization, + 'address': nodes.address, + 'contact': nodes.contact, + 'version': nodes.version, + 'revision': nodes.revision, + 'status': nodes.status, + 'date': nodes.date, + 'copyright': nodes.copyright, + 'dedication': nodes.topic, + 'abstract': nodes.topic} + """Canonical field name (lowcased) to node class name mapping for + bibliographic fields (field_list).""" + + def apply(self): + if not getattr(self.document.settings, 'docinfo_xform', 1): + return + document = self.document + index = document.first_child_not_matching_class( + nodes.PreBibliographic) + if index is None: + return + candidate = document[index] + if isinstance(candidate, nodes.field_list): + biblioindex = document.first_child_not_matching_class( + (nodes.Titular, nodes.Decorative)) + nodelist = self.extract_bibliographic(candidate) + del document[index] # untransformed field list (candidate) + document[biblioindex:biblioindex] = nodelist + + def extract_bibliographic(self, field_list): + docinfo = nodes.docinfo() + bibliofields = self.language.bibliographic_fields + labels = self.language.labels + topics = {'dedication': None, 'abstract': None} + for field in field_list: + try: + name = field[0][0].astext() + normedname = nodes.fully_normalize_name(name) + if not (len(field) == 2 and bibliofields.has_key(normedname) + and self.check_empty_biblio_field(field, name)): + raise TransformError + canonical = bibliofields[normedname] + biblioclass = self.biblio_nodes[canonical] + if issubclass(biblioclass, nodes.TextElement): + if not self.check_compound_biblio_field(field, name): + raise TransformError + utils.clean_rcs_keywords( + field[1][0], self.rcs_keyword_substitutions) + docinfo.append(biblioclass('', '', *field[1][0])) + elif issubclass(biblioclass, nodes.authors): + self.extract_authors(field, name, docinfo) + elif issubclass(biblioclass, nodes.topic): + if topics[canonical]: + field[-1] += self.document.reporter.warning( + 'There can only be one "%s" field.' % name, + base_node=field) + raise TransformError + title = nodes.title(name, labels[canonical]) + topics[canonical] = biblioclass( + '', title, classes=[canonical], *field[1].children) + else: + docinfo.append(biblioclass('', *field[1].children)) + except TransformError: + if len(field[-1]) == 1 \ + and isinstance(field[-1][0], nodes.paragraph): + utils.clean_rcs_keywords( + field[-1][0], self.rcs_keyword_substitutions) + docinfo.append(field) + nodelist = [] + if len(docinfo) != 0: + nodelist.append(docinfo) + for name in ('dedication', 'abstract'): + if topics[name]: + nodelist.append(topics[name]) + return nodelist + + def check_empty_biblio_field(self, field, name): + if len(field[-1]) < 1: + field[-1] += self.document.reporter.warning( + 'Cannot extract empty bibliographic field "%s".' % name, + base_node=field) + return None + return 1 + + def check_compound_biblio_field(self, field, name): + if len(field[-1]) > 1: + field[-1] += self.document.reporter.warning( + 'Cannot extract compound bibliographic field "%s".' % name, + base_node=field) + return None + if not isinstance(field[-1][0], nodes.paragraph): + field[-1] += self.document.reporter.warning( + 'Cannot extract bibliographic field "%s" containing ' + 'anything other than a single paragraph.' % name, + base_node=field) + return None + return 1 + + rcs_keyword_substitutions = [ + (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' + r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), + (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), + (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] + + def extract_authors(self, field, name, docinfo): + try: + if len(field[1]) == 1: + if isinstance(field[1][0], nodes.paragraph): + authors = self.authors_from_one_paragraph(field) + elif isinstance(field[1][0], nodes.bullet_list): + authors = self.authors_from_bullet_list(field) + else: + raise TransformError + else: + authors = self.authors_from_paragraphs(field) + authornodes = [nodes.author('', '', *author) + for author in authors if author] + if len(authornodes) >= 1: + docinfo.append(nodes.authors('', *authornodes)) + else: + raise TransformError + except TransformError: + field[-1] += self.document.reporter.warning( + 'Bibliographic field "%s" incompatible with extraction: ' + 'it must contain either a single paragraph (with authors ' + 'separated by one of "%s"), multiple paragraphs (one per ' + 'author), or a bullet list with one paragraph (one author) ' + 'per item.' + % (name, ''.join(self.language.author_separators)), + base_node=field) + raise + + def authors_from_one_paragraph(self, field): + text = field[1][0].astext().strip() + if not text: + raise TransformError + for authorsep in self.language.author_separators: + authornames = text.split(authorsep) + if len(authornames) > 1: + break + authornames = [author.strip() for author in authornames] + authors = [[nodes.Text(author)] for author in authornames if author] + return authors + + def authors_from_bullet_list(self, field): + authors = [] + for item in field[1][0]: + if len(item) != 1 or not isinstance(item[0], nodes.paragraph): + raise TransformError + authors.append(item[0].children) + if not authors: + raise TransformError + return authors + + def authors_from_paragraphs(self, field): + for item in field[1]: + if not isinstance(item, nodes.paragraph): + raise TransformError + authors = [item.children for item in field[1]] + return authors diff --git a/docutils/transforms/misc.py b/docutils/transforms/misc.py new file mode 100644 index 000000000..9567055f9 --- /dev/null +++ b/docutils/transforms/misc.py @@ -0,0 +1,145 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Miscellaneous transforms. +""" + +__docformat__ = 'reStructuredText' + +from docutils import nodes +from docutils.transforms import Transform, TransformError + + +class CallBack(Transform): + + """ + Inserts a callback into a document. The callback is called when the + transform is applied, which is determined by its priority. + + For use with `nodes.pending` elements. Requires a ``details['callback']`` + entry, a bound method or function which takes one parameter: the pending + node. Other data can be stored in the ``details`` attribute or in the + object hosting the callback method. + """ + + default_priority = 990 + + def apply(self): + pending = self.startnode + pending.details['callback'](pending) + pending.parent.remove(pending) + + +class ClassAttribute(Transform): + + """ + Move the "class" attribute specified in the "pending" node into the + immediately following non-comment element. + """ + + default_priority = 210 + + def apply(self): + pending = self.startnode + parent = pending.parent + child = pending + while parent: + # Check for appropriate following siblings: + for index in range(parent.index(child) + 1, len(parent)): + element = parent[index] + if (isinstance(element, nodes.Invisible) or + isinstance(element, nodes.system_message)): + continue + element['classes'] += pending.details['class'] + pending.parent.remove(pending) + return + else: + # At end of section or container; apply to sibling + child = parent + parent = parent.parent + error = self.document.reporter.error( + 'No suitable element following "%s" directive' + % pending.details['directive'], + nodes.literal_block(pending.rawsource, pending.rawsource), + line=pending.line) + pending.replace_self(error) + + +class Transitions(Transform): + + """ + Move transitions at the end of sections up the tree. Complain + on transitions after a title, at the beginning or end of the + document, and after another transition. + + For example, transform this:: + + <section> + ... + <transition> + <section> + ... + + into this:: + + <section> + ... + <transition> + <section> + ... + """ + + default_priority = 830 + + def apply(self): + for node in self.document.traverse(nodes.transition): + self.visit_transition(node) + + def visit_transition(self, node): + index = node.parent.index(node) + error = None + if (index == 0 or + isinstance(node.parent[0], nodes.title) and + (index == 1 or + isinstance(node.parent[1], nodes.subtitle) and + index == 2)): + assert (isinstance(node.parent, nodes.document) or + isinstance(node.parent, nodes.section)) + error = self.document.reporter.error( + 'Document or section may not begin with a transition.', + line=node.line) + elif isinstance(node.parent[index - 1], nodes.transition): + error = self.document.reporter.error( + 'At least one body element must separate transitions; ' + 'adjacent transitions are not allowed.', line=node.line) + if error: + # Insert before node and update index. + node.parent.insert(index, error) + index += 1 + assert index < len(node.parent) + if index != len(node.parent) - 1: + # No need to move the node. + return + # Node behind which the transition is to be moved. + sibling = node + # While sibling is the last node of its parent. + while index == len(sibling.parent) - 1: + sibling = sibling.parent + # If sibling is the whole document (i.e. it has no parent). + if sibling.parent is None: + # Transition at the end of document. Do not move the + # transition up, and place an error behind. + error = self.document.reporter.error( + 'Document may not end with a transition.', + line=node.line) + node.parent.insert(node.parent.index(node) + 1, error) + return + index = sibling.parent.index(sibling) + # Remove the original transition node. + node.parent.remove(node) + # Insert the transition after the sibling. + sibling.parent.insert(index + 1, node) diff --git a/docutils/transforms/parts.py b/docutils/transforms/parts.py new file mode 100644 index 000000000..1e275c78a --- /dev/null +++ b/docutils/transforms/parts.py @@ -0,0 +1,171 @@ +# Authors: David Goodger, Ueli Schlaepfer, Dmitry Jemerov +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Transforms related to document parts. +""" + +__docformat__ = 'reStructuredText' + + +import re +import sys +from docutils import nodes, utils +from docutils.transforms import TransformError, Transform + + +class SectNum(Transform): + + """ + Automatically assigns numbers to the titles of document sections. + + It is possible to limit the maximum section level for which the numbers + are added. For those sections that are auto-numbered, the "autonum" + attribute is set, informing the contents table generator that a different + form of the TOC should be used. + """ + + default_priority = 710 + """Should be applied before `Contents`.""" + + def apply(self): + self.maxdepth = self.startnode.details.get('depth', sys.maxint) + self.startvalue = self.startnode.details.get('start', 1) + self.prefix = self.startnode.details.get('prefix', '') + self.suffix = self.startnode.details.get('suffix', '') + self.startnode.parent.remove(self.startnode) + if self.document.settings.sectnum_xform: + self.update_section_numbers(self.document) + + def update_section_numbers(self, node, prefix=(), depth=0): + depth += 1 + if prefix: + sectnum = 1 + else: + sectnum = self.startvalue + for child in node: + if isinstance(child, nodes.section): + numbers = prefix + (str(sectnum),) + title = child[0] + # Use for spacing: + generated = nodes.generated( + '', (self.prefix + '.'.join(numbers) + self.suffix + + u'\u00a0' * 3), + classes=['sectnum']) + title.insert(0, generated) + title['auto'] = 1 + if depth < self.maxdepth: + self.update_section_numbers(child, numbers, depth) + sectnum += 1 + + +class Contents(Transform): + + """ + This transform generates a table of contents from the entire document tree + or from a single branch. It locates "section" elements and builds them + into a nested bullet list, which is placed within a "topic" created by the + contents directive. A title is either explicitly specified, taken from + the appropriate language module, or omitted (local table of contents). + The depth may be specified. Two-way references between the table of + contents and section titles are generated (requires Writer support). + + This transform requires a startnode, which which contains generation + options and provides the location for the generated table of contents (the + startnode is replaced by the table of contents "topic"). + """ + + default_priority = 720 + + def apply(self): + details = self.startnode.details + if details.has_key('local'): + startnode = self.startnode.parent.parent + while not (isinstance(startnode, nodes.section) + or isinstance(startnode, nodes.document)): + # find the ToC root: a direct ancestor of startnode + startnode = startnode.parent + else: + startnode = self.document + self.toc_id = self.startnode.parent['ids'][0] + if details.has_key('backlinks'): + self.backlinks = details['backlinks'] + else: + self.backlinks = self.document.settings.toc_backlinks + contents = self.build_contents(startnode) + if len(contents): + self.startnode.replace_self(contents) + else: + self.startnode.parent.parent.remove(self.startnode.parent) + + def build_contents(self, node, level=0): + level += 1 + sections = [] + i = len(node) - 1 + while i >= 0 and isinstance(node[i], nodes.section): + sections.append(node[i]) + i -= 1 + sections.reverse() + entries = [] + autonum = 0 + depth = self.startnode.details.get('depth', sys.maxint) + for section in sections: + title = section[0] + auto = title.get('auto') # May be set by SectNum. + entrytext = self.copy_and_filter(title) + reference = nodes.reference('', '', refid=section['ids'][0], + *entrytext) + ref_id = self.document.set_id(reference) + entry = nodes.paragraph('', '', reference) + item = nodes.list_item('', entry) + if ( self.backlinks in ('entry', 'top') + and title.next_node(nodes.reference) is None): + if self.backlinks == 'entry': + title['refid'] = ref_id + elif self.backlinks == 'top': + title['refid'] = self.toc_id + if level < depth: + subsects = self.build_contents(section, level) + item += subsects + entries.append(item) + if entries: + contents = nodes.bullet_list('', *entries) + if auto: + contents['classes'].append('auto-toc') + return contents + else: + return [] + + def copy_and_filter(self, node): + """Return a copy of a title, with references, images, etc. removed.""" + visitor = ContentsFilter(self.document) + node.walkabout(visitor) + return visitor.get_entry_text() + + +class ContentsFilter(nodes.TreeCopyVisitor): + + def get_entry_text(self): + return self.get_tree_copy().children + + def visit_citation_reference(self, node): + raise nodes.SkipNode + + def visit_footnote_reference(self, node): + raise nodes.SkipNode + + def visit_image(self, node): + if node.hasattr('alt'): + self.parent.append(nodes.Text(node['alt'])) + raise nodes.SkipNode + + def ignore_node_but_process_children(self, node): + raise nodes.SkipDeparture + + visit_interpreted = ignore_node_but_process_children + visit_problematic = ignore_node_but_process_children + visit_reference = ignore_node_but_process_children + visit_target = ignore_node_but_process_children diff --git a/docutils/transforms/peps.py b/docutils/transforms/peps.py new file mode 100644 index 000000000..edaba2557 --- /dev/null +++ b/docutils/transforms/peps.py @@ -0,0 +1,306 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Transforms for PEP processing. + +- `Headers`: Used to transform a PEP's initial RFC-2822 header. It remains a + field list, but some entries get processed. +- `Contents`: Auto-inserts a table of contents. +- `PEPZero`: Special processing for PEP 0. +""" + +__docformat__ = 'reStructuredText' + +import sys +import os +import re +import time +from docutils import nodes, utils, languages +from docutils import ApplicationError, DataError +from docutils.transforms import Transform, TransformError +from docutils.transforms import parts, references, misc + + +class Headers(Transform): + + """ + Process fields in a PEP's initial RFC-2822 header. + """ + + default_priority = 360 + + pep_url = 'pep-%04d.html' + pep_cvs_url = ('http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/python/' + 'python/nondist/peps/pep-%04d.txt') + rcs_keyword_substitutions = ( + (re.compile(r'\$' r'RCSfile: (.+),v \$$', re.IGNORECASE), r'\1'), + (re.compile(r'\$[a-zA-Z]+: (.+) \$$'), r'\1'),) + + def apply(self): + if not len(self.document): + # @@@ replace these DataErrors with proper system messages + raise DataError('Document tree is empty.') + header = self.document[0] + if not isinstance(header, nodes.field_list) or \ + 'rfc2822' not in header['classes']: + raise DataError('Document does not begin with an RFC-2822 ' + 'header; it is not a PEP.') + pep = None + for field in header: + if field[0].astext().lower() == 'pep': # should be the first field + value = field[1].astext() + try: + pep = int(value) + cvs_url = self.pep_cvs_url % pep + except ValueError: + pep = value + cvs_url = None + msg = self.document.reporter.warning( + '"PEP" header must contain an integer; "%s" is an ' + 'invalid value.' % pep, base_node=field) + msgid = self.document.set_id(msg) + prb = nodes.problematic(value, value or '(none)', + refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + if len(field[1]): + field[1][0][:] = [prb] + else: + field[1] += nodes.paragraph('', '', prb) + break + if pep is None: + raise DataError('Document does not contain an RFC-2822 "PEP" ' + 'header.') + if pep == 0: + # Special processing for PEP 0. + pending = nodes.pending(PEPZero) + self.document.insert(1, pending) + self.document.note_pending(pending) + if len(header) < 2 or header[1][0].astext().lower() != 'title': + raise DataError('No title!') + for field in header: + name = field[0].astext().lower() + body = field[1] + if len(body) > 1: + raise DataError('PEP header field body contains multiple ' + 'elements:\n%s' % field.pformat(level=1)) + elif len(body) == 1: + if not isinstance(body[0], nodes.paragraph): + raise DataError('PEP header field body may only contain ' + 'a single paragraph:\n%s' + % field.pformat(level=1)) + elif name == 'last-modified': + date = time.strftime( + '%d-%b-%Y', + time.localtime(os.stat(self.document['source'])[8])) + if cvs_url: + body += nodes.paragraph( + '', '', nodes.reference('', date, refuri=cvs_url)) + else: + # empty + continue + para = body[0] + if name == 'author': + for node in para: + if isinstance(node, nodes.reference): + node.replace_self(mask_email(node)) + elif name == 'discussions-to': + for node in para: + if isinstance(node, nodes.reference): + node.replace_self(mask_email(node, pep)) + elif name in ('replaces', 'replaced-by', 'requires'): + newbody = [] + space = nodes.Text(' ') + for refpep in re.split(',?\s+', body.astext()): + pepno = int(refpep) + newbody.append(nodes.reference( + refpep, refpep, + refuri=(self.document.settings.pep_base_url + + self.pep_url % pepno))) + newbody.append(space) + para[:] = newbody[:-1] # drop trailing space + elif name == 'last-modified': + utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions) + if cvs_url: + date = para.astext() + para[:] = [nodes.reference('', date, refuri=cvs_url)] + elif name == 'content-type': + pep_type = para.astext() + uri = self.document.settings.pep_base_url + self.pep_url % 12 + para[:] = [nodes.reference('', pep_type, refuri=uri)] + elif name == 'version' and len(body): + utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions) + + +class Contents(Transform): + + """ + Insert an empty table of contents topic and a transform placeholder into + the document after the RFC 2822 header. + """ + + default_priority = 380 + + def apply(self): + language = languages.get_language(self.document.settings.language_code) + name = language.labels['contents'] + title = nodes.title('', name) + topic = nodes.topic('', title, classes=['contents']) + name = nodes.fully_normalize_name(name) + if not self.document.has_name(name): + topic['names'].append(name) + self.document.note_implicit_target(topic) + pending = nodes.pending(parts.Contents) + topic += pending + self.document.insert(1, topic) + self.document.note_pending(pending) + + +class TargetNotes(Transform): + + """ + Locate the "References" section, insert a placeholder for an external + target footnote insertion transform at the end, and schedule the + transform to run immediately. + """ + + default_priority = 520 + + def apply(self): + doc = self.document + i = len(doc) - 1 + refsect = copyright = None + while i >= 0 and isinstance(doc[i], nodes.section): + title_words = doc[i][0].astext().lower().split() + if 'references' in title_words: + refsect = doc[i] + break + elif 'copyright' in title_words: + copyright = i + i -= 1 + if not refsect: + refsect = nodes.section() + refsect += nodes.title('', 'References') + doc.set_id(refsect) + if copyright: + # Put the new "References" section before "Copyright": + doc.insert(copyright, refsect) + else: + # Put the new "References" section at end of doc: + doc.append(refsect) + pending = nodes.pending(references.TargetNotes) + refsect.append(pending) + self.document.note_pending(pending, 0) + pending = nodes.pending(misc.CallBack, + details={'callback': self.cleanup_callback}) + refsect.append(pending) + self.document.note_pending(pending, 1) + + def cleanup_callback(self, pending): + """ + Remove an empty "References" section. + + Called after the `references.TargetNotes` transform is complete. + """ + if len(pending.parent) == 2: # <title> and <pending> + pending.parent.parent.remove(pending.parent) + + +class PEPZero(Transform): + + """ + Special processing for PEP 0. + """ + + default_priority =760 + + def apply(self): + visitor = PEPZeroSpecial(self.document) + self.document.walk(visitor) + self.startnode.parent.remove(self.startnode) + + +class PEPZeroSpecial(nodes.SparseNodeVisitor): + + """ + Perform the special processing needed by PEP 0: + + - Mask email addresses. + + - Link PEP numbers in the second column of 4-column tables to the PEPs + themselves. + """ + + pep_url = Headers.pep_url + + def unknown_visit(self, node): + pass + + def visit_reference(self, node): + node.replace_self(mask_email(node)) + + def visit_field_list(self, node): + if 'rfc2822' in node['classes']: + raise nodes.SkipNode + + def visit_tgroup(self, node): + self.pep_table = node['cols'] == 4 + self.entry = 0 + + def visit_colspec(self, node): + self.entry += 1 + if self.pep_table and self.entry == 2: + node['classes'].append('num') + + def visit_row(self, node): + self.entry = 0 + + def visit_entry(self, node): + self.entry += 1 + if self.pep_table and self.entry == 2 and len(node) == 1: + node['classes'].append('num') + p = node[0] + if isinstance(p, nodes.paragraph) and len(p) == 1: + text = p.astext() + try: + pep = int(text) + ref = (self.document.settings.pep_base_url + + self.pep_url % pep) + p[0] = nodes.reference(text, text, refuri=ref) + except ValueError: + pass + + +non_masked_addresses = ('peps@python.org', + 'python-list@python.org', + 'python-dev@python.org') + +def mask_email(ref, pepno=None): + """ + Mask the email address in `ref` and return a replacement node. + + `ref` is returned unchanged if it contains no email address. + + For email addresses such as "user@host", mask the address as "user at + host" (text) to thwart simple email address harvesters (except for those + listed in `non_masked_addresses`). If a PEP number (`pepno`) is given, + return a reference including a default email subject. + """ + if ref.hasattr('refuri') and ref['refuri'].startswith('mailto:'): + if ref['refuri'][8:] in non_masked_addresses: + replacement = ref[0] + else: + replacement_text = ref.astext().replace('@', ' at ') + replacement = nodes.raw('', replacement_text, format='html') + if pepno is None: + return replacement + else: + ref['refuri'] += '?subject=PEP%%20%s' % pepno + ref[:] = [replacement] + return ref + else: + return ref diff --git a/docutils/transforms/references.py b/docutils/transforms/references.py new file mode 100644 index 000000000..c7e17b50e --- /dev/null +++ b/docutils/transforms/references.py @@ -0,0 +1,906 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Transforms for resolving references. +""" + +__docformat__ = 'reStructuredText' + +import sys +import re +from docutils import nodes, utils +from docutils.transforms import TransformError, Transform + + +class PropagateTargets(Transform): + + """ + Propagate empty internal targets to the next element. + + Given the following nodes:: + + <target ids="internal1" names="internal1"> + <target anonymous="1" ids="id1"> + <target ids="internal2" names="internal2"> + <paragraph> + This is a test. + + PropagateTargets propagates the ids and names of the internal + targets preceding the paragraph to the paragraph itself:: + + <target refid="internal1"> + <target anonymous="1" refid="id1"> + <target refid="internal2"> + <paragraph ids="internal2 id1 internal1" names="internal2 internal1"> + This is a test. + """ + + default_priority = 260 + + def apply(self): + for target in self.document.traverse(nodes.target): + # Only block-level targets without reference (like ".. target:"): + if (isinstance(target.parent, nodes.TextElement) or + (target.hasattr('refid') or target.hasattr('refuri') or + target.hasattr('refname'))): + continue + assert len(target) == 0, 'error: block-level target has children' + next_node = target.next_node(ascend=1) + # Do not move names and ids into Invisibles (we'd lose the + # attributes) or different Targetables (e.g. footnotes). + if (next_node is not None and + ((not isinstance(next_node, nodes.Invisible) and + not isinstance(next_node, nodes.Targetable)) or + isinstance(next_node, nodes.target))): + next_node['ids'].extend(target['ids']) + next_node['names'].extend(target['names']) + # Set defaults for next_node.expect_referenced_by_name/id. + if not hasattr(next_node, 'expect_referenced_by_name'): + next_node.expect_referenced_by_name = {} + if not hasattr(next_node, 'expect_referenced_by_id'): + next_node.expect_referenced_by_id = {} + for id in target['ids']: + # Update IDs to node mapping. + self.document.ids[id] = next_node + # If next_node is referenced by id ``id``, this + # target shall be marked as referenced. + next_node.expect_referenced_by_id[id] = target + for name in target['names']: + next_node.expect_referenced_by_name[name] = target + # If there are any expect_referenced_by_... attributes + # in target set, copy them to next_node. + next_node.expect_referenced_by_name.update( + getattr(target, 'expect_referenced_by_name', {})) + next_node.expect_referenced_by_id.update( + getattr(target, 'expect_referenced_by_id', {})) + # Set refid to point to the first former ID of target + # which is now an ID of next_node. + target['refid'] = target['ids'][0] + # Clear ids and names; they have been moved to + # next_node. + target['ids'] = [] + target['names'] = [] + self.document.note_refid(target) + + +class AnonymousHyperlinks(Transform): + + """ + Link anonymous references to targets. Given:: + + <paragraph> + <reference anonymous="1"> + internal + <reference anonymous="1"> + external + <target anonymous="1" ids="id1"> + <target anonymous="1" ids="id2" refuri="http://external"> + + Corresponding references are linked via "refid" or resolved via "refuri":: + + <paragraph> + <reference anonymous="1" refid="id1"> + text + <reference anonymous="1" refuri="http://external"> + external + <target anonymous="1" ids="id1"> + <target anonymous="1" ids="id2" refuri="http://external"> + """ + + default_priority = 440 + + def apply(self): + anonymous_refs = [] + anonymous_targets = [] + for node in self.document.traverse(nodes.reference): + if node.get('anonymous'): + anonymous_refs.append(node) + for node in self.document.traverse(nodes.target): + if node.get('anonymous'): + anonymous_targets.append(node) + if len(anonymous_refs) \ + != len(anonymous_targets): + msg = self.document.reporter.error( + 'Anonymous hyperlink mismatch: %s references but %s ' + 'targets.\nSee "backrefs" attribute for IDs.' + % (len(anonymous_refs), len(anonymous_targets))) + msgid = self.document.set_id(msg) + for ref in anonymous_refs: + prb = nodes.problematic( + ref.rawsource, ref.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + ref.replace_self(prb) + return + for ref, target in zip(anonymous_refs, anonymous_targets): + target.referenced = 1 + while 1: + if target.hasattr('refuri'): + ref['refuri'] = target['refuri'] + ref.resolved = 1 + break + else: + if not target['ids']: + # Propagated target. + target = self.document.ids[target['refid']] + continue + ref['refid'] = target['ids'][0] + self.document.note_refid(ref) + break + + +class IndirectHyperlinks(Transform): + + """ + a) Indirect external references:: + + <paragraph> + <reference refname="indirect external"> + indirect external + <target id="id1" name="direct external" + refuri="http://indirect"> + <target id="id2" name="indirect external" + refname="direct external"> + + The "refuri" attribute is migrated back to all indirect targets + from the final direct target (i.e. a target not referring to + another indirect target):: + + <paragraph> + <reference refname="indirect external"> + indirect external + <target id="id1" name="direct external" + refuri="http://indirect"> + <target id="id2" name="indirect external" + refuri="http://indirect"> + + Once the attribute is migrated, the preexisting "refname" attribute + is dropped. + + b) Indirect internal references:: + + <target id="id1" name="final target"> + <paragraph> + <reference refname="indirect internal"> + indirect internal + <target id="id2" name="indirect internal 2" + refname="final target"> + <target id="id3" name="indirect internal" + refname="indirect internal 2"> + + Targets which indirectly refer to an internal target become one-hop + indirect (their "refid" attributes are directly set to the internal + target's "id"). References which indirectly refer to an internal + target become direct internal references:: + + <target id="id1" name="final target"> + <paragraph> + <reference refid="id1"> + indirect internal + <target id="id2" name="indirect internal 2" refid="id1"> + <target id="id3" name="indirect internal" refid="id1"> + """ + + default_priority = 460 + + def apply(self): + for target in self.document.indirect_targets: + if not target.resolved: + self.resolve_indirect_target(target) + self.resolve_indirect_references(target) + + def resolve_indirect_target(self, target): + refname = target.get('refname') + if refname is None: + reftarget_id = target['refid'] + else: + reftarget_id = self.document.nameids.get(refname) + if not reftarget_id: + # Check the unknown_reference_resolvers + for resolver_function in \ + self.document.transformer.unknown_reference_resolvers: + if resolver_function(target): + break + else: + self.nonexistent_indirect_target(target) + return + reftarget = self.document.ids[reftarget_id] + reftarget.note_referenced_by(id=reftarget_id) + if isinstance(reftarget, nodes.target) \ + and not reftarget.resolved and reftarget.hasattr('refname'): + if hasattr(target, 'multiply_indirect'): + #and target.multiply_indirect): + #del target.multiply_indirect + self.circular_indirect_reference(target) + return + target.multiply_indirect = 1 + self.resolve_indirect_target(reftarget) # multiply indirect + del target.multiply_indirect + if reftarget.hasattr('refuri'): + target['refuri'] = reftarget['refuri'] + if target.has_key('refid'): + del target['refid'] + elif reftarget.hasattr('refid'): + target['refid'] = reftarget['refid'] + self.document.note_refid(target) + else: + if reftarget['ids']: + target['refid'] = reftarget_id + self.document.note_refid(target) + else: + self.nonexistent_indirect_target(target) + return + if refname is not None: + del target['refname'] + target.resolved = 1 + + def nonexistent_indirect_target(self, target): + if self.document.nameids.has_key(target['refname']): + self.indirect_target_error(target, 'which is a duplicate, and ' + 'cannot be used as a unique reference') + else: + self.indirect_target_error(target, 'which does not exist') + + def circular_indirect_reference(self, target): + self.indirect_target_error(target, 'forming a circular reference') + + def indirect_target_error(self, target, explanation): + naming = '' + reflist = [] + if target['names']: + naming = '"%s" ' % target['names'][0] + for name in target['names']: + reflist.extend(self.document.refnames.get(name, [])) + for id in target['ids']: + reflist.extend(self.document.refids.get(id, [])) + naming += '(id="%s")' % target['ids'][0] + msg = self.document.reporter.error( + 'Indirect hyperlink target %s refers to target "%s", %s.' + % (naming, target['refname'], explanation), base_node=target) + msgid = self.document.set_id(msg) + for ref in uniq(reflist): + prb = nodes.problematic( + ref.rawsource, ref.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + ref.replace_self(prb) + target.resolved = 1 + + def resolve_indirect_references(self, target): + if target.hasattr('refid'): + attname = 'refid' + call_method = self.document.note_refid + elif target.hasattr('refuri'): + attname = 'refuri' + call_method = None + else: + return + attval = target[attname] + for name in target['names']: + reflist = self.document.refnames.get(name, []) + if reflist: + target.note_referenced_by(name=name) + for ref in reflist: + if ref.resolved: + continue + del ref['refname'] + ref[attname] = attval + if call_method: + call_method(ref) + ref.resolved = 1 + if isinstance(ref, nodes.target): + self.resolve_indirect_references(ref) + for id in target['ids']: + reflist = self.document.refids.get(id, []) + if reflist: + target.note_referenced_by(id=id) + for ref in reflist: + if ref.resolved: + continue + del ref['refid'] + ref[attname] = attval + if call_method: + call_method(ref) + ref.resolved = 1 + if isinstance(ref, nodes.target): + self.resolve_indirect_references(ref) + + +class ExternalTargets(Transform): + + """ + Given:: + + <paragraph> + <reference refname="direct external"> + direct external + <target id="id1" name="direct external" refuri="http://direct"> + + The "refname" attribute is replaced by the direct "refuri" attribute:: + + <paragraph> + <reference refuri="http://direct"> + direct external + <target id="id1" name="direct external" refuri="http://direct"> + """ + + default_priority = 640 + + def apply(self): + for target in self.document.traverse(nodes.target): + if target.hasattr('refuri'): + refuri = target['refuri'] + for name in target['names']: + reflist = self.document.refnames.get(name, []) + if reflist: + target.note_referenced_by(name=name) + for ref in reflist: + if ref.resolved: + continue + del ref['refname'] + ref['refuri'] = refuri + ref.resolved = 1 + + +class InternalTargets(Transform): + + default_priority = 660 + + def apply(self): + for target in self.document.traverse(nodes.target): + if not target.hasattr('refuri') and not target.hasattr('refid'): + self.resolve_reference_ids(target) + + def resolve_reference_ids(self, target): + """ + Given:: + + <paragraph> + <reference refname="direct internal"> + direct internal + <target id="id1" name="direct internal"> + + The "refname" attribute is replaced by "refid" linking to the target's + "id":: + + <paragraph> + <reference refid="id1"> + direct internal + <target id="id1" name="direct internal"> + """ + for name in target['names']: + refid = self.document.nameids[name] + reflist = self.document.refnames.get(name, []) + if reflist: + target.note_referenced_by(name=name) + for ref in reflist: + if ref.resolved: + continue + del ref['refname'] + ref['refid'] = refid + ref.resolved = 1 + + +class Footnotes(Transform): + + """ + Assign numbers to autonumbered footnotes, and resolve links to footnotes, + citations, and their references. + + Given the following ``document`` as input:: + + <document> + <paragraph> + A labeled autonumbered footnote referece: + <footnote_reference auto="1" id="id1" refname="footnote"> + <paragraph> + An unlabeled autonumbered footnote referece: + <footnote_reference auto="1" id="id2"> + <footnote auto="1" id="id3"> + <paragraph> + Unlabeled autonumbered footnote. + <footnote auto="1" id="footnote" name="footnote"> + <paragraph> + Labeled autonumbered footnote. + + Auto-numbered footnotes have attribute ``auto="1"`` and no label. + Auto-numbered footnote_references have no reference text (they're + empty elements). When resolving the numbering, a ``label`` element + is added to the beginning of the ``footnote``, and reference text + to the ``footnote_reference``. + + The transformed result will be:: + + <document> + <paragraph> + A labeled autonumbered footnote referece: + <footnote_reference auto="1" id="id1" refid="footnote"> + 2 + <paragraph> + An unlabeled autonumbered footnote referece: + <footnote_reference auto="1" id="id2" refid="id3"> + 1 + <footnote auto="1" id="id3" backrefs="id2"> + <label> + 1 + <paragraph> + Unlabeled autonumbered footnote. + <footnote auto="1" id="footnote" name="footnote" backrefs="id1"> + <label> + 2 + <paragraph> + Labeled autonumbered footnote. + + Note that the footnotes are not in the same order as the references. + + The labels and reference text are added to the auto-numbered ``footnote`` + and ``footnote_reference`` elements. Footnote elements are backlinked to + their references via "refids" attributes. References are assigned "id" + and "refid" attributes. + + After adding labels and reference text, the "auto" attributes can be + ignored. + """ + + default_priority = 620 + + autofootnote_labels = None + """Keep track of unlabeled autonumbered footnotes.""" + + symbols = [ + # Entries 1-4 and 6 below are from section 12.51 of + # The Chicago Manual of Style, 14th edition. + '*', # asterisk/star + u'\u2020', # dagger † + u'\u2021', # double dagger ‡ + u'\u00A7', # section mark § + u'\u00B6', # paragraph mark (pilcrow) ¶ + # (parallels ['||'] in CMoS) + '#', # number sign + # The entries below were chosen arbitrarily. + u'\u2660', # spade suit ♠ + u'\u2665', # heart suit ♥ + u'\u2666', # diamond suit ♦ + u'\u2663', # club suit ♣ + ] + + def apply(self): + self.autofootnote_labels = [] + startnum = self.document.autofootnote_start + self.document.autofootnote_start = self.number_footnotes(startnum) + self.number_footnote_references(startnum) + self.symbolize_footnotes() + self.resolve_footnotes_and_citations() + + def number_footnotes(self, startnum): + """ + Assign numbers to autonumbered footnotes. + + For labeled autonumbered footnotes, copy the number over to + corresponding footnote references. + """ + for footnote in self.document.autofootnotes: + while 1: + label = str(startnum) + startnum += 1 + if not self.document.nameids.has_key(label): + break + footnote.insert(0, nodes.label('', label)) + for name in footnote['names']: + for ref in self.document.footnote_refs.get(name, []): + ref += nodes.Text(label) + ref.delattr('refname') + assert len(footnote['ids']) == len(ref['ids']) == 1 + ref['refid'] = footnote['ids'][0] + footnote.add_backref(ref['ids'][0]) + self.document.note_refid(ref) + ref.resolved = 1 + if not footnote['names'] and not footnote['dupnames']: + footnote['names'].append(label) + self.document.note_explicit_target(footnote, footnote) + self.autofootnote_labels.append(label) + return startnum + + def number_footnote_references(self, startnum): + """Assign numbers to autonumbered footnote references.""" + i = 0 + for ref in self.document.autofootnote_refs: + if ref.resolved or ref.hasattr('refid'): + continue + try: + label = self.autofootnote_labels[i] + except IndexError: + msg = self.document.reporter.error( + 'Too many autonumbered footnote references: only %s ' + 'corresponding footnotes available.' + % len(self.autofootnote_labels), base_node=ref) + msgid = self.document.set_id(msg) + for ref in self.document.autofootnote_refs[i:]: + if ref.resolved or ref.hasattr('refname'): + continue + prb = nodes.problematic( + ref.rawsource, ref.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + ref.replace_self(prb) + break + ref += nodes.Text(label) + id = self.document.nameids[label] + footnote = self.document.ids[id] + ref['refid'] = id + self.document.note_refid(ref) + assert len(ref['ids']) == 1 + footnote.add_backref(ref['ids'][0]) + ref.resolved = 1 + i += 1 + + def symbolize_footnotes(self): + """Add symbols indexes to "[*]"-style footnotes and references.""" + labels = [] + for footnote in self.document.symbol_footnotes: + reps, index = divmod(self.document.symbol_footnote_start, + len(self.symbols)) + labeltext = self.symbols[index] * (reps + 1) + labels.append(labeltext) + footnote.insert(0, nodes.label('', labeltext)) + self.document.symbol_footnote_start += 1 + self.document.set_id(footnote) + i = 0 + for ref in self.document.symbol_footnote_refs: + try: + ref += nodes.Text(labels[i]) + except IndexError: + msg = self.document.reporter.error( + 'Too many symbol footnote references: only %s ' + 'corresponding footnotes available.' % len(labels), + base_node=ref) + msgid = self.document.set_id(msg) + for ref in self.document.symbol_footnote_refs[i:]: + if ref.resolved or ref.hasattr('refid'): + continue + prb = nodes.problematic( + ref.rawsource, ref.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + ref.replace_self(prb) + break + footnote = self.document.symbol_footnotes[i] + assert len(footnote['ids']) == 1 + ref['refid'] = footnote['ids'][0] + self.document.note_refid(ref) + footnote.add_backref(ref['ids'][0]) + i += 1 + + def resolve_footnotes_and_citations(self): + """ + Link manually-labeled footnotes and citations to/from their + references. + """ + for footnote in self.document.footnotes: + for label in footnote['names']: + if self.document.footnote_refs.has_key(label): + reflist = self.document.footnote_refs[label] + self.resolve_references(footnote, reflist) + for citation in self.document.citations: + for label in citation['names']: + if self.document.citation_refs.has_key(label): + reflist = self.document.citation_refs[label] + self.resolve_references(citation, reflist) + + def resolve_references(self, note, reflist): + assert len(note['ids']) == 1 + id = note['ids'][0] + for ref in reflist: + if ref.resolved: + continue + ref.delattr('refname') + ref['refid'] = id + assert len(ref['ids']) == 1 + note.add_backref(ref['ids'][0]) + ref.resolved = 1 + note.resolved = 1 + + +class CircularSubstitutionDefinitionError(Exception): pass + + +class Substitutions(Transform): + + """ + Given the following ``document`` as input:: + + <document> + <paragraph> + The + <substitution_reference refname="biohazard"> + biohazard + symbol is deservedly scary-looking. + <substitution_definition name="biohazard"> + <image alt="biohazard" uri="biohazard.png"> + + The ``substitution_reference`` will simply be replaced by the + contents of the corresponding ``substitution_definition``. + + The transformed result will be:: + + <document> + <paragraph> + The + <image alt="biohazard" uri="biohazard.png"> + symbol is deservedly scary-looking. + <substitution_definition name="biohazard"> + <image alt="biohazard" uri="biohazard.png"> + """ + + default_priority = 220 + """The Substitutions transform has to be applied very early, before + `docutils.tranforms.frontmatter.DocTitle` and others.""" + + def apply(self): + defs = self.document.substitution_defs + normed = self.document.substitution_names + subreflist = self.document.traverse(nodes.substitution_reference) + nested = {} + for ref in subreflist: + refname = ref['refname'] + key = None + if defs.has_key(refname): + key = refname + else: + normed_name = refname.lower() + if normed.has_key(normed_name): + key = normed[normed_name] + if key is None: + msg = self.document.reporter.error( + 'Undefined substitution referenced: "%s".' + % refname, base_node=ref) + msgid = self.document.set_id(msg) + prb = nodes.problematic( + ref.rawsource, ref.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + ref.replace_self(prb) + else: + subdef = defs[key] + parent = ref.parent + index = parent.index(ref) + if (subdef.attributes.has_key('ltrim') + or subdef.attributes.has_key('trim')): + if index > 0 and isinstance(parent[index - 1], + nodes.Text): + parent.replace(parent[index - 1], + parent[index - 1].rstrip()) + if (subdef.attributes.has_key('rtrim') + or subdef.attributes.has_key('trim')): + if (len(parent) > index + 1 + and isinstance(parent[index + 1], nodes.Text)): + parent.replace(parent[index + 1], + parent[index + 1].lstrip()) + subdef_copy = subdef.deepcopy() + try: + # Take care of nested substitution references: + for nested_ref in subdef_copy.traverse( + nodes.substitution_reference): + nested_name = normed[nested_ref['refname'].lower()] + if nested_name in nested.setdefault(nested_name, []): + raise CircularSubstitutionDefinitionError + else: + nested[nested_name].append(key) + subreflist.append(nested_ref) + except CircularSubstitutionDefinitionError: + parent = ref.parent + if isinstance(parent, nodes.substitution_definition): + msg = self.document.reporter.error( + 'Circular substitution definition detected:', + nodes.literal_block(parent.rawsource, + parent.rawsource), + line=parent.line, base_node=parent) + parent.replace_self(msg) + else: + msg = self.document.reporter.error( + 'Circular substitution definition referenced: "%s".' + % refname, base_node=ref) + msgid = self.document.set_id(msg) + prb = nodes.problematic( + ref.rawsource, ref.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + ref.replace_self(prb) + else: + ref.replace_self(subdef_copy.children) + + +class TargetNotes(Transform): + + """ + Creates a footnote for each external target in the text, and corresponding + footnote references after each reference. + """ + + default_priority = 540 + """The TargetNotes transform has to be applied after `IndirectHyperlinks` + but before `Footnotes`.""" + + + def __init__(self, document, startnode): + Transform.__init__(self, document, startnode=startnode) + + self.classes = startnode.details.get('class', []) + + def apply(self): + notes = {} + nodelist = [] + for target in self.document.traverse(nodes.target): + # Only external targets. + if not target.hasattr('refuri'): + continue + names = target['names'] + refs = [] + for name in names: + refs.extend(self.document.refnames.get(name, [])) + if not refs: + continue + footnote = self.make_target_footnote(target['refuri'], refs, + notes) + if not notes.has_key(target['refuri']): + notes[target['refuri']] = footnote + nodelist.append(footnote) + # Take care of anonymous references. + for ref in self.document.traverse(nodes.reference): + if not ref.get('anonymous'): + continue + if ref.hasattr('refuri'): + footnote = self.make_target_footnote(ref['refuri'], [ref], + notes) + if not notes.has_key(ref['refuri']): + notes[ref['refuri']] = footnote + nodelist.append(footnote) + self.startnode.replace_self(nodelist) + + def make_target_footnote(self, refuri, refs, notes): + if notes.has_key(refuri): # duplicate? + footnote = notes[refuri] + assert len(footnote['names']) == 1 + footnote_name = footnote['names'][0] + else: # original + footnote = nodes.footnote() + footnote_id = self.document.set_id(footnote) + # Use uppercase letters and a colon; they can't be + # produced inside names by the parser. + footnote_name = 'TARGET_NOTE: ' + footnote_id + footnote['auto'] = 1 + footnote['names'] = [footnote_name] + footnote_paragraph = nodes.paragraph() + footnote_paragraph += nodes.reference('', refuri, refuri=refuri) + footnote += footnote_paragraph + self.document.note_autofootnote(footnote) + self.document.note_explicit_target(footnote, footnote) + for ref in refs: + if isinstance(ref, nodes.target): + continue + refnode = nodes.footnote_reference( + refname=footnote_name, auto=1) + refnode['classes'] += self.classes + self.document.note_autofootnote_ref(refnode) + self.document.note_footnote_ref(refnode) + index = ref.parent.index(ref) + 1 + reflist = [refnode] + if not utils.get_trim_footnote_ref_space(self.document.settings): + if self.classes: + reflist.insert(0, nodes.inline(text=' ', Classes=self.classes)) + else: + reflist.insert(0, nodes.Text(' ')) + ref.parent.insert(index, reflist) + return footnote + + +class DanglingReferences(Transform): + + """ + Check for dangling references (incl. footnote & citation) and for + unreferenced targets. + """ + + default_priority = 850 + + def apply(self): + visitor = DanglingReferencesVisitor( + self.document, + self.document.transformer.unknown_reference_resolvers) + self.document.walk(visitor) + # *After* resolving all references, check for unreferenced + # targets: + for target in self.document.traverse(nodes.target): + if not target.referenced: + if target.get('anonymous'): + # If we have unreferenced anonymous targets, there + # is already an error message about anonymous + # hyperlink mismatch; no need to generate another + # message. + continue + if target['names']: + naming = target['names'][0] + elif target['ids']: + naming = target['ids'][0] + else: + # Hack: Propagated targets always have their refid + # attribute set. + naming = target['refid'] + self.document.reporter.info( + 'Hyperlink target "%s" is not referenced.' + % naming, base_node=target) + + +class DanglingReferencesVisitor(nodes.SparseNodeVisitor): + + def __init__(self, document, unknown_reference_resolvers): + nodes.SparseNodeVisitor.__init__(self, document) + self.document = document + self.unknown_reference_resolvers = unknown_reference_resolvers + + def unknown_visit(self, node): + pass + + def visit_reference(self, node): + if node.resolved or not node.hasattr('refname'): + return + refname = node['refname'] + id = self.document.nameids.get(refname) + if id is None: + for resolver_function in self.unknown_reference_resolvers: + if resolver_function(node): + break + else: + if self.document.nameids.has_key(refname): + msg = self.document.reporter.error( + 'Duplicate target name, cannot be used as a unique ' + 'reference: "%s".' % (node['refname']), base_node=node) + else: + msg = self.document.reporter.error( + 'Unknown target name: "%s".' % (node['refname']), + base_node=node) + msgid = self.document.set_id(msg) + prb = nodes.problematic( + node.rawsource, node.rawsource, refid=msgid) + prbid = self.document.set_id(prb) + msg.add_backref(prbid) + node.replace_self(prb) + else: + del node['refname'] + node['refid'] = id + self.document.ids[id].note_referenced_by(id=id) + node.resolved = 1 + + visit_footnote_reference = visit_citation_reference = visit_reference + + +def uniq(L): + r = [] + for item in L: + if not item in r: + r.append(item) + return r diff --git a/docutils/transforms/universal.py b/docutils/transforms/universal.py new file mode 100644 index 000000000..b31648632 --- /dev/null +++ b/docutils/transforms/universal.py @@ -0,0 +1,171 @@ +# Authors: David Goodger, Ueli Schlaepfer +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Transforms needed by most or all documents: + +- `Decorations`: Generate a document's header & footer. +- `Messages`: Placement of system messages stored in + `nodes.document.transform_messages`. +- `TestMessages`: Like `Messages`, used on test runs. +- `FinalReferences`: Resolve remaining references. +""" + +__docformat__ = 'reStructuredText' + +import re +import sys +import time +from docutils import nodes, utils +from docutils.transforms import TransformError, Transform + + +class Decorations(Transform): + + """ + Populate a document's decoration element (header, footer). + """ + + default_priority = 820 + + def apply(self): + header_nodes = self.generate_header() + if header_nodes: + decoration = self.document.get_decoration() + header = decoration.get_header() + header.extend(header_nodes) + footer_nodes = self.generate_footer() + if footer_nodes: + decoration = self.document.get_decoration() + footer = decoration.get_footer() + footer.extend(footer_nodes) + + def generate_header(self): + return None + + def generate_footer(self): + # @@@ Text is hard-coded for now. + # Should be made dynamic (language-dependent). + settings = self.document.settings + if settings.generator or settings.datestamp or settings.source_link \ + or settings.source_url: + text = [] + if settings.source_link and settings._source \ + or settings.source_url: + if settings.source_url: + source = settings.source_url + else: + source = utils.relative_path(settings._destination, + settings._source) + text.extend([ + nodes.reference('', 'View document source', + refuri=source), + nodes.Text('.\n')]) + if settings.datestamp: + datestamp = time.strftime(settings.datestamp, time.gmtime()) + text.append(nodes.Text('Generated on: ' + datestamp + '.\n')) + if settings.generator: + text.extend([ + nodes.Text('Generated by '), + nodes.reference('', 'Docutils', refuri= + 'http://docutils.sourceforge.net/'), + nodes.Text(' from '), + nodes.reference('', 'reStructuredText', refuri='http://' + 'docutils.sourceforge.net/rst.html'), + nodes.Text(' source.\n')]) + return [nodes.paragraph('', '', *text)] + else: + return None + + +class ExposeInternals(Transform): + + """ + Expose internal attributes if ``expose_internals`` setting is set. + """ + + default_priority = 840 + + def not_Text(self, node): + return not isinstance(node, nodes.Text) + + def apply(self): + if self.document.settings.expose_internals: + for node in self.document.traverse(self.not_Text): + for att in self.document.settings.expose_internals: + value = getattr(node, att, None) + if value is not None: + node['internal:' + att] = value + + +class Messages(Transform): + + """ + Place any system messages generated after parsing into a dedicated section + of the document. + """ + + default_priority = 860 + + def apply(self): + unfiltered = self.document.transform_messages + threshold = self.document.reporter.report_level + messages = [] + for msg in unfiltered: + if msg['level'] >= threshold and not msg.parent: + messages.append(msg) + if messages: + section = nodes.section(classes=['system-messages']) + # @@@ get this from the language module? + section += nodes.title('', 'Docutils System Messages') + section += messages + self.document.transform_messages[:] = [] + self.document += section + + +class FilterMessages(Transform): + + """ + Remove system messages below verbosity threshold. + """ + + default_priority = 870 + + def apply(self): + for node in self.document.traverse(nodes.system_message): + if node['level'] < self.document.reporter.report_level: + node.parent.remove(node) + + +class TestMessages(Transform): + + """ + Append all post-parse system messages to the end of the document. + + Used for testing purposes. + """ + + default_priority = 880 + + def apply(self): + for msg in self.document.transform_messages: + if not msg.parent: + self.document += msg + + +class StripComments(Transform): + + """ + Remove comment elements from the document tree (only if the + ``strip_comments`` setting is enabled). + """ + + default_priority = 740 + + def apply(self): + if self.document.settings.strip_comments: + for node in self.document.traverse(nodes.comment): + node.parent.remove(node) diff --git a/docutils/transforms/writer_aux.py b/docutils/transforms/writer_aux.py new file mode 100644 index 000000000..74ac4d4f6 --- /dev/null +++ b/docutils/transforms/writer_aux.py @@ -0,0 +1,52 @@ +# Authors: Felix Wiemann +# Contact: Felix_Wiemann@ososo.de +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Auxiliary transforms mainly to be used by Writer components. + +This module is called "writer_aux" because otherwise there would be +conflicting imports like this one:: + + from docutils import writers + from docutils.transforms import writers +""" + +__docformat__ = 'reStructuredText' + +from docutils import nodes, utils +from docutils.transforms import Transform + + +class Compound(Transform): + + """ + Flatten all compound paragraphs. For example, transform :: + + <compound> + <paragraph> + <literal_block> + <paragraph> + + into :: + + <paragraph> + <literal_block classes="continued"> + <paragraph classes="continued"> + """ + + default_priority = 810 + + def apply(self): + for compound in self.document.traverse(nodes.compound): + first_child = 1 + for child in compound: + if first_child: + if not isinstance(child, nodes.Invisible): + first_child = 0 + else: + child['classes'].append('continued') + # Substitute children for compound. + compound.replace_self(compound[:]) |