diff options
Diffstat (limited to 'doc/tools/sgmlconv/docfixer.py')
-rwxr-xr-x | doc/tools/sgmlconv/docfixer.py | 1033 |
1 files changed, 0 insertions, 1033 deletions
diff --git a/doc/tools/sgmlconv/docfixer.py b/doc/tools/sgmlconv/docfixer.py deleted file mode 100755 index 463276b..0000000 --- a/doc/tools/sgmlconv/docfixer.py +++ /dev/null @@ -1,1033 +0,0 @@ -#! /usr/bin/env python - -"""Perform massive transformations on a document tree created from the LaTeX -of the Python documentation, and dump the ESIS data for the transformed tree. -""" - - -import errno -import esistools -import re -import string -import sys -import xml.dom -import xml.dom.minidom - -ELEMENT = xml.dom.Node.ELEMENT_NODE -ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE -TEXT = xml.dom.Node.TEXT_NODE - - -class ConversionError(Exception): - pass - - -ewrite = sys.stderr.write -try: - # We can only do this trick on Unix (if tput is on $PATH)! - if sys.platform != "posix" or not sys.stderr.isatty(): - raise ImportError - import commands -except ImportError: - bwrite = ewrite -else: - def bwrite(s, BOLDON=commands.getoutput("tput bold"), - BOLDOFF=commands.getoutput("tput sgr0")): - ewrite("%s%s%s" % (BOLDON, s, BOLDOFF)) - - -PARA_ELEMENT = "para" - -DEBUG_PARA_FIXER = 0 - -if DEBUG_PARA_FIXER: - def para_msg(s): - ewrite("*** %s\n" % s) -else: - def para_msg(s): - pass - - -def get_first_element(doc, gi): - for n in doc.childNodes: - if n.nodeName == gi: - return n - -def extract_first_element(doc, gi): - node = get_first_element(doc, gi) - if node is not None: - doc.removeChild(node) - return node - - -def get_documentElement(node): - result = None - for child in node.childNodes: - if child.nodeType == ELEMENT: - result = child - return result - - -def set_tagName(elem, gi): - elem.nodeName = elem.tagName = gi - - -def find_all_elements(doc, gi): - nodes = [] - if doc.nodeName == gi: - nodes.append(doc) - for child in doc.childNodes: - if child.nodeType == ELEMENT: - if child.tagName == gi: - nodes.append(child) - for node in child.getElementsByTagName(gi): - nodes.append(node) - return nodes - -def find_all_child_elements(doc, gi): - nodes = [] - for child in doc.childNodes: - if child.nodeName == gi: - nodes.append(child) - return nodes - - -def find_all_elements_from_set(doc, gi_set): - return __find_all_elements_from_set(doc, gi_set, []) - -def __find_all_elements_from_set(doc, gi_set, nodes): - if doc.nodeName in gi_set: - nodes.append(doc) - for child in doc.childNodes: - if child.nodeType == ELEMENT: - __find_all_elements_from_set(child, gi_set, nodes) - return nodes - - -def simplify(doc, fragment): - # Try to rationalize the document a bit, since these things are simply - # not valid SGML/XML documents as they stand, and need a little work. - documentclass = "document" - inputs = [] - node = extract_first_element(fragment, "documentclass") - if node is not None: - documentclass = node.getAttribute("classname") - node = extract_first_element(fragment, "title") - if node is not None: - inputs.append(node) - # update the name of the root element - node = get_first_element(fragment, "document") - if node is not None: - set_tagName(node, documentclass) - while 1: - node = extract_first_element(fragment, "input") - if node is None: - break - inputs.append(node) - if inputs: - docelem = get_documentElement(fragment) - inputs.reverse() - for node in inputs: - text = doc.createTextNode("\n") - docelem.insertBefore(text, docelem.firstChild) - docelem.insertBefore(node, text) - docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) - while fragment.firstChild and fragment.firstChild.nodeType == TEXT: - fragment.removeChild(fragment.firstChild) - - -def cleanup_root_text(doc): - discards = [] - skip = 0 - for n in doc.childNodes: - prevskip = skip - skip = 0 - if n.nodeType == TEXT and not prevskip: - discards.append(n) - elif n.nodeName == "COMMENT": - skip = 1 - for node in discards: - doc.removeChild(node) - - -DESCRIPTOR_ELEMENTS = ( - "cfuncdesc", "cvardesc", "ctypedesc", - "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", - "excdesc", "funcdesc", "funcdescni", "opcodedesc", - "datadesc", "datadescni", - ) - -def fixup_descriptors(doc, fragment): - sections = find_all_elements(fragment, "section") - for section in sections: - find_and_fix_descriptors(doc, section) - - -def find_and_fix_descriptors(doc, container): - children = container.childNodes - for child in children: - if child.nodeType == ELEMENT: - tagName = child.tagName - if tagName in DESCRIPTOR_ELEMENTS: - rewrite_descriptor(doc, child) - elif tagName == "subsection": - find_and_fix_descriptors(doc, child) - - -def rewrite_descriptor(doc, descriptor): - # - # Do these things: - # 1. Add an "index='no'" attribute to the element if the tagName - # ends in 'ni', removing the 'ni' from the name. - # 2. Create a <signature> from the name attribute - # 2a.Create an <args> if it appears to be available. - # 3. Create additional <signature>s from <*line{,ni}> elements, - # if found. - # 4. If a <versionadded> is found, move it to an attribute on the - # descriptor. - # 5. Move remaining child nodes to a <description> element. - # 6. Put it back together. - # - # 1. - descname = descriptor.tagName - index = 1 - if descname[-2:] == "ni": - descname = descname[:-2] - descriptor.setAttribute("index", "no") - set_tagName(descriptor, descname) - index = 0 - desctype = descname[:-4] # remove 'desc' - linename = desctype + "line" - if not index: - linename = linename + "ni" - # 2. - signature = doc.createElement("signature") - name = doc.createElement("name") - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(name) - name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) - descriptor.removeAttribute("name") - # 2a. - if descriptor.hasAttribute("var"): - if descname != "opcodedesc": - raise RuntimeError, \ - "got 'var' attribute on descriptor other than opcodedesc" - variable = descriptor.getAttribute("var") - if variable: - args = doc.createElement("args") - args.appendChild(doc.createTextNode(variable)) - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(args) - descriptor.removeAttribute("var") - newchildren = [signature] - children = descriptor.childNodes - pos = skip_leading_nodes(children) - if pos < len(children): - child = children[pos] - if child.nodeName == "args": - # move <args> to <signature>, or remove if empty: - child.parentNode.removeChild(child) - if len(child.childNodes): - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(child) - signature.appendChild(doc.createTextNode("\n ")) - # 3, 4. - pos = skip_leading_nodes(children, pos) - while pos < len(children) \ - and children[pos].nodeName in (linename, "versionadded"): - if children[pos].tagName == linename: - # this is really a supplemental signature, create <signature> - oldchild = children[pos].cloneNode(1) - try: - sig = methodline_to_signature(doc, children[pos]) - except KeyError: - print oldchild.toxml() - raise - newchildren.append(sig) - else: - # <versionadded added=...> - descriptor.setAttribute( - "added", children[pos].getAttribute("version")) - pos = skip_leading_nodes(children, pos + 1) - # 5. - description = doc.createElement("description") - description.appendChild(doc.createTextNode("\n")) - newchildren.append(description) - move_children(descriptor, description, pos) - last = description.childNodes[-1] - if last.nodeType == TEXT: - last.data = string.rstrip(last.data) + "\n " - # 6. - # should have nothing but whitespace and signature lines in <descriptor>; - # discard them - while descriptor.childNodes: - descriptor.removeChild(descriptor.childNodes[0]) - for node in newchildren: - descriptor.appendChild(doc.createTextNode("\n ")) - descriptor.appendChild(node) - descriptor.appendChild(doc.createTextNode("\n")) - - -def methodline_to_signature(doc, methodline): - signature = doc.createElement("signature") - signature.appendChild(doc.createTextNode("\n ")) - name = doc.createElement("name") - name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) - methodline.removeAttribute("name") - signature.appendChild(name) - if len(methodline.childNodes): - args = doc.createElement("args") - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(args) - move_children(methodline, args) - signature.appendChild(doc.createTextNode("\n ")) - return signature - - -def move_children(origin, dest, start=0): - children = origin.childNodes - while start < len(children): - node = children[start] - origin.removeChild(node) - dest.appendChild(node) - - -def handle_appendix(doc, fragment): - # must be called after simplfy() if document is multi-rooted to begin with - docelem = get_documentElement(fragment) - toplevel = docelem.tagName == "manual" and "chapter" or "section" - appendices = 0 - nodes = [] - for node in docelem.childNodes: - if appendices: - nodes.append(node) - elif node.nodeType == ELEMENT: - appnodes = node.getElementsByTagName("appendix") - if appnodes: - appendices = 1 - parent = appnodes[0].parentNode - parent.removeChild(appnodes[0]) - parent.normalize() - if nodes: - map(docelem.removeChild, nodes) - docelem.appendChild(doc.createTextNode("\n\n\n")) - back = doc.createElement("back-matter") - docelem.appendChild(back) - back.appendChild(doc.createTextNode("\n")) - while nodes and nodes[0].nodeType == TEXT \ - and not string.strip(nodes[0].data): - del nodes[0] - map(back.appendChild, nodes) - docelem.appendChild(doc.createTextNode("\n")) - - -def handle_labels(doc, fragment): - for label in find_all_elements(fragment, "label"): - id = label.getAttribute("id") - if not id: - continue - parent = label.parentNode - parentTagName = parent.tagName - if parentTagName == "title": - parent.parentNode.setAttribute("id", id) - else: - parent.setAttribute("id", id) - # now, remove <label id="..."/> from parent: - parent.removeChild(label) - if parentTagName == "title": - parent.normalize() - children = parent.childNodes - if children[-1].nodeType == TEXT: - children[-1].data = string.rstrip(children[-1].data) - - -def fixup_trailing_whitespace(doc, wsmap): - queue = [doc] - while queue: - node = queue[0] - del queue[0] - if wsmap.has_key(node.nodeName): - ws = wsmap[node.tagName] - children = node.childNodes - children.reverse() - if children[0].nodeType == TEXT: - data = string.rstrip(children[0].data) + ws - children[0].data = data - children.reverse() - # hack to get the title in place: - if node.tagName == "title" \ - and node.parentNode.firstChild.nodeType == ELEMENT: - node.parentNode.insertBefore(doc.createText("\n "), - node.parentNode.firstChild) - for child in node.childNodes: - if child.nodeType == ELEMENT: - queue.append(child) - - -def normalize(doc): - for node in doc.childNodes: - if node.nodeType == ELEMENT: - node.normalize() - - -def cleanup_trailing_parens(doc, element_names): - d = {} - for gi in element_names: - d[gi] = gi - rewrite_element = d.has_key - queue = [] - for node in doc.childNodes: - if node.nodeType == ELEMENT: - queue.append(node) - while queue: - node = queue[0] - del queue[0] - if rewrite_element(node.tagName): - children = node.childNodes - if len(children) == 1 \ - and children[0].nodeType == TEXT: - data = children[0].data - if data[-2:] == "()": - children[0].data = data[:-2] - else: - for child in node.childNodes: - if child.nodeType == ELEMENT: - queue.append(child) - - -def contents_match(left, right): - left_children = left.childNodes - right_children = right.childNodes - if len(left_children) != len(right_children): - return 0 - for l, r in map(None, left_children, right_children): - nodeType = l.nodeType - if nodeType != r.nodeType: - return 0 - if nodeType == ELEMENT: - if l.tagName != r.tagName: - return 0 - # should check attributes, but that's not a problem here - if not contents_match(l, r): - return 0 - elif nodeType == TEXT: - if l.data != r.data: - return 0 - else: - # not quite right, but good enough - return 0 - return 1 - - -def create_module_info(doc, section): - # Heavy. - node = extract_first_element(section, "modulesynopsis") - if node is None: - return - set_tagName(node, "synopsis") - lastchild = node.childNodes[-1] - if lastchild.nodeType == TEXT \ - and lastchild.data[-1:] == ".": - lastchild.data = lastchild.data[:-1] - modauthor = extract_first_element(section, "moduleauthor") - if modauthor: - set_tagName(modauthor, "author") - modauthor.appendChild(doc.createTextNode( - modauthor.getAttribute("name"))) - modauthor.removeAttribute("name") - platform = extract_first_element(section, "platform") - if section.tagName == "section": - modinfo_pos = 2 - modinfo = doc.createElement("moduleinfo") - moddecl = extract_first_element(section, "declaremodule") - name = None - if moddecl: - modinfo.appendChild(doc.createTextNode("\n ")) - name = moddecl.attributes["name"].value - namenode = doc.createElement("name") - namenode.appendChild(doc.createTextNode(name)) - modinfo.appendChild(namenode) - type = moddecl.attributes.get("type") - if type: - type = type.value - modinfo.appendChild(doc.createTextNode("\n ")) - typenode = doc.createElement("type") - typenode.appendChild(doc.createTextNode(type)) - modinfo.appendChild(typenode) - versionadded = extract_first_element(section, "versionadded") - if versionadded: - modinfo.setAttribute("added", versionadded.getAttribute("version")) - title = get_first_element(section, "title") - if title: - children = title.childNodes - if len(children) >= 2 \ - and children[0].nodeName == "module" \ - and children[0].childNodes[0].data == name: - # this is it; morph the <title> into <short-synopsis> - first_data = children[1] - if first_data.data[:4] == " ---": - first_data.data = string.lstrip(first_data.data[4:]) - set_tagName(title, "short-synopsis") - if children[-1].nodeType == TEXT \ - and children[-1].data[-1:] == ".": - children[-1].data = children[-1].data[:-1] - section.removeChild(title) - section.removeChild(section.childNodes[0]) - title.removeChild(children[0]) - modinfo_pos = 0 - else: - ewrite("module name in title doesn't match" - " <declaremodule/>; no <short-synopsis/>\n") - else: - ewrite("Unexpected condition: <section/> without <title/>\n") - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(node) - if title and not contents_match(title, node): - # The short synopsis is actually different, - # and needs to be stored: - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(title) - if modauthor: - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(modauthor) - if platform: - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(platform) - modinfo.appendChild(doc.createTextNode("\n ")) - section.insertBefore(modinfo, section.childNodes[modinfo_pos]) - section.insertBefore(doc.createTextNode("\n "), modinfo) - # - # The rest of this removes extra newlines from where we cut out - # a lot of elements. A lot of code for minimal value, but keeps - # keeps the generated *ML from being too funny looking. - # - section.normalize() - children = section.childNodes - for i in range(len(children)): - node = children[i] - if node.nodeName == "moduleinfo": - nextnode = children[i+1] - if nextnode.nodeType == TEXT: - data = nextnode.data - if len(string.lstrip(data)) < (len(data) - 4): - nextnode.data = "\n\n\n" + string.lstrip(data) - - -def cleanup_synopses(doc, fragment): - for node in find_all_elements(fragment, "section"): - create_module_info(doc, node) - - -def fixup_table_structures(doc, fragment): - for table in find_all_elements(fragment, "table"): - fixup_table(doc, table) - - -def fixup_table(doc, table): - # create the table head - thead = doc.createElement("thead") - row = doc.createElement("row") - move_elements_by_name(doc, table, row, "entry") - thead.appendChild(doc.createTextNode("\n ")) - thead.appendChild(row) - thead.appendChild(doc.createTextNode("\n ")) - # create the table body - tbody = doc.createElement("tbody") - prev_row = None - last_was_hline = 0 - children = table.childNodes - for child in children: - if child.nodeType == ELEMENT: - tagName = child.tagName - if tagName == "hline" and prev_row is not None: - prev_row.setAttribute("rowsep", "1") - elif tagName == "row": - prev_row = child - # save the rows: - tbody.appendChild(doc.createTextNode("\n ")) - move_elements_by_name(doc, table, tbody, "row", sep="\n ") - # and toss the rest: - while children: - child = children[0] - nodeType = child.nodeType - if nodeType == TEXT: - if string.strip(child.data): - raise ConversionError("unexpected free data in <%s>: %r" - % (table.tagName, child.data)) - table.removeChild(child) - continue - if nodeType == ELEMENT: - if child.tagName != "hline": - raise ConversionError( - "unexpected <%s> in table" % child.tagName) - table.removeChild(child) - continue - raise ConversionError( - "unexpected %s node in table" % child.__class__.__name__) - # nothing left in the <table>; add the <thead> and <tbody> - tgroup = doc.createElement("tgroup") - tgroup.appendChild(doc.createTextNode("\n ")) - tgroup.appendChild(thead) - tgroup.appendChild(doc.createTextNode("\n ")) - tgroup.appendChild(tbody) - tgroup.appendChild(doc.createTextNode("\n ")) - table.appendChild(tgroup) - # now make the <entry>s look nice: - for row in table.getElementsByTagName("row"): - fixup_row(doc, row) - - -def fixup_row(doc, row): - entries = [] - map(entries.append, row.childNodes[1:]) - for entry in entries: - row.insertBefore(doc.createTextNode("\n "), entry) -# row.appendChild(doc.createTextNode("\n ")) - - -def move_elements_by_name(doc, source, dest, name, sep=None): - nodes = [] - for child in source.childNodes: - if child.nodeName == name: - nodes.append(child) - for node in nodes: - source.removeChild(node) - dest.appendChild(node) - if sep: - dest.appendChild(doc.createTextNode(sep)) - - -RECURSE_INTO_PARA_CONTAINERS = ( - "chapter", "abstract", "enumerate", - "section", "subsection", "subsubsection", - "paragraph", "subparagraph", "back-matter", - "howto", "manual", - "item", "itemize", "fulllineitems", "enumeration", "descriptionlist", - "definitionlist", "definition", - ) - -PARA_LEVEL_ELEMENTS = ( - "moduleinfo", "title", "verbatim", "enumerate", "item", - "interpreter-session", "back-matter", "interactive-session", - "opcodedesc", "classdesc", "datadesc", - "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni", - "funcdescni", "methoddescni", "excdescni", - "tableii", "tableiii", "tableiv", "localmoduletable", - "sectionauthor", "seealso", "itemize", - # include <para>, so we can just do it again to get subsequent paras: - PARA_ELEMENT, - ) - -PARA_LEVEL_PRECEEDERS = ( - "setindexsubitem", "author", - "stindex", "obindex", "COMMENT", "label", "input", "title", - "versionadded", "versionchanged", "declaremodule", "modulesynopsis", - "moduleauthor", "indexterm", "leader", - ) - - -def fixup_paras(doc, fragment): - for child in fragment.childNodes: - if child.nodeName in RECURSE_INTO_PARA_CONTAINERS: - fixup_paras_helper(doc, child) - descriptions = find_all_elements(fragment, "description") - for description in descriptions: - fixup_paras_helper(doc, description) - - -def fixup_paras_helper(doc, container, depth=0): - # document is already normalized - children = container.childNodes - start = skip_leading_nodes(children) - while len(children) > start: - if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS: - # Something to recurse into: - fixup_paras_helper(doc, children[start]) - else: - # Paragraph material: - build_para(doc, container, start, len(children)) - if DEBUG_PARA_FIXER and depth == 10: - sys.exit(1) - start = skip_leading_nodes(children, start + 1) - - -def build_para(doc, parent, start, i): - children = parent.childNodes - after = start + 1 - have_last = 0 - BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS - # Collect all children until \n\n+ is found in a text node or a - # member of BREAK_ELEMENTS is found. - for j in range(start, i): - after = j + 1 - child = children[j] - nodeType = child.nodeType - if nodeType == ELEMENT: - if child.tagName in BREAK_ELEMENTS: - after = j - break - elif nodeType == TEXT: - pos = string.find(child.data, "\n\n") - if pos == 0: - after = j - break - if pos >= 1: - child.splitText(pos) - break - else: - have_last = 1 - if (start + 1) > after: - raise ConversionError( - "build_para() could not identify content to turn into a paragraph") - if children[after - 1].nodeType == TEXT: - # we may need to split off trailing white space: - child = children[after - 1] - data = child.data - if string.rstrip(data) != data: - have_last = 0 - child.splitText(len(string.rstrip(data))) - para = doc.createElement(PARA_ELEMENT) - prev = None - indexes = range(start, after) - indexes.reverse() - for j in indexes: - node = parent.childNodes[j] - parent.removeChild(node) - para.insertBefore(node, prev) - prev = node - if have_last: - parent.appendChild(para) - parent.appendChild(doc.createTextNode("\n\n")) - return len(parent.childNodes) - else: - nextnode = parent.childNodes[start] - if nextnode.nodeType == TEXT: - if nextnode.data and nextnode.data[0] != "\n": - nextnode.data = "\n" + nextnode.data - else: - newnode = doc.createTextNode("\n") - parent.insertBefore(newnode, nextnode) - nextnode = newnode - start = start + 1 - parent.insertBefore(para, nextnode) - return start + 1 - - -def skip_leading_nodes(children, start=0): - """Return index into children of a node at which paragraph building should - begin or a recursive call to fixup_paras_helper() should be made (for - subsections, etc.). - - When the return value >= len(children), we've built all the paras we can - from this list of children. - """ - i = len(children) - while i > start: - # skip over leading comments and whitespace: - child = children[start] - nodeType = child.nodeType - if nodeType == TEXT: - data = child.data - shortened = string.lstrip(data) - if shortened: - if data != shortened: - # break into two nodes: whitespace and non-whitespace - child.splitText(len(data) - len(shortened)) - return start + 1 - return start - # all whitespace, just skip - elif nodeType == ELEMENT: - tagName = child.tagName - if tagName in RECURSE_INTO_PARA_CONTAINERS: - return start - if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: - return start - start = start + 1 - return start - - -def fixup_rfc_references(doc, fragment): - for rfcnode in find_all_elements(fragment, "rfc"): - rfcnode.appendChild(doc.createTextNode( - "RFC " + rfcnode.getAttribute("num"))) - - -def fixup_signatures(doc, fragment): - for child in fragment.childNodes: - if child.nodeType == ELEMENT: - args = child.getElementsByTagName("args") - for arg in args: - fixup_args(doc, arg) - arg.normalize() - args = child.getElementsByTagName("constructor-args") - for arg in args: - fixup_args(doc, arg) - arg.normalize() - - -def fixup_args(doc, arglist): - for child in arglist.childNodes: - if child.nodeName == "optional": - # found it; fix and return - arglist.insertBefore(doc.createTextNode("["), child) - optkids = child.childNodes - while optkids: - k = optkids[0] - child.removeChild(k) - arglist.insertBefore(k, child) - arglist.insertBefore(doc.createTextNode("]"), child) - arglist.removeChild(child) - return fixup_args(doc, arglist) - - -def fixup_sectionauthors(doc, fragment): - for sectauth in find_all_elements(fragment, "sectionauthor"): - section = sectauth.parentNode - section.removeChild(sectauth) - set_tagName(sectauth, "author") - sectauth.appendChild(doc.createTextNode( - sectauth.getAttribute("name"))) - sectauth.removeAttribute("name") - after = section.childNodes[2] - title = section.childNodes[1] - if title.nodeName != "title": - after = section.childNodes[0] - section.insertBefore(doc.createTextNode("\n "), after) - section.insertBefore(sectauth, after) - - -def fixup_verbatims(doc): - for verbatim in find_all_elements(doc, "verbatim"): - child = verbatim.childNodes[0] - if child.nodeType == TEXT \ - and string.lstrip(child.data)[:3] == ">>>": - set_tagName(verbatim, "interactive-session") - - -def add_node_ids(fragment, counter=0): - fragment.node_id = counter - for node in fragment.childNodes: - counter = counter + 1 - if node.nodeType == ELEMENT: - counter = add_node_ids(node, counter) - else: - node.node_id = counter - return counter + 1 - - -REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex', - 'refexmodindex', 'refstmodindex') - -def fixup_refmodindexes(fragment): - # Locate <ref*modindex>...</> co-located with <module>...</>, and - # remove the <ref*modindex>, replacing it with index=index on the - # <module> element. - nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS) - d = {} - for node in nodes: - parent = node.parentNode - d[parent.node_id] = parent - del nodes - map(fixup_refmodindexes_chunk, d.values()) - - -def fixup_refmodindexes_chunk(container): - # node is probably a <para>; let's see how often it isn't: - if container.tagName != PARA_ELEMENT: - bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container) - module_entries = find_all_elements(container, "module") - if not module_entries: - return - index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS) - removes = [] - for entry in index_entries: - children = entry.childNodes - if len(children) != 0: - bwrite("--- unexpected number of children for %s node:\n" - % entry.tagName) - ewrite(entry.toxml() + "\n") - continue - found = 0 - module_name = entry.getAttribute("module") - for node in module_entries: - if len(node.childNodes) != 1: - continue - this_name = node.childNodes[0].data - if this_name == module_name: - found = 1 - node.setAttribute("index", "yes") - if found: - removes.append(entry) - for node in removes: - container.removeChild(node) - - -def fixup_bifuncindexes(fragment): - nodes = find_all_elements(fragment, 'bifuncindex') - d = {} - # make sure that each parent is only processed once: - for node in nodes: - parent = node.parentNode - d[parent.node_id] = parent - del nodes - map(fixup_bifuncindexes_chunk, d.values()) - - -def fixup_bifuncindexes_chunk(container): - removes = [] - entries = find_all_child_elements(container, "bifuncindex") - function_entries = find_all_child_elements(container, "function") - for entry in entries: - function_name = entry.getAttribute("name") - found = 0 - for func_entry in function_entries: - t2 = func_entry.childNodes[0].data - if t2[-2:] != "()": - continue - t2 = t2[:-2] - if t2 == function_name: - func_entry.setAttribute("index", "yes") - func_entry.setAttribute("module", "__builtin__") - if not found: - found = 1 - removes.append(entry) - for entry in removes: - container.removeChild(entry) - - -def join_adjacent_elements(container, gi): - queue = [container] - while queue: - parent = queue.pop() - i = 0 - children = parent.childNodes - nchildren = len(children) - while i < (nchildren - 1): - child = children[i] - if child.nodeName == gi: - if children[i+1].nodeName == gi: - ewrite("--- merging two <%s/> elements\n" % gi) - child = children[i] - nextchild = children[i+1] - nextchildren = nextchild.childNodes - while len(nextchildren): - node = nextchildren[0] - nextchild.removeChild(node) - child.appendChild(node) - parent.removeChild(nextchild) - continue - if child.nodeType == ELEMENT: - queue.append(child) - i = i + 1 - - -_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") - -def write_esis(doc, ofp, knownempty): - for node in doc.childNodes: - nodeType = node.nodeType - if nodeType == ELEMENT: - gi = node.tagName - if knownempty(gi): - if node.hasChildNodes(): - raise ValueError, \ - "declared-empty node <%s> has children" % gi - ofp.write("e\n") - for k, value in node.attributes.items(): - if _token_rx.match(value): - dtype = "TOKEN" - else: - dtype = "CDATA" - ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value))) - ofp.write("(%s\n" % gi) - write_esis(node, ofp, knownempty) - ofp.write(")%s\n" % gi) - elif nodeType == TEXT: - ofp.write("-%s\n" % esistools.encode(node.data)) - elif nodeType == ENTITY_REFERENCE: - ofp.write("&%s\n" % node.nodeName) - else: - raise RuntimeError, "unsupported node type: %s" % nodeType - - -def convert(ifp, ofp): - events = esistools.parse(ifp) - toktype, doc = events.getEvent() - fragment = doc.createDocumentFragment() - events.expandNode(fragment) - - normalize(fragment) - simplify(doc, fragment) - handle_labels(doc, fragment) - handle_appendix(doc, fragment) - fixup_trailing_whitespace(doc, { - "abstract": "\n", - "title": "", - "chapter": "\n\n", - "section": "\n\n", - "subsection": "\n\n", - "subsubsection": "\n\n", - "paragraph": "\n\n", - "subparagraph": "\n\n", - }) - cleanup_root_text(doc) - cleanup_trailing_parens(fragment, ["function", "method", "cfunction"]) - cleanup_synopses(doc, fragment) - fixup_descriptors(doc, fragment) - fixup_verbatims(fragment) - normalize(fragment) - fixup_paras(doc, fragment) - fixup_sectionauthors(doc, fragment) - fixup_table_structures(doc, fragment) - fixup_rfc_references(doc, fragment) - fixup_signatures(doc, fragment) - add_node_ids(fragment) - fixup_refmodindexes(fragment) - fixup_bifuncindexes(fragment) - # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and - # LaTeX2HTML screwing with GNU-style long options (the '--' problem). - join_adjacent_elements(fragment, "option") - # - d = {} - for gi in events.parser.get_empties(): - d[gi] = gi - if d.has_key("author"): - del d["author"] - if d.has_key("rfc"): - del d["rfc"] - knownempty = d.has_key - # - try: - write_esis(fragment, ofp, knownempty) - except IOError, (err, msg): - # Ignore EPIPE; it just means that whoever we're writing to stopped - # reading. The rest of the output would be ignored. All other errors - # should still be reported, - if err != errno.EPIPE: - raise - - -def main(): - if len(sys.argv) == 1: - ifp = sys.stdin - ofp = sys.stdout - elif len(sys.argv) == 2: - ifp = open(sys.argv[1]) - ofp = sys.stdout - elif len(sys.argv) == 3: - ifp = open(sys.argv[1]) - import StringIO - ofp = StringIO.StringIO() - else: - usage() - sys.exit(2) - convert(ifp, ofp) - if len(sys.argv) == 3: - fp = open(sys.argv[2], "w") - fp.write(ofp.getvalue()) - fp.close() - ofp.close() - - -if __name__ == "__main__": - main() |