diff options
-rw-r--r-- | sandbox/docutils_xml/MANIFEST.in | 2 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml.rst | 59 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/__init__.py | 0 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/parsers/__init__.py | 0 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/parsers/xml.py | 318 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/parsers/xslt.py | 92 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/writers/__init__.py | 0 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/writers/xslt.py | 58 | ||||
-rw-r--r-- | sandbox/docutils_xml/global.log | 95 | ||||
-rwxr-xr-x | sandbox/docutils_xml/setup.py | 17 | ||||
-rw-r--r-- | sandbox/docutils_xml/tag.log | 1 | ||||
-rw-r--r-- | sandbox/docutils_xml/version.py | 1 |
12 files changed, 643 insertions, 0 deletions
diff --git a/sandbox/docutils_xml/MANIFEST.in b/sandbox/docutils_xml/MANIFEST.in new file mode 100644 index 000000000..d7a52a82f --- /dev/null +++ b/sandbox/docutils_xml/MANIFEST.in @@ -0,0 +1,2 @@ +include tests.tgz +include version.py diff --git a/sandbox/docutils_xml/docutils_xml.rst b/sandbox/docutils_xml/docutils_xml.rst new file mode 100644 index 000000000..f0c0339b6 --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml.rst @@ -0,0 +1,59 @@ +============ +docutils_xml +============ + +----------------------------------------- +Docutils support for XML based processing +----------------------------------------- + +:Author: smerten@oekonux.de +:Copyright: GPL v2 +:Manual section: 1 +:Manual group: text processing + +SYNOPSIS +======== + +:: + + from docutils_xml import parsers + from docutils_xml import writers + +DESCRIPTION +=========== + +Support code for processing XML in a Docutils environment. + +SEE ALSO +======== + +.. _Docutils: + +* Docutils + + http://docutils.sourceforge.net/ + +.. _reStructuredText: + +* reStructuredText + + http://docutils.sourceforge.net/rst.html + +* _`xml2rst` + + http://svn.code.sf.net/p/docutils/code/trunk/sandbox/xml2rst/ + +* _`odf2docutils` + + http://svn.code.sf.net/p/docutils/code/trunk/sandbox/odf2docutils/ + +AVAILABILITY +============ + +**docutils_xml** is available from + +http://www.merten-home.de/FreeSoftware/docutils_xml/ + +and the Docutils sandbox + +http://svn.code.sf.net/p/docutils/code/trunk/sandbox/docutils_xml diff --git a/sandbox/docutils_xml/docutils_xml/__init__.py b/sandbox/docutils_xml/docutils_xml/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml/__init__.py diff --git a/sandbox/docutils_xml/docutils_xml/parsers/__init__.py b/sandbox/docutils_xml/docutils_xml/parsers/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml/parsers/__init__.py diff --git a/sandbox/docutils_xml/docutils_xml/parsers/xml.py b/sandbox/docutils_xml/docutils_xml/parsers/xml.py new file mode 100644 index 000000000..1de562a40 --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml/parsers/xml.py @@ -0,0 +1,318 @@ +# Copyright (C) 2013 Stefan Merten + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +""" +Base classes for parsing XML and transform it to a Docutils document. +""" + +__docformat__ = 'reStructuredText' # Formatted to be rendered by epydoc + +############################################################################### +############################################################################### +# Import + +import docutils.parsers, docutils.nodes + +from lxml import etree + +############################################################################### +############################################################################### +# Constants + +############################################################################### +############################################################################### +# Variables + +############################################################################### +############################################################################### +# Functions + +############################################################################### +############################################################################### +# Classes + +class SomeChildren(docutils.nodes.TreePruningException): + """ + Thrown to process only some children of the given node. + + Resembles :: + + <xsl:apply-templates + select="..."/> + """ + + def __init__(self, tags): + """ + :Parameters: + + tags : ( ( str | unicode, str | unicode ), ... ) + See `tags`. + + """ + self.tags = [ ( unicode(prefix), unicode(tag) ) + for ( prefix, tag ) in tags ] + """ + :type: ( ( unicode, unicode ), ... ) + + List of children tags to process. A tag is given as a pair of namespace + prefix and local name. + """ + +############################################################################### + +class XmlVisitor(object): + """ + Base visitor class for visiting an XML tree. + + See `event_prefix_tag` for a description of the methods in this visitor. + """ + + def event_prefix_tag(self, elem): + """ + Dummy method for documentation. + + Vistor methods are named *event*\_\ *prefix*\_\ *tag* where *event* is + either ``visit`` or ``depart``, *prefix* is the namespace prefix and + *tag* is the local name of the tag with hyphens removed. + + :Parameters: + + elem : etree._Element + The element to process. + + :except docutils.nodes.TreePruningException: + Thrown to control the behavior of the caller. + + Note that are more subclasses defined here. + + :rtype: void + + """ + raise NotImplementedError("'event_prefix_tag' is just for documentation") + + def __init__(self, parser, document): + """ + See instance attributes for a description of the parameters. + """ + self.parser = parser + """ + :type: XmlParser + + The parser using this visitor. + """ + self.document = document + """ + :type: docutils.nodes.document + + The target document. + """ + self.stack = [ document ] + """ + :type: [ docutils.nodes.Node, ... ] + + The stack of current nodes. The last one is the most recent current + node and the first one is the document itself. + """ + + def _applyMethod(self, elem, event): + """ + Find and apply method. + + :Parameters: + + elem : lxml.etree._Element + The element to apply the method to. + + event : str + The event to apply. + """ + ( prefix, name ) = self.parser.elem2PrefixName(elem) + if prefix is None: + prefix = "" + prefix = prefix.replace("-", "") + name = name.replace("-", "") + methodName = "_".join(( event, prefix, name )) + try: + method = getattr(self, methodName) + except AttributeError: + method = getattr(self, "".join(( event, "Default" ))) + return method(elem) + + def visit(self, elem): + """ + Visit an element. + + :Parameters: + + elem : lxml.etree._Element + The element to visit. + """ + self._applyMethod(elem, 'visit') + + def depart(self, elem): + """ + Depart an element. + + :Parameters: + + elem : lxml.etree._Element + The element to depart. + """ + self._applyMethod(elem, 'depart') + + def visitDefault(self, elem): + """ + Used for elements without specific visit method. Does nothing. + + May be overridden in subclasses. + """ + pass + + def departDefault(self, elem): + """ + Used for elements without specific depart method. Does nothing. + + May be overridden in subclasses. + """ + pass + + def push(self, node): + """ + Add a node to the current node and make it the current node. + + :Parameters: + + node : docutils.nodes.Node + The new node to be made current. + + :return: The former current node which is now the parent. + :rtype: docutils.nodes.Node + """ + parent = self.stack[-1] + assert isinstance(parent, docutils.nodes.Element), \ + "Can not push to '%s'" % ( type(parent), ) + parent.append(node) + self.stack.append(node) + return parent + + def pop(self): + """ + Remove current node and make its parent the new current node. + + :return: The former current node which is now a child of the current + node. + :rtype: docutils.nodes.Node + """ + return self.stack.pop() + +############################################################################### + +class XmlParser(docutils.parsers.Parser): + """ + A generic XML parser for parsing XML input populating the Docutils doctree. + """ + + ns2Prefix = { } + """ + :type: { unicode: unicode, ... } + + Map namespace URI to namespace tag name. Usually overridden in subclasses. + """ + + visitorClass = XmlVisitor + """ + :type: class + + The visitor class to use. Must be a subclass of `XmlVisitor`. + + This is the essential thing to override in subclasses. All the logic on how + to parse and transform an XML document is in the visitor. + """ + + def parse(self, inputstring, document): + self.setup_parse(inputstring, document) + # This is a global setting in etree which is problematic because it is + # shared. However, this should work since it is overridden every time + # before it is used. + [ etree.register_namespace(prefix, uri) + for ( uri, prefix ) in self.ns2Prefix.items() ] + inDoc = etree.fromstring(inputstring) + self.walk(inDoc, self.visitorClass(self, document)) + self.finish_parse() + + def walk(self, elem, visitor): + """ + Traverse the element tree and create the result. + + :Parameters: + + elem : etree._Element + The root element of the part of the input document to be traversed. + + visitor : XmlVisitor + The visitor to use. + + :return: ``True`` if the traversal should be stopped. + :rtype: bool + """ + stop = False + skipDeparture = False + someChildren = None + try: + try: + visitor.visit(elem) + except docutils.nodes.SkipNode: + return stop + except docutils.nodes.SkipDeparture: + skipDeparture = True + except SomeChildren, e: + someChildren = e.tags + try: + for child in elem: + if (someChildren is not None + and self.elem2PrefixName(child) not in someChildren): + continue + if self.walk(child, visitor): + stop = True + break + except docutils.nodes.SkipSiblings: + pass + except docutils.nodes.SkipChildren: + pass + except docutils.nodes.StopTraversal: + stop = True + if not skipDeparture: + visitor.depart(elem) + return stop + + def elem2PrefixName(self, elem): + """ + :Parameters: + + elem : etree._Element + The element to work for. + + :rtype: ( unicode | None, unicode ) + :return: Namespace prefix and localname of `elem`. Namespace prefix + may be ``None`` if no or unknown namespace. + """ + qName = etree.QName(elem) + prefix = None + # elem.prefix would also work for lxml but using the namespace is saver + if qName.namespace: + prefix = self.ns2Prefix.get(qName.namespace, None) + return ( prefix, qName.localname ) diff --git a/sandbox/docutils_xml/docutils_xml/parsers/xslt.py b/sandbox/docutils_xml/docutils_xml/parsers/xslt.py new file mode 100644 index 000000000..4b2799fec --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml/parsers/xslt.py @@ -0,0 +1,92 @@ +# Copyright (C) 2013 Stefan Merten + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +""" +Parse and produce result string by XSLT. +""" + +__docformat__ = 'reStructuredText' # Formatted to be rendered by epydoc + +############################################################################### +############################################################################### +# Import + +import docutils.parsers + +from lxml import etree + +############################################################################### +############################################################################### +# Constants + +############################################################################### +############################################################################### +# Variables + +############################################################################### +############################################################################### +# Functions + +############################################################################### +############################################################################### +# Classes + +class XsltParser(docutils.parsers.Parser): + """ + Parses XML input by XSLT and stores the result in the attribute + `xslt_result` of the document. Works together with `XsltWriter`. + """ + + def __init__(self, xsltPath): + """ + See instance variables for parameter documentation. + """ + self.xsltPath = xsltPath + """ + Path to the XSLT to use. + """ + self.xslt = None + """ + :type: Return type of `etree.XSLT`() + + The XSLT to use for parsing. + """ + + # Find XSLT + try: + xsltF = open(self.xsltPath) + except IOError, e: + raise Exception("Can't open main XSLT file %r: %s" + % ( self.xsltPath, e, )) + + # Parse and prepare XSLT + try: + xsltDoc = etree.parse(xsltF) + except Exception, e: + raise Exception("Error parsing main XSLT file %r: %s" + % ( self.xsltPath, e, )) + xsltF.close() + self.xslt = etree.XSLT(xsltDoc) + + def parse(self, inputstring, document): + self.setup_parse(inputstring, document) + inDoc = etree.fromstring(inputstring) + document.xslt_result = self.xslt(inDoc, sourceName="'%s'" + % ( document.current_source, )) + if self.xslt.error_log: + document.reporter.error(self.xslt.error_log) + self.finish_parse() diff --git a/sandbox/docutils_xml/docutils_xml/writers/__init__.py b/sandbox/docutils_xml/docutils_xml/writers/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml/writers/__init__.py diff --git a/sandbox/docutils_xml/docutils_xml/writers/xslt.py b/sandbox/docutils_xml/docutils_xml/writers/xslt.py new file mode 100644 index 000000000..5758951b1 --- /dev/null +++ b/sandbox/docutils_xml/docutils_xml/writers/xslt.py @@ -0,0 +1,58 @@ +# Copyright (C) 2013 Stefan Merten + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +""" +Support writer for `XsltParser`. +""" + +__docformat__ = 'reStructuredText' # Formatted to be rendered by epydoc + +############################################################################### +############################################################################### +# Import + +import docutils.writers + +############################################################################### +############################################################################### +# Constants + +############################################################################### +############################################################################### +# Variables + +############################################################################### +############################################################################### +# Functions + +############################################################################### +############################################################################### +# Classes + +############################################################################### + +class XsltWriter(docutils.writers.Writer): + """ + Writer transparently writing the result from an `XsltParser`. May be used + only together with `XsltParser`. + + Please note that `supported` must be set in subclass since the output + format is determined by the XSLT. + """ + + def translate(self): + self.output = str(self.document.xslt_result) diff --git a/sandbox/docutils_xml/global.log b/sandbox/docutils_xml/global.log new file mode 100644 index 000000000..d231904d1 --- /dev/null +++ b/sandbox/docutils_xml/global.log @@ -0,0 +1,95 @@ +************************************** +Date: Sat Nov 16 13:51:23 CET 2013 +Author: stefan +Tag: docutils_xml_1_4 + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers +In directory eskebo:/home/stefan/free/docutils_xml/docutils_xml/parsers + +Added Files: + xml.py xslt.py + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/writers +In directory eskebo:/home/stefan/free/docutils_xml/docutils_xml/writers + +Added Files: + xslt.py + +-------------------------------------- +Log Message: +Added code from `odf2docutils` as of `odf2docutils_1_105`. +************************************** +Date: Sat Nov 16 13:46:18 CET 2013 +Author: stefan +Tag: docutils_xml_1_3 + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml +In directory eskebo:/home/stefan/free/docutils_xml + +Modified Files: + setup.py +Added Files: + docutils_xml.rst + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml +In directory eskebo:/home/stefan/free/docutils_xml/docutils_xml + +Added Files: + __init__.py + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers +In directory eskebo:/home/stefan/free/docutils_xml/docutils_xml/parsers + +Added Files: + __init__.py + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/writers +In directory eskebo:/home/stefan/free/docutils_xml/docutils_xml/writers + +Added Files: + __init__.py + +-------------------------------------- +Log Message: +Added structure for the module. +************************************** +Date: Sat Nov 16 13:36:25 CET 2013 +Author: stefan +Tag: docutils_xml_1_2 + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml +In directory eskebo:/home/stefan/free/docutils_xml + +Added Files: + Makefile setup.py + +-------------------------------------- +Log Message: +Added basic infrastructure. +************************************** +Date: Sat Nov 16 13:08:31 CET 2013 +Author: stefan + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml +In directory eskebo:/home/stefan/free/docutils_xml/docutils_xml + + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/test +In directory eskebo:/home/stefan/free/docutils_xml/test + + +-------------------------------------- +Log Message: +Directory /home/stefan/vault/sm/docutils_xml/docutils_xml put under version control + +Directory /home/stefan/vault/sm/docutils_xml/test put under version control + diff --git a/sandbox/docutils_xml/setup.py b/sandbox/docutils_xml/setup.py new file mode 100755 index 000000000..2a498f5fc --- /dev/null +++ b/sandbox/docutils_xml/setup.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +from distutils.core import setup + +from version import version + +setup(name='docutils_xml', + version=version, + description='Docutils support for XML based processing', + author='Stefan Merten', + author_email='smerten@oekonux.de', + url='http://docutils.sourceforge.net/sandbox/docutils_xml/', + license='GPL 2', + requires=[ 'lxml' ], + packages=[ 'docutils_xml', + 'docutils_xml/parsers', 'docutils_xml/writers' ], + ) diff --git a/sandbox/docutils_xml/tag.log b/sandbox/docutils_xml/tag.log new file mode 100644 index 000000000..519da394a --- /dev/null +++ b/sandbox/docutils_xml/tag.log @@ -0,0 +1 @@ +docutils_xml_1_4 diff --git a/sandbox/docutils_xml/version.py b/sandbox/docutils_xml/version.py new file mode 100644 index 000000000..b5e7c18f7 --- /dev/null +++ b/sandbox/docutils_xml/version.py @@ -0,0 +1 @@ +version = '0.0' |