summaryrefslogtreecommitdiff
path: root/sandbox/odf2docutils/odf2docutilslib/parsers/xml.py
diff options
context:
space:
mode:
Diffstat (limited to 'sandbox/odf2docutils/odf2docutilslib/parsers/xml.py')
-rw-r--r--sandbox/odf2docutils/odf2docutilslib/parsers/xml.py318
1 files changed, 0 insertions, 318 deletions
diff --git a/sandbox/odf2docutils/odf2docutilslib/parsers/xml.py b/sandbox/odf2docutils/odf2docutilslib/parsers/xml.py
deleted file mode 100644
index 1de562a40..000000000
--- a/sandbox/odf2docutils/odf2docutilslib/parsers/xml.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# Copyright (C) 2013 Stefan Merten
-
-# This file is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 2 of the License,
-# or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-"""
-Base classes for parsing XML and transform it to a Docutils document.
-"""
-
-__docformat__ = 'reStructuredText' # Formatted to be rendered by epydoc
-
-###############################################################################
-###############################################################################
-# Import
-
-import docutils.parsers, docutils.nodes
-
-from lxml import etree
-
-###############################################################################
-###############################################################################
-# Constants
-
-###############################################################################
-###############################################################################
-# Variables
-
-###############################################################################
-###############################################################################
-# Functions
-
-###############################################################################
-###############################################################################
-# Classes
-
-class SomeChildren(docutils.nodes.TreePruningException):
- """
- Thrown to process only some children of the given node.
-
- Resembles ::
-
- <xsl:apply-templates
- select="..."/>
- """
-
- def __init__(self, tags):
- """
- :Parameters:
-
- tags : ( ( str | unicode, str | unicode ), ... )
- See `tags`.
-
- """
- self.tags = [ ( unicode(prefix), unicode(tag) )
- for ( prefix, tag ) in tags ]
- """
- :type: ( ( unicode, unicode ), ... )
-
- List of children tags to process. A tag is given as a pair of namespace
- prefix and local name.
- """
-
-###############################################################################
-
-class XmlVisitor(object):
- """
- Base visitor class for visiting an XML tree.
-
- See `event_prefix_tag` for a description of the methods in this visitor.
- """
-
- def event_prefix_tag(self, elem):
- """
- Dummy method for documentation.
-
- Vistor methods are named *event*\_\ *prefix*\_\ *tag* where *event* is
- either ``visit`` or ``depart``, *prefix* is the namespace prefix and
- *tag* is the local name of the tag with hyphens removed.
-
- :Parameters:
-
- elem : etree._Element
- The element to process.
-
- :except docutils.nodes.TreePruningException:
- Thrown to control the behavior of the caller.
-
- Note that are more subclasses defined here.
-
- :rtype: void
-
- """
- raise NotImplementedError("'event_prefix_tag' is just for documentation")
-
- def __init__(self, parser, document):
- """
- See instance attributes for a description of the parameters.
- """
- self.parser = parser
- """
- :type: XmlParser
-
- The parser using this visitor.
- """
- self.document = document
- """
- :type: docutils.nodes.document
-
- The target document.
- """
- self.stack = [ document ]
- """
- :type: [ docutils.nodes.Node, ... ]
-
- The stack of current nodes. The last one is the most recent current
- node and the first one is the document itself.
- """
-
- def _applyMethod(self, elem, event):
- """
- Find and apply method.
-
- :Parameters:
-
- elem : lxml.etree._Element
- The element to apply the method to.
-
- event : str
- The event to apply.
- """
- ( prefix, name ) = self.parser.elem2PrefixName(elem)
- if prefix is None:
- prefix = ""
- prefix = prefix.replace("-", "")
- name = name.replace("-", "")
- methodName = "_".join(( event, prefix, name ))
- try:
- method = getattr(self, methodName)
- except AttributeError:
- method = getattr(self, "".join(( event, "Default" )))
- return method(elem)
-
- def visit(self, elem):
- """
- Visit an element.
-
- :Parameters:
-
- elem : lxml.etree._Element
- The element to visit.
- """
- self._applyMethod(elem, 'visit')
-
- def depart(self, elem):
- """
- Depart an element.
-
- :Parameters:
-
- elem : lxml.etree._Element
- The element to depart.
- """
- self._applyMethod(elem, 'depart')
-
- def visitDefault(self, elem):
- """
- Used for elements without specific visit method. Does nothing.
-
- May be overridden in subclasses.
- """
- pass
-
- def departDefault(self, elem):
- """
- Used for elements without specific depart method. Does nothing.
-
- May be overridden in subclasses.
- """
- pass
-
- def push(self, node):
- """
- Add a node to the current node and make it the current node.
-
- :Parameters:
-
- node : docutils.nodes.Node
- The new node to be made current.
-
- :return: The former current node which is now the parent.
- :rtype: docutils.nodes.Node
- """
- parent = self.stack[-1]
- assert isinstance(parent, docutils.nodes.Element), \
- "Can not push to '%s'" % ( type(parent), )
- parent.append(node)
- self.stack.append(node)
- return parent
-
- def pop(self):
- """
- Remove current node and make its parent the new current node.
-
- :return: The former current node which is now a child of the current
- node.
- :rtype: docutils.nodes.Node
- """
- return self.stack.pop()
-
-###############################################################################
-
-class XmlParser(docutils.parsers.Parser):
- """
- A generic XML parser for parsing XML input populating the Docutils doctree.
- """
-
- ns2Prefix = { }
- """
- :type: { unicode: unicode, ... }
-
- Map namespace URI to namespace tag name. Usually overridden in subclasses.
- """
-
- visitorClass = XmlVisitor
- """
- :type: class
-
- The visitor class to use. Must be a subclass of `XmlVisitor`.
-
- This is the essential thing to override in subclasses. All the logic on how
- to parse and transform an XML document is in the visitor.
- """
-
- def parse(self, inputstring, document):
- self.setup_parse(inputstring, document)
- # This is a global setting in etree which is problematic because it is
- # shared. However, this should work since it is overridden every time
- # before it is used.
- [ etree.register_namespace(prefix, uri)
- for ( uri, prefix ) in self.ns2Prefix.items() ]
- inDoc = etree.fromstring(inputstring)
- self.walk(inDoc, self.visitorClass(self, document))
- self.finish_parse()
-
- def walk(self, elem, visitor):
- """
- Traverse the element tree and create the result.
-
- :Parameters:
-
- elem : etree._Element
- The root element of the part of the input document to be traversed.
-
- visitor : XmlVisitor
- The visitor to use.
-
- :return: ``True`` if the traversal should be stopped.
- :rtype: bool
- """
- stop = False
- skipDeparture = False
- someChildren = None
- try:
- try:
- visitor.visit(elem)
- except docutils.nodes.SkipNode:
- return stop
- except docutils.nodes.SkipDeparture:
- skipDeparture = True
- except SomeChildren, e:
- someChildren = e.tags
- try:
- for child in elem:
- if (someChildren is not None
- and self.elem2PrefixName(child) not in someChildren):
- continue
- if self.walk(child, visitor):
- stop = True
- break
- except docutils.nodes.SkipSiblings:
- pass
- except docutils.nodes.SkipChildren:
- pass
- except docutils.nodes.StopTraversal:
- stop = True
- if not skipDeparture:
- visitor.depart(elem)
- return stop
-
- def elem2PrefixName(self, elem):
- """
- :Parameters:
-
- elem : etree._Element
- The element to work for.
-
- :rtype: ( unicode | None, unicode )
- :return: Namespace prefix and localname of `elem`. Namespace prefix
- may be ``None`` if no or unknown namespace.
- """
- qName = etree.QName(elem)
- prefix = None
- # elem.prefix would also work for lxml but using the namespace is saver
- if qName.namespace:
- prefix = self.ns2Prefix.get(qName.namespace, None)
- return ( prefix, qName.localname )