summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sandbox/docutils_xml/docutils_xml/parsers/xml.py122
-rw-r--r--sandbox/docutils_xml/docutils_xml/parsers/xslt.py26
-rw-r--r--sandbox/docutils_xml/global.log60
-rw-r--r--sandbox/docutils_xml/tag.log2
-rw-r--r--sandbox/docutils_xml/test/test_parsers/test_XmlParser.py15
-rw-r--r--sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py8
-rw-r--r--sandbox/docutils_xml/test/test_parsers/test_XsltParser.py92
-rw-r--r--sandbox/docutils_xml/version.py2
8 files changed, 253 insertions, 74 deletions
diff --git a/sandbox/docutils_xml/docutils_xml/parsers/xml.py b/sandbox/docutils_xml/docutils_xml/parsers/xml.py
index 4a4bddb4c..6ee232e26 100644
--- a/sandbox/docutils_xml/docutils_xml/parsers/xml.py
+++ b/sandbox/docutils_xml/docutils_xml/parsers/xml.py
@@ -74,6 +74,72 @@ class SomeChildren(docutils.nodes.TreePruningException):
###############################################################################
+class Uri2Prefixes(object):
+ """
+ Maps namespace URI to prefixes.
+ """
+
+ uri2Prefixes = { }
+ """
+ :type: { unicode: [ unicode, ... ] , ... }
+
+ Map namespace URI to one or more namespace prefixes.
+ """
+
+ def __init__(self, uriPrefixes):
+ """
+ :Parameters:
+
+ uriPrefixes : ( ( basestring, basestring, ... ), ... )
+ For each tuple in the list use the first entry as an URI and the
+ remaining entries as prefixes defined for this URI. An URI may be
+ given more than once
+
+ The strings must be convertable to unicode.
+ """
+ for uriPfxes in uriPrefixes:
+ pfxList = [ unicode(s)
+ for s in uriPfxes ]
+ uri = pfxList.pop(0)
+ if uri not in self.uri2Prefixes:
+ self.uri2Prefixes[uri] = [ ]
+ self.uri2Prefixes[uri].extend(pfxList)
+
+ def etreeRegister(self):
+ """
+ Register all namespace URIs in etree.
+
+ .. note:: In etree this is a global setting which is problematic
+ because it is shared. Take care to override the namespaces
+ every time the parser needs them.
+ """
+ for ( uri, prefixes ) in self.uri2Prefixes.items():
+ for prefix in prefixes:
+ etree.register_namespace(prefix, uri)
+
+ def elem2PrefixName(self, elem):
+ """
+ :Parameters:
+
+ elem : etree._Element
+ The element to work for.
+
+ :rtype: ( unicode | None, unicode )
+ :return: Namespace prefix and localname of `elem`. Namespace prefix may
+ be ``None`` if no or unknown namespace. If the namespace has
+ more than one prefixes the first one is used.
+ """
+ qName = etree.QName(elem)
+ prefix = None
+ # elem.prefix would also work for lxml but using the namespace is saver
+ if qName.namespace:
+ prefixes = self.uri2Prefixes.get(qName.namespace, None)
+ if prefixes is not None:
+ prefix = prefixes[0]
+ return ( prefix, qName.localname )
+
+###############################################################################
+
class XmlVisitor(object):
"""
Base visitor class for visiting an XML tree.
@@ -104,15 +170,16 @@ class XmlVisitor(object):
"""
raise NotImplementedError("'event_prefix_tag' is just for documentation")
- def __init__(self, parser, document):
+ def __init__(self, uri2Prefixes, document):
"""
See instance attributes for a description of the parameters.
"""
- self.parser = parser
+ self.uri2Prefixes = uri2Prefixes
"""
- :type: XmlParser
+ :type: Uri2Prefixes
- The parser using this visitor.
+ The namespace prefixes to use. These must match those of the parser
+ used.
"""
self.document = document
"""
@@ -140,7 +207,7 @@ class XmlVisitor(object):
event : str
The event to apply.
"""
- ( prefix, name ) = self.parser.elem2PrefixName(elem)
+ ( prefix, name ) = self.uri2Prefixes.elem2PrefixName(elem)
if prefix is None:
prefix = ""
prefix = prefix.replace("-", "")
@@ -226,12 +293,11 @@ class XmlParser(docutils.parsers.Parser):
A generic XML parser for parsing XML input populating the Docutils doctree.
"""
- ns2Prefixes = { }
+ uri2Prefixes = Uri2Prefixes(( ))
"""
- :type: { unicode: unicode | ( unicode, ... ) , ... }
+ :type: Uri2Prefixes
- Map namespace URI to one or more namespace prefixes. In case a unique
- prefix is needed for a namespace the first one is used.
+ Map namespace URI to prefixes for this parser.
Usually overridden in subclasses.
"""
@@ -248,16 +314,9 @@ class XmlParser(docutils.parsers.Parser):
def parse(self, inputstring, document):
self.setup_parse(inputstring, document)
- # This is a global setting in etree which is problematic because it is
- # shared. However, this should work since it is overridden every time
- # before it is used.
- for ( uri, prefixes ) in self.ns2Prefixes.items():
- if isinstance(prefixes, basestring):
- prefixes = ( prefixes, )
- for prefix in prefixes:
- etree.register_namespace(prefix, uri)
+ self.uri2Prefixes.etreeRegister()
inDoc = etree.fromstring(inputstring)
- self.walk(inDoc, self.visitorClass(self, document))
+ self.walk(inDoc, self.visitorClass(self.uri2Prefixes, document))
self.finish_parse()
def walk(self, elem, visitor):
@@ -291,7 +350,8 @@ class XmlParser(docutils.parsers.Parser):
try:
for child in elem:
if (someChildren is not None
- and self.elem2PrefixName(child) not in someChildren):
+ and self.uri2Prefixes.elem2PrefixName(child)
+ not in someChildren):
continue
if self.walk(child, visitor):
stop = True
@@ -307,27 +367,3 @@ class XmlParser(docutils.parsers.Parser):
if not skipDeparture:
visitor.depart(elem)
return stop
-
- def elem2PrefixName(self, elem):
- """
- :Parameters:
-
- elem : etree._Element
- The element to work for.
-
- :rtype: ( unicode | None, unicode )
- :return: Namespace prefix and localname of `elem`. Namespace prefix
- may be ``None`` if no or unknown namespace.
- """
- qName = etree.QName(elem)
- prefix = None
- # elem.prefix would also work for lxml but using the namespace is saver
- if qName.namespace:
- prefixes = self.ns2Prefixes.get(qName.namespace, None)
- if prefixes is None:
- prefix = None
- elif isinstance(prefixes, basestring):
- prefix = prefixes
- else:
- prefix = prefixes[0]
- return ( prefix, qName.localname )
diff --git a/sandbox/docutils_xml/docutils_xml/parsers/xslt.py b/sandbox/docutils_xml/docutils_xml/parsers/xslt.py
index 4b2799fec..003d6301a 100644
--- a/sandbox/docutils_xml/docutils_xml/parsers/xslt.py
+++ b/sandbox/docutils_xml/docutils_xml/parsers/xslt.py
@@ -51,13 +51,12 @@ class XsltParser(docutils.parsers.Parser):
`xslt_result` of the document. Works together with `XsltWriter`.
"""
- def __init__(self, xsltPath):
+ def __init__(self, xsltSource):
"""
- See instance variables for parameter documentation.
- """
- self.xsltPath = xsltPath
- """
- Path to the XSLT to use.
+ :Parameters:
+
+ xsltSource : file-like object
+ The source containing the XSLT. This is an open file-like object.
"""
self.xslt = None
"""
@@ -66,20 +65,11 @@ class XsltParser(docutils.parsers.Parser):
The XSLT to use for parsing.
"""
- # Find XSLT
- try:
- xsltF = open(self.xsltPath)
- except IOError, e:
- raise Exception("Can't open main XSLT file %r: %s"
- % ( self.xsltPath, e, ))
-
- # Parse and prepare XSLT
try:
- xsltDoc = etree.parse(xsltF)
+ xsltDoc = etree.parse(xsltSource)
except Exception, e:
- raise Exception("Error parsing main XSLT file %r: %s"
- % ( self.xsltPath, e, ))
- xsltF.close()
+ raise Exception("Error parsing XSLT: %s" % ( e, ))
+ xsltSource.close()
self.xslt = etree.XSLT(xsltDoc)
def parse(self, inputstring, document):
diff --git a/sandbox/docutils_xml/global.log b/sandbox/docutils_xml/global.log
index 5ebb59650..51348d16c 100644
--- a/sandbox/docutils_xml/global.log
+++ b/sandbox/docutils_xml/global.log
@@ -1,4 +1,64 @@
**************************************
+Date: Fri Nov 22 18:14:40 CET 2013
+Author: stefan
+Tag: docutils_xml_1_29
+
+--------------------------------------
+Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers
+In directory theowa:/home/stefan/free/docutils_xml/docutils_xml/parsers
+
+Modified Files:
+ xslt.py
+
+--------------------------------------
+Update of /home/stefan/vault/sm/docutils_xml/test/test_parsers
+In directory theowa:/home/stefan/free/docutils_xml/test/test_parsers
+
+Added Files:
+ test_XsltParser.py
+
+--------------------------------------
+Log Message:
+Added test for `XsltParser`.
+**************************************
+Date: Thu Nov 21 14:24:11 CET 2013
+Author: stefan
+Tag: docutils_xml_1_28
+
+--------------------------------------
+Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers
+In directory theowa:/home/stefan/free/docutils_xml/docutils_xml/parsers
+
+Modified Files:
+ xslt.py
+
+--------------------------------------
+Log Message:
+Changed interface of `XsltParser` to accept a file-like object.
+**************************************
+Date: Wed Nov 20 18:46:11 CET 2013
+Author: stefan
+Tag: docutils_xml_1_27
+
+--------------------------------------
+Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers
+In directory theowa:/home/stefan/free/docutils_xml/docutils_xml/parsers
+
+Modified Files:
+ xml.py
+
+--------------------------------------
+Update of /home/stefan/vault/sm/docutils_xml/test/test_parsers
+In directory theowa:/home/stefan/free/docutils_xml/test/test_parsers
+
+Modified Files:
+ test_XmlParser.py test_XmlVisitor.py
+
+--------------------------------------
+Log Message:
+Refactoring: Factored out `Uri2Prefixes` uncoupling `XmlVisitor` from
+`XmlParser`.
+**************************************
Date: Wed Nov 20 01:55:31 CET 2013
Author: stefan
Tag: docutils_xml_1_26
diff --git a/sandbox/docutils_xml/tag.log b/sandbox/docutils_xml/tag.log
index 444c60ef8..57282bf0f 100644
--- a/sandbox/docutils_xml/tag.log
+++ b/sandbox/docutils_xml/tag.log
@@ -1 +1 @@
-docutils_xml_1_26
+docutils_xml_1_29
diff --git a/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py b/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py
index bccd1c789..9268a491b 100644
--- a/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py
+++ b/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py
@@ -25,7 +25,7 @@ from docutils.nodes import Text
from __init__ import DocutilsTestSupport
-from docutils_xml.parsers.xml import XmlVisitor, XmlParser, SomeChildren
+from docutils_xml.parsers.xml import Uri2Prefixes, XmlVisitor, XmlParser, SomeChildren
###############################################################################
@@ -63,7 +63,7 @@ class XmlVisitorMock(XmlVisitor):
"""
def __recordVisit(self, elem):
- ( pfx, nm ) = self.parser.elem2PrefixName(elem)
+ ( pfx, nm ) = self.uri2Prefixes.elem2PrefixName(elem)
attrs = ""
for attr in sorted(elem.keys()):
attrs += " %s=%r" % ( attr, elem.get(attr) )
@@ -88,7 +88,7 @@ class XmlVisitorMock(XmlVisitor):
return None
def __recordDepart(self, elem):
- ( pfx, nm ) = self.parser.elem2PrefixName(elem)
+ ( pfx, nm ) = self.uri2Prefixes.elem2PrefixName(elem)
self.depth -= 1
self.document += Text("%s} %s:%s\n"
% ( self.depth * self.indent,
@@ -110,10 +110,11 @@ class XmlParserMock(XmlParser):
Mock class recording visited nodes in the output document.
"""
- ns2Prefixes = { u'urn:example': ( u'ex', u'alias', u'int', ),
- # u'urn:empty': u'', # Empty tag is not accepted by lxml
- u'urn:other': u'ot',
- }
+ uri2Prefixes = Uri2Prefixes((
+ ( 'urn:example', 'ex', 'alias', 'int' ),
+ # ( 'urn:empty', u'' ), # Empty tag is not accepted by lxml
+ ( 'urn:other', 'ot' ),
+ ))
visitorClass = XmlVisitorMock
diff --git a/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py b/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py
index 154c299a3..f79b14453 100644
--- a/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py
+++ b/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py
@@ -23,7 +23,7 @@ import unittest
from lxml import etree
import docutils.nodes, docutils.utils
-from docutils_xml.parsers.xml import XmlVisitor, XmlParser
+from docutils_xml.parsers.xml import XmlVisitor, Uri2Prefixes
###############################################################################
@@ -32,7 +32,7 @@ class XmlVisitorMock(XmlVisitor):
Mock class recording calls.
"""
- def __init__(self, parser, document):
+ def __init__(self, uri2Prefixes, document):
self.calls = [ ]
"""
@@ -50,7 +50,7 @@ class XmlVisitorMock(XmlVisitor):
The name of the current call.
"""
- XmlVisitor.__init__(self, parser, document)
+ XmlVisitor.__init__(self, uri2Prefixes, document)
def __record(self, *args, **kwargs):
self.calls.append(( self.currentCall, args, kwargs ))
@@ -77,7 +77,7 @@ class XmlVisitorMock(XmlVisitor):
class XmlVisitorTests(unittest.TestCase):
def setUp(self):
- self.visitor = XmlVisitorMock(XmlParser(),
+ self.visitor = XmlVisitorMock(Uri2Prefixes(( )),
docutils.utils.new_document(None))
def test__init__(self):
diff --git a/sandbox/docutils_xml/test/test_parsers/test_XsltParser.py b/sandbox/docutils_xml/test/test_parsers/test_XsltParser.py
new file mode 100644
index 000000000..687f357b3
--- /dev/null
+++ b/sandbox/docutils_xml/test/test_parsers/test_XsltParser.py
@@ -0,0 +1,92 @@
+# Copyright (C) 2013 Stefan Merten
+
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 2 of the License,
+# or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+"""
+Test XmlParser.
+"""
+
+import unittest
+import docutils.utils, docutils.frontend
+import StringIO
+
+from __init__ import DocutilsTestSupport
+
+from docutils_xml.parsers.xslt import XsltParser
+from docutils_xml.writers.xslt import XsltWriter
+
+###############################################################################
+
+class XsltParserTestCase(DocutilsTestSupport.ParserTestCase):
+ """
+ Output checker for XsltParser.
+ """
+
+ identityXslt = u"""\
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+ <xsl:template match="/">
+ <xsl:copy-of select="/"/>
+ </xsl:template>
+</xsl:stylesheet>
+"""
+
+ parser = XsltParser(StringIO.StringIO(identityXslt))
+ """Parser shared by all XsltParserTestCases."""
+
+ option_parser = docutils.frontend.OptionParser(components=(
+ XsltParser, ))
+
+ def test_parser(self):
+ settings = self.settings.copy()
+ settings.__dict__.update(self.suite_settings)
+ document = docutils.utils.new_document('test data', settings)
+ self.parser.parse(self.input, document)
+ writer = XsltWriter()
+ output = writer.write(document, docutils.io.StringOutput())
+ self.compare_output(self.input, output, self.expected)
+
+###############################################################################
+
+class XsltParserTestSuite(DocutilsTestSupport.ParserTestSuite):
+
+ test_case_class = XsltParserTestCase
+
+###############################################################################
+
+totest = {}
+
+totest['simple'] = (
+ ( u"""<?xml version="1.0"?>
+<rootOnly/>
+""",
+ u"""<?xml version="1.0"?>
+<rootOnly/>
+""" ),
+ )
+
+###############################################################################
+
+def suite():
+ s = XsltParserTestSuite()
+ s.generateTests(totest)
+ return s
+
+###############################################################################
+
+if __name__ == '__main__':
+ import unittest
+ unittest.main(defaultTest='suite')
diff --git a/sandbox/docutils_xml/version.py b/sandbox/docutils_xml/version.py
index d4f4d8123..01673f641 100644
--- a/sandbox/docutils_xml/version.py
+++ b/sandbox/docutils_xml/version.py
@@ -1 +1 @@
-version = '0.1.1'
+version = '0.2.1'