diff options
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/parsers/xml.py | 122 | ||||
-rw-r--r-- | sandbox/docutils_xml/docutils_xml/parsers/xslt.py | 26 | ||||
-rw-r--r-- | sandbox/docutils_xml/global.log | 60 | ||||
-rw-r--r-- | sandbox/docutils_xml/tag.log | 2 | ||||
-rw-r--r-- | sandbox/docutils_xml/test/test_parsers/test_XmlParser.py | 15 | ||||
-rw-r--r-- | sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py | 8 | ||||
-rw-r--r-- | sandbox/docutils_xml/test/test_parsers/test_XsltParser.py | 92 | ||||
-rw-r--r-- | sandbox/docutils_xml/version.py | 2 |
8 files changed, 253 insertions, 74 deletions
diff --git a/sandbox/docutils_xml/docutils_xml/parsers/xml.py b/sandbox/docutils_xml/docutils_xml/parsers/xml.py index 4a4bddb4c..6ee232e26 100644 --- a/sandbox/docutils_xml/docutils_xml/parsers/xml.py +++ b/sandbox/docutils_xml/docutils_xml/parsers/xml.py @@ -74,6 +74,72 @@ class SomeChildren(docutils.nodes.TreePruningException): ############################################################################### +class Uri2Prefixes(object): + """ + Maps namespace URI to prefixes. + """ + + uri2Prefixes = { } + """ + :type: { unicode: [ unicode, ... ] , ... } + + Map namespace URI to one or more namespace prefixes. + """ + + def __init__(self, uriPrefixes): + """ + :Parameters: + + uriPrefixes : ( ( basestring, basestring, ... ), ... ) + For each tuple in the list use the first entry as an URI and the + remaining entries as prefixes defined for this URI. An URI may be + given more than once + + The strings must be convertable to unicode. + """ + for uriPfxes in uriPrefixes: + pfxList = [ unicode(s) + for s in uriPfxes ] + uri = pfxList.pop(0) + if uri not in self.uri2Prefixes: + self.uri2Prefixes[uri] = [ ] + self.uri2Prefixes[uri].extend(pfxList) + + def etreeRegister(self): + """ + Register all namespace URIs in etree. + + .. note:: In etree this is a global setting which is problematic + because it is shared. Take care to override the namespaces + every time the parser needs them. + """ + for ( uri, prefixes ) in self.uri2Prefixes.items(): + for prefix in prefixes: + etree.register_namespace(prefix, uri) + + def elem2PrefixName(self, elem): + """ + :Parameters: + + elem : etree._Element + The element to work for. + + :rtype: ( unicode | None, unicode ) + :return: Namespace prefix and localname of `elem`. Namespace prefix may + be ``None`` if no or unknown namespace. If the namespace has + more than one prefixes the first one is used. + """ + qName = etree.QName(elem) + prefix = None + # elem.prefix would also work for lxml but using the namespace is saver + if qName.namespace: + prefixes = self.uri2Prefixes.get(qName.namespace, None) + if prefixes is not None: + prefix = prefixes[0] + return ( prefix, qName.localname ) + +############################################################################### + class XmlVisitor(object): """ Base visitor class for visiting an XML tree. @@ -104,15 +170,16 @@ class XmlVisitor(object): """ raise NotImplementedError("'event_prefix_tag' is just for documentation") - def __init__(self, parser, document): + def __init__(self, uri2Prefixes, document): """ See instance attributes for a description of the parameters. """ - self.parser = parser + self.uri2Prefixes = uri2Prefixes """ - :type: XmlParser + :type: Uri2Prefixes - The parser using this visitor. + The namespace prefixes to use. These must match those of the parser + used. """ self.document = document """ @@ -140,7 +207,7 @@ class XmlVisitor(object): event : str The event to apply. """ - ( prefix, name ) = self.parser.elem2PrefixName(elem) + ( prefix, name ) = self.uri2Prefixes.elem2PrefixName(elem) if prefix is None: prefix = "" prefix = prefix.replace("-", "") @@ -226,12 +293,11 @@ class XmlParser(docutils.parsers.Parser): A generic XML parser for parsing XML input populating the Docutils doctree. """ - ns2Prefixes = { } + uri2Prefixes = Uri2Prefixes(( )) """ - :type: { unicode: unicode | ( unicode, ... ) , ... } + :type: Uri2Prefixes - Map namespace URI to one or more namespace prefixes. In case a unique - prefix is needed for a namespace the first one is used. + Map namespace URI to prefixes for this parser. Usually overridden in subclasses. """ @@ -248,16 +314,9 @@ class XmlParser(docutils.parsers.Parser): def parse(self, inputstring, document): self.setup_parse(inputstring, document) - # This is a global setting in etree which is problematic because it is - # shared. However, this should work since it is overridden every time - # before it is used. - for ( uri, prefixes ) in self.ns2Prefixes.items(): - if isinstance(prefixes, basestring): - prefixes = ( prefixes, ) - for prefix in prefixes: - etree.register_namespace(prefix, uri) + self.uri2Prefixes.etreeRegister() inDoc = etree.fromstring(inputstring) - self.walk(inDoc, self.visitorClass(self, document)) + self.walk(inDoc, self.visitorClass(self.uri2Prefixes, document)) self.finish_parse() def walk(self, elem, visitor): @@ -291,7 +350,8 @@ class XmlParser(docutils.parsers.Parser): try: for child in elem: if (someChildren is not None - and self.elem2PrefixName(child) not in someChildren): + and self.uri2Prefixes.elem2PrefixName(child) + not in someChildren): continue if self.walk(child, visitor): stop = True @@ -307,27 +367,3 @@ class XmlParser(docutils.parsers.Parser): if not skipDeparture: visitor.depart(elem) return stop - - def elem2PrefixName(self, elem): - """ - :Parameters: - - elem : etree._Element - The element to work for. - - :rtype: ( unicode | None, unicode ) - :return: Namespace prefix and localname of `elem`. Namespace prefix - may be ``None`` if no or unknown namespace. - """ - qName = etree.QName(elem) - prefix = None - # elem.prefix would also work for lxml but using the namespace is saver - if qName.namespace: - prefixes = self.ns2Prefixes.get(qName.namespace, None) - if prefixes is None: - prefix = None - elif isinstance(prefixes, basestring): - prefix = prefixes - else: - prefix = prefixes[0] - return ( prefix, qName.localname ) diff --git a/sandbox/docutils_xml/docutils_xml/parsers/xslt.py b/sandbox/docutils_xml/docutils_xml/parsers/xslt.py index 4b2799fec..003d6301a 100644 --- a/sandbox/docutils_xml/docutils_xml/parsers/xslt.py +++ b/sandbox/docutils_xml/docutils_xml/parsers/xslt.py @@ -51,13 +51,12 @@ class XsltParser(docutils.parsers.Parser): `xslt_result` of the document. Works together with `XsltWriter`. """ - def __init__(self, xsltPath): + def __init__(self, xsltSource): """ - See instance variables for parameter documentation. - """ - self.xsltPath = xsltPath - """ - Path to the XSLT to use. + :Parameters: + + xsltSource : file-like object + The source containing the XSLT. This is an open file-like object. """ self.xslt = None """ @@ -66,20 +65,11 @@ class XsltParser(docutils.parsers.Parser): The XSLT to use for parsing. """ - # Find XSLT - try: - xsltF = open(self.xsltPath) - except IOError, e: - raise Exception("Can't open main XSLT file %r: %s" - % ( self.xsltPath, e, )) - - # Parse and prepare XSLT try: - xsltDoc = etree.parse(xsltF) + xsltDoc = etree.parse(xsltSource) except Exception, e: - raise Exception("Error parsing main XSLT file %r: %s" - % ( self.xsltPath, e, )) - xsltF.close() + raise Exception("Error parsing XSLT: %s" % ( e, )) + xsltSource.close() self.xslt = etree.XSLT(xsltDoc) def parse(self, inputstring, document): diff --git a/sandbox/docutils_xml/global.log b/sandbox/docutils_xml/global.log index 5ebb59650..51348d16c 100644 --- a/sandbox/docutils_xml/global.log +++ b/sandbox/docutils_xml/global.log @@ -1,4 +1,64 @@ ************************************** +Date: Fri Nov 22 18:14:40 CET 2013 +Author: stefan +Tag: docutils_xml_1_29 + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers +In directory theowa:/home/stefan/free/docutils_xml/docutils_xml/parsers + +Modified Files: + xslt.py + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/test/test_parsers +In directory theowa:/home/stefan/free/docutils_xml/test/test_parsers + +Added Files: + test_XsltParser.py + +-------------------------------------- +Log Message: +Added test for `XsltParser`. +************************************** +Date: Thu Nov 21 14:24:11 CET 2013 +Author: stefan +Tag: docutils_xml_1_28 + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers +In directory theowa:/home/stefan/free/docutils_xml/docutils_xml/parsers + +Modified Files: + xslt.py + +-------------------------------------- +Log Message: +Changed interface of `XsltParser` to accept a file-like object. +************************************** +Date: Wed Nov 20 18:46:11 CET 2013 +Author: stefan +Tag: docutils_xml_1_27 + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/docutils_xml/parsers +In directory theowa:/home/stefan/free/docutils_xml/docutils_xml/parsers + +Modified Files: + xml.py + +-------------------------------------- +Update of /home/stefan/vault/sm/docutils_xml/test/test_parsers +In directory theowa:/home/stefan/free/docutils_xml/test/test_parsers + +Modified Files: + test_XmlParser.py test_XmlVisitor.py + +-------------------------------------- +Log Message: +Refactoring: Factored out `Uri2Prefixes` uncoupling `XmlVisitor` from +`XmlParser`. +************************************** Date: Wed Nov 20 01:55:31 CET 2013 Author: stefan Tag: docutils_xml_1_26 diff --git a/sandbox/docutils_xml/tag.log b/sandbox/docutils_xml/tag.log index 444c60ef8..57282bf0f 100644 --- a/sandbox/docutils_xml/tag.log +++ b/sandbox/docutils_xml/tag.log @@ -1 +1 @@ -docutils_xml_1_26 +docutils_xml_1_29 diff --git a/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py b/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py index bccd1c789..9268a491b 100644 --- a/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py +++ b/sandbox/docutils_xml/test/test_parsers/test_XmlParser.py @@ -25,7 +25,7 @@ from docutils.nodes import Text from __init__ import DocutilsTestSupport -from docutils_xml.parsers.xml import XmlVisitor, XmlParser, SomeChildren +from docutils_xml.parsers.xml import Uri2Prefixes, XmlVisitor, XmlParser, SomeChildren ############################################################################### @@ -63,7 +63,7 @@ class XmlVisitorMock(XmlVisitor): """ def __recordVisit(self, elem): - ( pfx, nm ) = self.parser.elem2PrefixName(elem) + ( pfx, nm ) = self.uri2Prefixes.elem2PrefixName(elem) attrs = "" for attr in sorted(elem.keys()): attrs += " %s=%r" % ( attr, elem.get(attr) ) @@ -88,7 +88,7 @@ class XmlVisitorMock(XmlVisitor): return None def __recordDepart(self, elem): - ( pfx, nm ) = self.parser.elem2PrefixName(elem) + ( pfx, nm ) = self.uri2Prefixes.elem2PrefixName(elem) self.depth -= 1 self.document += Text("%s} %s:%s\n" % ( self.depth * self.indent, @@ -110,10 +110,11 @@ class XmlParserMock(XmlParser): Mock class recording visited nodes in the output document. """ - ns2Prefixes = { u'urn:example': ( u'ex', u'alias', u'int', ), - # u'urn:empty': u'', # Empty tag is not accepted by lxml - u'urn:other': u'ot', - } + uri2Prefixes = Uri2Prefixes(( + ( 'urn:example', 'ex', 'alias', 'int' ), + # ( 'urn:empty', u'' ), # Empty tag is not accepted by lxml + ( 'urn:other', 'ot' ), + )) visitorClass = XmlVisitorMock diff --git a/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py b/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py index 154c299a3..f79b14453 100644 --- a/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py +++ b/sandbox/docutils_xml/test/test_parsers/test_XmlVisitor.py @@ -23,7 +23,7 @@ import unittest from lxml import etree import docutils.nodes, docutils.utils -from docutils_xml.parsers.xml import XmlVisitor, XmlParser +from docutils_xml.parsers.xml import XmlVisitor, Uri2Prefixes ############################################################################### @@ -32,7 +32,7 @@ class XmlVisitorMock(XmlVisitor): Mock class recording calls. """ - def __init__(self, parser, document): + def __init__(self, uri2Prefixes, document): self.calls = [ ] """ @@ -50,7 +50,7 @@ class XmlVisitorMock(XmlVisitor): The name of the current call. """ - XmlVisitor.__init__(self, parser, document) + XmlVisitor.__init__(self, uri2Prefixes, document) def __record(self, *args, **kwargs): self.calls.append(( self.currentCall, args, kwargs )) @@ -77,7 +77,7 @@ class XmlVisitorMock(XmlVisitor): class XmlVisitorTests(unittest.TestCase): def setUp(self): - self.visitor = XmlVisitorMock(XmlParser(), + self.visitor = XmlVisitorMock(Uri2Prefixes(( )), docutils.utils.new_document(None)) def test__init__(self): diff --git a/sandbox/docutils_xml/test/test_parsers/test_XsltParser.py b/sandbox/docutils_xml/test/test_parsers/test_XsltParser.py new file mode 100644 index 000000000..687f357b3 --- /dev/null +++ b/sandbox/docutils_xml/test/test_parsers/test_XsltParser.py @@ -0,0 +1,92 @@ +# Copyright (C) 2013 Stefan Merten + +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +""" +Test XmlParser. +""" + +import unittest +import docutils.utils, docutils.frontend +import StringIO + +from __init__ import DocutilsTestSupport + +from docutils_xml.parsers.xslt import XsltParser +from docutils_xml.writers.xslt import XsltWriter + +############################################################################### + +class XsltParserTestCase(DocutilsTestSupport.ParserTestCase): + """ + Output checker for XsltParser. + """ + + identityXslt = u"""\ +<?xml version="1.0"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <xsl:template match="/"> + <xsl:copy-of select="/"/> + </xsl:template> +</xsl:stylesheet> +""" + + parser = XsltParser(StringIO.StringIO(identityXslt)) + """Parser shared by all XsltParserTestCases.""" + + option_parser = docutils.frontend.OptionParser(components=( + XsltParser, )) + + def test_parser(self): + settings = self.settings.copy() + settings.__dict__.update(self.suite_settings) + document = docutils.utils.new_document('test data', settings) + self.parser.parse(self.input, document) + writer = XsltWriter() + output = writer.write(document, docutils.io.StringOutput()) + self.compare_output(self.input, output, self.expected) + +############################################################################### + +class XsltParserTestSuite(DocutilsTestSupport.ParserTestSuite): + + test_case_class = XsltParserTestCase + +############################################################################### + +totest = {} + +totest['simple'] = ( + ( u"""<?xml version="1.0"?> +<rootOnly/> +""", + u"""<?xml version="1.0"?> +<rootOnly/> +""" ), + ) + +############################################################################### + +def suite(): + s = XsltParserTestSuite() + s.generateTests(totest) + return s + +############################################################################### + +if __name__ == '__main__': + import unittest + unittest.main(defaultTest='suite') diff --git a/sandbox/docutils_xml/version.py b/sandbox/docutils_xml/version.py index d4f4d8123..01673f641 100644 --- a/sandbox/docutils_xml/version.py +++ b/sandbox/docutils_xml/version.py @@ -1 +1 @@ -version = '0.1.1' +version = '0.2.1' |