diff options
author | scoder <none@none> | 2006-12-02 22:07:16 +0100 |
---|---|---|
committer | scoder <none@none> | 2006-12-02 22:07:16 +0100 |
commit | 5b54e9465117f17b80953d261223445a2122edf1 (patch) | |
tree | f587e12263ee6a60f494fefd2e34a0c566115e87 /src/lxml/ElementInclude.py | |
parent | 2dd4bab1ecffcb2cfbfdd41529edb53a12ac7709 (diff) | |
download | python-lxml-5b54e9465117f17b80953d261223445a2122edf1.tar.gz |
[svn r2085] integrated a modified ElementInclude.py: reuse the parser of the original document, recursive includes, based on Element.getiterator()
--HG--
branch : trunk
Diffstat (limited to 'src/lxml/ElementInclude.py')
-rw-r--r-- | src/lxml/ElementInclude.py | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/src/lxml/ElementInclude.py b/src/lxml/ElementInclude.py new file mode 100644 index 00000000..6a449a5d --- /dev/null +++ b/src/lxml/ElementInclude.py @@ -0,0 +1,196 @@ +# +# ElementTree +# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ +# +# limited xinclude support for element trees +# +# history: +# 2003-08-15 fl created +# 2003-11-14 fl fixed default loader +# +# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +## +# Limited XInclude support for the ElementTree package. +## + +import copy +from lxml import etree + +try: + set +except NameError: + from sets import Set as set + +XINCLUDE = "{http://www.w3.org/2001/XInclude}" + +XINCLUDE_INCLUDE = XINCLUDE + "include" +XINCLUDE_FALLBACK = XINCLUDE + "fallback" + +## +# Fatal include error. + +class FatalIncludeError(etree.LxmlSyntaxError): + pass + +## +# ET compatible default loader. +# This loader reads an included resource from disk. +# +# @param href Resource reference. +# @param parse Parse mode. Either "xml" or "text". +# @param encoding Optional text encoding. +# @return The expanded resource. If the parse mode is "xml", this +# is an ElementTree instance. If the parse mode is "text", this +# is a Unicode string. If the loader fails, it can return None +# or raise an IOError exception. +# @throws IOError If the loader fails to load the resource. + +def default_loader(href, parse, encoding=None): + file = open(href) + if parse == "xml": + data = etree.parse(file).getroot() + else: + data = file.read() + if encoding: + data = data.decode(encoding) + file.close() + return data + +## +# Default loader used by lxml.etree - handles custom resolvers properly +# + +def _lxml_default_loader(href, parse, encoding=None, parser=None): + if parse == "xml": + data = etree.parse(href, parser).getroot() + else: + data = open(href).read() + if encoding: + data = data.decode(encoding) + return data + +## +# Wrapper for ET compatibility - drops the parser + +def _wrap_et_loader(loader): + def load(href, parse, encoding=None, parser=None): + return loader(href, parse, encoding) + return load + + +## +# Expand XInclude directives. +# +# @param elem Root element. +# @param loader Optional resource loader. If omitted, it defaults +# to {@link default_loader}. If given, it should be a callable +# that implements the same interface as <b>default_loader</b>. +# @throws FatalIncludeError If the function fails to include a given +# resource, or if the tree contains malformed XInclude elements. +# @throws IOError If the function fails to load a given resource. +# @returns the node or its replacement if it was an XInclude node + +def include(elem, loader=None): + if hasattr(elem, 'getroot'): + #if hasattr(elem, 'docinfo'): + # base_url = elem.docinfo.URL + _include(elem.getroot(), loader) + else: + _include(elem, loader) + +def _include(elem, loader=None, _parent_hrefs=None): + if loader is not None: + load_include = _wrap_et_loader(loader) + else: + load_include = _lxml_default_loader + + if _parent_hrefs is None: + _parent_hrefs = set() + + parser = elem.getroottree().parser + + include_elements = list( + elem.getiterator('{http://www.w3.org/2001/XInclude}*')) + + for e in include_elements: + if e.tag == XINCLUDE_INCLUDE: + # process xinclude directive + href = e.get("href") + parse = e.get("parse", "xml") + parent = e.getparent() + if parse == "xml": + if href in _parent_hrefs: + raise FatalIncludeError( + "recursive include of %r detected" % href + ) + _parent_hrefs.add(href) + node = load_include(href, parse, parser=parser) + if node is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + node = _include(node, loader, _parent_hrefs) + if e.tail: + node.tail = (node.tail or "") + e.tail + if parent is None: + return node # replaced the root node! + parent.replace(e, node) + elif parse == "text": + text = load_include(href, parse, encoding=e.get("encoding")) + if text is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + predecessor = e.getprevious() + if predecessor is not None: + predecessor.tail = (predecessor.tail or "") + text + elif parent is None: + return text # replaced the root node! + else: + parent.text = (parent.text or "") + text + (e.tail or "") + parent.remove(e) + else: + raise FatalIncludeError( + "unknown parse type in xi:include tag (%r)" % parse + ) + elif e.tag == XINCLUDE_FALLBACK: + raise FatalIncludeError( + "xi:fallback tag must be child of xi:include (%r)" % e.tag + ) + else: + raise FatalIncludeError( + "Invalid element found in XInclude namespace (%r)" % e.tag + ) + return elem |