diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-04-14 15:17:25 +0200 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-04-14 15:17:25 +0200 |
commit | 359f693b972c2e6b0d83d26a329d2d20b7581c48 (patch) | |
tree | 977959d0bdb53b272156702fab6f2b0405028598 /src/lxml/ElementInclude.py | |
parent | 604c5939bd8807d55e9365d7c6e787b6607dd3df (diff) | |
download | python-lxml-359f693b972c2e6b0d83d26a329d2d20b7581c48.tar.gz |
Add a `max_depth` argument to ElementInclude to prevent content explosion. Limit it to 6 by default.
Diffstat (limited to 'src/lxml/ElementInclude.py')
-rw-r--r-- | src/lxml/ElementInclude.py | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/src/lxml/ElementInclude.py b/src/lxml/ElementInclude.py index 8badf8b4..10af448c 100644 --- a/src/lxml/ElementInclude.py +++ b/src/lxml/ElementInclude.py @@ -65,12 +65,21 @@ XINCLUDE_INCLUDE = XINCLUDE + "include" XINCLUDE_FALLBACK = XINCLUDE + "fallback" XINCLUDE_ITER_TAG = XINCLUDE + "*" +# For security reasons, the inclusion depth is limited to this read-only value by default. +DEFAULT_MAX_INCLUSION_DEPTH = 6 + + ## # Fatal include error. class FatalIncludeError(etree.LxmlSyntaxError): pass + +class LimitedRecursiveIncludeError(FatalIncludeError): + pass + + ## # ET compatible default loader. # This loader reads an included resource from disk. @@ -96,6 +105,7 @@ def default_loader(href, parse, encoding=None): file.close() return data + ## # Default loader used by lxml.etree - handles custom resolvers properly # @@ -115,6 +125,7 @@ def _lxml_default_loader(href, parse, encoding=None, parser=None): data = data.decode(encoding) return data + ## # Wrapper for ET compatibility - drops the parser @@ -133,12 +144,22 @@ def _wrap_et_loader(loader): # that implements the same interface as <b>default_loader</b>. # @param base_url The base URL of the original file, to resolve # relative include file references. +# @param max_depth The maximum number of recursive inclusions. +# Limited to reduce the risk of malicious content explosion. +# Pass None to disable the limitation. +# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. # @throws FatalIncludeError If the function fails to include a given # resource, or if the tree contains malformed XInclude elements. # @throws IOError If the function fails to load a given resource. # @returns the node or its replacement if it was an XInclude node -def include(elem, loader=None, base_url=None): +def include(elem, loader=None, base_url=None, + max_depth=DEFAULT_MAX_INCLUSION_DEPTH): + if max_depth is None: + max_depth = -1 + elif max_depth < 0: + raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) + if base_url is None: if hasattr(elem, 'getroot'): tree = elem @@ -149,9 +170,11 @@ def include(elem, loader=None, base_url=None): base_url = tree.docinfo.URL elif hasattr(elem, 'getroot'): elem = elem.getroot() - _include(elem, loader, base_url=base_url) + _include(elem, loader, base_url, max_depth) + -def _include(elem, loader=None, _parent_hrefs=None, base_url=None): +def _include(elem, loader=None, base_url=None, + max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None): if loader is not None: load_include = _wrap_et_loader(loader) else: @@ -176,13 +199,16 @@ def _include(elem, loader=None, _parent_hrefs=None, base_url=None): raise FatalIncludeError( "recursive include of %r detected" % href ) + if max_depth == 0: + raise LimitedRecursiveIncludeError( + "maximum xinclude depth reached when including file %s" % href) _parent_hrefs.add(href) node = load_include(href, parse, parser=parser) if node is None: raise FatalIncludeError( "cannot load %r as %r" % (href, parse) ) - node = _include(node, loader, _parent_hrefs) + node = _include(node, loader, href, max_depth - 1, _parent_hrefs) if e.tail: node.tail = (node.tail or "") + e.tail if parent is None: |