summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-02-28 20:53:29 +0100
committerStefan Behnel <stefan_ml@behnel.de>2019-02-28 20:53:29 +0100
commitf529aeb1bb234cf7dc0cf23e1e7fd98ce4953e85 (patch)
tree68b16a4f04e39d9b8e173fc35a7fd00400e9e953
parentf8bb21857f8cfad0c707b6785ae0ec1832011fbf (diff)
downloadpython-lxml-f529aeb1bb234cf7dc0cf23e1e7fd98ce4953e85.tar.gz
Fix crash due to incorrect dict handling for text nodes.
The C doc link needs to be set after removing text from the dict and before putting it there. Thus, it is best to separate the adaptations into two traversals again.
-rw-r--r--src/lxml/apihelpers.pxi4
-rw-r--r--src/lxml/proxy.pxi35
2 files changed, 23 insertions, 16 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index bccf5fbb..cf932d43 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -1269,7 +1269,7 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node,
cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1:
- """Simple version of 'xmlAddChild()' that does not deep-fix the document links.
+ """Adaptation of 'xmlAddChild()' that deep-fix the document links iteratively.
"""
assert _isElement(c_node)
c_node.parent = c_parent
@@ -1279,6 +1279,8 @@ cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1:
c_node.prev = c_parent.last
c_parent.last.next = c_node
c_parent.last = c_node
+
+ _setTreeDoc(c_node, c_parent.doc)
return 0
diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi
index 2f8e76c5..ff277c53 100644
--- a/src/lxml/proxy.pxi
+++ b/src/lxml/proxy.pxi
@@ -332,16 +332,11 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
cdef proxy_count = 0
if not tree._isElementOrXInclude(c_element):
- c_element.doc = c_doc
- _fixDocChildren(c_element.children, c_doc)
return 0
c_start_node = c_element
tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
- # 0) set C doc link
- c_element.doc = c_doc
-
if tree._isElementOrXInclude(c_element):
if hasProxy(c_element):
proxy_count += 1
@@ -352,7 +347,6 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
try:
_stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list)
except:
- _fixDocChildren(c_start_node.children, c_doc)
_cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list)
raise
@@ -365,14 +359,6 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
while c_node is not NULL:
if c_node.ns is not NULL:
_fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list)
-
- # remove attribute from ID table (see xmlSetTreeDoc() in libxml2's tree.c)
- c_attr = <tree.xmlAttr*>c_node
- if c_attr.atype == tree.XML_ATTRIBUTE_ID:
- tree.xmlRemoveID(c_source_doc, c_attr)
- # set C doc link also for attributes
- c_node.doc = c_doc
- _fixDocChildren(c_node.children, c_doc)
c_node = c_node.next
tree.END_FOR_EACH_FROM(c_element)
@@ -405,10 +391,29 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
return 0
+cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc):
+ """Adaptation of 'xmlSetTreeDoc()' that deep-fix the document links iteratively.
+ It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42
+ """
+ tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1)
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ c_attr = <tree.xmlAttr*>c_node.properties
+ while c_attr:
+ if c_attr.atype == tree.XML_ATTRIBUTE_ID:
+ tree.xmlRemoveID(c_node.doc, c_attr)
+ c_attr.doc = c_doc
+ _fixDocChildren(c_attr.children, c_doc)
+ c_attr = c_attr.next
+ # Set doc link for all nodes, not only elements.
+ c_node.doc = c_doc
+ tree.END_FOR_EACH_FROM(c_node)
+
+
cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc):
while c_child:
c_child.doc = c_doc
- _fixDocChildren(c_child.children, c_doc)
+ if c_child.children:
+ _fixDocChildren(c_child.children, c_doc)
c_child = c_child.next