summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2015-06-28 09:23:50 -0400
committerLeonard Richardson <leonardr@segfault.org>2015-06-28 09:23:50 -0400
commit62d68526a2cd08e74170c736a5f7ce96f62e726c (patch)
tree4528d809559b9f54cf0ab4a1c563867c12503a95
parent573f1021beaaec5cf48149bde8b30f3c5b82d6b8 (diff)
downloadbeautifulsoup4-62d68526a2cd08e74170c736a5f7ce96f62e726c.tar.gz
Fixed a bug where Element.extract() could create an infinite loop in
the remaining tree.
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/_html5lib.py13
-rw-r--r--bs4/element.py11
3 files changed, 17 insertions, 10 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 9126ddf..cf7e2b8 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -37,6 +37,9 @@
* Fixed yet another bug that caused a disconnected tree when html5lib
copied an element from one part of the tree to another. [bug=1270611]
+* Fixed a bug where Element.extract() could create an infinite loop in
+ the remaining tree.
+
* The select() method now supports selector grouping. Patch by
Francisco Canas [bug=1191917]
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 7788063..4eaaaec 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -2,6 +2,7 @@ __all__ = [
'HTML5TreeBuilder',
]
+from pdb import set_trace
import warnings
from bs4.builder import (
PERMISSIVE,
@@ -243,9 +244,9 @@ class Element(html5lib.treebuilders._base.Node):
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
- # print "MOVE", self.element.contents
- # print "FROM", self.element
- # print "TO", new_parent.element
+ print "MOVE", self.element.contents
+ print "FROM", self.element
+ print "TO", new_parent.element
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
@@ -296,9 +297,9 @@ class Element(html5lib.treebuilders._base.Node):
element.contents = []
element.next_element = final_next_element
- # print "DONE WITH MOVE"
- # print "FROM", self.element
- # print "TO", new_parent_element
+ print "DONE WITH MOVE"
+ print "FROM", self.element
+ print "TO", new_parent_element
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/bs4/element.py b/bs4/element.py
index 7c787b1..d1b7c12 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -261,17 +261,20 @@ class PageElement(object):
last_child = self._last_descendant()
next_element = last_child.next_element
- if self.previous_element is not None:
+ if (self.previous_element is not None and
+ self.previous_element != next_element):
self.previous_element.next_element = next_element
- if next_element is not None:
+ if next_element is not None and next_element != self.previous_element:
next_element.previous_element = self.previous_element
self.previous_element = None
last_child.next_element = None
self.parent = None
- if self.previous_sibling is not None:
+ if (self.previous_sibling is not None
+ and self.previous_sibling != self.next_sibling):
self.previous_sibling.next_sibling = self.next_sibling
- if self.next_sibling is not None:
+ if (self.next_sibling is not None
+ and self.next_sibling != self.previous_sibling):
self.next_sibling.previous_sibling = self.previous_sibling
self.previous_sibling = self.next_sibling = None
return self