summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2016-12-19 18:43:56 -0500
committerLeonard Richardson <leonardr@segfault.org>2016-12-19 18:43:56 -0500
commit4f3f7ec5587f2f1f37416c6129dd1a6b3eafabf6 (patch)
tree925f83e2ffa6c7effd486018f9e1ce441f0bfaf6
parentfad293d237aadcbd2576e3fd347831724fe847cf (diff)
downloadbeautifulsoup4-4f3f7ec5587f2f1f37416c6129dd1a6b3eafabf6.tar.gz
Fixed yet another problem that caused the html5lib tree builder to
-rw-r--r--NEWS.txt5
-rw-r--r--bs4/builder/_html5lib.py21
-rw-r--r--bs4/tests/test_html5lib.py17
3 files changed, 37 insertions, 6 deletions
diff --git a/NEWS.txt b/NEWS.txt
index d5134c4..12f979f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,8 @@
+= Unreleased =
+
+* Fixed yet another problem that caused the html5lib tree builder to
+ create a disconnected parse tree. [bug=1629825]
+
= 4.5.1 (20160802) =
* Fixed a crash when passing Unicode markup that contained a
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index c46f882..ad0deea 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -221,6 +221,8 @@ class Element(treebuilder_base.Node):
most_recent_element=most_recent_element)
def getAttributes(self):
+ if isinstance(self.element, Comment):
+ return {}
return AttrList(self.element)
def setAttributes(self, attributes):
@@ -274,6 +276,7 @@ class Element(treebuilder_base.Node):
# print "MOVE", self.element.contents
# print "FROM", self.element
# print "TO", new_parent.element
+
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
@@ -292,7 +295,6 @@ class Element(treebuilder_base.Node):
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
- append_after = new_parent_element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
@@ -309,12 +311,19 @@ class Element(treebuilder_base.Node):
if new_parents_last_child:
new_parents_last_child.next_sibling = first_child
- # Fix the last child's next_element and next_sibling
- last_child = to_append[-1]
- last_child.next_element = new_parents_last_descendant_next_element
+ # Find the very last element being moved. It is now the
+ # parent's last descendant. It has no .next_sibling and
+ # its .next_element is whatever the previous last
+ # descendant had.
+ last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
+
+ last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
if new_parents_last_descendant_next_element:
- new_parents_last_descendant_next_element.previous_element = last_child
- last_child.next_sibling = None
+ # TODO: This code has no test coverage and I'm not sure
+ # how to get html5lib to go through this path, but it's
+ # just the other side of the previous line.
+ new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
+ last_childs_last_descendant.next_sibling = None
for child in to_append:
child.parent = new_parent_element
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 8e3cba6..2eb41b3 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -95,6 +95,23 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
assert space1.next_element is tbody1
assert tbody2.next_element is space2
+ def test_reparented_markup_containing_children(self):
+ markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
+ soup = self.soup(markup)
+ noscript = soup.noscript
+ self.assertEqual("target", noscript.next_element)
+ target = soup.find(string='target')
+
+ # The 'aftermath' string was duplicated; we want the second one.
+ final_aftermath = soup.find_all(string='aftermath')[-1]
+ import pdb; pdb.set_trace()
+
+ # The <noscript> tag was moved beneath a copy of the <a> tag,
+ # but the 'target' string within is still connected to the
+ # (second) 'aftermath' string.
+ self.assertEqual(final_aftermath, target.next_element)
+ self.assertEqual(target, final_aftermath.previous_element)
+
def test_processing_instruction(self):
"""Processing instructions become comments."""
markup = b"""<?PITarget PIContent?>"""