summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2018-07-14 22:49:10 -0400
committerLeonard Richardson <leonardr@segfault.org>2018-07-14 22:49:10 -0400
commitb2836d45288e0de1474ecc555f6e3aac51f3168c (patch)
tree695b12bc3288289bab365cc53b68f6261cb8b2f5
parentc60b55e4362c41825098504c177784395624272c (diff)
downloadbeautifulsoup4-b2836d45288e0de1474ecc555f6e3aac51f3168c.tar.gz
Fixed a disconnected parse tree when one BeautifulSoup object was
inserted into another. [bug=1105148]
-rw-r--r--bs4/element.py8
-rw-r--r--bs4/tests/test_tree.py20
2 files changed, 28 insertions, 0 deletions
diff --git a/bs4/element.py b/bs4/element.py
index e4f2303..5ee9887 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -319,6 +319,14 @@ class PageElement(object):
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
+ from bs4 import BeautifulSoup
+ if isinstance(new_child, BeautifulSoup):
+ # We don't want to end up with a situation where one BeautifulSoup
+ # object contains another. Insert the children one at a time.
+ for subchild in list(new_child.contents):
+ self.insert(position, subchild)
+ position += 1
+ return
position = min(position, len(self.contents))
if hasattr(new_child, 'parent') and new_child.parent is not None:
# We're 'inserting' an element that's already one
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 297b4aa..e75cf1d 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -821,6 +821,26 @@ class TestTreeModification(SoupTest):
soup = self.soup(text)
self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
+ def test_insert_beautifulsoup_object_inserts_children(self):
+ """Inserting one BeautifulSoup object into another actually inserts all
+ of its children -- you'll never combine BeautifulSoup objects.
+ """
+ soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>")
+
+ text = "<p>p2</p><p>p3</p>"
+ to_insert = self.soup(text)
+ soup.insert(1, to_insert)
+
+ for i in soup.descendants:
+ assert not isinstance(i, BeautifulSoup)
+
+ p1, p2, p3, p4 = list(soup.children)
+ self.assertEquals("And now, a word:", p1.string)
+ self.assertEquals("p2", p2.string)
+ self.assertEquals("p3", p3.string)
+ self.assertEquals("And we're back.", p4.string)
+
+
def test_replace_with_maintains_next_element_throughout(self):
soup = self.soup('<p><a>one</a><b>three</b></p>')
a = soup.a