diff options
author | Leonard Richardson <leonardr@segfault.org> | 2018-07-14 22:49:10 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2018-07-14 22:49:10 -0400 |
commit | b2836d45288e0de1474ecc555f6e3aac51f3168c (patch) | |
tree | 695b12bc3288289bab365cc53b68f6261cb8b2f5 | |
parent | c60b55e4362c41825098504c177784395624272c (diff) | |
download | beautifulsoup4-b2836d45288e0de1474ecc555f6e3aac51f3168c.tar.gz |
Fixed a disconnected parse tree when one BeautifulSoup object was
inserted into another. [bug=1105148]
-rw-r--r-- | bs4/element.py | 8 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 20 |
2 files changed, 28 insertions, 0 deletions
diff --git a/bs4/element.py b/bs4/element.py index e4f2303..5ee9887 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -319,6 +319,14 @@ class PageElement(object): and not isinstance(new_child, NavigableString)): new_child = NavigableString(new_child) + from bs4 import BeautifulSoup + if isinstance(new_child, BeautifulSoup): + # We don't want to end up with a situation where one BeautifulSoup + # object contains another. Insert the children one at a time. + for subchild in list(new_child.contents): + self.insert(position, subchild) + position += 1 + return position = min(position, len(self.contents)) if hasattr(new_child, 'parent') and new_child.parent is not None: # We're 'inserting' an element that's already one diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 297b4aa..e75cf1d 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -821,6 +821,26 @@ class TestTreeModification(SoupTest): soup = self.soup(text) self.assertRaises(ValueError, soup.a.insert, 0, soup.a) + def test_insert_beautifulsoup_object_inserts_children(self): + """Inserting one BeautifulSoup object into another actually inserts all + of its children -- you'll never combine BeautifulSoup objects. + """ + soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>") + + text = "<p>p2</p><p>p3</p>" + to_insert = self.soup(text) + soup.insert(1, to_insert) + + for i in soup.descendants: + assert not isinstance(i, BeautifulSoup) + + p1, p2, p3, p4 = list(soup.children) + self.assertEquals("And now, a word:", p1.string) + self.assertEquals("p2", p2.string) + self.assertEquals("p3", p3.string) + self.assertEquals("And we're back.", p4.string) + + def test_replace_with_maintains_next_element_throughout(self): soup = self.soup('<p><a>one</a><b>three</b></p>') a = soup.a |