From b2836d45288e0de1474ecc555f6e3aac51f3168c Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 14 Jul 2018 22:49:10 -0400 Subject: Fixed a disconnected parse tree when one BeautifulSoup object was inserted into another. [bug=1105148] --- bs4/element.py | 8 ++++++++ bs4/tests/test_tree.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/bs4/element.py b/bs4/element.py index e4f2303..5ee9887 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -319,6 +319,14 @@ class PageElement(object): and not isinstance(new_child, NavigableString)): new_child = NavigableString(new_child) + from bs4 import BeautifulSoup + if isinstance(new_child, BeautifulSoup): + # We don't want to end up with a situation where one BeautifulSoup + # object contains another. Insert the children one at a time. + for subchild in list(new_child.contents): + self.insert(position, subchild) + position += 1 + return position = min(position, len(self.contents)) if hasattr(new_child, 'parent') and new_child.parent is not None: # We're 'inserting' an element that's already one diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 297b4aa..e75cf1d 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -821,6 +821,26 @@ class TestTreeModification(SoupTest): soup = self.soup(text) self.assertRaises(ValueError, soup.a.insert, 0, soup.a) + def test_insert_beautifulsoup_object_inserts_children(self): + """Inserting one BeautifulSoup object into another actually inserts all + of its children -- you'll never combine BeautifulSoup objects. + """ + soup = self.soup("

And now, a word:

And we're back.

") + + text = "

p2

p3

" + to_insert = self.soup(text) + soup.insert(1, to_insert) + + for i in soup.descendants: + assert not isinstance(i, BeautifulSoup) + + p1, p2, p3, p4 = list(soup.children) + self.assertEquals("And now, a word:", p1.string) + self.assertEquals("p2", p2.string) + self.assertEquals("p3", p3.string) + self.assertEquals("And we're back.", p4.string) + + def test_replace_with_maintains_next_element_throughout(self): soup = self.soup('

onethree

') a = soup.a -- cgit v1.2.1