Added a sanity check helper method that makes sure all the elements of a tree are properly connected via .next_element and .previous_element.

author: Leonard Richardson <leonardr@segfault.org> 2015-06-26 07:06:55 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2015-06-26 07:06:55 -0400
commit: 800d1971dcbdc6316a013a4c6ce86e8c18d48dca (patch)
tree: bb0f4f28db26827247b60a23b6b1fa2965e82248
parent: fc32a6eb0fe0e981b4f41362b97576099b8c4a4e (diff)
download: beautifulsoup4-800d1971dcbdc6316a013a4c6ce86e8c18d48dca.tar.gz
3 files changed, 21 insertions, 9 deletions
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index ad3c6ef..0778dde 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -236,9 +236,9 @@ class Element(html5lib.treebuilders._base.Node):
 
     def reparentChildren(self, new_parent):
         """Move all of this tag's children into another tag."""
-        #print "MOVE", self.element.contents
-        #print "FROM", self.element
-        #print "TO", new_parent.element
+        # print "MOVE", self.element.contents
+        # print "FROM", self.element
+        # print "TO", new_parent.element
         element = self.element
         new_parent_element = new_parent.element
         # Determine what this tag's next_element will be once all the children
@@ -289,9 +289,9 @@ class Element(html5lib.treebuilders._base.Node):
         element.contents = []
         element.next_element = final_next_element
 
-        #print "DONE WITH MOVE"
-        #print "FROM", self.element
-        #print "TO", new_parent_element
+        # print "DONE WITH MOVE"
+        # print "FROM", self.element
+        # print "TO", new_parent_element
 
     def cloneNode(self):
         tag = self.soup.new_tag(self.element.name, self.namespace)
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 68ed81f..7ced3a5 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -3,10 +3,11 @@
 
 This library converts a bytestream to Unicode through any means
 necessary. It is heavily based on code from Mark Pilgrim's Universal
-Feed Parser. It works best on XML and XML, but it does not rewrite the
+Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
 
+from pdb import set_trace
 import codecs
 from htmlentitydefs import codepoint2name
 import re
diff --git a/bs4/testing.py b/bs4/testing.py
index 8ca3878..7232513 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -43,6 +43,16 @@ class SoupTest(unittest.TestCase):
 
         self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
 
+    def assertConnectedness(self, element):
+        """Ensure that next_element and previous_element are properly
+        set for all descendants of the given element.
+        """
+        earlier = None
+        for e in element.descendants:
+            if earlier:
+                self.assertEqual(e, earlier.next_element)
+                self.assertEqual(earlier, e.previous_element)
+            earlier = e
 
 class HTMLTreeBuilderSmokeTest(object):
 
@@ -283,6 +293,7 @@ Hello, world!
         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
         self.assertEqual("p", soup.h2.string.next_element.name)
         self.assertEqual("p", soup.p.name)
+        self.assertConnectedness(soup)
 
     def test_head_tag_between_head_and_body(self):
         "Prevent recurrence of a bug in the html5lib treebuilder."
@@ -293,6 +304,7 @@ Hello, world!
 """
         soup = self.soup(content)
         self.assertNotEqual(None, soup.html.body)
+        self.assertConnectedness(soup)
 
     def test_multiple_copies_of_a_tag(self):
         "Prevent recurrence of a bug in the html5lib treebuilder."
@@ -309,8 +321,7 @@ Hello, world!
 </html>
 """
         soup = self.soup(content)
-        [x for x in soup.article.descendants]
-
+        self.assertConnectedness(soup.article)
 
     def test_basic_namespaces(self):
         """Parsers don't need to *understand* namespaces, but at the
author	Leonard Richardson <leonardr@segfault.org>	2015-06-26 07:06:55 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2015-06-26 07:06:55 -0400
commit	800d1971dcbdc6316a013a4c6ce86e8c18d48dca (patch)
tree	bb0f4f28db26827247b60a23b6b1fa2965e82248
parent	fc32a6eb0fe0e981b4f41362b97576099b8c4a4e (diff)
download	beautifulsoup4-800d1971dcbdc6316a013a4c6ce86e8c18d48dca.tar.gz