diff options
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/builder/_htmlparser.py | 1 | ||||
-rw-r--r-- | bs4/testing.py | 2 | ||||
-rw-r--r-- | bs4/tests/test_htmlparser.py | 5 |
3 files changed, 7 insertions, 1 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index 71604c5..ef9fd1e 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -224,6 +224,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): parser.soup = self.soup try: parser.feed(markup) + parser.close() except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) diff --git a/bs4/testing.py b/bs4/testing.py index 6ba2506..9d42702 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -334,7 +334,7 @@ Hello, world! self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) - + def test_multipart_strings(self): "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." soup = self.soup("<html><h2>\nfoo</h2><p></p></html>") diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index c13d59f..0381c7d 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -34,6 +34,11 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>") self.assertSoupEquals('</br></br></br>', "") + def test_empty_element(self): + # This verifies that any buffered data present when the parser + # finishes working is handled. + self.assertSoupEquals("foo &# bar", "foo &# bar") + class TestHTMLParserSubclass(SoupTest): def test_error(self): |