Stop data loss when encountering an empty numeric entity, and

possibly in other cases. Thanks to tos.kamiya for the fix. [bug=1698503]
author: Leonard Richardson <leonardr@segfault.org> 2018-07-15 08:27:40 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2018-07-15 08:27:40 -0400
commit: b5ffba33327acaf51a2274f9e0a5305b5fb8bdf9 (patch)
tree: e627743f2434291a3cb12139c4ae45b8088d8592
parent: b2836d45288e0de1474ecc555f6e3aac51f3168c (diff)
download: beautifulsoup4-b5ffba33327acaf51a2274f9e0a5305b5fb8bdf9.tar.gz
4 files changed, 10 insertions, 1 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 909de65..4788489 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,5 +1,8 @@
 = Unreleased
 
+* Stop data loss when encountering an empty numeric entity, and
+  possibly in other cases.  Thanks to tos.kamiya for the fix. [bug=1698503]
+
 * Improved the warning given when no parser is specified. [bug=1780571]
 
 * Fixed code that was causing deprecation warnings in recent Python 3
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 71604c5..ef9fd1e 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -224,6 +224,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         parser.soup = self.soup
         try:
             parser.feed(markup)
+            parser.close()
         except HTMLParseError, e:
             warnings.warn(RuntimeWarning(
                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
diff --git a/bs4/testing.py b/bs4/testing.py
index 6ba2506..9d42702 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -334,7 +334,7 @@ Hello, world!
         self.assertSoupEquals("&#10000000000000;", expect)
         self.assertSoupEquals("&#x10000000000000;", expect)
         self.assertSoupEquals("&#1000000000;", expect)
-
+        
     def test_multipart_strings(self):
         "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
index c13d59f..0381c7d 100644
--- a/bs4/tests/test_htmlparser.py
+++ b/bs4/tests/test_htmlparser.py
@@ -34,6 +34,11 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
         self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
         self.assertSoupEquals('</br></br></br>', "")
 
+    def test_empty_element(self):
+        # This verifies that any buffered data present when the parser
+        # finishes working is handled.
+        self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
+
 
 class TestHTMLParserSubclass(SoupTest):
     def test_error(self):
author	Leonard Richardson <leonardr@segfault.org>	2018-07-15 08:27:40 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2018-07-15 08:27:40 -0400
commit	b5ffba33327acaf51a2274f9e0a5305b5fb8bdf9 (patch)
tree	e627743f2434291a3cb12139c4ae45b8088d8592
parent	b2836d45288e0de1474ecc555f6e3aac51f3168c (diff)
download	beautifulsoup4-b5ffba33327acaf51a2274f9e0a5305b5fb8bdf9.tar.gz