summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2018-07-14 15:35:24 -0400
committerLeonard Richardson <leonardr@segfault.org>2018-07-14 15:35:24 -0400
commit670ecfdeb3b315cf05777e6bd361539a21604907 (patch)
treea6e66959686d963952a4110deff53319974ef491 /bs4/builder
parentd48fd72468023868ade770abe8ea824bff7df4cc (diff)
downloadbeautifulsoup4-670ecfdeb3b315cf05777e6bd361539a21604907.tar.gz
Stopped HTMLParser from raising an exception in very rare cases of
bad markup. [bug=1708831]
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/_htmlparser.py13
-rw-r--r--bs4/builder/_lxml.py8
2 files changed, 18 insertions, 3 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 67890b3..71604c5 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -64,7 +64,18 @@ class BeautifulSoupHTMLParser(HTMLParser):
# order. It's a list of closing tags we've already handled and
# will ignore, assuming they ever show up.
self.already_closed_empty_element = []
-
+
+ def error(self, msg):
+ """In Python 3, HTMLParser subclasses must implement error(), although this
+ requirement doesn't appear to be documented.
+
+ In Python 2, HTMLParser implements error() as raising an exception.
+
+ In any event, this method is called only on very strange markup and our best strategy
+ is to pretend it didn't happen and keep going.
+ """
+ warnings.warn(msg)
+
def handle_startendtag(self, name, attrs):
# This is only called when the markup looks like
# <tag/>.
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index d2ca287..3439271 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -5,9 +5,13 @@ __all__ = [
'LXMLTreeBuilder',
]
+try:
+ from collections.abc import Callable # Python 3.6
+except ImportError , e:
+ from collections import Callable
+
from io import BytesIO
from StringIO import StringIO
-import collections
from lxml import etree
from bs4.element import (
Comment,
@@ -58,7 +62,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# Use the default parser.
parser = self.default_parser(encoding)
- if isinstance(parser, collections.Callable):
+ if isinstance(parser, Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False, encoding=encoding)
return parser