diff options
author | Leonard Richardson <leonardr@segfault.org> | 2014-12-07 09:31:30 -0500 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2014-12-07 09:31:30 -0500 |
commit | 237b8f895f056fdac201e54009bc20fdc05faee0 (patch) | |
tree | a70bcedc008c367fd565b5d9a6937a687fe39727 /bs4/builder | |
parent | bcd7af0e9159d97aa511fb2d879424d1c1c5aadf (diff) | |
download | beautifulsoup4-237b8f895f056fdac201e54009bc20fdc05faee0.tar.gz |
Issue a warning if the BeautifulSoup constructor arguments do not explicitly name a parser.
Diffstat (limited to 'bs4/builder')
-rw-r--r-- | bs4/builder/__init__.py | 1 | ||||
-rw-r--r-- | bs4/builder/_html5lib.py | 4 | ||||
-rw-r--r-- | bs4/builder/_htmlparser.py | 3 | ||||
-rw-r--r-- | bs4/builder/_lxml.py | 8 |
4 files changed, 12 insertions, 4 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 740f5f2..0e84fae 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -80,6 +80,7 @@ builder_registry = TreeBuilderRegistry() class TreeBuilder(object): """Turn a document into a Beautiful Soup object tree.""" + NAME = "[Unknown tree builder]" features = [] is_xml = False diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py index 6446c2e..6013575 100644 --- a/bs4/builder/_html5lib.py +++ b/bs4/builder/_html5lib.py @@ -22,7 +22,9 @@ from bs4.element import ( class HTML5TreeBuilder(HTMLTreeBuilder): """Use html5lib to build a tree.""" - features = ['html5lib', PERMISSIVE, HTML_5, HTML] + NAME = "html5lib" + + features = [NAME, PERMISSIVE, HTML_5, HTML] def prepare_markup(self, markup, user_specified_encoding): # Store the user-specified encoding for use later on. diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index ca8d8b8..3e78c65 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -128,7 +128,8 @@ class BeautifulSoupHTMLParser(HTMLParser): class HTMLParserTreeBuilder(HTMLTreeBuilder): is_xml = False - features = [HTML, STRICT, HTMLPARSER] + NAME = HTMLPARSER + features = [NAME, HTML, STRICT] def __init__(self, *args, **kwargs): if CONSTRUCTOR_TAKES_STRICT: diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index fa5d498..110e9d2 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -25,8 +25,10 @@ class LXMLTreeBuilderForXML(TreeBuilder): is_xml = True + NAME = "lxml-xml" + # Well, it's permissive by XML parser standards. - features = [LXML, XML, FAST, PERMISSIVE] + features = [NAME, LXML, XML, FAST, PERMISSIVE] CHUNK_SIZE = 512 @@ -212,7 +214,9 @@ class LXMLTreeBuilderForXML(TreeBuilder): class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): - features = [LXML, HTML, FAST, PERMISSIVE] + NAME = [LXML, "lxml-html"] + + features = NAME + [HTML, FAST, PERMISSIVE] is_xml = False def default_parser(self, encoding): |