summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2014-12-07 09:31:30 -0500
committerLeonard Richardson <leonardr@segfault.org>2014-12-07 09:31:30 -0500
commit237b8f895f056fdac201e54009bc20fdc05faee0 (patch)
treea70bcedc008c367fd565b5d9a6937a687fe39727 /bs4/builder
parentbcd7af0e9159d97aa511fb2d879424d1c1c5aadf (diff)
downloadbeautifulsoup4-237b8f895f056fdac201e54009bc20fdc05faee0.tar.gz
Issue a warning if the BeautifulSoup constructor arguments do not explicitly name a parser.
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/__init__.py1
-rw-r--r--bs4/builder/_html5lib.py4
-rw-r--r--bs4/builder/_htmlparser.py3
-rw-r--r--bs4/builder/_lxml.py8
4 files changed, 12 insertions, 4 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 740f5f2..0e84fae 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -80,6 +80,7 @@ builder_registry = TreeBuilderRegistry()
class TreeBuilder(object):
"""Turn a document into a Beautiful Soup object tree."""
+ NAME = "[Unknown tree builder]"
features = []
is_xml = False
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 6446c2e..6013575 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -22,7 +22,9 @@ from bs4.element import (
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
- features = ['html5lib', PERMISSIVE, HTML_5, HTML]
+ NAME = "html5lib"
+
+ features = [NAME, PERMISSIVE, HTML_5, HTML]
def prepare_markup(self, markup, user_specified_encoding):
# Store the user-specified encoding for use later on.
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index ca8d8b8..3e78c65 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -128,7 +128,8 @@ class BeautifulSoupHTMLParser(HTMLParser):
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
- features = [HTML, STRICT, HTMLPARSER]
+ NAME = HTMLPARSER
+ features = [NAME, HTML, STRICT]
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT:
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index fa5d498..110e9d2 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -25,8 +25,10 @@ class LXMLTreeBuilderForXML(TreeBuilder):
is_xml = True
+ NAME = "lxml-xml"
+
# Well, it's permissive by XML parser standards.
- features = [LXML, XML, FAST, PERMISSIVE]
+ features = [NAME, LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
@@ -212,7 +214,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
- features = [LXML, HTML, FAST, PERMISSIVE]
+ NAME = [LXML, "lxml-html"]
+
+ features = NAME + [HTML, FAST, PERMISSIVE]
is_xml = False
def default_parser(self, encoding):