From 66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sat, 14 Jul 2018 14:05:04 -0400 Subject: Improve the warning given when no parser is specified. [bug=1780571] --- NEWS.txt | 4 ++++ bs4/__init__.py | 46 ++++++++++++++++++++++++++++++++++++++++------ prepare-release.sh | 1 + 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/NEWS.txt b/NEWS.txt index f213b5d..c3e4755 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -1,3 +1,7 @@ += Unreleased + +* Improved the warning given when no parser is specified. + = 4.6.0 (20170507) = * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for diff --git a/bs4/__init__.py b/bs4/__init__.py index 7a80452..e184cce 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -82,14 +82,46 @@ class BeautifulSoup(Tag): ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n" + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, **kwargs): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser.""" + """Constructor. + + :param markup: A string or a file-like object representing + markup to be parsed. + + :param features: Desirable features of the parser to be used. This + may be the name of a specific parser ("lxml", "lxml-xml", + "html.parser", or "html5lib") or it may be the type of markup + to be used ("html", "html5", "xml"). It's recommended that you + name a specific parser, so that Beautiful Soup gives you the + same results across platforms and virtual environments. + + :param builder: A specific TreeBuilder to use instead of looking one + up based on `features`. You shouldn't need to use this. + + :param parse_only: A SoupStrainer. Only parts of the document + matching the SoupStrainer will be considered. This is useful + when parsing part of a document that would otherwise be too + large to fit into memory. + + :param from_encoding: A string indicating the encoding of the + document to be parsed. Pass this in if Beautiful Soup is + guessing wrongly about the document's encoding. + + :param exclude_encodings: A list of strings indicating + encodings known to be wrong. Pass this in if you don't know + the document's encoding but you know Beautiful Soup's guess is + wrong. + + :param kwargs: For backwards compatibility purposes, the + constructor accepts certain keyword arguments used in + Beautiful Soup 3. None of these arguments do anything in + Beautiful Soup 4 and there's no need to actually pass keyword + arguments into the constructor. + """ if 'convertEntities' in kwargs: warnings.warn( @@ -174,11 +206,13 @@ class BeautifulSoup(Tag): caller = traceback.extract_stack()[0] filename = caller[0] line_number = caller[1] - warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( + values = dict( filename=filename, line_number=line_number, parser=builder.NAME, - markup_type=markup_type)) + markup_type=markup_type + ) + warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2) self.builder = builder self.is_xml = builder.is_xml diff --git a/prepare-release.sh b/prepare-release.sh index d88ff1e..c278b67 100644 --- a/prepare-release.sh +++ b/prepare-release.sh @@ -55,6 +55,7 @@ source ../py2-install-test-virtualenv/bin/activate python setup.py install echo "EXPECT HTML ON LINE BELOW" (cd .. && python -c "from bs4 import _s; print(_s('foo', 'html.parser'))") +echo # That should print 'foo' deactivate rm -rf ../py2-install-test-virtualenv -- cgit v1.2.1