Improve the warning given when no parser is specified. [bug=1780571]

author: Leonard Richardson <leonardr@segfault.org> 2018-07-14 14:05:04 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2018-07-14 14:05:04 -0400
commit: 66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d (patch)
tree: ffbe1e1371a3aa0d0461f6548f2350854fdb834a
parent: ca3ce2a593aa3ca3442c4762625ea3b26f5e9efa (diff)
download: beautifulsoup4-66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d.tar.gz
3 files changed, 45 insertions, 6 deletions
diff --git a/NEWS.txt b/NEWS.txt
index f213b5d..c3e4755 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,7 @@
+= Unreleased
+
+* Improved the warning given when no parser is specified.
+
 = 4.6.0 (20170507) =
 
 * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for
diff --git a/bs4/__init__.py b/bs4/__init__.py
index 7a80452..e184cce 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -82,14 +82,46 @@ class BeautifulSoup(Tag):
 
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
-    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
 
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  **kwargs):
-        """The Soup object is initialized as the 'root tag', and the
-        provided markup (which can be a string or a file-like object)
-        is fed into the underlying parser."""
+        """Constructor.
+
+        :param markup: A string or a file-like object representing
+        markup to be parsed.
+
+        :param features: Desirable features of the parser to be used. This
+        may be the name of a specific parser ("lxml", "lxml-xml",
+        "html.parser", or "html5lib") or it may be the type of markup
+        to be used ("html", "html5", "xml"). It's recommended that you
+        name a specific parser, so that Beautiful Soup gives you the
+        same results across platforms and virtual environments.
+
+        :param builder: A specific TreeBuilder to use instead of looking one
+        up based on `features`. You shouldn't need to use this.
+
+        :param parse_only: A SoupStrainer. Only parts of the document
+        matching the SoupStrainer will be considered. This is useful
+        when parsing part of a document that would otherwise be too
+        large to fit into memory.
+
+        :param from_encoding: A string indicating the encoding of the
+        document to be parsed. Pass this in if Beautiful Soup is
+        guessing wrongly about the document's encoding.
+
+        :param exclude_encodings: A list of strings indicating
+        encodings known to be wrong. Pass this in if you don't know
+        the document's encoding but you know Beautiful Soup's guess is
+        wrong.
+
+        :param kwargs: For backwards compatibility purposes, the
+        constructor accepts certain keyword arguments used in
+        Beautiful Soup 3. None of these arguments do anything in
+        Beautiful Soup 4 and there's no need to actually pass keyword
+        arguments into the constructor.
+        """
 
         if 'convertEntities' in kwargs:
             warnings.warn(
@@ -174,11 +206,13 @@ class BeautifulSoup(Tag):
                 caller = traceback.extract_stack()[0]
                 filename = caller[0]
                 line_number = caller[1]
-                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+                values = dict(
                     filename=filename,
                     line_number=line_number,
                     parser=builder.NAME,
-                    markup_type=markup_type))
+                    markup_type=markup_type
+                )
+                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
 
         self.builder = builder
         self.is_xml = builder.is_xml
diff --git a/prepare-release.sh b/prepare-release.sh
index d88ff1e..c278b67 100644
--- a/prepare-release.sh
+++ b/prepare-release.sh
@@ -55,6 +55,7 @@ source ../py2-install-test-virtualenv/bin/activate
 python setup.py install
 echo "EXPECT HTML ON LINE BELOW"
 (cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html.parser'))")
+echo
 # That should print '<a>foo</a>'
 deactivate
 rm -rf ../py2-install-test-virtualenv
author	Leonard Richardson <leonardr@segfault.org>	2018-07-14 14:05:04 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2018-07-14 14:05:04 -0400
commit	66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d (patch)
tree	ffbe1e1371a3aa0d0461f6548f2350854fdb834a
parent	ca3ce2a593aa3ca3442c4762625ea3b26f5e9efa (diff)
download	beautifulsoup4-66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d.tar.gz