Fixed code that was causing deprecation warnings in recent Python 3

versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
author: Leonard Richardson <leonardr@segfault.org> 2018-07-14 14:15:29 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2018-07-14 14:15:29 -0400
commit: 73b0fdbccb599c5bb77d7727af74c0d73a72e41d (patch)
tree: 871311ccaca71521e6a78a04ea9938f66dbe2303
parent: 66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d (diff)
download: beautifulsoup4-73b0fdbccb599c5bb77d7727af74c0d73a72e41d.tar.gz
4 files changed, 17 insertions, 11 deletions
diff --git a/NEWS.txt b/NEWS.txt
index c3e4755..2fc0a6e 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,6 +1,9 @@
 = Unreleased
 
-* Improved the warning given when no parser is specified.
+* Improved the warning given when no parser is specified. [bug=1780571]
+
+* Fixed code that was causing deprecation warnings in recent Python 3
+  versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
 
 = 4.6.0 (20170507) =
 
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 7965565..be46b39 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -46,9 +46,9 @@ except ImportError:
     pass
 
 xml_encoding_re = re.compile(
-    '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+    '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I)
 html_meta_re = re.compile(
-    '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+    '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
 
 class EntitySubstitution(object):
 
@@ -82,7 +82,7 @@ class EntitySubstitution(object):
         }
 
     BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
-                                           "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
                                            ")")
 
     AMPERSAND_OR_BRACKET = re.compile("([<>&])")
diff --git a/bs4/element.py b/bs4/element.py
index 9ef75f8..e4f2303 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -2,7 +2,10 @@
 # found in the LICENSE file.
 __license__ = "MIT"
 
-import collections
+try:
+    from collections.abc import Callable # Python 3.6
+except ImportError , e:
+    from collections import Callable
 import re
 import shlex
 import sys
@@ -12,7 +15,7 @@ from bs4.dammit import EntitySubstitution
 DEFAULT_OUTPUT_ENCODING = "utf-8"
 PY3K = (sys.version_info[0] > 2)
 
-whitespace_re = re.compile("\s+")
+whitespace_re = re.compile(r"\s+")
 
 def _alias(attr):
     """Alias one attribute name to another for backward compatibility"""
@@ -69,7 +72,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
     The value of the 'content' attribute will be one of these objects.
     """
 
-    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+    CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)
 
     def __new__(cls, original_value):
         match = cls.CHARSET_RE.search(original_value)
@@ -1418,7 +1421,7 @@ class Tag(PageElement):
                 if tag_name == '':
                     raise ValueError(
                         "A pseudo-class must be prefixed with a tag name.")
-                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+                pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                 found = []
                 if pseudo_attributes is None:
                     pseudo_type = pseudo
@@ -1652,7 +1655,7 @@ class SoupStrainer(object):
             markup = markup_name
             markup_attrs = markup
         call_function_with_tag_data = (
-            isinstance(self.name, collections.Callable)
+            isinstance(self.name, Callable)
             and not isinstance(markup_name, Tag))
 
         if ((not self.name)
@@ -1732,7 +1735,7 @@ class SoupStrainer(object):
             # True matches any non-None value.
             return markup is not None
 
-        if isinstance(match_against, collections.Callable):
+        if isinstance(match_against, Callable):
             return match_against(markup)
 
         # Custom callables take the tag as an argument, but all
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index c0e7c40..297b4aa 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -605,7 +605,7 @@ class SiblingTest(TreeTest):
                     </html>'''
         # All that whitespace looks good but makes the tests more
         # difficult. Get rid of it.
-        markup = re.compile("\n\s*").sub("", markup)
+        markup = re.compile(r"\n\s*").sub("", markup)
         self.tree = self.soup(markup)
author	Leonard Richardson <leonardr@segfault.org>	2018-07-14 14:15:29 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2018-07-14 14:15:29 -0400
commit	73b0fdbccb599c5bb77d7727af74c0d73a72e41d (patch)
tree	871311ccaca71521e6a78a04ea9938f66dbe2303
parent	66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d (diff)
download	beautifulsoup4-73b0fdbccb599c5bb77d7727af74c0d73a72e41d.tar.gz