summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2018-07-14 14:15:29 -0400
committerLeonard Richardson <leonardr@segfault.org>2018-07-14 14:15:29 -0400
commit73b0fdbccb599c5bb77d7727af74c0d73a72e41d (patch)
tree871311ccaca71521e6a78a04ea9938f66dbe2303
parent66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d (diff)
downloadbeautifulsoup4-73b0fdbccb599c5bb77d7727af74c0d73a72e41d.tar.gz
Fixed code that was causing deprecation warnings in recent Python 3
versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
-rw-r--r--NEWS.txt5
-rw-r--r--bs4/dammit.py6
-rw-r--r--bs4/element.py15
-rw-r--r--bs4/tests/test_tree.py2
4 files changed, 17 insertions, 11 deletions
diff --git a/NEWS.txt b/NEWS.txt
index c3e4755..2fc0a6e 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,6 +1,9 @@
= Unreleased
-* Improved the warning given when no parser is specified.
+* Improved the warning given when no parser is specified. [bug=1780571]
+
+* Fixed code that was causing deprecation warnings in recent Python 3
+ versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
= 4.6.0 (20170507) =
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 7965565..be46b39 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -46,9 +46,9 @@ except ImportError:
pass
xml_encoding_re = re.compile(
- '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+ '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I)
html_meta_re = re.compile(
- '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+ '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):
@@ -82,7 +82,7 @@ class EntitySubstitution(object):
}
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+ "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
")")
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
diff --git a/bs4/element.py b/bs4/element.py
index 9ef75f8..e4f2303 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -2,7 +2,10 @@
# found in the LICENSE file.
__license__ = "MIT"
-import collections
+try:
+ from collections.abc import Callable # Python 3.6
+except ImportError , e:
+ from collections import Callable
import re
import shlex
import sys
@@ -12,7 +15,7 @@ from bs4.dammit import EntitySubstitution
DEFAULT_OUTPUT_ENCODING = "utf-8"
PY3K = (sys.version_info[0] > 2)
-whitespace_re = re.compile("\s+")
+whitespace_re = re.compile(r"\s+")
def _alias(attr):
"""Alias one attribute name to another for backward compatibility"""
@@ -69,7 +72,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
The value of the 'content' attribute will be one of these objects.
"""
- CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+ CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)
def __new__(cls, original_value):
match = cls.CHARSET_RE.search(original_value)
@@ -1418,7 +1421,7 @@ class Tag(PageElement):
if tag_name == '':
raise ValueError(
"A pseudo-class must be prefixed with a tag name.")
- pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+ pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
found = []
if pseudo_attributes is None:
pseudo_type = pseudo
@@ -1652,7 +1655,7 @@ class SoupStrainer(object):
markup = markup_name
markup_attrs = markup
call_function_with_tag_data = (
- isinstance(self.name, collections.Callable)
+ isinstance(self.name, Callable)
and not isinstance(markup_name, Tag))
if ((not self.name)
@@ -1732,7 +1735,7 @@ class SoupStrainer(object):
# True matches any non-None value.
return markup is not None
- if isinstance(match_against, collections.Callable):
+ if isinstance(match_against, Callable):
return match_against(markup)
# Custom callables take the tag as an argument, but all
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index c0e7c40..297b4aa 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -605,7 +605,7 @@ class SiblingTest(TreeTest):
</html>'''
# All that whitespace looks good but makes the tests more
# difficult. Get rid of it.
- markup = re.compile("\n\s*").sub("", markup)
+ markup = re.compile(r"\n\s*").sub("", markup)
self.tree = self.soup(markup)