diff options
author | Leonard Richardson <leonardr@segfault.org> | 2018-07-14 14:15:29 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2018-07-14 14:15:29 -0400 |
commit | 73b0fdbccb599c5bb77d7727af74c0d73a72e41d (patch) | |
tree | 871311ccaca71521e6a78a04ea9938f66dbe2303 /bs4 | |
parent | 66274ce5781cfcd9af98f1dc2891f8d8f2d9d83d (diff) | |
download | beautifulsoup4-73b0fdbccb599c5bb77d7727af74c0d73a72e41d.tar.gz |
Fixed code that was causing deprecation warnings in recent Python 3
versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
Diffstat (limited to 'bs4')
-rw-r--r-- | bs4/dammit.py | 6 | ||||
-rw-r--r-- | bs4/element.py | 15 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 2 |
3 files changed, 13 insertions, 10 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index 7965565..be46b39 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -46,9 +46,9 @@ except ImportError: pass xml_encoding_re = re.compile( - '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I) + '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I) html_meta_re = re.compile( - '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) + '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) class EntitySubstitution(object): @@ -82,7 +82,7 @@ class EntitySubstitution(object): } BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)" ")") AMPERSAND_OR_BRACKET = re.compile("([<>&])") diff --git a/bs4/element.py b/bs4/element.py index 9ef75f8..e4f2303 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -2,7 +2,10 @@ # found in the LICENSE file. __license__ = "MIT" -import collections +try: + from collections.abc import Callable # Python 3.6 +except ImportError , e: + from collections import Callable import re import shlex import sys @@ -12,7 +15,7 @@ from bs4.dammit import EntitySubstitution DEFAULT_OUTPUT_ENCODING = "utf-8" PY3K = (sys.version_info[0] > 2) -whitespace_re = re.compile("\s+") +whitespace_re = re.compile(r"\s+") def _alias(attr): """Alias one attribute name to another for backward compatibility""" @@ -69,7 +72,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): The value of the 'content' attribute will be one of these objects. """ - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) def __new__(cls, original_value): match = cls.CHARSET_RE.search(original_value) @@ -1418,7 +1421,7 @@ class Tag(PageElement): if tag_name == '': raise ValueError( "A pseudo-class must be prefixed with a tag name.") - pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo) + pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo) found = [] if pseudo_attributes is None: pseudo_type = pseudo @@ -1652,7 +1655,7 @@ class SoupStrainer(object): markup = markup_name markup_attrs = markup call_function_with_tag_data = ( - isinstance(self.name, collections.Callable) + isinstance(self.name, Callable) and not isinstance(markup_name, Tag)) if ((not self.name) @@ -1732,7 +1735,7 @@ class SoupStrainer(object): # True matches any non-None value. return markup is not None - if isinstance(match_against, collections.Callable): + if isinstance(match_against, Callable): return match_against(markup) # Custom callables take the tag as an argument, but all diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index c0e7c40..297b4aa 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -605,7 +605,7 @@ class SiblingTest(TreeTest): </html>''' # All that whitespace looks good but makes the tests more # difficult. Get rid of it. - markup = re.compile("\n\s*").sub("", markup) + markup = re.compile(r"\n\s*").sub("", markup) self.tree = self.soup(markup) |