diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:31:20 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:31:20 -0500 |
commit | 0ba6c9cf1b5d88722418d1eb63a7285a89288206 (patch) | |
tree | 4ed246bdeffc62ca3daf68fcf2a9c606db2cdf1b /bs4/dammit.py | |
parent | aea4cf30a7c58597defcc11d5f9e8f764a881206 (diff) | |
download | beautifulsoup4-0ba6c9cf1b5d88722418d1eb63a7285a89288206.tar.gz |
Issue a warning if characters were replaced with REPLACEMENT CHARACTER during Unicode conversion.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r-- | bs4/dammit.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index 76ac9ce..a35c213 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -9,6 +9,7 @@ encoding; that's the tree builder's job. import codecs from htmlentitydefs import codepoint2name import re +import warnings # Autodetects character encodings. Very useful. # Download from http://chardet.feedparser.org/ @@ -212,6 +213,10 @@ class UnicodeDammit: if proposed_encoding != "ascii": u = self._convert_from(proposed_encoding, "replace") if u is not None: + warnings.warn( + UnicodeWarning( + "Some characters could not be decoded, and were " + "replaced with REPLACEMENT CHARACTER.")) self.contains_replacement_characters = True break |