summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-16 13:31:20 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-16 13:31:20 -0500
commit0ba6c9cf1b5d88722418d1eb63a7285a89288206 (patch)
tree4ed246bdeffc62ca3daf68fcf2a9c606db2cdf1b /bs4/dammit.py
parentaea4cf30a7c58597defcc11d5f9e8f764a881206 (diff)
downloadbeautifulsoup4-0ba6c9cf1b5d88722418d1eb63a7285a89288206.tar.gz
Issue a warning if characters were replaced with REPLACEMENT CHARACTER during Unicode conversion.
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 76ac9ce..a35c213 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -9,6 +9,7 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
+import warnings
# Autodetects character encodings. Very useful.
# Download from http://chardet.feedparser.org/
@@ -212,6 +213,10 @@ class UnicodeDammit:
if proposed_encoding != "ascii":
u = self._convert_from(proposed_encoding, "replace")
if u is not None:
+ warnings.warn(
+ UnicodeWarning(
+ "Some characters could not be decoded, and were "
+ "replaced with REPLACEMENT CHARACTER."))
self.contains_replacement_characters = True
break