summaryrefslogtreecommitdiff
path: root/bs4/tests/test_soup.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-16 13:31:20 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-16 13:31:20 -0500
commit0ba6c9cf1b5d88722418d1eb63a7285a89288206 (patch)
tree4ed246bdeffc62ca3daf68fcf2a9c606db2cdf1b /bs4/tests/test_soup.py
parentaea4cf30a7c58597defcc11d5f9e8f764a881206 (diff)
downloadbeautifulsoup4-0ba6c9cf1b5d88722418d1eb63a7285a89288206.tar.gz
Issue a warning if characters were replaced with REPLACEMENT CHARACTER during Unicode conversion.
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r--bs4/tests/test_soup.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index d744694..997a01f 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -177,9 +177,14 @@ class TestUnicodeDammit(unittest.TestCase):
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
- dammit = UnicodeDammit(doc)
- self.assertEqual(True, dammit.contains_replacement_characters)
- self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+ with warnings.catch_warnings(record=True) as w:
+ dammit = UnicodeDammit(doc)
+ self.assertEqual(True, dammit.contains_replacement_characters)
+ self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+
+ soup = BeautifulSoup(doc)
+ self.assertTrue(soup.contains_replacement_characters)
- soup = BeautifulSoup(doc)
- self.assertTrue(soup.contains_replacement_characters)
+ msg = w[0].message
+ self.assertTrue(isinstance(msg, UnicodeWarning))
+ self.assertTrue("Some characters could not be decoded" in str(msg))