diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:31:20 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-02-16 13:31:20 -0500 |
commit | 0ba6c9cf1b5d88722418d1eb63a7285a89288206 (patch) | |
tree | 4ed246bdeffc62ca3daf68fcf2a9c606db2cdf1b /bs4/tests/test_soup.py | |
parent | aea4cf30a7c58597defcc11d5f9e8f764a881206 (diff) | |
download | beautifulsoup4-0ba6c9cf1b5d88722418d1eb63a7285a89288206.tar.gz |
Issue a warning if characters were replaced with REPLACEMENT CHARACTER during Unicode conversion.
Diffstat (limited to 'bs4/tests/test_soup.py')
-rw-r--r-- | bs4/tests/test_soup.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py index d744694..997a01f 100644 --- a/bs4/tests/test_soup.py +++ b/bs4/tests/test_soup.py @@ -177,9 +177,14 @@ class TestUnicodeDammit(unittest.TestCase): doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?> <html><b>\330\250\330\252\330\261</b> <i>\310\322\321\220\312\321\355\344</i></html>""" - dammit = UnicodeDammit(doc) - self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue(u"\ufffd" in dammit.unicode_markup) + with warnings.catch_warnings(record=True) as w: + dammit = UnicodeDammit(doc) + self.assertEqual(True, dammit.contains_replacement_characters) + self.assertTrue(u"\ufffd" in dammit.unicode_markup) + + soup = BeautifulSoup(doc) + self.assertTrue(soup.contains_replacement_characters) - soup = BeautifulSoup(doc) - self.assertTrue(soup.contains_replacement_characters) + msg = w[0].message + self.assertTrue(isinstance(msg, UnicodeWarning)) + self.assertTrue("Some characters could not be decoded" in str(msg)) |