diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-04-18 14:37:44 -0400 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-04-18 14:37:44 -0400 |
commit | 34b1cb5540ce0e6ff26b0cfc1db25235024cbbaa (patch) | |
tree | 4adfbb9a4e82aab5d1963cc1d532566908d591f3 /bs4/testing.py | |
parent | eefff699de9666ffff43d50ba943505efd97b2ca (diff) | |
download | beautifulsoup4-34b1cb5540ce0e6ff26b0cfc1db25235024cbbaa.tar.gz |
Made encoding substitution in <meta> tags completely transparent (no more %SOUP-ENCODING%).
Diffstat (limited to 'bs4/testing.py')
-rw-r--r-- | bs4/testing.py | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/bs4/testing.py b/bs4/testing.py index 41c8783..94c87c9 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -6,7 +6,9 @@ import unittest from unittest import TestCase from bs4 import BeautifulSoup from bs4.element import ( + CharsetMetaAttributeValue, Comment, + ContentMetaAttributeValue, Doctype, SoupStrainer, ) @@ -371,12 +373,17 @@ class HTMLTreeBuilderSmokeTest(object): '</head><body>Shift-JIS markup goes here.') % meta_tag soup = self.soup(shift_jis_html) - # Parse the document, and the charset is replaced with a - # generic value. + # Parse the document, and the charset is seemingly unaffected. parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) - self.assertEqual(parsed_meta['content'], - 'text/html; charset=%SOUP-ENCODING%') - self.assertEqual(parsed_meta.contains_substitutions, True) + content = parsed_meta['content'] + self.assertEqual('text/html; charset=x-sjis', content) + + # But that value is actually a ContentMetaAttributeValue object. + self.assertTrue(isinstance(content, ContentMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('text/html; charset=utf8', content.encode("utf8")) # For the rest of the story, see TestSubstitutions in # test_tree.py. @@ -393,11 +400,17 @@ class HTMLTreeBuilderSmokeTest(object): '</head><body>Shift-JIS markup goes here.') % meta_tag soup = self.soup(shift_jis_html) - # Parse the document, and the charset is replaced with a - # generic value. + # Parse the document, and the charset is seemingly unaffected. parsed_meta = soup.find('meta', id="encoding") - self.assertEqual('%SOUP-ENCODING%', parsed_meta['charset']) - self.assertEqual(True, parsed_meta.contains_substitutions) + charset = parsed_meta['charset'] + self.assertEqual('x-sjis', charset) + + # But that value is actually a CharsetMetaAttributeValue object. + self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('utf8', charset.encode("utf8")) class XMLTreeBuilderSmokeTest(object): |