diff options
author | Leonard Richardson <leonard.richardson@canonical.com> | 2012-03-01 13:37:42 -0500 |
---|---|---|
committer | Leonard Richardson <leonard.richardson@canonical.com> | 2012-03-01 13:37:42 -0500 |
commit | 76e93195ae2c62cba701bf9452caaea8a48f7833 (patch) | |
tree | bc480fac48cc2e24c14b1a803c77d01482a5f705 /bs4/testing.py | |
parent | 870c2c422aa77bc3cb3a49b39fa8f8b633ec18ad (diff) | |
download | beautifulsoup4-76e93195ae2c62cba701bf9452caaea8a48f7833.tar.gz |
In HTML5-style <meta charset="foo"> tags, the value of the "charset" attribute is now replaced with the appropriate encoding on output. [bug=942714]
Diffstat (limited to 'bs4/testing.py')
-rw-r--r-- | bs4/testing.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/bs4/testing.py b/bs4/testing.py index a3e0b38..1b73160 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -368,6 +368,24 @@ class HTMLTreeBuilderSmokeTest(object): # For the rest of the story, see TestSubstitutions in # test_tree.py. + def test_html5_style_meta_tag_reflects_current_encoding(self): + # Here's the <meta> tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('<meta id="encoding" charset="x-sjis" />') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '<html><head>\n%s\n' + '<meta http-equiv="Content-language" content="ja"/>' + '</head><body>Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is replaced with a + # generic value. + parsed_meta = soup.find('meta', id="encoding") + self.assertEqual('%SOUP-ENCODING%', parsed_meta['charset']) + self.assertEqual(True, parsed_meta.contains_substitutions) + class XMLTreeBuilderSmokeTest(object): def test_docstring_generated(self): |