summaryrefslogtreecommitdiff
path: root/bs4/testing.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-04-18 14:37:44 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2012-04-18 14:37:44 -0400
commit34b1cb5540ce0e6ff26b0cfc1db25235024cbbaa (patch)
tree4adfbb9a4e82aab5d1963cc1d532566908d591f3 /bs4/testing.py
parenteefff699de9666ffff43d50ba943505efd97b2ca (diff)
downloadbeautifulsoup4-34b1cb5540ce0e6ff26b0cfc1db25235024cbbaa.tar.gz
Made encoding substitution in <meta> tags completely transparent (no more %SOUP-ENCODING%).
Diffstat (limited to 'bs4/testing.py')
-rw-r--r--bs4/testing.py31
1 files changed, 22 insertions, 9 deletions
diff --git a/bs4/testing.py b/bs4/testing.py
index 41c8783..94c87c9 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -6,7 +6,9 @@ import unittest
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
+ CharsetMetaAttributeValue,
Comment,
+ ContentMetaAttributeValue,
Doctype,
SoupStrainer,
)
@@ -371,12 +373,17 @@ class HTMLTreeBuilderSmokeTest(object):
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
- # Parse the document, and the charset is replaced with a
- # generic value.
+ # Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
- self.assertEqual(parsed_meta['content'],
- 'text/html; charset=%SOUP-ENCODING%')
- self.assertEqual(parsed_meta.contains_substitutions, True)
+ content = parsed_meta['content']
+ self.assertEqual('text/html; charset=x-sjis', content)
+
+ # But that value is actually a ContentMetaAttributeValue object.
+ self.assertTrue(isinstance(content, ContentMetaAttributeValue))
+
+ # And it will take on a value that reflects its current
+ # encoding.
+ self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
# For the rest of the story, see TestSubstitutions in
# test_tree.py.
@@ -393,11 +400,17 @@ class HTMLTreeBuilderSmokeTest(object):
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
- # Parse the document, and the charset is replaced with a
- # generic value.
+ # Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', id="encoding")
- self.assertEqual('%SOUP-ENCODING%', parsed_meta['charset'])
- self.assertEqual(True, parsed_meta.contains_substitutions)
+ charset = parsed_meta['charset']
+ self.assertEqual('x-sjis', charset)
+
+ # But that value is actually a CharsetMetaAttributeValue object.
+ self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
+
+ # And it will take on a value that reflects its current
+ # encoding.
+ self.assertEqual('utf8', charset.encode("utf8"))
class XMLTreeBuilderSmokeTest(object):