summaryrefslogtreecommitdiff
path: root/bs4/testing.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2013-06-02 22:19:37 -0400
committerLeonard Richardson <leonard.richardson@canonical.com>2013-06-02 22:19:37 -0400
commitb42a4ece63de739ad7a37973a4e10af23346ffd1 (patch)
treea65794b5422a1e12a8ddf943c9afd0e0f798f6c4 /bs4/testing.py
parentb8b0711b903509e4b88e878fb6ca3731738ca99e (diff)
parent847a8e08e21de9036783feeecd8de93b112f3868 (diff)
downloadbeautifulsoup4-b42a4ece63de739ad7a37973a4e10af23346ffd1.tar.gz
Merged in big encoding-detection refactoring branch.
Diffstat (limited to 'bs4/testing.py')
-rw-r--r--bs4/testing.py13
1 files changed, 13 insertions, 0 deletions
diff --git a/bs4/testing.py b/bs4/testing.py
index 23b26f1..fd4495a 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -281,6 +281,14 @@ class HTMLTreeBuilderSmokeTest(object):
# to detect any differences between them.
#
+ def test_can_parse_unicode_document(self):
+ # A seemingly innocuous document... but it's in Unicode! And
+ # it contains characters that can't be represented in the
+ # encoding found in the declaration! The horror!
+ markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
+ soup = self.soup(markup)
+ self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
+
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
strainer = SoupStrainer("b")
@@ -484,6 +492,11 @@ class XMLTreeBuilderSmokeTest(object):
encoded = soup.encode()
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
+ def test_can_parse_unicode_document(self):
+ markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
+ soup = self.soup(markup)
+ self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
+
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)