summaryrefslogtreecommitdiff
path: root/bs4/dammit.py
diff options
context:
space:
mode:
authorLeonard Richardson <leonard.richardson@canonical.com>2012-02-09 10:38:14 -0500
committerLeonard Richardson <leonard.richardson@canonical.com>2012-02-09 10:38:14 -0500
commite4f4a83c3963397bfc47b5e04c11245efaecab10 (patch)
tree27942e7d8a2bc849f2b586bade20ae0b6f29bce9 /bs4/dammit.py
parent02b01471645ad4b315d848d7c5d5a75ec38b1370 (diff)
downloadbeautifulsoup4-e4f4a83c3963397bfc47b5e04c11245efaecab10.tar.gz
Unicode, Dammit now detects the encoding in HTML 5-style <meta> tags like <meta charset="utf-8" />. [bug=837268]
Diffstat (limited to 'bs4/dammit.py')
-rw-r--r--bs4/dammit.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 09ac89e..0c4bf17 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -27,8 +27,10 @@ try:
except ImportError:
pass
-xml_encoding_re = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
-html_meta_re = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode(), re.I)
+xml_encoding_re = re.compile(
+ '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+html_meta_re = re.compile(
+ '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):