summaryrefslogtreecommitdiff
path: root/src/lxml/html/html5parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/lxml/html/html5parser.py')
-rw-r--r--src/lxml/html/html5parser.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/src/lxml/html/html5parser.py b/src/lxml/html/html5parser.py
index 7188c7ea..ba9d41b3 100644
--- a/src/lxml/html/html5parser.py
+++ b/src/lxml/html/html5parser.py
@@ -147,7 +147,14 @@ def fromstring(html, guess_charset=True, parser=None):
guess_charset=guess_charset)
# document starts with doctype or <html>, full document!
- start = html[:50].lstrip().lower()
+ start = html[:50]
+ if hasattr(start, 'decode'):
+ # In python3, we may have been presented with a bytes object.
+ # Decode in ascii, that also covers latin-1 and utf-8 for the
+ # characters we need
+ start = start.decode('ascii', 'replace')
+
+ start = start.lstrip().lower()
if start.startswith('<html') or start.startswith('<!doctype'):
return doc