diff options
Diffstat (limited to 'src/lxml/html/html5parser.py')
-rw-r--r-- | src/lxml/html/html5parser.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/src/lxml/html/html5parser.py b/src/lxml/html/html5parser.py index 7188c7ea..ba9d41b3 100644 --- a/src/lxml/html/html5parser.py +++ b/src/lxml/html/html5parser.py @@ -147,7 +147,14 @@ def fromstring(html, guess_charset=True, parser=None): guess_charset=guess_charset) # document starts with doctype or <html>, full document! - start = html[:50].lstrip().lower() + start = html[:50] + if hasattr(start, 'decode'): + # In python3, we may have been presented with a bytes object. + # Decode in ascii, that also covers latin-1 and utf-8 for the + # characters we need + start = start.decode('ascii', 'replace') + + start = start.lstrip().lower() if start.startswith('<html') or start.startswith('<!doctype'): return doc |