diff options
-rw-r--r-- | pygments/lexer.py | 5 | ||||
-rw-r--r-- | tests/test_examplefiles.py | 3 |
2 files changed, 6 insertions, 2 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index 6f466a77..2280a250 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -163,8 +163,11 @@ class Lexer(object): text = decoded else: text = text.decode(self.encoding) + else: + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] + # text now *is* a unicode string - text = text.lstrip(u'\xef\xbb\xbf\ufeff') # remove BOM text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: diff --git a/tests/test_examplefiles.py b/tests/test_examplefiles.py index 1d3515df..a938ebaa 100644 --- a/tests/test_examplefiles.py +++ b/tests/test_examplefiles.py @@ -54,11 +54,12 @@ def check_lexer(lx, absfn, outfn): text = fp.read() finally: fp.close() - text = text.lstrip(u'\xef\xbb\xbf\ufeff') #remove BOM text = text.replace(b('\r\n'), b('\n')) text = text.strip(b('\n')) + b('\n') try: text = text.decode('utf-8') + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] except UnicodeError: text = text.decode('latin1') ntext = [] |