summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexer.py5
-rw-r--r--tests/test_examplefiles.py3
2 files changed, 6 insertions, 2 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 6f466a77..2280a250 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -163,8 +163,11 @@ class Lexer(object):
text = decoded
else:
text = text.decode(self.encoding)
+ else:
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+
# text now *is* a unicode string
- text = text.lstrip(u'\xef\xbb\xbf\ufeff') # remove BOM
text = text.replace('\r\n', '\n')
text = text.replace('\r', '\n')
if self.stripall:
diff --git a/tests/test_examplefiles.py b/tests/test_examplefiles.py
index 1d3515df..a938ebaa 100644
--- a/tests/test_examplefiles.py
+++ b/tests/test_examplefiles.py
@@ -54,11 +54,12 @@ def check_lexer(lx, absfn, outfn):
text = fp.read()
finally:
fp.close()
- text = text.lstrip(u'\xef\xbb\xbf\ufeff') #remove BOM
text = text.replace(b('\r\n'), b('\n'))
text = text.strip(b('\n')) + b('\n')
try:
text = text.decode('utf-8')
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
except UnicodeError:
text = text.decode('latin1')
ntext = []