summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
authorAndy Li <andy@onthewings.net>2012-12-12 18:11:34 +0800
committerAndy Li <andy@onthewings.net>2012-12-12 18:11:34 +0800
commita0fb320fd7264c3804fd846277f47d40b9013282 (patch)
tree72f3db02e15c2e5dbc3f80469f2351134c9ddfb8 /pygments/lexer.py
parentd523209a898076e118144e6260de6bdb1778c1a4 (diff)
downloadpygments-a0fb320fd7264c3804fd846277f47d40b9013282.tar.gz
Remove BOM when the input is unicode.
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 6f466a77..2280a250 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -163,8 +163,11 @@ class Lexer(object):
text = decoded
else:
text = text.decode(self.encoding)
+ else:
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+
# text now *is* a unicode string
- text = text.lstrip(u'\xef\xbb\xbf\ufeff') # remove BOM
text = text.replace('\r\n', '\n')
text = text.replace('\r', '\n')
if self.stripall: