diff options
author | Andy Li <andy@onthewings.net> | 2012-12-12 18:11:34 +0800 |
---|---|---|
committer | Andy Li <andy@onthewings.net> | 2012-12-12 18:11:34 +0800 |
commit | a0fb320fd7264c3804fd846277f47d40b9013282 (patch) | |
tree | 72f3db02e15c2e5dbc3f80469f2351134c9ddfb8 /pygments/lexer.py | |
parent | d523209a898076e118144e6260de6bdb1778c1a4 (diff) | |
download | pygments-a0fb320fd7264c3804fd846277f47d40b9013282.tar.gz |
Remove BOM when the input is unicode.
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r-- | pygments/lexer.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index 6f466a77..2280a250 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -163,8 +163,11 @@ class Lexer(object): text = decoded else: text = text.decode(self.encoding) + else: + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] + # text now *is* a unicode string - text = text.lstrip(u'\xef\xbb\xbf\ufeff') # remove BOM text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: |