summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2013-01-09 13:15:36 +0100
committerGeorg Brandl <georg@python.org>2013-01-09 13:15:36 +0100
commit1b551e169d6c29d49c90baa62ea8886a7e8be46f (patch)
tree4c57f637cf7d23a1a52f058b1dba36d32d1ed73d /pygments/lexer.py
parent01ff62d16bcb567d2c7becd241e2b5bfb1bd3a93 (diff)
parent73a4b067b9ec07088e7c2e77e3f3c040aedcd50c (diff)
downloadpygments-1b551e169d6c29d49c90baa62ea8886a7e8be46f.tar.gz
Merged in alastairh/pygments (pull request #141: Improve and unify C family language lexers)
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py35
1 files changed, 18 insertions, 17 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index b8cf69fb..53539df7 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -5,7 +5,7 @@
Base lexer classes.
- :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re, itertools
@@ -72,13 +72,13 @@ class Lexer(object):
#: Shortcuts for the lexer
aliases = []
- #: fn match rules
+ #: File name globs
filenames = []
- #: fn alias filenames
+ #: Secondary file name globs
alias_filenames = []
- #: mime types
+ #: MIME types
mimetypes = []
#: Priority, should multiple lexers match and no content is provided
@@ -166,6 +166,10 @@ class Lexer(object):
text = decoded
else:
text = text.decode(self.encoding)
+ else:
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+
# text now *is* a unicode string
text = text.replace('\r\n', '\n')
text = text.replace('\r', '\n')
@@ -240,6 +244,7 @@ class include(str):
"""
pass
+
class _inherit(object):
"""
Indicates the a state should inherit from its superclass.
@@ -249,6 +254,7 @@ class _inherit(object):
inherit = _inherit()
+
class combined(tuple):
"""
Indicates a state combined from multiple states.
@@ -472,15 +478,15 @@ class RegexLexerMeta(LexerMeta):
def get_tokendefs(cls):
"""
- Merge tokens from superclasses in MRO order, returning a single
- tokendef dictionary.
+ Merge tokens from superclasses in MRO order, returning a single tokendef
+ dictionary.
Any state that is not defined by a subclass will be inherited
automatically. States that *are* defined by subclasses will, by
- default, override that state in the superclass. If a subclass
- wishes to inherit definitions from a superclass, it can use the
- special value "inherit", which will cause the superclass' state
- definition to be included at that point in the state.
+ default, override that state in the superclass. If a subclass wishes to
+ inherit definitions from a superclass, it can use the special value
+ "inherit", which will cause the superclass' state definition to be
+ included at that point in the state.
"""
tokens = {}
inheritable = {}
@@ -491,12 +497,10 @@ class RegexLexerMeta(LexerMeta):
curitems = tokens.get(state)
if curitems is None:
tokens[state] = items
-
try:
inherit_ndx = items.index(inherit)
except ValueError:
continue
-
inheritable[state] = inherit_ndx
continue
@@ -506,13 +510,11 @@ class RegexLexerMeta(LexerMeta):
# Replace the "inherit" value with the items
curitems[inherit_ndx:inherit_ndx+1] = items
-
try:
new_inh_ndx = items.index(inherit)
except ValueError:
- new_inh_ndx = -1
-
- if new_inh_ndx != -1:
+ pass
+ else:
inheritable[state] = inherit_ndx + new_inh_ndx
return tokens
@@ -755,4 +757,3 @@ def do_insertions(insertions, tokens):
except StopIteration:
insleft = False
break # not strictly necessary
-