summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2013-01-09 13:24:33 +0100
committerGeorg Brandl <georg@python.org>2013-01-09 13:24:33 +0100
commitc7baf27b4058f53f8d26be23e45fb3e7772656eb (patch)
treecbe61af680e678990e108760272381501a80edbb /pygments/lexer.py
parentd3f74b1897d2cc5887527bf5a3faefcdcc20cb08 (diff)
parent07dc045f5ad2b94cb8c99313184e7306a62813ed (diff)
downloadpygments-c7baf27b4058f53f8d26be23e45fb3e7772656eb.tar.gz
Merged in andyli/pygments-main/ExtendedRegexLexer-tuple-newstate (pull request #131: ExtendedRegexLexer handles tuple new_state the same way as RegexLexer)
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py78
1 files changed, 70 insertions, 8 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 134471f6..b2af789b 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -5,10 +5,10 @@
Base lexer classes.
- :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-import re
+import re, itertools
from pygments.filter import apply_filters, Filter
from pygments.filters import get_filter_by_name
@@ -18,7 +18,7 @@ from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
__all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
- 'LexerContext', 'include', 'bygroups', 'using', 'this']
+ 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
_encoding_map = [('\xef\xbb\xbf', 'utf-8'),
@@ -72,15 +72,18 @@ class Lexer(object):
#: Shortcuts for the lexer
aliases = []
- #: fn match rules
+ #: File name globs
filenames = []
- #: fn alias filenames
+ #: Secondary file name globs
alias_filenames = []
- #: mime types
+ #: MIME types
mimetypes = []
+ #: Priority, should multiple lexers match and no content is provided
+ priority = 0
+
__metaclass__ = LexerMeta
def __init__(self, **options):
@@ -163,6 +166,10 @@ class Lexer(object):
text = decoded
else:
text = text.decode(self.encoding)
+ else:
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+
# text now *is* a unicode string
text = text.replace('\r\n', '\n')
text = text.replace('\r', '\n')
@@ -238,6 +245,16 @@ class include(str):
pass
+class _inherit(object):
+ """
+ Indicates the a state should inherit from its superclass.
+ """
+ def __repr__(self):
+ return 'inherit'
+
+inherit = _inherit()
+
+
class combined(tuple):
"""
Indicates a state combined from multiple states.
@@ -428,6 +445,9 @@ class RegexLexerMeta(LexerMeta):
tokens.extend(cls._process_state(unprocessed, processed,
str(tdef)))
continue
+ if isinstance(tdef, _inherit):
+ # processed already
+ continue
assert type(tdef) is tuple, "wrong rule def %r" % tdef
@@ -456,6 +476,49 @@ class RegexLexerMeta(LexerMeta):
cls._process_state(tokendefs, processed, state)
return processed
+ def get_tokendefs(cls):
+ """
+ Merge tokens from superclasses in MRO order, returning a single tokendef
+ dictionary.
+
+ Any state that is not defined by a subclass will be inherited
+ automatically. States that *are* defined by subclasses will, by
+ default, override that state in the superclass. If a subclass wishes to
+ inherit definitions from a superclass, it can use the special value
+ "inherit", which will cause the superclass' state definition to be
+ included at that point in the state.
+ """
+ tokens = {}
+ inheritable = {}
+ for c in itertools.chain((cls,), cls.__mro__):
+ toks = c.__dict__.get('tokens', {})
+
+ for state, items in toks.iteritems():
+ curitems = tokens.get(state)
+ if curitems is None:
+ tokens[state] = items
+ try:
+ inherit_ndx = items.index(inherit)
+ except ValueError:
+ continue
+ inheritable[state] = inherit_ndx
+ continue
+
+ inherit_ndx = inheritable.pop(state, None)
+ if inherit_ndx is None:
+ continue
+
+ # Replace the "inherit" value with the items
+ curitems[inherit_ndx:inherit_ndx+1] = items
+ try:
+ new_inh_ndx = items.index(inherit)
+ except ValueError:
+ pass
+ else:
+ inheritable[state] = inherit_ndx + new_inh_ndx
+
+ return tokens
+
def __call__(cls, *args, **kwds):
"""Instantiate cls after preprocessing its token definitions."""
if '_tokens' not in cls.__dict__:
@@ -465,7 +528,7 @@ class RegexLexerMeta(LexerMeta):
# don't process yet
pass
else:
- cls._tokens = cls.process_tokendef('', cls.tokens)
+ cls._tokens = cls.process_tokendef('', cls.get_tokendefs())
return type.__call__(cls, *args, **kwds)
@@ -700,4 +763,3 @@ def do_insertions(insertions, tokens):
except StopIteration:
insleft = False
break # not strictly necessary
-