Merged in andyli/pygments-main/ExtendedRegexLexer-tuple-newstate (pull request #131: ExtendedRegexLexer handles tuple new_state the same way as RegexLexer)

author: Georg Brandl <georg@python.org> 2013-01-09 13:24:33 +0100
committer: Georg Brandl <georg@python.org> 2013-01-09 13:24:33 +0100
commit: c7baf27b4058f53f8d26be23e45fb3e7772656eb (patch)
tree: cbe61af680e678990e108760272381501a80edbb /pygments/lexer.py
parent: d3f74b1897d2cc5887527bf5a3faefcdcc20cb08 (diff)
parent: 07dc045f5ad2b94cb8c99313184e7306a62813ed (diff)
download: pygments-c7baf27b4058f53f8d26be23e45fb3e7772656eb.tar.gz
1 files changed, 70 insertions, 8 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 134471f6..b2af789b 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -5,10 +5,10 @@
 
     Base lexer classes.
 
-    :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
-import re
+import re, itertools
 
 from pygments.filter import apply_filters, Filter
 from pygments.filters import get_filter_by_name
@@ -18,7 +18,7 @@ from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
 
 
 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
-           'LexerContext', 'include', 'bygroups', 'using', 'this']
+           'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
 
 
 _encoding_map = [('\xef\xbb\xbf', 'utf-8'),
@@ -72,15 +72,18 @@ class Lexer(object):
     #: Shortcuts for the lexer
     aliases = []
 
-    #: fn match rules
+    #: File name globs
     filenames = []
 
-    #: fn alias filenames
+    #: Secondary file name globs
     alias_filenames = []
 
-    #: mime types
+    #: MIME types
     mimetypes = []
 
+    #: Priority, should multiple lexers match and no content is provided
+    priority = 0
+
     __metaclass__ = LexerMeta
 
     def __init__(self, **options):
@@ -163,6 +166,10 @@ class Lexer(object):
                 text = decoded
             else:
                 text = text.decode(self.encoding)
+        else:
+            if text.startswith(u'\ufeff'):
+                text = text[len(u'\ufeff'):]
+
         # text now *is* a unicode string
         text = text.replace('\r\n', '\n')
         text = text.replace('\r', '\n')
@@ -238,6 +245,16 @@ class include(str):
     pass
 
 
+class _inherit(object):
+    """
+    Indicates the a state should inherit from its superclass.
+    """
+    def __repr__(self):
+        return 'inherit'
+
+inherit = _inherit()
+
+
 class combined(tuple):
     """
     Indicates a state combined from multiple states.
@@ -428,6 +445,9 @@ class RegexLexerMeta(LexerMeta):
                 tokens.extend(cls._process_state(unprocessed, processed,
                                                  str(tdef)))
                 continue
+            if isinstance(tdef, _inherit):
+                # processed already
+                continue
 
             assert type(tdef) is tuple, "wrong rule def %r" % tdef
 
@@ -456,6 +476,49 @@ class RegexLexerMeta(LexerMeta):
             cls._process_state(tokendefs, processed, state)
         return processed
 
+    def get_tokendefs(cls):
+        """
+        Merge tokens from superclasses in MRO order, returning a single tokendef
+        dictionary.
+
+        Any state that is not defined by a subclass will be inherited
+        automatically.  States that *are* defined by subclasses will, by
+        default, override that state in the superclass.  If a subclass wishes to
+        inherit definitions from a superclass, it can use the special value
+        "inherit", which will cause the superclass' state definition to be
+        included at that point in the state.
+        """
+        tokens = {}
+        inheritable = {}
+        for c in itertools.chain((cls,), cls.__mro__):
+            toks = c.__dict__.get('tokens', {})
+
+            for state, items in toks.iteritems():
+                curitems = tokens.get(state)
+                if curitems is None:
+                    tokens[state] = items
+                    try:
+                        inherit_ndx = items.index(inherit)
+                    except ValueError:
+                        continue
+                    inheritable[state] = inherit_ndx
+                    continue
+
+                inherit_ndx = inheritable.pop(state, None)
+                if inherit_ndx is None:
+                    continue
+
+                # Replace the "inherit" value with the items
+                curitems[inherit_ndx:inherit_ndx+1] = items
+                try:
+                    new_inh_ndx = items.index(inherit)
+                except ValueError:
+                    pass
+                else:
+                    inheritable[state] = inherit_ndx + new_inh_ndx
+
+        return tokens
+
     def __call__(cls, *args, **kwds):
         """Instantiate cls after preprocessing its token definitions."""
         if '_tokens' not in cls.__dict__:
@@ -465,7 +528,7 @@ class RegexLexerMeta(LexerMeta):
                 # don't process yet
                 pass
             else:
-                cls._tokens = cls.process_tokendef('', cls.tokens)
+                cls._tokens = cls.process_tokendef('', cls.get_tokendefs())
 
         return type.__call__(cls, *args, **kwds)
 
@@ -700,4 +763,3 @@ def do_insertions(insertions, tokens):
         except StopIteration:
             insleft = False
             break  # not strictly necessary
-
author	Georg Brandl <georg@python.org>	2013-01-09 13:24:33 +0100
committer	Georg Brandl <georg@python.org>	2013-01-09 13:24:33 +0100
commit	c7baf27b4058f53f8d26be23e45fb3e7772656eb (patch)
tree	cbe61af680e678990e108760272381501a80edbb /pygments/lexer.py
parent	d3f74b1897d2cc5887527bf5a3faefcdcc20cb08 (diff)
parent	07dc045f5ad2b94cb8c99313184e7306a62813ed (diff)
download	pygments-c7baf27b4058f53f8d26be23e45fb3e7772656eb.tar.gz