Add ability to specify default state transition

Certain lexers are forced to match an extra empty token if they want to make a state transition. By adding the ability to specfiy a default state transition to take, we can avoid creating extra tokens
author: Gaurav Jain <gaurav@gauravjain.org> 2014-04-29 22:38:50 -0400
committer: Gaurav Jain <gaurav@gauravjain.org> 2014-04-29 22:38:50 -0400
commit: 56d9659a5bccd4010f2fdc7becb8389aff363f83 (patch)
tree: a679327c6bc2dd4ccb8d97c70127b5bd7177b042
parent: f16d0dbfaececc1b868c11355f4bca13e06dca53 (diff)
download: pygments-56d9659a5bccd4010f2fdc7becb8389aff363f83.tar.gz
1 files changed, 19 insertions, 6 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 567e85f8..3ef1c8d1 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -18,7 +18,7 @@ from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
 
 
 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
-           'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
+           'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this', 'default']
 
 
 _encoding_map = [(b'\xef\xbb\xbf', 'utf-8'),
@@ -383,6 +383,13 @@ def using(_other, **kwargs):
     return callback
 
 
+class default(str):
+    """
+    Indicates that a state should include rules from another state.
+    """
+    pass
+
+
 class RegexLexerMeta(LexerMeta):
     """
     Metaclass for RegexLexer, creates the self._tokens attribute from
@@ -452,6 +459,11 @@ class RegexLexerMeta(LexerMeta):
             if isinstance(tdef, _inherit):
                 # processed already
                 continue
+            if isinstance(tdef, default):
+                new_state = cls._process_new_state(str(tdef),
+                                                   unprocessed, processed)
+                tokens.append((re.compile('').match, None, new_state))
+                continue
 
             assert type(tdef) is tuple, "wrong rule def %r" % tdef
 
@@ -582,11 +594,12 @@ class RegexLexer(Lexer):
             for rexmatch, action, new_state in statetokens:
                 m = rexmatch(text, pos)
                 if m:
-                    if type(action) is _TokenType:
-                        yield pos, action, m.group()
-                    else:
-                        for item in action(self, m):
-                            yield item
+                    if action is not None:
+                        if type(action) is _TokenType:
+                            yield pos, action, m.group()
+                        else:
+                            for item in action(self, m):
+                                yield item
                     pos = m.end()
                     if new_state is not None:
                         # state transition
author	Gaurav Jain <gaurav@gauravjain.org>	2014-04-29 22:38:50 -0400
committer	Gaurav Jain <gaurav@gauravjain.org>	2014-04-29 22:38:50 -0400
commit	56d9659a5bccd4010f2fdc7becb8389aff363f83 (patch)
tree	a679327c6bc2dd4ccb8d97c70127b5bd7177b042
parent	f16d0dbfaececc1b868c11355f4bca13e06dca53 (diff)
download	pygments-56d9659a5bccd4010f2fdc7becb8389aff363f83.tar.gz