diff options
author | Tim Hatch <tim@timhatch.com> | 2014-05-18 07:39:47 -0700 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2014-05-18 07:39:47 -0700 |
commit | 22603938a79b5c42fd607849608991977ff4ff81 (patch) | |
tree | e28671a7b1a06d6a066b55cf6ea67287b4c3a175 | |
parent | cb7de16b36d3ea35968c94b9e6fddab8df0a10c9 (diff) | |
parent | 99fba293818c780ce48e990e973c9df24ec6c8c0 (diff) | |
download | pygments-22603938a79b5c42fd607849608991977ff4ff81.tar.gz |
Merged in jaingaurav2/pygments-main-c-cleanup (pull request #342)
Add ability to specify default state transition
-rw-r--r-- | pygments/lexer.py | 46 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 24 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 79 | ||||
-rw-r--r-- | pygments/lexers/dotnet.py | 8 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 30 | ||||
-rw-r--r-- | pygments/lexers/jvm.py | 10 | ||||
-rw-r--r-- | pygments/lexers/other.py | 8 | ||||
-rw-r--r-- | pygments/lexers/rdf.py | 4 | ||||
-rw-r--r-- | pygments/lexers/templates.py | 4 | ||||
-rw-r--r-- | pygments/lexers/text.py | 4 | ||||
-rw-r--r-- | pygments/lexers/web.py | 150 | ||||
-rw-r--r-- | tests/test_clexer.py | 22 | ||||
-rw-r--r-- | tests/test_objectiveclexer.py | 10 | ||||
-rw-r--r-- | tests/test_regexlexer.py | 7 |
14 files changed, 199 insertions, 207 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index 567e85f8..0ede7927 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -18,7 +18,7 @@ from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', - 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this'] + 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this', 'default'] _encoding_map = [(b'\xef\xbb\xbf', 'utf-8'), @@ -383,6 +383,16 @@ def using(_other, **kwargs): return callback +class default: + """ + Indicates a state or state action (e.g. #pop) to apply. + For example default('#pop') is equivalent to ('', Token, '#pop') + Note that state tuples may be used as well + """ + def __init__(self, state): + self.state = state + + class RegexLexerMeta(LexerMeta): """ Metaclass for RegexLexer, creates the self._tokens attribute from @@ -452,6 +462,10 @@ class RegexLexerMeta(LexerMeta): if isinstance(tdef, _inherit): # processed already continue + if isinstance(tdef, default): + new_state = cls._process_new_state(tdef.state, unprocessed, processed) + tokens.append((re.compile('').match, None, new_state)) + continue assert type(tdef) is tuple, "wrong rule def %r" % tdef @@ -582,11 +596,12 @@ class RegexLexer(Lexer): for rexmatch, action, new_state in statetokens: m = rexmatch(text, pos) if m: - if type(action) is _TokenType: - yield pos, action, m.group() - else: - for item in action(self, m): - yield item + if action is not None: + if type(action) is _TokenType: + yield pos, action, m.group() + else: + for item in action(self, m): + yield item pos = m.end() if new_state is not None: # state transition @@ -660,15 +675,16 @@ class ExtendedRegexLexer(RegexLexer): for rexmatch, action, new_state in statetokens: m = rexmatch(text, ctx.pos, ctx.end) if m: - if type(action) is _TokenType: - yield ctx.pos, action, m.group() - ctx.pos = m.end() - else: - for item in action(self, m, ctx): - yield item - if not new_state: - # altered the state stack? - statetokens = tokendefs[ctx.stack[-1]] + if action is not None: + if type(action) is _TokenType: + yield ctx.pos, action, m.group() + ctx.pos = m.end() + else: + for item in action(self, m, ctx): + yield item + if not new_state: + # altered the state stack? + statetokens = tokendefs[ctx.stack[-1]] # CAUTION: callback must set ctx.pos! if new_state is not None: # state transition diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index cd105126..0c2d02ac 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ - LexerContext, include, combined, do_insertions, bygroups, using, this + LexerContext, include, combined, do_insertions, bygroups, using, this, default from pygments.token import Error, Text, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches, iteritems @@ -133,7 +133,7 @@ class PythonLexer(RegexLexer): (r'as\b', Keyword.Namespace), (r',', Operator), (r'[a-zA-Z_][\w.]*', Name.Namespace), - (r'', Text, '#pop') # all else: go back + default('#pop') # all else: go back ], 'fromimport': [ (r'(?:[ \t]|\\\n)+', Text), @@ -145,7 +145,7 @@ class PythonLexer(RegexLexer): (r'[a-zA-Z_.][\w.]*', Name.Namespace), # anything else here also means "raise x from y" and is therefore # not an error - (r'', Text, '#pop'), + default('#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' @@ -269,13 +269,13 @@ class Python3Lexer(RegexLexer): (r'\.', Name.Namespace), (uni_name, Name.Namespace), (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), - (r'', Text, '#pop') # all else: go back + default('#pop') # all else: go back ] tokens['fromimport'] = [ (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'), (r'\.', Name.Namespace), (uni_name, Name.Namespace), - (r'', Text, '#pop'), + default('#pop'), ] # don't highlight "%s" substitutions tokens['strings'] = [ @@ -743,13 +743,13 @@ class RubyLexer(ExtendedRegexLexer): r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), - (r'', Text, '#pop') + default('#pop') ], 'classname': [ (r'\(', Punctuation, 'defexpr'), (r'<<', Operator, '#pop'), (r'[A-Z_]\w*', Name.Class, '#pop'), - (r'', Text, '#pop') + default('#pop') ], 'defexpr': [ (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'), @@ -1058,7 +1058,7 @@ class LuaLexer(RegexLexer): 'root': [ # lua allows a file to start with a shebang (r'#!(.*?)$', Comment.Preproc), - (r'', Text, 'base'), + default('base'), ], 'base': [ (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline), @@ -1164,7 +1164,7 @@ class MoonScriptLexer(LuaLexer): tokens = { 'root': [ (r'#!(.*?)$', Comment.Preproc), - (r'', Text, 'base'), + default('base'), ], 'base': [ ('--.*$', Comment.Single), @@ -1650,7 +1650,7 @@ class FactorLexer(RegexLexer): 'root': [ # factor allows a file to start with a shebang (r'#!.*$', Comment.Preproc), - (r'', Text, 'base'), + default('base'), ], 'base': [ (r'\s+', Text), @@ -2302,7 +2302,7 @@ class Perl6Lexer(ExtendedRegexLexer): ], 'token-sym-brackets' : [ (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), - (r'', Name, ('#pop', 'pre-token')), + default(('#pop', 'pre-token')), ], 'token': [ (r'}', Text, '#pop'), @@ -2512,7 +2512,7 @@ class ChaiscriptLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop') ], 'badregex': [ ('\n', Text, '#pop') diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index ec3015a5..b0b95c62 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -13,7 +13,7 @@ import re from string import Template from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - this, combined, inherit, do_insertions + this, combined, inherit, do_insertions, default from pygments.util import get_bool_opt, get_list_opt from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Error, Literal, Generic @@ -107,7 +107,7 @@ class CFamilyLexer(RegexLexer): r'(' + _ws + r')?(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), - ('', Text, 'statement'), + default('statement'), ], 'statement' : [ include('whitespace'), @@ -1894,13 +1894,13 @@ class CythonLexer(RegexLexer): (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)), (r'[a-zA-Z_][\w.]*', Name.Namespace), (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), - (r'', Text, '#pop') # all else: go back + default('#pop') # all else: go back ], 'fromimport': [ (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'), (r'[a-zA-Z_.][\w.]*', Name.Namespace), # ``cdef foo from "header"``, or ``for foo from 0 < i < 10`` - (r'', Text, '#pop'), + default('#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' @@ -2367,7 +2367,7 @@ class FelixLexer(RegexLexer): 'modulename': [ include('whitespace'), (r'\[', Punctuation, ('modulename2', 'tvarlist')), - (r'', Error, 'modulename2'), + default('modulename2'), ], 'modulename2': [ include('whitespace'), @@ -2528,7 +2528,7 @@ class AdaLexer(RegexLexer): ], 'import': [ (r'[a-z0-9_.]+', Name.Namespace, '#pop'), - (r'', Text, '#pop'), + default('#pop'), ], 'formal_part' : [ (r'\)', Punctuation, '#pop'), @@ -3052,16 +3052,16 @@ class NimrodLexer(RegexLexer): 'float-number': [ (r'\.(?!\.)[0-9_]*', Number.Float), (r'[eE][+-]?[0-9][0-9_]*', Number.Float), - (r'', Text, '#pop') + default('#pop') ], 'float-suffix': [ (r'\'[fF](32|64)', Number.Float), - (r'', Text, '#pop') + default('#pop') ], 'int-suffix': [ (r'\'[iI](32|64)', Number.Integer.Long), (r'\'[iI](8|16)', Number.Integer), - (r'', Text, '#pop') + default('#pop') ], } @@ -3167,7 +3167,7 @@ class FantomLexer(RegexLexer): 'inType': [ (r'[\[\]\|\->:\?]', Punctuation), (s(r'$id'), Name.Class), - (r'', Text, '#pop'), + default('#pop'), ], 'root': [ @@ -3272,19 +3272,19 @@ class FantomLexer(RegexLexer): (r'(\")?([\w\.]+)(\")?', bygroups(Punctuation, Name.Namespace, Punctuation)), #podname (r'::', Punctuation, 'usingClass'), - (r'', Text, '#pop') + default('#pop') ], 'usingClass': [ (r'[ \t]+', Text), # consume whitespaces (r'(as)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Name.Class), '#pop:2'), (r'[\w\$]+', Name.Class), - (r'', Text, '#pop:2') # jump out to root state + default('#pop:2') # jump out to root state ], 'facet': [ (r'\s+', Text), (r'{', Punctuation, 'facetFields'), - (r'', Text, '#pop') + default('#pop') ], 'facetFields': [ include('comments'), @@ -3587,7 +3587,7 @@ class MonkeyLexer(RegexLexer): (r'<', Punctuation, '#push'), (r'>', Punctuation, '#pop'), (r'\n', Text, '#pop'), - (r'', Text, '#pop') + default('#pop') ], 'variables': [ (r'%s\b' % name_constant, Name.Constant), @@ -3596,7 +3596,7 @@ class MonkeyLexer(RegexLexer): (r'\s+', Text), (r':', Punctuation, 'classname'), (r',', Punctuation, '#push'), - (r'', Text, '#pop') + default('#pop') ], 'string': [ (r'[^"~]+', String.Double), @@ -4048,7 +4048,7 @@ class Inform6Lexer(RegexLexer): (r'\[', Punctuation, 'many-values'), # Array initialization (r':|(?=;)', Punctuation, '#pop'), (r'<', Punctuation), # Second angle bracket in an action statement - (r'', Text, ('expression', '_expression')) + default(('expression', '_expression')) ], # Expressions @@ -4075,7 +4075,7 @@ class Inform6Lexer(RegexLexer): (r'sp\b', Name), (r'\?~?', Name.Label, 'label?'), (r'[@{]', Error), - (r'', Text, '#pop') + default('#pop') ], '_assembly-expression': [ (r'\(', Punctuation, ('#push', '_expression')), @@ -4272,13 +4272,13 @@ class Inform6Lexer(RegexLexer): (r';', Punctuation), (r'\]', Punctuation, '#pop'), (r':', Error), - (r'', Text, ('expression', '_expression')) + default(('expression', '_expression')) ], # Attribute, Property 'alias?': [ include('_whitespace'), (r'alias\b', Keyword, ('#pop', '_constant')), - (r'', Text, '#pop') + default('#pop') ], # Class, Object, Nearby 'class-name': [ @@ -4289,7 +4289,7 @@ class Inform6Lexer(RegexLexer): 'duplicates': [ include('_whitespace'), (r'\(', Punctuation, ('#pop', 'expression', '_expression')), - (r'', Text, '#pop') + default('#pop') ], '_object-head': [ (r'[%s]>' % _dash, Punctuation), @@ -4303,20 +4303,20 @@ class Inform6Lexer(RegexLexer): (r'class\b', Keyword.Declaration, 'class-segment'), (r'(has|private|with)\b', Keyword.Declaration), (r':', Error), - (r'', Text, ('_object-expression', '_expression')) + default(('_object-expression', '_expression')) ], 'class-segment': [ include('_whitespace'), (r'(?=[,;]|(class|has|private|with)\b)', Text, '#pop'), (_name, Name.Class), - (r'', Text, 'value') + default('value') ], # Extend, Verb 'grammar': [ include('_whitespace'), (r'=', Punctuation, ('#pop', 'default')), (r'\*', Punctuation, ('#pop', 'grammar-line')), - (r'', Text, '_directive-keyword') + default('_directive-keyword') ], 'grammar-line': [ include('_whitespace'), @@ -4324,12 +4324,12 @@ class Inform6Lexer(RegexLexer): (r'[/*]', Punctuation), (r'[%s]>' % _dash, Punctuation, 'value'), (r'(noun|scope)\b', Keyword, '=routine'), - (r'', Text, '_directive-keyword') + default('_directive-keyword') ], '=routine': [ include('_whitespace'), (r'=', Punctuation, 'routine-name?'), - (r'', Text, '#pop') + default('#pop') ], # Import 'manifest': [ @@ -4342,7 +4342,7 @@ class Inform6Lexer(RegexLexer): 'diagnostic': [ include('_whitespace'), (r'[%s]' % _dquote, String.Double, ('#pop', 'message-string')), - (r'', Text, ('#pop', 'before-plain-string', 'directive-keyword?')) + default(('#pop', 'before-plain-string', 'directive-keyword?')) ], 'before-plain-string': [ include('_whitespace'), @@ -4369,18 +4369,18 @@ class Inform6Lexer(RegexLexer): ], 'directive-keyword?': [ include('_directive-keyword!'), - (r'', Text, '#pop') + default('#pop') ], 'property-keyword*': [ include('_whitespace'), (r'(additive|long)\b', Keyword), - (r'', Text, '#pop') + default('#pop') ], 'trace-keyword?': [ include('_whitespace'), (r'(assembly|dictionary|expressions|lines|linker|objects|off|on|' r'symbols|tokens|verbs)\b', Keyword, '#pop'), - (r'', Text, '#pop') + default('#pop') ], # Statements @@ -4417,7 +4417,7 @@ class Inform6Lexer(RegexLexer): '#pop'), (r'%s(?=(\s+|(![^%s]*))*\))' % (_name, _newline), Name.Function, '#pop'), - (r'', Text, '#pop') + default('#pop') ], '(?': [ include('_whitespace'), @@ -4431,13 +4431,12 @@ class Inform6Lexer(RegexLexer): include('_whitespace'), (r';', Punctuation, '#pop'), (r':', Error), - (r'', Text, - ('_list-expression', '_expression', '_list-expression', 'form')) + default(('_list-expression', '_expression', '_list-expression', 'form')) ], 'form': [ include('_whitespace'), (r'\(', Punctuation, ('#pop', 'miscellaneous-keyword?')), - (r'', Text, '#pop') + default('#pop') ], # Assembly @@ -4448,7 +4447,7 @@ class Inform6Lexer(RegexLexer): ], 'operands': [ (r':', Error), - (r'', Text, ('_assembly-expression', '_expression')) + default(('_assembly-expression', '_expression')) ] } @@ -4522,7 +4521,7 @@ class Inform7Lexer(RegexLexer): (r'\[', Comment.Multiline, '+comment'), (r'[%s]' % _dquote, Generic.Heading, ('+main', '+titling', '+titling-string')), - (r'', Text, ('+main', '+heading?')) + default(('+main', '+heading?')) ], '+titling-string': [ (r'[^%s]+' % _dquote, Generic.Heading), @@ -4559,7 +4558,7 @@ class Inform7Lexer(RegexLexer): using(this, state=('+i6-root', 'directive', 'default', 'statements'), i6t='+i6t-inline'), Punctuation), '#pop'), - (r'', Text, '#pop') + default('#pop') ], '+use-option': [ (r'\s+', Text), @@ -4568,7 +4567,7 @@ class Inform7Lexer(RegexLexer): bygroups(Punctuation, using(this, state=('+i6-root', 'directive'), i6t='+i6t-use-option'), Punctuation), '#pop'), - (r'', Text, '#pop') + default('#pop') ], '+comment': [ (r'[^\[\]]+', Comment.Multiline), @@ -4587,19 +4586,19 @@ class Inform7Lexer(RegexLexer): (r'[%s]{1,3}' % _dash, Text), (r'(?i)(volume|book|part|chapter|section)\b[^%s]*' % _newline, Generic.Heading, '#pop'), - (r'', Text, '#pop') + default('#pop') ], '+documentation-heading': [ (r'\s+', Text), (r'\[', Comment.Multiline, '+comment'), (r'(?i)documentation\s+', Text, '+documentation-heading2'), - (r'', Text, '#pop') + default('#pop') ], '+documentation-heading2': [ (r'\s+', Text), (r'\[', Comment.Multiline, '+comment'), (r'[%s]{4}\s' % _dash, Text, '+documentation'), - (r'', Text, '#pop:2') + default('#pop:2') ], '+documentation': [ (r'(?i)(%s)\s*(chapter|example)\s*:[^%s]*' % diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index 5a07cd0d..a490314b 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -11,7 +11,7 @@ import re from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \ - using, this + using, this, default from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other from pygments.util import get_choice_opt, iteritems @@ -440,7 +440,7 @@ class VbNetLexer(RegexLexer): ], 'dim': [ (r'[a-z_]\w*', Name.Variable, '#pop'), - (r'', Text, '#pop'), # any other syntax + default('#pop'), # any other syntax ], 'funcname': [ (r'[a-z_]\w*', Name.Function, '#pop'), @@ -455,7 +455,7 @@ class VbNetLexer(RegexLexer): (r'\s+', Text), (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b', Keyword, '#pop'), - (r'', Text, '#pop'), + default('#pop'), ] } @@ -645,7 +645,7 @@ class FSharpLexer(RegexLexer): (r'[A-Z][\w\']*', Name, '#pop'), (r'[a-z_][\w\']*', Name, '#pop'), # e.g. dictionary index access - (r'', Text, '#pop'), + default('#pop'), ], 'comment': [ (r'[^(*)@"]+', Comment), diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 0c978254..af3b2552 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions +from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions, default from pygments.token import Text, Comment, Operator, Keyword, Name, \ String, Number, Punctuation, Literal, Generic, Error from pygments import unistring as uni @@ -860,7 +860,7 @@ class RacketLexer(RegexLexer): (r'`|,@?', Operator), (_symbol, String.Symbol, '#pop'), (r'[|\\]', Error), - (r'', Text, '#pop') + default('#pop') ], 'list': [ (_closing_parenthesis, Punctuation, '#pop') @@ -1960,7 +1960,7 @@ class SMLLexer(RegexLexer): # Main parser (prevents errors in files that have scoping errors) - 'root': [ (r'', Text, 'main') ], + 'root': [ default('main') ], # In this scope, I expect '|' to not be followed by a function name, # and I expect 'and' to be followed by a binding site @@ -2013,7 +2013,7 @@ class SMLLexer(RegexLexer): include('breakout'), (r'(%s)' % alphanumid_re, Name.Namespace), - (r'', Text, '#pop'), + default('#pop'), ], # Dealing with what comes after the 'fun' (or 'and' or '|') keyword @@ -2026,7 +2026,7 @@ class SMLLexer(RegexLexer): (r'(%s)' % symbolicid_re, Name.Function, '#pop'), # Ignore interesting function declarations like "fun (x + y) = ..." - (r'', Text, '#pop'), + default('#pop'), ], # Dealing with what comes after the 'val' (or 'and') keyword @@ -2043,7 +2043,7 @@ class SMLLexer(RegexLexer): (r'(%s)' % symbolicid_re, Name.Variable, '#pop'), # Ignore interesting patterns like 'val (x, y)' - (r'', Text, '#pop'), + default('#pop'), ], # Dealing with what comes after the 'type' (or 'and') keyword @@ -2545,7 +2545,7 @@ class OpaLexer(RegexLexer): 'type': [ include('comments-and-spaces'), (r'->', Keyword.Type), - (r'', Keyword.Type, ('#pop', 'type-lhs-1', 'type-with-slash')), + default(('#pop', 'type-lhs-1', 'type-with-slash')), ], # parses all the atomic or closed constructions in the syntax of type @@ -2562,7 +2562,7 @@ class OpaLexer(RegexLexer): # we think we are parsing types when in fact we are parsing # some css, so we just pop the states until we get back into # the root state - (r'', Keyword.Type, '#pop'), + default('#pop'), ], # type-with-slash is either: @@ -2570,13 +2570,13 @@ class OpaLexer(RegexLexer): # * type-1 (/ type-1)+ 'type-with-slash': [ include('comments-and-spaces'), - (r'', Keyword.Type, ('#pop', 'slash-type-1', 'type-1')), + default(('#pop', 'slash-type-1', 'type-1')), ], 'slash-type-1': [ include('comments-and-spaces'), ('/', Keyword.Type, ('#pop', 'type-1')), # same remark as above - (r'', Keyword.Type, '#pop'), + default('#pop'), ], # we go in this state after having parsed a type-with-slash @@ -2588,7 +2588,7 @@ class OpaLexer(RegexLexer): include('comments-and-spaces'), (r'->', Keyword.Type, ('#pop', 'type')), (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), - (r'', Keyword.Type, '#pop'), + default('#pop'), ], 'type-arrow': [ include('comments-and-spaces'), @@ -2597,7 +2597,7 @@ class OpaLexer(RegexLexer): (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), (r'->', Keyword.Type, ('#pop', 'type')), # same remark as above - (r'', Keyword.Type, '#pop'), + default('#pop'), ], # no need to do precise parsing for tuples and records @@ -2624,7 +2624,7 @@ class OpaLexer(RegexLexer): # 'type-tuple': [ # include('comments-and-spaces'), # (r'\)', Keyword.Type, '#pop'), -# (r'', Keyword.Type, ('#pop', 'type-tuple-1', 'type-1')), +# default(('#pop', 'type-tuple-1', 'type-1')), # ], # 'type-tuple-1': [ # include('comments-and-spaces'), @@ -2846,7 +2846,7 @@ class CoqLexer(RegexLexer): (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][\w\']*', Name.Class, '#pop'), (r'[a-z][a-z0-9_\']*', Name, '#pop'), - (r'', Text, '#pop') + default('#pop') ], } @@ -3439,7 +3439,7 @@ class KokaLexer(RegexLexer): (r'::|\->|[\.:|]', tokenType), #catchall - (r'', Text, '#pop') + default('#pop') ], # comments and literals diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index e9c9be20..341d8eb2 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - this, combined + this, combined, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation from pygments import unistring as uni @@ -439,7 +439,7 @@ class GroovyLexer(RegexLexer): 'root': [ # Groovy allows a file to start with a shebang (r'#!(.*?)$', Comment.Preproc, 'base'), - (r'', Text, 'base'), + default('base'), ], 'base': [ # method names @@ -1401,7 +1401,7 @@ class JasminLexer(RegexLexer): (r'\n+', Text), (r'\.end%s' % _break, Keyword.Reserved, '#pop'), include('default'), - (r'', Text, ('annotation-items', 'descriptor/no-dots')) + default(('annotation-items', 'descriptor/no-dots')) ], 'annotation-items': [ (r"'", String.Single, 'quote'), @@ -1447,12 +1447,12 @@ class JasminLexer(RegexLexer): ], 'descriptors/convert-dots': [ (r'\)', Punctuation, '#pop'), - (r'', Text, 'descriptor/convert-dots') + default('descriptor/convert-dots') ], 'enclosing-method': [ (_ws, Text), (r'(?=[^%s]*\()' % _separator, Text, ('#pop', 'invocation')), - (r'', Text, ('#pop', 'class/convert-dots')) + default(('#pop', 'class/convert-dots')) ], 'exception': [ include('default'), diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 779cdece..0c4753ad 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -12,7 +12,7 @@ import re from pygments.lexer import RegexLexer, include, bygroups, using, \ - this, combined, ExtendedRegexLexer + this, combined, ExtendedRegexLexer, default from pygments.token import Error, Punctuation, Literal, Token, \ Text, Comment, Operator, Keyword, Name, String, Number, Generic, \ Whitespace @@ -403,7 +403,7 @@ class SmalltalkLexer(RegexLexer): (r'(:)(\s*)(\w+)', bygroups(Operator, Text, Name.Variable)), (r'\|', Operator, '#pop'), - (r'', Text, '#pop'), # else pop + default('#pop'), # else pop ], 'literals' : [ (r"'(''|[^'])*'", String, 'afterobject'), @@ -2607,7 +2607,7 @@ class AwkLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'\B', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') @@ -3129,7 +3129,7 @@ class KconfigLexer(RegexLexer): return [ (_rx_indent(level), String.Doc), (r'\s*\n', Text), - (r'', Generic, '#pop:2') + default('#pop:2') ] tokens = { diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index f270cf44..8d16c14f 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, bygroups +from pygments.lexer import RegexLexer, bygroups, default from pygments.token import Keyword, Punctuation, String, Number, Operator, \ Whitespace, Name, Literal, Comment, Text @@ -94,6 +94,6 @@ class SparqlLexer(RegexLexer): (r'(@)([a-z]+(:?-[a-z0-9]+)*)', bygroups(Operator, Name.Function), '#pop:2'), (r'\^\^', Operator, '#pop:2'), - (r'', Text, '#pop:2'), + default('#pop:2'), ], } diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index 4d53dca6..89d29f0f 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -17,7 +17,7 @@ from pygments.lexers.agile import PythonLexer, PerlLexer from pygments.lexers.compiled import JavaLexer from pygments.lexers.jvm import TeaLangLexer from pygments.lexer import Lexer, DelegatingLexer, RegexLexer, bygroups, \ - include, using, this + include, using, this, default from pygments.token import Error, Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Other, Token from pygments.util import html_doctype_matches, looks_like_xml @@ -229,7 +229,7 @@ class VelocityLexer(RegexLexer): (r'(\.)(' + identifier + r')', bygroups(Punctuation, Name.Variable), '#push'), (r'\}', Punctuation, '#pop'), - (r'', Other, '#pop') + default('#pop') ], 'directiveparams': [ (r'(&&|\|\||==?|!=?|[-<>+*%&\|\^/])|\b(eq|ne|gt|lt|ge|le|not|in)\b', diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 10675654..8de3ded7 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -13,7 +13,7 @@ import re from bisect import bisect from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \ - bygroups, include, using, this, do_insertions + bygroups, include, using, this, do_insertions, default from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \ Generic, Operator, Number, Whitespace, Literal from pygments.util import get_bool_opt, ClassNotFound @@ -503,7 +503,7 @@ class TexLexer(RegexLexer): 'command': [ (r'\[.*?\]', Name.Attribute), (r'\*', Keyword), - (r'', Text, '#pop'), + default('#pop'), ], } diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 2428ffcd..91b2a1eb 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -13,7 +13,7 @@ import re import copy from pygments.lexer import RegexLexer, ExtendedRegexLexer, bygroups, using, \ - include, this + include, this, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Other, Punctuation, Literal from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \ @@ -55,7 +55,7 @@ class JavascriptLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') @@ -542,7 +542,7 @@ class ObjectiveJLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop'), + default('#pop'), ], 'badregex': [ (r'\n', Text, '#pop'), @@ -1258,7 +1258,7 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), (ident, Name.Namespace), (r'\.', Punctuation, 'import-ident'), - (r'', Text, '#pop'), + default('#pop'), ], 'import': [ @@ -1267,7 +1267,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'\*', Keyword), # wildcard import (r'\.', Punctuation, 'import-ident'), (r'in', Keyword.Namespace, 'ident'), - (r'', Text, '#pop'), + default('#pop'), ], 'import-ident': [ @@ -1280,14 +1280,14 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), (ident, Name.Namespace), (r'\.', Punctuation, 'import-ident'), - (r'', Text, '#pop'), + default('#pop'), ], 'preproc-error': [ (r'\s+', Comment.Preproc), (r"'", String.Single, ('#pop', 'string-single')), (r'"', String.Double, ('#pop', 'string-double')), - (r'', Text, '#pop'), + default('#pop'), ], 'preproc-expr': [ @@ -1322,7 +1322,7 @@ class HaxeLexer(ExtendedRegexLexer): 'preproc-expr-chain': [ (r'\s+', Comment.Preproc), (binop, Comment.Preproc, ('#pop', 'preproc-expr-in-parenthesis')), - (r'', Text, '#pop'), + default('#pop'), ], # same as 'preproc-expr' but able to chain 'preproc-expr-chain' @@ -1354,7 +1354,7 @@ class HaxeLexer(ExtendedRegexLexer): 'abstract' : [ include('spaces'), - (r'', Text, ('#pop', 'abstract-body', 'abstract-relation', + default(('#pop', 'abstract-body', 'abstract-relation', 'abstract-opaque', 'type-param-constraint', 'type-name')), ], @@ -1366,14 +1366,14 @@ class HaxeLexer(ExtendedRegexLexer): 'abstract-opaque' : [ include('spaces'), (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'type')), - (r'', Text, '#pop'), + default('#pop'), ], 'abstract-relation': [ include('spaces'), (r'(?:to|from)', Keyword.Declaration, 'type'), (r',', Punctuation), - (r'', Text, '#pop'), + default('#pop'), ], 'meta': [ @@ -1385,7 +1385,7 @@ class HaxeLexer(ExtendedRegexLexer): 'meta-colon': [ include('spaces'), (r':', Name.Decorator, '#pop'), - (r'', Text, '#pop'), + default('#pop'), ], # same as 'ident' but set token as Name.Decorator instead of Name @@ -1397,13 +1397,13 @@ class HaxeLexer(ExtendedRegexLexer): 'meta-body': [ include('spaces'), (r'\(', Name.Decorator, ('#pop', 'meta-call')), - (r'', Text, '#pop'), + default('#pop'), ], 'meta-call': [ include('spaces'), (r'\)', Name.Decorator, '#pop'), - (r'', Text, ('#pop', 'meta-call-sep', 'expr')), + default(('#pop', 'meta-call-sep', 'expr')), ], 'meta-call-sep': [ @@ -1414,7 +1414,7 @@ class HaxeLexer(ExtendedRegexLexer): 'typedef': [ include('spaces'), - (r'', Text, ('#pop', 'typedef-body', 'type-param-constraint', + default(('#pop', 'typedef-body', 'type-param-constraint', 'type-name')), ], @@ -1425,7 +1425,7 @@ class HaxeLexer(ExtendedRegexLexer): 'enum': [ include('spaces'), - (r'', Text, ('#pop', 'enum-body', 'bracket-open', + default(('#pop', 'enum-body', 'bracket-open', 'type-param-constraint', 'type-name')), ], @@ -1440,12 +1440,12 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), (r'\(', Punctuation, ('#pop', 'semicolon', 'flag', 'function-param')), - (r'', Punctuation, ('#pop', 'semicolon', 'flag')), + default(('#pop', 'semicolon', 'flag')), ], 'class': [ include('spaces'), - (r'', Text, ('#pop', 'class-body', 'bracket-open', 'extends', + default(('#pop', 'class-body', 'bracket-open', 'extends', 'type-param-constraint', 'type-name')), ], @@ -1454,7 +1454,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'(?:extends|implements)\b', Keyword.Declaration, 'type'), (r',', Punctuation), # the comma is made optional here, since haxe2 # requires the comma but haxe3 does not allow it - (r'', Text, '#pop'), + default('#pop'), ], 'bracket-open': [ @@ -1473,7 +1473,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'\}', Punctuation, '#pop'), (r'(?:static|public|private|override|dynamic|inline|macro)\b', Keyword.Declaration), - (r'', Text, 'class-member'), + default('class-member'), ], 'class-member': [ @@ -1495,7 +1495,7 @@ class HaxeLexer(ExtendedRegexLexer): 'optional-expr': [ include('spaces'), include('expr'), - (r'', Text, '#pop'), + default('#pop'), ], 'class-method': [ @@ -1524,7 +1524,7 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'prop-get-set-opt', 'comma', 'prop-get-set-opt')), - (r'', Text, '#pop'), + default('#pop'), ], 'prop-get-set-opt': [ @@ -1537,7 +1537,7 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), # makes semicolon optional here, just to avoid checking the last # one is bracket or not. - (r'', Text, ('#pop', 'optional-semicolon', 'expr')), + default(('#pop', 'optional-semicolon', 'expr')), ], 'expr': [ @@ -1600,7 +1600,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'(\.)(' + ident_no_keyword + ')', bygroups(Punctuation, Name)), (r'\[', Punctuation, 'array-access'), (r'\(', Punctuation, 'call'), - (r'', Text, '#pop'), + default('#pop'), ], # macro reification @@ -1615,7 +1615,7 @@ class HaxeLexer(ExtendedRegexLexer): (r'(?:enum)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'enum')), (r'(?:typedef)\b', Keyword.Declaration, ('#pop', 'optional-semicolon', 'typedef')), - (r'', Text, ('#pop', 'expr')), + default(('#pop', 'expr')), ], # cast can be written as "cast expr" or "cast(expr, type)" @@ -1623,27 +1623,27 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), (r'\(', Punctuation, ('#pop', 'parenthesis-close', 'cast-type', 'expr')), - (r'', Text, ('#pop', 'expr')), + default(('#pop', 'expr')), ], # optionally give a type as the 2nd argument of cast() 'cast-type': [ include('spaces'), (r',', Punctuation, ('#pop', 'type')), - (r'', Text, '#pop'), + default('#pop'), ], 'catch': [ include('spaces'), (r'(?:catch)\b', Keyword, ('expr', 'function-param', 'parenthesis-open')), - (r'', Text, '#pop'), + default('#pop'), ], # do-while loop 'do': [ include('spaces'), - (r'', Punctuation, ('#pop', 'do-while', 'expr')), + default(('#pop', 'do-while', 'expr')), ], # the while after do @@ -1672,12 +1672,12 @@ class HaxeLexer(ExtendedRegexLexer): 'else': [ include('spaces'), (r'(?:else)\b', Keyword, ('#pop', 'expr')), - (r'', Text, '#pop'), + default('#pop'), ], 'switch': [ include('spaces'), - (r'', Text, ('#pop', 'switch-body', 'bracket-open', 'expr')), + default(('#pop', 'switch-body', 'bracket-open', 'expr')), ], 'switch-body': [ @@ -1689,7 +1689,7 @@ class HaxeLexer(ExtendedRegexLexer): 'case': [ include('spaces'), (r':', Punctuation, '#pop'), - (r'', Text, ('#pop', 'case-sep', 'case-guard', 'expr')), + default(('#pop', 'case-sep', 'case-guard', 'expr')), ], 'case-sep': [ @@ -1701,25 +1701,25 @@ class HaxeLexer(ExtendedRegexLexer): 'case-guard': [ include('spaces'), (r'(?:if)\b', Keyword, ('#pop', 'parenthesis', 'parenthesis-open')), - (r'', Text, '#pop'), + default('#pop'), ], # optional multiple expr under a case 'case-block': [ include('spaces'), (r'(?!(?:case|default)\b|\})', Keyword, 'expr-statement'), - (r'', Text, '#pop'), + default('#pop'), ], 'new': [ include('spaces'), - (r'', Text, ('#pop', 'call', 'parenthesis-open', 'type')), + default(('#pop', 'call', 'parenthesis-open', 'type')), ], 'array-decl': [ include('spaces'), (r'\]', Punctuation, '#pop'), - (r'', Text, ('#pop', 'array-decl-sep', 'expr')), + default(('#pop', 'array-decl-sep', 'expr')), ], 'array-decl-sep': [ @@ -1730,7 +1730,7 @@ class HaxeLexer(ExtendedRegexLexer): 'array-access': [ include('spaces'), - (r'', Text, ('#pop', 'array-access-close', 'expr')), + default(('#pop', 'array-access-close', 'expr')), ], 'array-access-close': [ @@ -1756,7 +1756,7 @@ class HaxeLexer(ExtendedRegexLexer): 'optional-semicolon': [ include('spaces'), (r';', Punctuation, '#pop'), - (r'', Text, '#pop'), + default('#pop'), ], # identity that CAN be a Haxe keyword @@ -1768,7 +1768,7 @@ class HaxeLexer(ExtendedRegexLexer): 'dollar': [ include('spaces'), (r'\{', Punctuation, ('#pop', 'expr-chain', 'bracket-close', 'expr')), - (r'', Text, ('#pop', 'expr-chain')), + default(('#pop', 'expr-chain')), ], 'type-name': [ @@ -1779,7 +1779,7 @@ class HaxeLexer(ExtendedRegexLexer): 'type-full-name': [ include('spaces'), (r'\.', Punctuation, 'ident'), - (r'', Text, '#pop'), + default('#pop'), ], 'type': [ @@ -1792,14 +1792,14 @@ class HaxeLexer(ExtendedRegexLexer): 'type-parenthesis': [ include('spaces'), - (r'', Text, ('#pop', 'parenthesis-close', 'type')), + default(('#pop', 'parenthesis-close', 'type')), ], 'type-check': [ include('spaces'), (r'->', Punctuation, ('#pop', 'type')), (r'<(?!=)', Punctuation, 'type-param'), - (r'', Text, '#pop'), + default('#pop'), ], 'type-struct': [ @@ -1847,7 +1847,7 @@ class HaxeLexer(ExtendedRegexLexer): # ie. the <A,B> path in Map<A,B> 'type-param': [ include('spaces'), - (r'', Text, ('#pop', 'type-param-sep', 'type-param-type')), + default(('#pop', 'type-param-sep', 'type-param-type')), ], 'type-param-sep': [ @@ -1862,7 +1862,7 @@ class HaxeLexer(ExtendedRegexLexer): include('spaces'), (r'<(?!=)', Punctuation, ('#pop', 'type-param-constraint-sep', 'type-param-constraint-flag', 'type-name')), - (r'', Text, '#pop'), + default('#pop'), ], 'type-param-constraint-sep': [ @@ -1876,14 +1876,14 @@ class HaxeLexer(ExtendedRegexLexer): 'type-param-constraint-flag': [ include('spaces'), (r':', Punctuation, ('#pop', 'type-param-constraint-flag-type')), - (r'', Text, '#pop'), + default('#pop'), ], 'type-param-constraint-flag-type': [ include('spaces'), (r'\(', Punctuation, ('#pop', 'type-param-constraint-flag-type-sep', 'type')), - (r'', Text, ('#pop', 'type')), + default(('#pop', 'type')), ], 'type-param-constraint-flag-type-sep': [ @@ -1895,7 +1895,7 @@ class HaxeLexer(ExtendedRegexLexer): # a parenthesis expr that contain exactly one expr 'parenthesis': [ include('spaces'), - (r'', Text, ('#pop', 'parenthesis-close', 'flag', 'expr')), + default(('#pop', 'parenthesis-close', 'flag', 'expr')), ], 'parenthesis-open': [ @@ -1917,21 +1917,21 @@ class HaxeLexer(ExtendedRegexLexer): 'var-sep': [ include('spaces'), (r',', Punctuation, ('#pop', 'var')), - (r'', Text, '#pop'), + default('#pop'), ], # optional assignment 'assign': [ include('spaces'), (r'=', Operator, ('#pop', 'expr')), - (r'', Text, '#pop'), + default('#pop'), ], # optional type flag 'flag': [ include('spaces'), (r':', Punctuation, ('#pop', 'type')), - (r'', Text, '#pop'), + default('#pop'), ], # colon as part of a ternary operator (?:) @@ -1944,7 +1944,7 @@ class HaxeLexer(ExtendedRegexLexer): 'call': [ include('spaces'), (r'\)', Punctuation, '#pop'), - (r'', Text, ('#pop', 'call-sep', 'expr')), + default(('#pop', 'call-sep', 'expr')), ], # after a call param @@ -1961,27 +1961,27 @@ class HaxeLexer(ExtendedRegexLexer): ('#pop', 'bracket-check')), (r"'", String.Single, ('#pop', 'bracket-check', 'string-single')), (r'"', String.Double, ('#pop', 'bracket-check', 'string-double')), - (r'', Text, ('#pop', 'block')), + default(('#pop', 'block')), ], 'bracket-check': [ include('spaces'), (r':', Punctuation, ('#pop', 'object-sep', 'expr')), #is object - (r'', Text, ('#pop', 'block', 'optional-semicolon', 'expr-chain')), #is block + default(('#pop', 'block', 'optional-semicolon', 'expr-chain')), #is block ], # code block 'block': [ include('spaces'), (r'\}', Punctuation, '#pop'), - (r'', Text, 'expr-statement'), + default('expr-statement'), ], # object in key-value pairs 'object': [ include('spaces'), (r'\}', Punctuation, '#pop'), - (r'', Text, ('#pop', 'object-sep', 'expr', 'colon', 'ident-or-string')) + default(('#pop', 'object-sep', 'expr', 'colon', 'ident-or-string')) ], # a key of an object @@ -2074,7 +2074,7 @@ class HamlLexer(ExtendedRegexLexer): (r'([&!]?[=~])(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), 'root'), - (r'', Text, 'plain'), + default('plain'), ], 'content': [ @@ -2293,19 +2293,19 @@ common_sass_tokens = { 'pseudo-class': [ (r'[\w-]+', Name.Decorator), (r'#\{', String.Interpol, 'interpolation'), - (r'', Text, '#pop'), + default('#pop'), ], 'class': [ (r'[\w-]+', Name.Class), (r'#\{', String.Interpol, 'interpolation'), - (r'', Text, '#pop'), + default('#pop'), ], 'id': [ (r'[\w-]+', Name.Namespace), (r'#\{', String.Interpol, 'interpolation'), - (r'', Text, '#pop'), + default('#pop'), ], 'for': [ @@ -2351,7 +2351,7 @@ class SassLexer(ExtendedRegexLexer): bygroups(Name.Variable, Operator), 'value'), (r':', Name.Attribute, 'old-style-attr'), (r'(?=.+?[=:]([^a-z]|$))', Name.Attribute, 'new-style-attr'), - (r'', Text, 'selector'), + default('selector'), ], 'single-comment': [ @@ -2374,7 +2374,7 @@ class SassLexer(ExtendedRegexLexer): (r'[^\s:="\[]+', Name.Attribute), (r'#{', String.Interpol, 'interpolation'), (r'[ \t]*=', Operator, 'value'), - (r'', Text, 'value'), + default('value'), ], 'new-style-attr': [ @@ -2421,7 +2421,7 @@ class ScssLexer(RegexLexer): (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), - (r'', Text, 'selector'), + default('selector'), ], 'attr': [ @@ -2474,7 +2474,7 @@ class CoffeeScriptLexer(RegexLexer): (r'///', String.Regex, ('#pop', 'multilineregex')), (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), - (r'', Text, '#pop'), + default('#pop'), ], 'root': [ # this next expr leads to infinite loops root -> slashstartsregex @@ -2699,7 +2699,7 @@ class LiveScriptLexer(RegexLexer): (r'//', String.Regex, ('#pop', 'multilineregex')), (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), - (r'', Text, '#pop'), + default('#pop'), ], 'root': [ # this next expr leads to infinite loops root -> slashstartsregex @@ -2846,7 +2846,7 @@ class ScamlLexer(ExtendedRegexLexer): (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), - (r'', Text, 'plain'), + default('plain'), ], 'content': [ @@ -2955,7 +2955,7 @@ class JadeLexer(ExtendedRegexLexer): (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), - (r'', Text, 'plain'), + default('plain'), ], 'content': [ @@ -3535,7 +3535,7 @@ class XQueryLexer(ExtendedRegexLexer): (r'\(:', Comment, 'comment'), (r'\*|\?|\+', Operator, 'operator'), (r':=', Operator, 'root'), - (r'', Text, 'operator'), + default('operator'), ], 'option': [ include('whitespace'), @@ -3820,7 +3820,7 @@ class TypeScriptLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') @@ -3906,7 +3906,7 @@ class LassoLexer(RegexLexer): ('delimiters', 'anglebrackets')), (r'<(!--.*?-->)?', Other, 'delimiters'), (r'\s+', Other), - (r'', Other, ('delimiters', 'lassofile')), + default(('delimiters', 'lassofile')), ], 'delimiters': [ (r'\[no_square_brackets\]', Comment.Preproc, 'nosquarebrackets'), @@ -4085,7 +4085,7 @@ class LassoLexer(RegexLexer): r'(?=\s*(\(([^()]*\([^()]*\))*[^)]*\)\s*)?(::[\w.\s]+)?=>))', Name.Function, 'signature'), include('whitespacecomments'), - (r'', Text, '#pop'), + default('#pop'), ], } @@ -4161,7 +4161,7 @@ class QmlLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') @@ -4330,19 +4330,19 @@ class MaskLexer(RegexLexer): 'node-class': [ (r'[\w-]+', Name.Variable.Class), (r'~\[', String.Interpol, 'interpolation'), - (r'', Text, '#pop') + default('#pop') ], 'node-id': [ (r'[\w-]+', Name.Function), (r'~\[', String.Interpol, 'interpolation'), - (r'', Text, '#pop') + default('#pop') ], 'node-attr-value':[ (r'\s+', Text), (r'\w+', Name.Variable, '#pop'), (r"'", String, 'string-single-pop2'), (r'"', String, 'string-double-pop2'), - (r'', Text, '#pop') + default('#pop') ], 'node-attr-style-value':[ (r'\s+', Text), @@ -4405,7 +4405,7 @@ class ZephirLexer(RegexLexer): include('commentsandwhitespace'), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), - (r'', Text, '#pop') + default('#pop') ], 'badregex': [ (r'\n', Text, '#pop') diff --git a/tests/test_clexer.py b/tests/test_clexer.py index 188569dc..4aac6d39 100644 --- a/tests/test_clexer.py +++ b/tests/test_clexer.py @@ -28,7 +28,7 @@ class CLexerTest(unittest.TestCase): Number.Float, Number.Float], code.split()): wanted.append(item) wanted.append((Text, ' ')) - wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')] + wanted = wanted[:-1] + [(Text, '\n')] self.assertEqual(list(self.lexer.get_tokens(code)), wanted) def testSwitch(self): @@ -44,15 +44,12 @@ class CLexerTest(unittest.TestCase): } ''' tokens = [ - (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), - (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), - (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Text, u' '), @@ -83,7 +80,6 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) @@ -100,15 +96,12 @@ class CLexerTest(unittest.TestCase): } ''' tokens = [ - (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), - (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), - (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Text, u' '), @@ -141,7 +134,6 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) @@ -154,15 +146,12 @@ class CLexerTest(unittest.TestCase): } ''' tokens = [ - (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), - (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), - (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Name.Label, u'foo'), @@ -176,7 +165,6 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) @@ -189,15 +177,12 @@ class CLexerTest(unittest.TestCase): } ''' tokens = [ - (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), - (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), - (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Name.Label, u'foo'), @@ -212,7 +197,6 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) @@ -225,15 +209,12 @@ class CLexerTest(unittest.TestCase): } ''' tokens = [ - (Token.Text, u''), (Token.Keyword.Type, u'int'), (Token.Text, u' '), (Token.Name.Function, u'main'), - (Token.Text, u''), (Token.Punctuation, u'('), (Token.Punctuation, u')'), (Token.Text, u'\n'), - (Token.Text, u''), (Token.Punctuation, u'{'), (Token.Text, u'\n'), (Token.Name.Label, u'foo'), @@ -251,6 +232,5 @@ class CLexerTest(unittest.TestCase): (Token.Text, u'\n'), (Token.Punctuation, u'}'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment)))) diff --git a/tests/test_objectiveclexer.py b/tests/test_objectiveclexer.py index 46fdb6d2..7339f6f7 100644 --- a/tests/test_objectiveclexer.py +++ b/tests/test_objectiveclexer.py @@ -22,20 +22,17 @@ class ObjectiveCLexerTest(unittest.TestCase): def testLiteralNumberInt(self): fragment = u'@(1);\n' expected = [ - (Token.Text, u''), (Token.Literal, u'@('), (Token.Literal.Number.Integer, u'1'), (Token.Literal, u')'), (Token.Punctuation, u';'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) def testLiteralNumberExpression(self): fragment = u'@(1+2);\n' expected = [ - (Token.Text, u''), (Token.Literal, u'@('), (Token.Literal.Number.Integer, u'1'), (Token.Operator, u'+'), @@ -43,14 +40,12 @@ class ObjectiveCLexerTest(unittest.TestCase): (Token.Literal, u')'), (Token.Punctuation, u';'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) def testLiteralNumberNestedExpression(self): fragment = u'@(1+(2+3));\n' expected = [ - (Token.Text, u''), (Token.Literal, u'@('), (Token.Literal.Number.Integer, u'1'), (Token.Operator, u'+'), @@ -62,30 +57,25 @@ class ObjectiveCLexerTest(unittest.TestCase): (Token.Literal, u')'), (Token.Punctuation, u';'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) def testLiteralNumberBool(self): fragment = u'@NO;\n' expected = [ - (Token.Text, u''), (Token.Literal.Number, u'@NO'), (Token.Punctuation, u';'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) def testLieralNumberBoolExpression(self): fragment = u'@(YES);\n' expected = [ - (Token.Text, u''), (Token.Literal, u'@('), (Token.Name.Builtin, u'YES'), (Token.Literal, u')'), (Token.Punctuation, u';'), (Token.Text, u'\n'), - (Token.Text, u''), ] self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) diff --git a/tests/test_regexlexer.py b/tests/test_regexlexer.py index b12dce0a..546dfcae 100644 --- a/tests/test_regexlexer.py +++ b/tests/test_regexlexer.py @@ -12,6 +12,7 @@ import unittest from pygments.token import Text from pygments.lexer import RegexLexer from pygments.lexer import bygroups +from pygments.lexer import default class TestLexer(RegexLexer): @@ -20,6 +21,7 @@ class TestLexer(RegexLexer): 'root': [ ('a', Text.Root, 'rag'), ('e', Text.Root), + default(('beer', 'beer')) ], 'beer': [ ('d', Text.Beer, ('#pop', '#pop')), @@ -45,3 +47,8 @@ class TupleTransTest(unittest.TestCase): self.assertEqual(toks, [(0, Text.Root, 'a'), (1, Text, u'\n'), (2, Text.Root, 'e')]) + + def test_default(self): + lx = TestLexer() + toks = list(lx.get_tokens_unprocessed('d')) + self.assertEqual(toks, [(0, Text.Beer, 'd')]) |