diff options
author | Tim Hatch <tim@timhatch.com> | 2014-05-17 09:19:35 -0700 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2014-05-17 09:19:35 -0700 |
commit | a62cdcfaa5a260274303cb93b92a3f2e2ce3be98 (patch) | |
tree | e7d2b4cd4e10a6e43fe4b2dc4e03ab2e827ebc20 /pygments/lexers/jvm.py | |
parent | 70a10a2e423d9729b62c7b56faca28889c0d688a (diff) | |
parent | ff12540907fe9d98bf02c9508a171659457b14b2 (diff) | |
download | pygments-a62cdcfaa5a260274303cb93b92a3f2e2ce3be98.tar.gz |
Merged in timgilbert/pygments-main/clj-keyword-fix (pull request #326)
Tweaking clojure keyword lexing
Diffstat (limited to 'pygments/lexers/jvm.py')
-rw-r--r-- | pygments/lexers/jvm.py | 411 |
1 files changed, 339 insertions, 72 deletions
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 6d8a2584..e9c9be20 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -21,7 +21,7 @@ from pygments import unistring as uni __all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer', 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'ClojureScriptLexer', 'KotlinLexer', 'XtendLexer', 'AspectJLexer', 'CeylonLexer', - 'PigLexer', 'GoloLexer'] + 'PigLexer', 'GoloLexer', 'JasminLexer'] class JavaLexer(RegexLexer): @@ -34,14 +34,19 @@ class JavaLexer(RegexLexer): filenames = ['*.java'] mimetypes = ['text/x-java'] - flags = re.MULTILINE | re.DOTALL + flags = re.MULTILINE | re.DOTALL | re.UNICODE tokens = { 'root': [ (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + # method names + (r'((?:(?:[^\W\d]|\$)[\w\.\[\]\$<>]*\s+)+?)' # return arguments + r'((?:[^\W\d]|\$)[\w\$]*)' # method name + r'(\s*)(\()', # signature start + bygroups(using(this), Name.Function, Text, Operator)), + (r'@[^\W\d][\w\.]*', Name.Decorator), (r'(assert|break|case|catch|continue|default|do|else|finally|for|' r'if|goto|instanceof|new|return|switch|this|throw|try|while)\b', Keyword), @@ -50,20 +55,15 @@ class JavaLexer(RegexLexer): r'transient|volatile)\b', Keyword.Declaration), (r'(boolean|byte|char|double|float|int|long|short|void)\b', Keyword.Type), - # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]<>]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start - bygroups(using(this), Name.Function, Text, Operator)), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'(true|false|null)\b', Keyword.Constant), (r'(class|interface)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'(\.)((?:[^\W\d]|\$)[\w\$]*)', bygroups(Operator, Name.Attribute)), + (r'([^\W\d]|\$)[\w\$]*:', Name.Label), + (r'([^\W\d]|\$)[\w\$]*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -71,10 +71,10 @@ class JavaLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'([^\W\d]|\$)[\w\$]*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } @@ -340,14 +340,14 @@ class GosuLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # modifiers etc. - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # modifiers etc. + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(in|as|typeof|statictypeof|typeis|typeas|if|else|foreach|for|' r'index|while|do|continue|break|return|try|catch|finally|this|' r'throw|new|switch|case|default|eval|super|outer|classpath|' @@ -360,16 +360,16 @@ class GosuLexer(RegexLexer): Keyword.Type), (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)), (r'(true|false|null|NaN|Infinity)\b', Keyword.Constant), - (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(class|interface|enhancement|enum)(\s+)([a-zA-Z_]\w*)', bygroups(Keyword.Declaration, Text, Name.Class)), - (r'(uses)(\s+)([a-zA-Z0-9_.]+\*?)', + (r'(uses)(\s+)([\w.]+\*?)', bygroups(Keyword.Namespace, Text, Name.Namespace)), (r'"', String, 'string'), - (r'(\??[\.#])([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(\??[\.#])([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'(:)([a-zA-Z_][a-zA-Z0-9_]*)', + (r'(:)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_\$]\w*', Name), (r'and|or|not|[\\~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'[0-9]+', Number.Integer), @@ -437,15 +437,20 @@ class GroovyLexer(RegexLexer): tokens = { 'root': [ + # Groovy allows a file to start with a shebang + (r'#!(.*?)$', Comment.Preproc, 'base'), + (r'', Text, 'base'), + ], + 'base': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(assert|break|case|catch|continue|default|do|else|finally|for|' r'if|goto|instanceof|new|return|switch|this|throw|try|while|in|as)\b', Keyword), @@ -464,9 +469,9 @@ class GroovyLexer(RegexLexer): (r'\$/((?!/\$).)*/\$', String), (r'/(\\\\|\\"|[^/])*/', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -474,13 +479,16 @@ class GroovyLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } + def analyse_text(text): + return shebang_matches(text, r'groovy') + class IokeLexer(RegexLexer): """ @@ -550,8 +558,8 @@ class IokeLexer(RegexLexer): (r'#r\[', String.Regex, 'squareRegexp'), #Symbols - (r':[a-zA-Z0-9_!:?]+', String.Symbol), - (r'[a-zA-Z0-9_!:?]+:(?![a-zA-Z0-9_!?])', String.Other), + (r':[\w!:?]+', String.Symbol), + (r'[\w!:?]+:(?![\w!?])', String.Other), (r':"(\\\\|\\"|[^"])*"', String.Symbol), #Documentation @@ -564,10 +572,10 @@ class IokeLexer(RegexLexer): (r'#\[', String, 'squareText'), #Mimic - (r'[a-zA-Z0-9_][a-zA-Z0-9!?_:]+(?=\s*=.*mimic\s)', Name.Entity), + (r'\w[a-zA-Z0-9!?_:]+(?=\s*=.*mimic\s)', Name.Entity), #Assignment - (r'[a-zA-Z_][a-zA-Z0-9_!:?]*(?=[\s]*[+*/-]?=[^=].*($|\.))', + (r'[a-zA-Z_][\w!:?]*(?=[\s]*[+*/-]?=[^=].*($|\.))', Name.Variable), # keywords @@ -658,17 +666,17 @@ class IokeLexer(RegexLexer): r'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|' r'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|' u'\\?|#|\u2260|\u2218|\u2208|\u2209)', Operator), - (r'(and|nand|or|xor|nor|return|import)(?![a-zA-Z0-9_!?])', + (r'(and|nand|or|xor|nor|return|import)(?![\w!?])', Operator), # Punctuation (r'(\`\`|\`|\'\'|\'|\.|\,|@@|@|\[|\]|\(|\)|{|})', Punctuation), #kinds - (r'[A-Z][a-zA-Z0-9_!:?]*', Name.Class), + (r'[A-Z][\w!:?]*', Name.Class), #default cellnames - (r'[a-z_][a-zA-Z0-9_!:?]*', Name) + (r'[a-z_][\w!:?]*', Name) ] } @@ -840,14 +848,14 @@ class TeaLangLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(and|break|else|foreach|if|in|not|or|reverse)\b', Keyword), (r'(as|call|define)\b', Keyword.Declaration), @@ -856,9 +864,9 @@ class TeaLangLexer(RegexLexer): (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), (r'\'(\\\\|\\\'|[^\'])*\'', String), - (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'(\.)([a-zA-Z_]\w*)', bygroups(Operator, Name.Attribute)), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'(isa|[.]{3}|[.]{2}|[=#!<>+-/%&;,.\*\\\(\)\[\]\{\}])', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -866,10 +874,10 @@ class TeaLangLexer(RegexLexer): (r'\n', Text) ], 'template': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], } @@ -894,9 +902,9 @@ class CeylonLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_]\w*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), @@ -919,10 +927,10 @@ class CeylonLexer(RegexLexer): (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char), (r'".*``.*``.*"', String.Interpol), - (r'(\.)([a-z_][a-zA-Z0-9_]*)', + (r'(\.)([a-z_]\w*)', bygroups(Operator, Name.Attribute)), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'\d{1,3}(_\d{3})+\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float), (r'\d{1,3}(_\d{3})+\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?', @@ -939,10 +947,10 @@ class CeylonLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[A-Za-z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[A-Za-z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-z][a-zA-Z0-9_.]*', + (r'[a-z][\w.]*', Name.Namespace, '#pop') ], 'comment': [ @@ -1034,14 +1042,14 @@ class XtendLexer(RegexLexer): tokens = { 'root': [ # method names - (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments - r'([a-zA-Z_$][a-zA-Z0-9_$]*)' # method name - r'(\s*)(\()', # signature start + (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments + r'([a-zA-Z_$][\w$]*)' # method name + r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), - (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), + (r'@[a-zA-Z_][\w\.]*', Name.Decorator), (r'(assert|break|case|catch|continue|default|do|else|finally|for|' r'if|goto|instanceof|new|return|switch|this|throw|try|while|IF|' r'ELSE|ELSEIF|ENDIF|FOR|ENDFOR|SEPARATOR|BEFORE|AFTER)\b', @@ -1060,8 +1068,8 @@ class XtendLexer(RegexLexer): (u'(\u00BB)', String, 'template'), (r'"(\\\\|\\"|[^"])*"', String), (r"'(\\\\|\\'|[^'])*'", String), - (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), - (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), + (r'[a-zA-Z_]\w*:', Name.Label), + (r'[a-zA-Z_\$]\w*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), @@ -1069,10 +1077,10 @@ class XtendLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') + (r'[a-zA-Z_]\w*', Name.Class, '#pop') ], 'import': [ - (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop') + (r'[\w.]+\*?', Name.Namespace, '#pop') ], 'template': [ (r"'''", String, '#pop'), @@ -1112,7 +1120,7 @@ class PigLexer(RegexLexer): (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text), - (r'([a-z_][a-z0-9_]*)(\s*)(\()', + (r'([a-z_]\w*)(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[()#:]', Text), (r'[^(:#\'\")\s]+', Text), @@ -1175,8 +1183,8 @@ class GoloLexer(RegexLexer): (r'(module|import)(\s+)', bygroups(Keyword.Namespace, Text), 'modname'), - (r'\b([a-zA-Z_][a-z$A-Z0-9._]*)(::)', bygroups(Name.Namespace, Punctuation)), - (r'\b([a-zA-Z_][a-z$A-Z0-9_]*(?:\.[a-zA-Z_][a-z$A-Z0-9_]*)+)\b', Name.Namespace), + (r'\b([a-zA-Z_][\w$.]*)(::)', bygroups(Name.Namespace, Punctuation)), + (r'\b([a-zA-Z_][\w$]*(?:\.[a-zA-Z_][\w$]*)+)\b', Name.Namespace), (r'(let|var)(\s+)', bygroups(Keyword.Declaration, Text), @@ -1201,7 +1209,7 @@ class GoloLexer(RegexLexer): bygroups(Name.Builtin, Punctuation)), (r'(print|println|readln|raise|fun' r'|asInterfaceInstance)\b', Name.Builtin), - (r'(`?[a-zA-Z_][a-z$A-Z0-9_]*)(\()', + (r'(`?[a-zA-Z_][\w$]*)(\()', bygroups(Name.Function, Punctuation)), (r'-?[\d_]*\.[\d_]*([eE][+-]?\d[\d_]*)?F?', Number.Float), @@ -1210,7 +1218,7 @@ class GoloLexer(RegexLexer): (r'-?\d[\d_]*L', Number.Integer.Long), (r'-?\d[\d_]*', Number.Integer), - ('`?[a-zA-Z_][a-z$A-Z0-9_]*', Name), + ('`?[a-zA-Z_][\w$]*', Name), (r'"""', String, combined('stringescape', 'triplestring')), (r'"', String, combined('stringescape', 'doublestring')), @@ -1220,16 +1228,16 @@ class GoloLexer(RegexLexer): ], 'funcname': [ - (r'`?[a-zA-Z_][a-z$A-Z0-9_]*', Name.Function, '#pop'), + (r'`?[a-zA-Z_][\w$]*', Name.Function, '#pop'), ], 'modname': [ - (r'[a-zA-Z_][a-z$A-Z0-9._]*\*?', Name.Namespace, '#pop') + (r'[a-zA-Z_][\w$.]*\*?', Name.Namespace, '#pop') ], 'structname': [ - (r'`?[a-zA-Z0-9_.]+\*?', Name.Class, '#pop') + (r'`?[\w.]+\*?', Name.Class, '#pop') ], 'varname': [ - (r'`?[a-zA-Z_][a-z$A-Z0-9_]*', Name.Variable, '#pop'), + (r'`?[a-zA-Z_][\w$]*', Name.Variable, '#pop'), ], 'string': [ (r'[^\\\'"\n]+', String), @@ -1258,3 +1266,262 @@ class GoloLexer(RegexLexer): (r'(==|<=|<|>=|>|!=)', Operator), ], } + + +class JasminLexer(RegexLexer): + """ + For `Jasmin <http://jasmin.sourceforge.net/>`_ assembly code. + + .. versionadded:: 2.0 + """ + + name = 'Jasmin' + aliases = ['jasmin', 'jasminxt'] + filenames = ['*.j'] + + _whitespace = r' \n\t\r' + _ws = r'(?:[%s]+)' % _whitespace + _separator = r'%s:=' % _whitespace + _break = r'(?=[%s]|$)' % _separator + _name = r'[^%s]+' % _separator + _unqualified_name = r'(?:[^%s.;\[/]+)' % _separator + + tokens = { + 'default': [ + (r'\n', Text, '#pop'), + (r"'", String.Single, ('#pop', 'quote')), + (r'"', String.Double, 'string'), + (r'=', Punctuation), + (r':', Punctuation, 'label'), + (_ws, Text), + (r';.*', Comment.Single), + (r'(\$[-+])?0x-?[\da-fA-F]+%s' % _break, Number.Hex), + (r'(\$[-+]|\+)?-?\d+%s' % _break, Number.Integer), + (r'-?(\d+\.\d*|\.\d+)([eE][-+]?\d+)?[fFdD]?' + r'[\x00-\x08\x0b\x0c\x0e-\x1f]*%s' % _break, Number.Float), + (r'\$%s' % _name, Name.Variable), + + # Directives + (r'\.annotation%s' % _break, Keyword.Reserved, 'annotation'), + (r'(\.attribute|\.bytecode|\.debug|\.deprecated|\.enclosing|' + r'\.interface|\.line|\.signature|\.source|\.stack|\.var|abstract|' + r'annotation|bridge|class|default|enum|field|final|fpstrict|' + r'interface|native|private|protected|public|signature|static|' + r'synchronized|synthetic|transient|varargs|volatile)%s' % _break, + Keyword.Reserved), + (r'\.catch%s' % _break, Keyword.Reserved, 'caught-exception'), + (r'(\.class|\.implements|\.inner|\.super|inner|invisible|' + r'invisibleparam|outer|visible|visibleparam)%s' % _break, + Keyword.Reserved, 'class/convert-dots'), + (r'\.field%s' % _break, Keyword.Reserved, + ('descriptor/convert-dots', 'field')), + (r'(\.end|\.limit|use)%s' % _break, Keyword.Reserved, + 'no-verification'), + (r'\.method%s' % _break, Keyword.Reserved, 'method'), + (r'\.set%s' % _break, Keyword.Reserved, 'var'), + (r'\.throws%s' % _break, Keyword.Reserved, 'exception'), + (r'(from|offset|to|using)%s' % _break, Keyword.Reserved, 'label'), + (r'is%s' % _break, Keyword.Reserved, + ('descriptor/convert-dots', 'var')), + (r'(locals|stack)%s' % _break, Keyword.Reserved, 'verification'), + (r'method%s' % _break, Keyword.Reserved, 'enclosing-method'), + + # Instructions + (r'(aaload|aastore|aconst_null|aload|aload_0|aload_1|aload_2|' + r'aload_3|aload_w|areturn|arraylength|astore|astore_0|astore_1|' + r'astore_2|astore_3|astore_w|athrow|baload|bastore|bipush|' + r'breakpoint|caload|castore|d2f|d2i|d2l|dadd|daload|dastore|' + r'dcmpg|dcmpl|dconst_0|dconst_1|ddiv|dload|dload_0|dload_1|' + r'dload_2|dload_3|dload_w|dmul|dneg|drem|dreturn|dstore|dstore_0|' + r'dstore_1|dstore_2|dstore_3|dstore_w|dsub|dup|dup2|dup2_x1|' + r'dup2_x2|dup_x1|dup_x2|f2d|f2i|f2l|fadd|faload|fastore|fcmpg|' + r'fcmpl|fconst_0|fconst_1|fconst_2|fdiv|fload|fload_0|fload_1|' + r'fload_2|fload_3|fload_w|fmul|fneg|frem|freturn|fstore|fstore_0|' + r'fstore_1|fstore_2|fstore_3|fstore_w|fsub|i2b|i2c|i2d|i2f|i2l|' + r'i2s|iadd|iaload|iand|iastore|iconst_0|iconst_1|iconst_2|' + r'iconst_3|iconst_4|iconst_5|iconst_m1|idiv|iinc|iinc_w|iload|' + r'iload_0|iload_1|iload_2|iload_3|iload_w|imul|ineg|int2byte|' + r'int2char|int2short|ior|irem|ireturn|ishl|ishr|istore|istore_0|' + r'istore_1|istore_2|istore_3|istore_w|isub|iushr|ixor|l2d|l2f|' + r'l2i|ladd|laload|land|lastore|lcmp|lconst_0|lconst_1|ldc2_w|' + r'ldiv|lload|lload_0|lload_1|lload_2|lload_3|lload_w|lmul|lneg|' + r'lookupswitch|lor|lrem|lreturn|lshl|lshr|lstore|lstore_0|' + r'lstore_1|lstore_2|lstore_3|lstore_w|lsub|lushr|lxor|' + r'monitorenter|monitorexit|nop|pop|pop2|ret|ret_w|return|saload|' + r'sastore|sipush|swap)%s' % _break, Keyword.Reserved), + (r'(anewarray|checkcast|instanceof|ldc|ldc_w|new)%s' % _break, + Keyword.Reserved, 'class/no-dots'), + (r'(invokedynamic|invokeinterface|invokenonvirtual|invokespecial|' + r'invokestatic|invokevirtual)%s' % _break, Keyword.Reserved, + 'invocation'), + (r'(getfield|putfield)%s' % _break, Keyword.Reserved, + ('descriptor/no-dots', 'field')), + (r'(getstatic|putstatic)%s' % _break, Keyword.Reserved, + ('descriptor/no-dots', 'static')), + (r'(goto|goto_w|if_acmpeq|if_acmpne|if_icmpeq|if_icmpge|if_icmpgt|' + r'if_icmple|if_icmplt|if_icmpne|ifeq|ifge|ifgt|ifle|iflt|ifne|' + r'ifnonnull|ifnull|jsr|jsr_w)%s' % _break, Keyword.Reserved, + 'label'), + (r'(multianewarray|newarray)%s' % _break, Keyword.Reserved, + 'descriptor/convert-dots'), + (r'tableswitch%s' % _break, Keyword.Reserved, 'table') + ], + 'quote': [ + (r"'", String.Single, '#pop'), + (r'\\u[\da-fA-F]{4}', String.Escape), + (r"[^'\\]+", String.Single) + ], + 'string': [ + (r'"', String.Double, '#pop'), + (r'\\([nrtfb"\'\\]|u[\da-fA-F]{4}|[0-3]?[0-7]{1,2})', + String.Escape), + (r'[^"\\]+', String.Double) + ], + 'root': [ + (r'\n+', Text), + (r"'", String.Single, 'quote'), + include('default'), + (r'(%s)([ \t\r]*)(:)' % _name, + bygroups(Name.Label, Text, Punctuation)), + (_name, String.Other) + ], + 'annotation': [ + (r'\n', Text, ('#pop', 'annotation-body')), + (r'default%s' % _break, Keyword.Reserved, + ('#pop', 'annotation-default')), + include('default') + ], + 'annotation-body': [ + (r'\n+', Text), + (r'\.end%s' % _break, Keyword.Reserved, '#pop'), + include('default'), + (_name, String.Other, ('annotation-items', 'descriptor/no-dots')) + ], + 'annotation-default': [ + (r'\n+', Text), + (r'\.end%s' % _break, Keyword.Reserved, '#pop'), + include('default'), + (r'', Text, ('annotation-items', 'descriptor/no-dots')) + ], + 'annotation-items': [ + (r"'", String.Single, 'quote'), + include('default'), + (_name, String.Other) + ], + 'caught-exception': [ + (r'all%s' % _break, Keyword, '#pop'), + include('exception') + ], + 'class/convert-dots': [ + include('default'), + (r'(L)((?:%s[/.])*)(%s)(;)' % (_unqualified_name, _name), + bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation), + '#pop'), + (r'((?:%s[/.])*)(%s)' % (_unqualified_name, _name), + bygroups(Name.Namespace, Name.Class), '#pop') + ], + 'class/no-dots': [ + include('default'), + (r'\[+', Punctuation, ('#pop', 'descriptor/no-dots')), + (r'(L)((?:%s/)*)(%s)(;)' % (_unqualified_name, _name), + bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation), + '#pop'), + (r'((?:%s/)*)(%s)' % (_unqualified_name, _name), + bygroups(Name.Namespace, Name.Class), '#pop') + ], + 'descriptor/convert-dots': [ + include('default'), + (r'\[+', Punctuation), + (r'(L)((?:%s[/.])*)(%s?)(;)' % (_unqualified_name, _name), + bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation), + '#pop'), + (r'[^%s\[)L]*' % _separator, Keyword.Type, '#pop') + ], + 'descriptor/no-dots': [ + include('default'), + (r'\[+', Punctuation), + (r'(L)((?:%s/)*)(%s)(;)' % (_unqualified_name, _name), + bygroups(Keyword.Type, Name.Namespace, Name.Class, Punctuation), + '#pop'), + (r'[^%s\[)L]*' % _separator, Keyword.Type, '#pop') + ], + 'descriptors/convert-dots': [ + (r'\)', Punctuation, '#pop'), + (r'', Text, 'descriptor/convert-dots') + ], + 'enclosing-method': [ + (_ws, Text), + (r'(?=[^%s]*\()' % _separator, Text, ('#pop', 'invocation')), + (r'', Text, ('#pop', 'class/convert-dots')) + ], + 'exception': [ + include('default'), + (r'((?:%s[/.])*)(%s)' % (_unqualified_name, _name), + bygroups(Name.Namespace, Name.Exception), '#pop') + ], + 'field': [ + (r'static%s' % _break, Keyword.Reserved, ('#pop', 'static')), + include('default'), + (r'((?:%s[/.](?=[^%s]*[/.]))*)(%s[/.])?(%s)' % + (_unqualified_name, _separator, _unqualified_name, _name), + bygroups(Name.Namespace, Name.Class, Name.Variable.Instance), + '#pop') + ], + 'invocation': [ + include('default'), + (r'((?:%s[/.](?=[^%s(]*[/.]))*)(%s[/.])?(%s)(\()' % + (_unqualified_name, _separator, _unqualified_name, _name), + bygroups(Name.Namespace, Name.Class, Name.Function, Punctuation), + ('#pop', 'descriptor/convert-dots', 'descriptors/convert-dots', + 'descriptor/convert-dots')) + ], + 'label': [ + include('default'), + (_name, Name.Label, '#pop') + ], + 'method': [ + include('default'), + (r'(%s)(\()' % _name, bygroups(Name.Function, Punctuation), + ('#pop', 'descriptor/convert-dots', 'descriptors/convert-dots', + 'descriptor/convert-dots')) + ], + 'no-verification': [ + (r'(locals|method|stack)%s' % _break, Keyword.Reserved, '#pop'), + include('default') + ], + 'static': [ + include('default'), + (r'((?:%s[/.](?=[^%s]*[/.]))*)(%s[/.])?(%s)' % + (_unqualified_name, _separator, _unqualified_name, _name), + bygroups(Name.Namespace, Name.Class, Name.Variable.Class), '#pop') + ], + 'table': [ + (r'\n+', Text), + (r'default%s' % _break, Keyword.Reserved, '#pop'), + include('default'), + (_name, Name.Label) + ], + 'var': [ + include('default'), + (_name, Name.Variable, '#pop') + ], + 'verification': [ + include('default'), + (r'(Double|Float|Integer|Long|Null|Top|UninitializedThis)%s' % + _break, Keyword, '#pop'), + (r'Object%s' % _break, Keyword, ('#pop', 'class/no-dots')), + (r'Uninitialized%s' % _break, Keyword, ('#pop', 'label')) + ] + } + + def analyse_text(text): + score = 0 + if re.search(r'^\s*\.class\s', text, re.MULTILINE): + score += 0.5 + if re.search(r'^\s*[a-z]+_[a-z]+\b', text, re.MULTILINE): + score += 0.3 + if re.search(r'^\s*\.(attribute|bytecode|debug|deprecated|enclosing|' + r'inner|interface|limit|set|signature|stack)\b', text, + re.MULTILINE): + score += 0.6 + return score |