diff options
author | Georg Brandl <georg@python.org> | 2012-03-10 14:27:31 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2012-03-10 14:27:31 +0100 |
commit | c11b7995ce496ad1f0dc439187b0b8334428a78c (patch) | |
tree | ed004a956751bc1ebbb8002dc6ccb2b3a1c4f66f | |
parent | d761f76cb2491e4269638a4971dff6c26010c85f (diff) | |
parent | 08c53fd3c0e9ef11f66b9bf4dc8475677d2b7566 (diff) | |
download | pygments-c11b7995ce496ad1f0dc439187b0b8334428a78c.tar.gz |
Merged in rafaelmartins/pygments-main (pull request #60)
-rw-r--r-- | CHANGES | 6 | ||||
-rw-r--r-- | TODO | 2 | ||||
-rw-r--r-- | docs/src/tokens.txt | 2 | ||||
-rwxr-xr-x | pygments/formatters/_mapping.py | 2 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 30 | ||||
-rw-r--r-- | pygments/lexers/asm.py | 8 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 18 | ||||
-rw-r--r-- | pygments/lexers/dotnet.py | 10 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 20 | ||||
-rw-r--r-- | pygments/lexers/hdl.py | 43 | ||||
-rw-r--r-- | pygments/lexers/jvm.py | 16 | ||||
-rw-r--r-- | pygments/lexers/math.py | 8 | ||||
-rw-r--r-- | pygments/lexers/other.py | 62 | ||||
-rw-r--r-- | pygments/lexers/parsers.py | 6 | ||||
-rw-r--r-- | pygments/lexers/shell.py | 6 | ||||
-rw-r--r-- | pygments/lexers/sql.py | 8 | ||||
-rw-r--r-- | pygments/lexers/templates.py | 33 | ||||
-rw-r--r-- | pygments/lexers/text.py | 41 | ||||
-rw-r--r-- | pygments/lexers/web.py | 161 | ||||
-rw-r--r-- | tests/examplefiles/antlr_throws | 1 | ||||
-rw-r--r-- | tests/examplefiles/function.mu | 1 |
21 files changed, 249 insertions, 235 deletions
@@ -78,6 +78,12 @@ Version 1.5 - Add "rrt" style (#727). +- Fix infinite looping in Darcs Patch lexer. + +- Lots of misc fixes to character-eating bugs and ordering problems in many + different lexers. + + Version 1.4 ----------- (codename Unschärfe, released Jan 03, 2011) @@ -3,8 +3,6 @@ Todo - suggested new lexers * IPython sessions - * PostgreSQL/SQLite - * Nemerle - lexers that need work: * review perl lexer (numerous bugs, but so far no one had complaints ;) diff --git a/docs/src/tokens.txt b/docs/src/tokens.txt index 9ef0df8d..4900a9ab 100644 --- a/docs/src/tokens.txt +++ b/docs/src/tokens.txt @@ -303,7 +303,7 @@ Comments `Comment.Special` Special data in comments. For example code tags, author and license - informations etc. + information, etc. Generic Tokens diff --git a/pygments/formatters/_mapping.py b/pygments/formatters/_mapping.py index fd380b3a..b64eb685 100755 --- a/pygments/formatters/_mapping.py +++ b/pygments/formatters/_mapping.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ pygments.formatters._mapping - ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Formatter mapping defintions. This file is generated by itself. Everytime you change something on a builtin formatter defintion, run this script from diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 6e4e9d13..024ab8d5 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -381,7 +381,7 @@ class PythonTracebackLexer(RegexLexer): bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(PythonLexer), Text)), - (r'^([ \t]*)(...)(\n)', + (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^(.+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), @@ -417,7 +417,7 @@ class Python3TracebackLexer(RegexLexer): bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(Python3Lexer), Text)), - (r'^([ \t]*)(...)(\n)', + (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^(.+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), @@ -506,7 +506,7 @@ class RubyLexer(ExtendedRegexLexer): states = {} states['strings'] = [ # easy ones - (r'\:@{0,2}([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|' + (r'\:@{0,2}([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), @@ -703,7 +703,7 @@ class RubyLexer(ExtendedRegexLexer): # like keywords (class) or like this: ` ?!? (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])', bygroups(Operator, Name)), - (r'[a-zA-Z_][\w_]*[\!\?]?', Name), + (r'[a-zA-Z_]\w*[\!\?]?', Name), (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|' r'!~|&&?|\|\||\.{1,3})', Operator), (r'[-+/*%=<>&!^|~]=?', Operator), @@ -713,7 +713,7 @@ class RubyLexer(ExtendedRegexLexer): 'funcname': [ (r'\(', Punctuation, 'defexpr'), (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?' - r'([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|' + r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', bygroups(Name.Class, Operator, Name.Function), '#pop'), (r'', Text, '#pop') @@ -721,7 +721,7 @@ class RubyLexer(ExtendedRegexLexer): 'classname': [ (r'\(', Punctuation, 'defexpr'), (r'<<', Operator, '#pop'), - (r'[A-Z_][\w_]*', Name.Class, '#pop'), + (r'[A-Z_]\w*', Name.Class, '#pop'), (r'', Text, '#pop') ], 'defexpr': [ @@ -924,7 +924,7 @@ class PerlLexer(RegexLexer): (r'(q|qq|qw|qr|qx)([^a-zA-Z0-9])(.|\n)*?\2', String.Other), (r'package\s+', Keyword, 'modulename'), (r'sub\s+', Keyword, 'funcname'), - (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|' + (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|' r'!~|&&?|\|\||\.{1,3})', Operator), (r'[-+/*%=<>&^|!\\~]=?', Operator), (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage @@ -949,10 +949,10 @@ class PerlLexer(RegexLexer): (r'(?=[^a-zA-Z0-9_])', Text, '#pop'), ], 'modulename': [ - (r'[a-zA-Z_][\w_]*', Name.Namespace, '#pop') + (r'[a-zA-Z_]\w*', Name.Namespace, '#pop') ], 'funcname': [ - (r'[a-zA-Z_][\w_]*[\!\?]?', Name.Function), + (r'[a-zA-Z_]\w*[\!\?]?', Name.Function), (r'\s+', Text), # argument declaration (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)), @@ -1151,13 +1151,13 @@ class MoonScriptLexer(LuaLexer): (r'(?s)\[(=*)\[.*?\]\1\]', String), (r'(->|=>)', Name.Function), (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), - (r'(==|!=|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#!.\\:])', Operator), + (r'(==|!=|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', Operator), (r'[;,]', Punctuation), (r'[\[\]\{\}\(\)]', Keyword.Type), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable), (r"(class|extends|if|then|super|do|with|import|export|" - r"while|elseif|return|for|in|from|when|using|else|" - r"and|or|not|switch|break)\b", Keyword), + r"while|elseif|return|for|in|from|when|using|else|" + r"and|or|not|switch|break)\b", Keyword), (r'(true|false|nil)\b', Keyword.Constant), (r'(and|or|not)\b', Operator.Word), (r'(self)\b', Name.Builtin.Pseudo), @@ -1286,7 +1286,7 @@ class IoLexer(RegexLexer): # constants (r'(nil|false|true)\b', Name.Constant), # names - ('(Object|list|List|Map|args|Sequence|Coroutine|File)\b', + (r'(Object|list|List|Map|args|Sequence|Coroutine|File)\b', Name.Builtin), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), # numbers @@ -1378,7 +1378,7 @@ class TclLexer(RegexLexer): 'params': [ (r';', Keyword, '#pop'), (r'\n', Text, '#pop'), - (r'(else|elseif|then)', Keyword), + (r'(else|elseif|then)\b', Keyword), include('basic'), include('data'), ], @@ -1785,7 +1785,7 @@ class FancyLexer(RegexLexer): ('[A-Z][a-zA-Z0-9_]*', Name.Constant), ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), - ('(@|@@)', Operator), + ('@@?', Operator), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), # numbers - / checks are necessary to avoid mismarking regexes, # see comment in RubyLexer diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index 26951f7e..11964a77 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -136,17 +136,17 @@ class ObjdumpLexer(RegexLexer): ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes - ('\t\.\.\.$', Text), + (r'\t\.\.\.$', Text), # Relocation line # (With offset) - ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', + (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) - ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)$', + (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), - ('[^\n]+\n', Other) + (r'[^\n]+\n', Other) ] } diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index eeab8d57..04077ec6 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -1058,7 +1058,7 @@ class DylanLexer(RegexLexer): tokens = { 'root': [ (r'\b(subclass|abstract|block|c(on(crete|stant)|lass)|domain' - r'|ex(c(eption|lude)|port)|f(unction(|al))|generic|handler' + r'|ex(c(eption|lude)|port)|f(unction(al)?)|generic|handler' r'|i(n(herited|line|stance|terface)|mport)|library|m(acro|ethod)' r'|open|primary|sealed|si(deways|ngleton)|slot' r'|v(ariable|irtual))\b', Name.Builtin), @@ -1068,7 +1068,7 @@ class DylanLexer(RegexLexer): (r'"', String, 'string'), (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)' - r'|define|else(|if)|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise' + r'|define|else(if)?|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise' r'|rename|s(elect|ignal)|t(hen|o)|u(n(less|til)|se)|wh(en|ile))\b', Keyword), (r'([ \t])([!\$%&\*\/:<=>\?~_^a-zA-Z0-9.+\-]*:)', @@ -2186,7 +2186,7 @@ class AdaLexer(RegexLexer): 'end' : [ ('(if|case|record|loop|select)', Keyword.Reserved), ('"[^"]+"|[a-zA-Z0-9_.]+', Name.Function), - ('[\n\s]+', Text), + ('\s+', Text), (';', Punctuation, '#pop'), ], 'type_def': [ @@ -2572,7 +2572,7 @@ class NimrodLexer(RegexLexer): (r'##.*$', String.Doc), (r'#.*$', Comment), (r'\*|=|>|<|\+|-|/|@|\$|~|&|%|\!|\?|\||\\|\[|\]', Operator), - (r'\.\.|\.|,|[\.|\.]|{\.|\.}|\(\.|\.\)|{|}|\(|\)|:|\^|`|;', + (r'\.\.|\.|,|\[\.|\.\]|{\.|\.}|\(\.|\.\)|{|}|\(|\)|:|\^|`|;', Punctuation), # Strings @@ -2679,7 +2679,7 @@ class FantomLexer(RegexLexer): id = r'[a-zA-Z_][a-zA-Z0-9_]*', # all chars which can be part of type definition. Starts with # either letter, or [ (maps), or | (funcs) - type = r'(?:\[|[a-zA-Z_]|\|)[:\w_\[\]\|\->\?]*?', + type = r'(?:\[|[a-zA-Z_]|\|)[:\w\[\]\|\->\?]*?', ) ) @@ -2810,7 +2810,7 @@ class FantomLexer(RegexLexer): ### ArgType argName, ##### (s(r'($type)(\s+)($id)(\s*)(,)'), - bygroups(using(this, state= 'inType'), Text, Name.Variable, + bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation)), #### ArgType argName) #### @@ -2818,13 +2818,13 @@ class FantomLexer(RegexLexer): ### ArgType argName -> ArgType| ### (s(r'($type)(\s+)($id)(\s*)(\->)(\s*)($type)(\|)'), - bygroups(using(this, state= 'inType'), Text, Name.Variable, + bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation, Text, using(this, state = 'inType'), Punctuation)), ### ArgType argName| ### (s(r'($type)(\s+)($id)(\s*)(\|)'), - bygroups(using(this, state= 'inType'), Text, Name.Variable, + bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation)), ### Type var @@ -2843,7 +2843,7 @@ class FantomLexer(RegexLexer): 'insideMethodDeclArgs': [ (r'\)', Punctuation, '#pop'), (s(r'($type)(\s+)($id)(\s*)(\))'), - bygroups(using(this, state= 'inType'), Text, Name.Variable, + bygroups(using(this, state='inType'), Text, Name.Variable, Text, Punctuation), '#pop'), include('root'), ], diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index 8b7a0b89..278ba546 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -344,7 +344,7 @@ class BooLexer(RegexLexer): (r"'(\\\\|\\'|[^']*?)'", String.Single), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), - (r'[0-9][0-9\.]*(m|ms|d|h|s)', Number), + (r'[0-9][0-9\.]*(ms?|d|h|s)', Number), (r'0\d+', Number.Oct), (r'0x[a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), @@ -559,10 +559,10 @@ class FSharpLexer(RegexLexer): 'with', 'yield', 'yield!' ] keyopts = [ - '!=','#','&','&&','\(','\)','\*','\+',',','-', - '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<', - '<-','>','>]','\?','\?\?','\[','\[<','\[>','\[\|', - ']','_','`','{','\|','\|]','}','~','<@','=','@>' + '!=','#','&&','&','\(','\)','\*','\+',',','-\.', + '->','-','\.\.','\.','::',':=',':>',':',';;',';','<-', + '<','>]','>','\?\?','\?','\[<','\[>','\[\|','\[', + ']','_','`','{','\|\]','\|','}','~','<@','=','@>' ] operators = r'[!$%&*+\./:<=>?@^|~-]' diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 42fd05a7..cc538dbc 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -113,7 +113,7 @@ class SchemeLexer(RegexLexer): # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), - (r"#\\([()/'\".'_!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), + (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), # constants (r'(#t|#f)', Name.Constant), @@ -560,7 +560,10 @@ class SMLLexer(RegexLexer): return [ (r'[^"\\]', whatkind), (r'\\[\\\"abtnvfr]', String.Escape), - (r'\\\^[@-^]', String.Escape), + # Control-character notation is used for codes < 32, + # where \^@ == \000 + (r'\\\^[\x40-\x5e]', String.Escape), + # Docs say 'decimal digits' (r'\\[0-9]{3}', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), (r'\\\s+\\', String.Interpol), @@ -887,7 +890,7 @@ class OcamlLexer(RegexLexer): (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), (r'\(\*', Comment, 'comment'), (r'\b(%s)\b' % '|'.join(keywords), Keyword), - (r'(%s)' % '|'.join(keyopts), Operator), + (r'(%s)' % '|'.join(keyopts[::-1]), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), @@ -980,7 +983,7 @@ class ErlangLexer(RegexLexer): 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' ] - operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!|\?)' + operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' word_operators = [ 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div', 'not', 'or', 'orelse', 'rem', 'xor' @@ -1494,7 +1497,7 @@ class CoqLexer(RegexLexer): (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), - (r'(%s)' % '|'.join(keyopts), Operator), + (r'(%s)' % '|'.join(keyopts[::-1]), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), @@ -1704,13 +1707,14 @@ class ElixirLexer(RegexLexer): r'<=>|&&?|%\(\)|%\[\]|%\{\}|\+\+?|\-\-?|\|\|?|\!|//|[%&`/\|]|' r'\*\*?|=?~|<\-)|([a-zA-Z_]\w*([?!])?)(:)(?!:)', String.Symbol), (r':"', String.Symbol, 'interpoling_symbol'), - (r'\b(nil|true|false)\b(?![?!])|\b[A-Z]\w*\b', Name.Constant), + (r'\b(nil|true|false)\b(?![?!])', Name.Constant), + (r'\b[A-Z]\w*\b', Name.Constant), (r'\b(__(FILE|LINE|MODULE|STOP_ITERATOR|EXCEPTION|OP|REF|FUNCTION|' r'BLOCK|KVBLOCK)__)\b(?![?!])', Name.Builtin.Pseudo), - (r'[a-zA-Z_!][\w_]*[!\?]?', Name), + (r'[a-zA-Z_!]\w*[!\?]?', Name), (r'[(){};,/\|:\\\[\]]', Punctuation), (r'@[a-zA-Z_]\w*|&\d', Name.Variable), - (r'\b(0[xX][0-9A-Fa-f]+|\d(_?\d)*(\.(?![^[:space:][:digit:]])' + (r'\b(0[xX][0-9A-Fa-f]+|\d(_?\d)*(\.(?![^\d\s])' r'(_?\d)*)?([eE][-+]?\d(_?\d)*)?|0[bB][01]+)\b', Number), include('strings'), ], diff --git a/pygments/lexers/hdl.py b/pygments/lexers/hdl.py index b4eccd3b..0db35e74 100644 --- a/pygments/lexers/hdl.py +++ b/pygments/lexers/hdl.py @@ -56,8 +56,9 @@ class VerilogLexer(RegexLexer): (r'[()\[\],.;\']', Punctuation), (r'`[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), - (r'^\s*(package)(\s+)', bygroups(Keyword.Namespace, Text)), - (r'^\s*(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), + (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)), + (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), + 'import'), (r'(always|always_comb|always_ff|always_latch|and|assign|automatic|' r'begin|break|buf|bufif0|bufif1|case|casex|casez|cmos|const|' @@ -75,21 +76,21 @@ class VerilogLexer(RegexLexer): r'unsigned|var|vectored|void|wait|weak0|weak1|while|' r'xnor|xor)\b', Keyword), - (r'(`accelerate|`autoexpand_vectornets|`celldefine|`default_nettype|' - r'`else|`elsif|`endcelldefine|`endif|`endprotect|`endprotected|' - r'`expand_vectornets|`ifdef|`ifndef|`include|`noaccelerate|`noexpand_vectornets|' - r'`noremove_gatenames|`noremove_netnames|`nounconnected_drive|' - r'`protect|`protected|`remove_gatenames|`remove_netnames|`resetall|' - r'`timescale|`unconnected_drive|`undef)\b', Comment.Preproc), - - (r'(\$bits|\$bitstoreal|\$bitstoshortreal|\$countdrivers|\$display|\$fclose|' - r'\$fdisplay|\$finish|\$floor|\$fmonitor|\$fopen|\$fstrobe|\$fwrite|' - r'\$getpattern|\$history|\$incsave|\$input|\$itor|\$key|\$list|\$log|' - r'\$monitor|\$monitoroff|\$monitoron|\$nokey|\$nolog|\$printtimescale|' - r'\$random|\$readmemb|\$readmemh|\$realtime|\$realtobits|\$reset|\$reset_count|' - r'\$reset_value|\$restart|\$rtoi|\$save|\$scale|\$scope|\$shortrealtobits|' - r'\$showscopes|\$showvariables|\$showvars|\$sreadmemb|\$sreadmemh|' - r'\$stime|\$stop|\$strobe|\$time|\$timeformat|\$write)\b', Name.Builtin), + (r'`(accelerate|autoexpand_vectornets|celldefine|default_nettype|' + r'else|elsif|endcelldefine|endif|endprotect|endprotected|' + r'expand_vectornets|ifdef|ifndef|include|noaccelerate|noexpand_vectornets|' + r'noremove_gatenames|noremove_netnames|nounconnected_drive|' + r'protect|protected|remove_gatenames|remove_netnames|resetall|' + r'timescale|unconnected_drive|undef)\b', Comment.Preproc), + + (r'\$(bits|bitstoreal|bitstoshortreal|countdrivers|display|fclose|' + r'fdisplay|finish|floor|fmonitor|fopen|fstrobe|fwrite|' + r'getpattern|history|incsave|input|itor|key|list|log|' + r'monitor|monitoroff|monitoron|nokey|nolog|printtimescale|' + r'random|readmemb|readmemh|realtime|realtobits|reset|reset_count|' + r'reset_value|restart|rtoi|save|scale|scope|shortrealtobits|' + r'showscopes|showvariables|showvars|sreadmemb|sreadmemh|' + r'stime|stop|strobe|time|timeformat|write)\b', Name.Builtin), (r'(byte|shortint|int|longint|integer|time|' r'bit|logic|reg|' @@ -146,6 +147,9 @@ class SystemVerilogLexer(RegexLexer): tokens = { 'root': [ (r'^\s*`define', Comment.Preproc, 'macro'), + (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)), + (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), 'import'), + (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation @@ -167,11 +171,6 @@ class SystemVerilogLexer(RegexLexer): (r'[()\[\],.;\']', Punctuation), (r'`[a-zA-Z_][a-zA-Z0-9_]*', Name.Constant), - (r'^\s*(package)(\s+)', bygroups(Keyword.Namespace, Text)), - (r'^\s*(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), - - - (r'(accept_on|alias|always|always_comb|always_ff|always_latch|' r'and|assert|assign|assume|automatic|before|begin|bind|bins|' r'binsof|bit|break|buf|bufif0|bufif1|byte|case|casex|casez|' diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 83b45613..4c183b98 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -117,7 +117,7 @@ class ScalaLexer(RegexLexer): ur'lazy|match|new|override|pr(?:ivate|otected)' ur'|re(?:quires|turn)|s(?:ealed|uper)|' ur't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\b|' - u'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\b|(?=\\s)|$)', Keyword), + u'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\\b|(?=\\s)|$)', Keyword), (ur':(?!%s)' % op, Keyword, 'type'), (ur'%s%s\b' % (upper, idrest), Name.Class), (r'(true|false|null)\b', Keyword.Constant), @@ -125,7 +125,7 @@ class ScalaLexer(RegexLexer): (r'(type)(\s+)', bygroups(Keyword, Text), 'type'), (r'""".*?"""', String), (r'"(\\\\|\\"|[^"])*"', String), - (ur"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), + (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), # (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, # Name.Attribute)), (idrest, Name), @@ -133,7 +133,7 @@ class ScalaLexer(RegexLexer): (r'\[', Operator, 'typeparam'), (r'[\(\)\{\};,.#]', Operator), (op, Operator), - (ur'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?', + (r'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?', Number.Float), (r'0x[0-9a-f]+', Number.Hex), (r'[0-9]+L?', Number.Integer), @@ -142,7 +142,7 @@ class ScalaLexer(RegexLexer): 'class': [ (ur'(%s|%s|`[^`]+`)(\s*)(\[)' % (idrest, op), bygroups(Name.Class, Text, Operator), 'typeparam'), - (r'[\s\n]+', Text), + (r'\s+', Text), (r'{', Operator, '#pop'), (r'\(', Operator, '#pop'), (r'//.*?\n', Comment.Single, '#pop'), @@ -151,7 +151,7 @@ class ScalaLexer(RegexLexer): 'type': [ (r'\s+', Text), (u'<[%:]|>:|[#_\u21D2]|forSome|type', Keyword), - (r'([,\);}]|=>|=)([\s\n]*)', bygroups(Operator, Text), '#pop'), + (r'([,\);}]|=>|=)(\s*)', bygroups(Operator, Text), '#pop'), (r'[\(\{]', Operator, '#push'), (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)(\[)' % (idrest, op, idrest, op), @@ -163,7 +163,7 @@ class ScalaLexer(RegexLexer): (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) ], 'typeparam': [ - (r'[\s\n,]+', Text), + (r'[\s,]+', Text), (u'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword), (r'([\]\)\}])', Operator, '#pop'), (r'[\(\[\{]', Operator, '#push'), @@ -421,7 +421,7 @@ class IokeLexer(RegexLexer): #Documentation (r'((?<=fn\()|(?<=fnx\()|(?<=method\()|(?<=macro\()|(?<=lecro\()' r'|(?<=syntax\()|(?<=dmacro\()|(?<=dlecro\()|(?<=dlecrox\()' - r'|(?<=dsyntax\())[\s\n\r]*"', String.Doc, 'documentation'), + r'|(?<=dsyntax\())\s*"', String.Doc, 'documentation'), #Text (r'"', String, 'text'), @@ -526,7 +526,7 @@ class IokeLexer(RegexLexer): Operator), # Punctuation - (r'(\`\`|\`|\'\'|\'|\.|\,|@|@@|\[|\]|\(|\)|{|})', Punctuation), + (r'(\`\`|\`|\'\'|\'|\.|\,|@@|@|\[|\]|\(|\)|{|})', Punctuation), #kinds (r'[A-Z][a-zA-Z0-9_!:?]*', Name.Class), diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py index 9f7efb3b..f500231f 100644 --- a/pygments/lexers/math.py +++ b/pygments/lexers/math.py @@ -75,8 +75,8 @@ class MuPADLexer(RegexLexer): #(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin), (r'''(?x) ((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) - (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''', - bygroups(Name.Function, Punctuation)), + (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)(\s*)([(])''', + bygroups(Name.Function, Text, Punctuation)), (r'''(?x) (?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable), @@ -668,7 +668,7 @@ class ScilabLexer(RegexLexer): _scilab_builtins.macros_kw ) + r')\b', Name.Builtin), - ("(" + "|".join(_scilab_builtins.builtin_consts) + r')\b', + (r'(%s)\b' % "|".join(map(re.escape, _scilab_builtins.builtin_consts)), Name.Constant), # operators: @@ -677,7 +677,7 @@ class ScilabLexer(RegexLexer): (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator), # punctuation: - (r'[\[\](){}:@.,=:;]', Punctuation), + (r'[\[\](){}@.,=:;]', Punctuation), (r'"[^"]*"', String), diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index a64022e4..c9718142 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -79,7 +79,7 @@ class ECLLexer(RegexLexer): (r'^#.*$', Comment.Preproc), ], 'types': [ - (r'(RECORD|END)[^\d]', Keyword.Declaration), + (r'(RECORD|END)\D', Keyword.Declaration), (r'((?:ASCII|BIG_ENDIAN|BOOLEAN|DATA|DECIMAL|EBCDIC|INTEGER|PATTERN|' r'QSTRING|REAL|RECORD|RULE|SET OF|STRING|TOKEN|UDECIMAL|UNICODE|' r'UNSIGNED|VARSTRING|VARUNICODE)\d*)(\s+)', @@ -310,7 +310,7 @@ class SmalltalkLexer(RegexLexer): ], 'blockvariables' : [ include('whitespaces'), - (r'(:)(\s*)([A-Za-z\w]+)', + (r'(:)(\s*)(\w+)', bygroups(Operator, Text, Name.Variable)), (r'\|', Operator, '#pop'), (r'', Text, '#pop'), # else pop @@ -325,7 +325,7 @@ class SmalltalkLexer(RegexLexer): '_parenth_helper' : [ include('whitespaces'), (r'(\d+r)?-?\d+(\.\d+)?(e-?\d+)?', Number), - (r'[-+*/\\~<>=|&#!?,@%\w+:]+', String.Symbol), + (r'[-+*/\\~<>=|&#!?,@%\w:]+', String.Symbol), # literals (r'\'[^\']*\'', String), (r'\$.', String.Char), @@ -541,12 +541,12 @@ class LogtalkLexer(RegexLexer): (r'[()\[\],.|]', Text), # Atoms (r"[a-z][a-zA-Z0-9_]*", Text), - (r"[']", String, 'quoted_atom'), + (r"'", String, 'quoted_atom'), ], 'quoted_atom': [ - (r"['][']", String), - (r"[']", String, '#pop'), + (r"''", String), + (r"'", String, '#pop'), (r'\\([\\abfnrtv"\']|(x[a-fA-F0-9]+|[0-7]+)\\)', String.Escape), (r"[^\\'\n]+", String), (r'\\', String), @@ -588,7 +588,7 @@ class LogtalkLexer(RegexLexer): (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable), # Atoms (r"[a-z][a-zA-Z0-9_]*", Text), - (r"[']", String, 'quoted_atom'), + (r"'", String, 'quoted_atom'), # Strings (r'"(\\\\|\\"|[^"])*"', String), # End of entity-opening directive @@ -798,9 +798,9 @@ class PovrayLexer(RegexLexer): (r'/\*[\w\W]*?\*/', Comment.Multiline), (r'//.*\n', Comment.Single), (r'(?s)"(?:\\.|[^"\\])+"', String.Double), - (r'#(debug|default|else|end|error|fclose|fopen|if|ifdef|ifndef|' + (r'#(debug|default|else|end|error|fclose|fopen|ifdef|ifndef|' r'include|range|read|render|statistics|switch|undef|version|' - r'warning|while|write|define|macro|local|declare)', + r'warning|while|write|define|macro|local|declare)\b', Comment.Preproc), (r'\b(aa_level|aa_threshold|abs|acos|acosh|adaptive|adc_bailout|' r'agate|agate_turb|all|alpha|ambient|ambient_light|angle|' @@ -850,11 +850,11 @@ class PovrayLexer(RegexLexer): r'vnormalize|volume_object|volume_rendered|vol_with_light|' r'vrotate|v_steps|warning|warp|water_level|waves|while|width|' r'wood|wrinkles|yes)\b', Keyword), - (r'bicubic_patch|blob|box|camera|cone|cubic|cylinder|difference|' + (r'(bicubic_patch|blob|box|camera|cone|cubic|cylinder|difference|' r'disc|height_field|intersection|julia_fractal|lathe|' r'light_source|merge|mesh|object|plane|poly|polygon|prism|' r'quadric|quartic|smooth_triangle|sor|sphere|superellipsoid|' - r'text|torus|triangle|union', Name.Builtin), + r'text|torus|triangle|union)\b', Name.Builtin), # TODO: <=, etc (r'[\[\](){}<>;,]', Punctuation), (r'[-+*/=]', Operator), @@ -892,7 +892,7 @@ class AppleScriptLexer(RegexLexer): Classes = ['alias ', 'application ', 'boolean ', 'class ', 'constant ', 'date ', 'file ', 'integer ', 'list ', 'number ', 'POSIX file ', 'real ', 'record ', 'reference ', 'RGB color ', 'script ', - 'text ', 'unit types', '(Unicode )?text', 'string'] + 'text ', 'unit types', '(?:Unicode )?text', 'string'] BuiltIn = ['attachment', 'attribute run', 'character', 'day', 'month', 'paragraph', 'word', 'year'] HandlerParams = ['about', 'above', 'against', 'apart from', 'around', @@ -1158,7 +1158,7 @@ class AppleScriptLexer(RegexLexer): (ur'(-|\*|\+|&|≠|>=?|<=?|=|≥|≤|/|÷|\^)', Operator), (r"\b(%s)\b" % '|'.join(Operators), Operator.Word), (r'^(\s*(?:on|end)\s+)' - r'(%s)' % '|'.join(StudioEvents), + r'(%s)' % '|'.join(StudioEvents[::-1]), bygroups(Keyword, Name.Function)), (r'^(\s*)(in|on|script|to)(\s+)', bygroups(Text, Keyword, Text)), (r'\b(as )(%s)\b' % '|'.join(Classes), @@ -1240,11 +1240,11 @@ class ModelicaLexer(RegexLexer): ], 'functions': [ (r'(abs|acos|acosh|asin|asinh|atan|atan2|atan3|ceil|cos|cosh|' - r'cross|div|exp|floor|log|log10|mod|rem|sign|sin|sinh|size|' - r'sqrt|tan|tanh|zeros)\b', Name.Function) + r'cross|div|exp|floor|log|log10|mod|rem|semiLinear|sign|sin|' + r'sinh|size|sqrt|tan|tanh|zeros)\b', Name.Function) ], 'operators': [ - (r'(and|assert|cardinality|change|delay|der|edge|initial|' + (r'(and|assert|cardinality|change|delay|der|edge|homotopy|initial|' r'noEvent|not|or|pre|reinit|return|sample|smooth|' r'terminal|terminate)\b', Name.Builtin) ], @@ -1501,7 +1501,7 @@ class ABAPLexer(RegexLexer): ], 'variable-names': [ (r'<[\S_]+>', Name.Variable), - (r'[\w][\w_~]*(?:(\[\])|->\*)?', Name.Variable), + (r'\w[\w~]*(?:(\[\])|->\*)?', Name.Variable), ], 'root': [ include('common'), @@ -1511,21 +1511,21 @@ class ABAPLexer(RegexLexer): (r'(CALL\s+(?:DIALOG|SCREEN|SUBSCREEN|SELECTION-SCREEN|' r'TRANSACTION|TRANSFORMATION))\b', Keyword), - (r'(FORM|PERFORM)(\s+)([\w_]+)', + (r'(FORM|PERFORM)(\s+)(\w+)', bygroups(Keyword, Text, Name.Function)), - (r'(PERFORM)(\s+)(\()([\w_]+)(\))', + (r'(PERFORM)(\s+)(\()(\w+)(\))', bygroups(Keyword, Text, Punctuation, Name.Variable, Punctuation )), (r'(MODULE)(\s+)(\S+)(\s+)(INPUT|OUTPUT)', bygroups(Keyword, Text, Name.Function, Text, Keyword)), # method implementation - (r'(METHOD)(\s+)([\w_~]+)', + (r'(METHOD)(\s+)([\w~]+)', bygroups(Keyword, Text, Name.Function)), # method calls - (r'(\s+)([\w_\-]+)([=\-]>)([\w_\-~]+)', + (r'(\s+)([\w\-]+)([=\-]>)([\w\-~]+)', bygroups(Text, Name.Variable, Operator, Name.Function)), # call methodnames returning style - (r'(?<=(=|-)>)([\w_\-~]+)(?=\()', Name.Function), + (r'(?<=(=|-)>)([\w\-~]+)(?=\()', Name.Function), # keywords with dashes in them. # these need to be first, because for instance the -ID part @@ -2034,7 +2034,7 @@ class PostScriptLexer(RegexLexer): ], 'escape': [ - (r'([0-8]{3}|n|r|t|b|f|\\|\(|\)|)', String.Escape, '#pop'), + (r'([0-8]{3}|n|r|t|b|f|\\|\(|\))?', String.Escape, '#pop'), ], } @@ -2394,7 +2394,7 @@ class HybrisLexer(RegexLexer): # method names (r'^(\s*(?:function|method|operator\s+)+?)' r'([a-zA-Z_][a-zA-Z0-9_]*)' - r'(\s*)(\()', bygroups(Name.Function, Text, Operator)), + r'(\s*)(\()', bygroups(Keyword, Name.Function, Text, Operator)), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), @@ -2478,7 +2478,7 @@ class AwkLexer(RegexLexer): (r'', Text, '#pop') ], 'badregex': [ - ('\n', Text, '#pop') + (r'\n', Text, '#pop') ], 'root': [ (r'^(?=\s|/)', Text, 'slashstartsregex'), @@ -2608,7 +2608,7 @@ class SnobolLexer(RegexLexer): # ASCII equivalents of original operators # | for the EBCDIC equivalent, ! likewise # \ for EBCDIC negation - (r'\*\*|[\?\$\.!%\*/#+\-@\|&\\!=]', Operator), + (r'\*\*|[\?\$\.!%\*/#+\-@\|&\\=]', Operator), (r'"[^"]*"', String), (r"'[^']*'", String), # Accept SPITBOL syntax for real numbers @@ -3244,8 +3244,8 @@ class OpenEdgeLexer(RegexLexer): (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'[0-9]+', Number.Integer), (r'\s+', Text), - (r'[\\+\\-\\*\\/\\=]', Operator), - (r'[\\.\\:\\(\\)]', Punctuation), + (r'[+*/=-]', Operator), + (r'[.:()]', Punctuation), (r'.', Name.Variable), # Lazy catch-all ], 'comment': [ @@ -3279,7 +3279,7 @@ class BroLexer(RegexLexer): tokens = { 'root': [ # Whitespace - ('^@.*?\n', Comment.Preproc), + (r'^@.*?\n', Comment.Preproc), (r'#.*?\n', Comment.Single), (r'\n', Text), (r'\s+', Text), @@ -3315,8 +3315,8 @@ class BroLexer(RegexLexer): (r'/', String.Regex, 'regex'), (r'"', String, 'string'), # Operators - (r'[!%*/+-:<=>?~|]', Operator), - (r'([-+=&|]{2}|[+-=!><]=)', Operator), + (r'[!%*/+:<=>?~|-]', Operator), + (r'([-+=&|]{2}|[+=!><-]=)', Operator), (r'(in|match)\b', Operator.Word), (r'[{}()\[\]$.,;]', Punctuation), # Identfier diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py index c4aacf22..2b5f954f 100644 --- a/pygments/lexers/parsers.py +++ b/pygments/lexers/parsers.py @@ -72,8 +72,8 @@ class RagelLexer(RegexLexer): ], 'operators': [ (r',', Operator), # Join - (r'\||&|-|--', Operator), # Union, Intersection and Subtraction - (r'\.|<:|:>|:>>', Operator), # Concatention + (r'\||&|--?', Operator), # Union, Intersection and Subtraction + (r'\.|<:|:>>?', Operator), # Concatention (r':', Operator), # Label (r'->', Operator), # Epsilon Transition (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions @@ -393,7 +393,7 @@ class AntlrLexer(RegexLexer): # throwsSpec (r'(throws)(\s+)(' + _id + ')', bygroups(Keyword, Whitespace, Name.Label)), - (r'(?:(,)(\s*)(' + _id + '))+', + (r'(,)(\s*)(' + _id + ')', bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws # optionsSpec (r'options\b', Keyword, 'options'), diff --git a/pygments/lexers/shell.py b/pygments/lexers/shell.py index 61069f72..df45317e 100644 --- a/pygments/lexers/shell.py +++ b/pygments/lexers/shell.py @@ -68,7 +68,7 @@ class BashLexer(RegexLexer): (r"(?s)\$?'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), (r';', Text), (r'\s+', Text), - (r'[^=\s\n\[\]{}()$"\'`\\<]+', Text), + (r'[^=\s\[\]{}()$"\'`\\<]+', Text), (r'\d+(?= |\Z)', Number), (r'\$#?(\w+|.)', Name.Variable), (r'<', Text), @@ -252,7 +252,7 @@ class TcshLexer(RegexLexer): (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double), (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), (r'\s+', Text), - (r'[^=\s\n\[\]{}()$"\'`\\]+', Text), + (r'[^=\s\[\]{}()$"\'`\\]+', Text), (r'\d+(?= |\Z)', Number), (r'\$#?(\w+|.)', Name.Variable), ], @@ -316,7 +316,7 @@ class PowerShellLexer(RegexLexer): commenthelp = ( 'component description example externalhelp forwardhelpcategory ' - 'forwardhelptargetname forwardhelptargetname functionality inputs link ' + 'forwardhelptargetname functionality inputs link ' 'notes outputs parameter remotehelprunspace role synopsis').split() tokens = { diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index d5444fcb..656d29a7 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -147,14 +147,14 @@ class PostgresLexer(PostgresBase, RegexLexer): for s in DATATYPES + PSEUDO_TYPES]) + r')\b', Name.Builtin), (r'(' + '|'.join(KEYWORDS) + r')\b', Keyword), - (r'[+*/<>=~!@#%^&|`?^-]+', Operator), + (r'[+*/<>=~!@#%^&|`?-]+', Operator), (r'::', Operator), # cast (r'\$\d+', Name.Variable), (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), (r'[0-9]+', Number.Integer), (r"(E|U&)?'(''|[^'])*'", String.Single), (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier - (r'(?ms)(\$[^\$]*\$)(.*?)(\1)', language_callback), + (r'(?s)(\$[^\$]*\$)(.*?)(\1)', language_callback), (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), # psql variable in SQL @@ -434,7 +434,7 @@ class SqlLexer(RegexLexer): r'DEC|DECIMAL|FLOAT|INT|INTEGER|INTERVAL|NUMBER|NUMERIC|REAL|' r'SERIAL|SMALLINT|VARCHAR|VARYING|INT8|SERIAL8|TEXT)\b', Name.Builtin), - (r'[+*/<>=~!@#%^&|`?^-]', Operator), + (r'[+*/<>=~!@#%^&|`?-]', Operator), (r'[0-9]+', Number.Integer), # TODO: Backslash escapes? (r"'(''|[^'])*'", String.Single), @@ -472,7 +472,7 @@ class MySqlLexer(RegexLexer): (r"'(''|[^'])*'", String.Single), (r'"(""|[^"])*"', String.Double), (r"`(``|[^`])*`", String.Symbol), - (r'[+*/<>=~!@#%^&|`?^-]', Operator), + (r'[+*/<>=~!@#%^&|`?-]', Operator), (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|' r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|' r'tinyblob|mediumblob|longblob|blob|float|double|double\s+' diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index 717d689f..4693d434 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -168,7 +168,7 @@ class SmartyLexer(RegexLexer): (r'#[a-zA-Z_][a-zA-Z0-9_]*#', Name.Variable), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z0-9_]+)*', Name.Variable), (r'[~!%^&*()+=|\[\]:;,.<>/?{}@-]', Operator), - ('(true|false|null)\b', Keyword.Constant), + (r'(true|false|null)\b', Keyword.Constant), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r'"(\\\\|\\"|[^"])*"', String.Double), @@ -406,11 +406,11 @@ class MyghtyLexer(RegexLexer): tokens = { 'root': [ (r'\s+', Text), - (r'(<%(def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)', - bygroups(Name.Tag, None, Text, Name.Function, Name.Tag, + (r'(<%(?:def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)', + bygroups(Name.Tag, Text, Name.Function, Name.Tag, using(this), Name.Tag)), - (r'(<%(\w+))(.*?)(>)(.*?)(</%\2\s*>)(?s)', - bygroups(Name.Tag, None, Name.Function, Name.Tag, + (r'(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)(?s)', + bygroups(Name.Tag, Name.Function, Name.Tag, using(PythonLexer), Name.Tag)), (r'(<&[^|])(.*?)(,.*?)?(&>)', bygroups(Name.Tag, Name.Function, using(PythonLexer), Name.Tag)), @@ -525,11 +525,11 @@ class MasonLexer(RegexLexer): (r'\s+', Text), (r'(<%doc>)(.*?)(</%doc>)(?s)', bygroups(Name.Tag, Comment.Multiline, Name.Tag)), - (r'(<%(def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)', - bygroups(Name.Tag, None, Text, Name.Function, Name.Tag, + (r'(<%(?:def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)(?s)', + bygroups(Name.Tag, Text, Name.Function, Name.Tag, using(this), Name.Tag)), - (r'(<%(\w+))(.*?)(>)(.*?)(</%\2\s*>)(?s)', - bygroups(Name.Tag, None, Name.Function, Name.Tag, + (r'(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)(?s)', + bygroups(Name.Tag, Name.Function, Name.Tag, using(PerlLexer), Name.Tag)), (r'(<&[^|])(.*?)(,.*?)?(&>)(?s)', bygroups(Name.Tag, Name.Function, using(PerlLexer), Name.Tag)), @@ -616,8 +616,8 @@ class MakoLexer(RegexLexer): include('tag'), ], 'tag': [ - (r'((?:\w+)\s*=)\s*(".*?")', - bygroups(Name.Attribute, String)), + (r'((?:\w+)\s*=)(\s*)(".*?")', + bygroups(Name.Attribute, Text, String)), (r'/?\s*>', Comment.Preproc, '#pop'), (r'\s+', Text), ], @@ -1403,7 +1403,7 @@ class EvoqueLexer(RegexLexer): # directives: begin, end (r'(\$)(begin|end)(\{(%)?)(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, - String, Punctuation, None)), + String, Punctuation)), # directives: evoque, overlay # see doc for handling first name arg: /directives/evoque/ #+ minor inconsistency: the "name" in e.g. $overlay{name=site_base} @@ -1411,17 +1411,17 @@ class EvoqueLexer(RegexLexer): (r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?' r'(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, - String, using(PythonLexer), Punctuation, None)), + String, using(PythonLexer), Punctuation)), # directives: if, for, prefer, test (r'(\$)(\w+)(\{(%)?)(.*?)((?(4)%)\})', bygroups(Punctuation, Name.Builtin, Punctuation, None, - using(PythonLexer), Punctuation, None)), + using(PythonLexer), Punctuation)), # directive clauses (no {} expression) (r'(\$)(else|rof|fi)', bygroups(Punctuation, Name.Builtin)), # expressions (r'(\$\{(%)?)(.*?)((!)(.*?))?((?(2)%)\})', bygroups(Punctuation, None, using(PythonLexer), - Name.Builtin, None, None, Punctuation, None)), + Name.Builtin, None, None, Punctuation)), (r'#', Other), ], 'comment': [ @@ -1489,7 +1489,8 @@ class ColdfusionLexer(RegexLexer): (r"'.*?'", String.Single), (r'\d+', Number), (r'(if|else|len|var|case|default|break|switch)\b', Keyword), - (r'([A-Za-z_$][A-Za-z0-9_.]*)\s*(\()', bygroups(Name.Function, Punctuation)), + (r'([A-Za-z_$][A-Za-z0-9_.]*)(\s*)(\()', + bygroups(Name.Function, Text, Punctuation)), (r'[A-Za-z_$][A-Za-z0-9_.]*', Name.Variable), (r'[()\[\]{};:,.\\]', Punctuation), (r'\s+', Text), diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 65303efb..0796312f 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -292,12 +292,12 @@ class DarcsPatchLexer(RegexLexer): 'insert': [ include('specialText'), (r'\[', Generic.Inserted), - (r'[^\n\[]*', Generic.Inserted), + (r'[^\n\[]+', Generic.Inserted), ], 'delete': [ include('specialText'), (r'\[', Generic.Deleted), - (r'[^\n\[]*', Generic.Deleted), + (r'[^\n\[]+', Generic.Deleted), ], } @@ -346,18 +346,18 @@ class IrcLogsLexer(RegexLexer): # /me msgs ("^" + timestamp + r""" (\s*[*]\s+) # Star - ([^\s]+\s+.*?\n) # Nick + rest of message """, + (\S+\s+.*?\n) # Nick + rest of message """, bygroups(Comment.Preproc, Keyword, Generic.Inserted)), # join/part msgs ("^" + timestamp + r""" (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols - ([^\s]+\s+) # Nick + Space + (\S+\s+) # Nick + Space (.*?\n) # Rest of message """, bygroups(Comment.Preproc, Keyword, String, Comment)), (r"^.*?\n", Text), ], 'msg': [ - (r"[^\s]+:(?!//)", Name.Attribute), # Prefix + (r"\S+:(?!//)", Name.Attribute), # Prefix (r".*\n", Text, '#pop'), ], } @@ -468,7 +468,7 @@ class GroffLexer(RegexLexer): tokens = { 'root': [ - (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'), + (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), (r'\.', Punctuation, 'request'), # Regular characters, slurp till we find a backslash or newline (r'[^\\\n]*', Text, 'textline'), @@ -482,7 +482,7 @@ class GroffLexer(RegexLexer): # groff has many ways to write escapes. (r'\\"[^\n]*', Comment), (r'\\[fn]\w', String.Escape), - (r'\\\(..', String.Escape), + (r'\\\(.{2}', String.Escape), (r'\\.\[.*\]', String.Escape), (r'\\.', String.Escape), (r'\\\n', Text, 'request'), @@ -1018,11 +1018,11 @@ class DebianControlLexer(RegexLexer): (r'^(Description)', Keyword, 'description'), (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), (r'^((Build-)?Depends)', Keyword, 'depends'), - (r'^((?:Python-)?Version)(:\s*)([^\s]+)$', + (r'^((?:Python-)?Version)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), - (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$', + (r'^((?:Installed-)?Size)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), - (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$', + (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', bygroups(Keyword, Whitespace, String)), @@ -1034,7 +1034,7 @@ class DebianControlLexer(RegexLexer): (r'.', Text), ], 'description': [ - (r'(.*)(Homepage)(: )([^\s]+)', + (r'(.*)(Homepage)(: )(\S+)', bygroups(Text, String, Name, Name.Class)), (r':.*\n', Generic.Strong), (r' .*\n', Text), @@ -1048,9 +1048,9 @@ class DebianControlLexer(RegexLexer): (r'\|', Operator), (r'[\s]+', Text), (r'[}\)]\s*$', Text, '#pop'), - (r'[}]', Text), + (r'}', Text), (r'[^,]$', Name.Function, '#pop'), - (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function), + (r'([\+\.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), (r'\[.*?\]', Name.Entity), ], 'depend_vers': [ @@ -1387,7 +1387,8 @@ class YamlLexer(ExtendedRegexLexer): # ignored and regular whitespaces in quoted scalars 'quoted-scalar-whitespaces': [ # leading and trailing whitespaces are ignored - (r'^[ ]+|[ ]+$', Text), + (r'^[ ]+', Text), + (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value @@ -1456,7 +1457,8 @@ class YamlLexer(ExtendedRegexLexer): # the scalar ends with a comment (r'[ ]+(?=#)', Text, '#pop'), # leading and trailing whitespaces are ignored - (r'^[ ]+|[ ]+$', Text), + (r'^[ ]+', Text), + (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value @@ -1702,7 +1704,7 @@ class PyPyLogLexer(RegexLexer): (r"^\+\d+: ", Comment), (r"[ifp]\d+", Name), (r"ptr\d+", Name), - (r"(\()([\w_]+(?:\.[\w_]+)?)(\))", + (r"(\()(\w+(?:\.\w+)?)(\))", bygroups(Punctuation, Name.Builtin, Punctuation)), (r"[\[\]=,()]", Punctuation), (r"(\d+\.\d+|inf|-inf)", Number.Float), @@ -1728,8 +1730,7 @@ class PyPyLogLexer(RegexLexer): r"arraylen_gc|" r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|" - r"getfield_gc|getinteriorfield_gc|" - r"getinteriorfield_gc|setinteriorfield_gc|" + r"getfield_gc|getinteriorfield_gc|setinteriorfield_gc|" r"getfield_raw|setfield_gc|setfield_raw|" r"strgetitem|strsetitem|strlen|copystrcontent|" r"unicodegetitem|unicodesetitem|unicodelen|" @@ -1741,12 +1742,12 @@ class PyPyLogLexer(RegexLexer): ], "jit-backend-counts": [ (r"\[\w+\] jit-backend-counts}$", Keyword, "#pop"), - (r"[:]", Punctuation), + (r":", Punctuation), (r"\d+", Number), include("extra-stuff"), ], "extra-stuff": [ - (r"[\n\s]+", Text), + (r"\s+", Text), (r"#.*?$", Comment), ], } diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 38f75619..f45abf57 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -56,7 +56,7 @@ class JavascriptLexer(RegexLexer): (r'', Text, '#pop') ], 'badregex': [ - ('\n', Text, '#pop') + (r'\n', Text, '#pop') ], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), @@ -296,7 +296,7 @@ class ActionScript3Lexer(RegexLexer): (r'[0-9]+', Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), - (r'[~\^\*!%&<>\|+=:;,/?\\{}\[\]();.-]+', Operator), + (r'[~\^\*!%&<>\|+=:;,/?\\{}\[\]().-]+', Operator), ], 'funcparams': [ (r'\s+', Text), @@ -383,7 +383,7 @@ class CssLexer(RegexLexer): r'list-style|margin-bottom|margin-left|margin-right|' r'margin-top|margin|marker-offset|marks|max-height|max-width|' r'min-height|min-width|opacity|orphans|outline|outline-color|' - r'outline-style|outline-width|overflow(?:-x|-y|)|padding-bottom|' + r'outline-style|outline-width|overflow(?:-x|-y)?|padding-bottom|' r'padding-left|padding-right|padding-top|padding|page|' r'page-break-after|page-break-before|page-break-inside|' r'pause-after|pause-before|pause|pitch|pitch-range|' @@ -499,13 +499,13 @@ class ObjectiveJLexer(RegexLexer): (';', Punctuation), ], 'whitespace': [ - (r'(@import)(\s+)("(\\\\|\\"|[^"])*")', + (r'(@import)(\s+)("(?:\\\\|\\"|[^"])*")', bygroups(Comment.Preproc, Text, String.Double)), - (r'(@import)(\s+)(<(\\\\|\\>|[^>])*>)', + (r'(@import)(\s+)(<(?:\\\\|\\>|[^>])*>)', bygroups(Comment.Preproc, Text, String.Double)), - (r'(#(?:include|import))(\s+)("(\\\\|\\"|[^"])*")', + (r'(#(?:include|import))(\s+)("(?:\\\\|\\"|[^"])*")', bygroups(Comment.Preproc, Text, String.Double)), - (r'(#(?:include|import))(\s+)(<(\\\\|\\>|[^>])*>)', + (r'(#(?:include|import))(\s+)(<(?:\\\\|\\>|[^>])*>)', bygroups(Comment.Preproc, Text, String.Double)), (r'#if\s+0', Comment.Preproc, 'if0'), @@ -526,7 +526,7 @@ class ObjectiveJLexer(RegexLexer): (r'', Text, '#pop'), ], 'badregex': [ - ('\n', Text, '#pop'), + (r'\n', Text, '#pop'), ], 'statements': [ (r'(L|@)?"', String, 'string'), @@ -633,7 +633,7 @@ class ObjectiveJLexer(RegexLexer): # parameters (r'(\(' + _ws + ')' # open paren r'([^\)]+)' # type - r'(' + _ws + r'\)' + _ws + r')+' # close paren + r'(' + _ws + r'\)' + _ws + r')' # close paren r'([$a-zA-Z_][a-zA-Z0-9_]+)', # param name bygroups(using(this), Keyword.Type, using(this), Text)), @@ -645,7 +645,7 @@ class ObjectiveJLexer(RegexLexer): (r'(:)', Name.Function), # var args - (r'(,' + _ws + r'...)', using(this)), + (r'(,' + _ws + r'\.\.\.)', using(this)), # param name (r'([$a-zA-Z_][a-zA-Z0-9_]+)', Text), @@ -816,7 +816,7 @@ class PhpLexer(RegexLexer): r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this|use|namespace)\b', Keyword), - ('(true|false|null)\b', Keyword.Constant), + (r'(true|false|null)\b', Keyword.Constant), (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable), (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), (r'[\\a-zA-Z_][\\a-zA-Z0-9_]*', Name.Other), @@ -912,18 +912,18 @@ class DtdLexer(RegexLexer): 'root': [ include('common'), - (r'(<!ELEMENT)(\s+)([^\s]+)', + (r'(<!ELEMENT)(\s+)(\S+)', bygroups(Keyword, Text, Name.Tag), 'element'), - (r'(<!ATTLIST)(\s+)([^\s]+)', + (r'(<!ATTLIST)(\s+)(\S+)', bygroups(Keyword, Text, Name.Tag), 'attlist'), - (r'(<!ENTITY)(\s+)([^\s]+)', + (r'(<!ENTITY)(\s+)(\S+)', bygroups(Keyword, Text, Name.Entity), 'entity'), - (r'(<!NOTATION)(\s+)([^\s]+)', + (r'(<!NOTATION)(\s+)(\S+)', bygroups(Keyword, Text, Name.Tag), 'notation'), (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections bygroups(Keyword, Name.Entity, Text, Keyword)), - (r'(<!DOCTYPE)(\s+)([^>\s]+)', + (r'(<!DOCTYPE)(\s+)([^>\s]+)', bygroups(Keyword, Text, Name.Tag)), (r'PUBLIC|SYSTEM', Keyword.Constant), (r'[\[\]>]', Keyword), @@ -937,7 +937,7 @@ class DtdLexer(RegexLexer): (r'"[^"]*"', String.Double), (r'\'[^\']*\'', String.Single), ], - + 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), @@ -966,7 +966,7 @@ class DtdLexer(RegexLexer): (r'[^>\s\|()?+*,]+', Name.Entity), (r'>', Keyword, '#pop'), ], - + 'notation': [ include('common'), (r'SYSTEM|PUBLIC', Keyword.Constant), @@ -1169,7 +1169,7 @@ class HaxeLexer(RegexLexer): include('codekeywords'), (r'[();,\[\]]', Punctuation), (r'(?:=|\+=|-=|\*=|/=|%=|&=|\|=|\^=|<<=|>>=|>>>=|\|\||&&|' - r'\.\.\.|==|!=|>|<|>=|<=|\||&|\^|<<|>>|>>>|\+|\-|\*|/|%|' + r'\.\.\.|==|!=|>|<|>=|<=|\||&|\^|<<|>>>|>>|\+|\-|\*|/|%|' r'!|\+\+|\-\-|~|\.|\?|\:)', Operator), (ident, Name), @@ -1395,7 +1395,7 @@ class HamlLexer(ExtendedRegexLexer): 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), - (r'([&!]?[=~])(' + _comma_dot + '*\n)', + (r'([&!]?[=~])(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), 'root'), (r'', Text, 'plain'), @@ -1404,18 +1404,18 @@ class HamlLexer(ExtendedRegexLexer): 'content': [ include('css'), (r'%[a-z0-9_:-]+', Name.Tag, 'tag'), - (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'), - (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)', + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), - (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), - (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc, + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'), - (r'(-)(' + _comma_dot + '*\n)', + (r'(-)(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'), - (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], @@ -1690,7 +1690,7 @@ class SassLexer(ExtendedRegexLexer): 'import': [ (r'[ \t]+', Text), - (r'[^\s]+', String), + (r'\S+', String), (r'\n', Text, 'root'), ], @@ -1932,7 +1932,7 @@ class ScamlLexer(ExtendedRegexLexer): 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), - (r'([&!]?[=~])(' + _dot + '*\n)', + (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), (r'', Text, 'plain'), @@ -1941,21 +1941,21 @@ class ScamlLexer(ExtendedRegexLexer): 'content': [ include('css'), (r'%[a-z0-9_:-]+', Name.Tag, 'tag'), - (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'), - (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)', + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), - (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), - (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc, + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), - (r'(-@\s*)(import)?(' + _dot + '*\n)', + (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), - (r'(-)(' + _dot + '*\n)', + (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), - (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], @@ -2042,31 +2042,31 @@ class JadeLexer(ExtendedRegexLexer): 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), - (r'([&!]?[=~])(' + _dot + '*\n)', + (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), (r'', Text, 'plain'), ], 'content': [ include('css'), - (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'), - (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)', + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), - (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), - (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc, + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), - (r'(-@\s*)(import)?(' + _dot + '*\n)', + (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), - (r'(-)(' + _dot + '*\n)', + (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), - (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), (r'[a-z0-9_:-]+', Name.Tag, 'tag'), - (r'|', Text, 'eval-or-plain'), + (r'\|', Text, 'eval-or-plain'), ], 'tag': [ @@ -2142,24 +2142,24 @@ class XQueryLexer(ExtendedRegexLexer): # ur"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|" # ur"[\u10000-\uEFFFF]" #) - ncnamestartchar = r"[A-Z]|_|[a-z]" + ncnamestartchar = r"(?:[A-Z]|_|[a-z])" # FIX UNICODE LATER #ncnamechar = ncnamestartchar + (ur"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|" # ur"[\u203F-\u2040]") - ncnamechar = ncnamestartchar + r"|-|\.|[0-9]" - ncname = "((%s)+(%s)*)" % (ncnamestartchar, ncnamechar) - pitarget_namestartchar = r"[A-KN-WY-Z]|_|:|[a-kn-wy-z]" - pitarget_namechar = pitarget_namestartchar + r"|-|\.|[0-9]" - pitarget = "(%s)+(%s)*" % (pitarget_namestartchar, pitarget_namechar) + ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])" + ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar) + pitarget_namestartchar = r"(?:[A-KN-WY-Z]|_|:|[a-kn-wy-z])" + pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])" + pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar) prefixedname = "%s:%s" % (ncname, ncname) unprefixedname = ncname - qname = "((%s)|(%s))" %(prefixedname, unprefixedname) + qname = "(?:%s|%s)" % (prefixedname, unprefixedname) - entityref = r'&(lt|gt|amp|quot|apos|nbsp);' - charref = r'&#[0-9]+;|&#x[0-9a-fA-F]+;' + entityref = r'(?:&(?:lt|gt|amp|quot|apos|nbsp);)' + charref = r'(?:&#[0-9]+;|&#x[0-9a-fA-F]+;)' - stringdouble = r'("((' + entityref + r')|(' + charref + r')|("")|([^&"]))*")' - stringsingle = r"('((" + entityref + r")|(" + charref + r")|('')|([^&']))*')" + stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")' + stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')" # FIX UNICODE LATER #elementcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' @@ -2407,11 +2407,13 @@ class XQueryLexer(ExtendedRegexLexer): operator_root_callback), (r'(::|;|\[|//|/|,)', punctuation_root_callback), - (r'(castable|cast)(\s+)(as)', + (r'(castable|cast)(\s+)(as)\b', bygroups(Keyword, Text, Keyword), 'singletype'), - (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword), 'itemtype'), - (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword), 'itemtype'), - (r'case|as', Keyword, 'itemtype'), + (r'(instance)(\s+)(of)\b', + bygroups(Keyword, Text, Keyword), 'itemtype'), + (r'(treat)(\s+)(as)\b', + bygroups(Keyword, Text, Keyword), 'itemtype'), + (r'(case|as)\b', Keyword, 'itemtype'), (r'(\))(\s*)(as)', bygroups(Punctuation, Text, Keyword), 'itemtype'), (r'\$', Name.Variable, 'varname'), @@ -2436,8 +2438,8 @@ class XQueryLexer(ExtendedRegexLexer): 'namespacedecl': [ include('whitespace'), (r'\(:', Comment, 'comment'), - (r'(at)(\s+)'+stringdouble, bygroups(Keyword, Text, String.Double)), - (r"(at)(\s+)"+stringsingle, bygroups(Keyword, Text, String.Single)), + (r'(at)(\s+)('+stringdouble+')', bygroups(Keyword, Text, String.Double)), + (r"(at)(\s+)("+stringsingle+')', bygroups(Keyword, Text, String.Single)), (stringdouble, String.Double), (stringsingle, String.Single), (r',', Punctuation), @@ -2484,9 +2486,9 @@ class XQueryLexer(ExtendedRegexLexer): (r'\(\#', Punctuation, 'pragma'), (r';', Punctuation, '#pop'), (r'then|else', Keyword, '#pop'), - (r'(at)(\s+)' + stringdouble, + (r'(at)(\s+)(' + stringdouble + ')', bygroups(Keyword, Text, String.Double), 'namespacedecl'), - (r'(at)(\s+)' + stringsingle, + (r'(at)(\s+)(' + stringsingle + ')', bygroups(Keyword, Text, String.Single), 'namespacedecl'), (r'except|intersect|in|is|return|satisfies|to|union|where', Keyword, 'root'), @@ -2501,12 +2503,12 @@ class XQueryLexer(ExtendedRegexLexer): (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword)), (r'case|as', Keyword, 'itemtype'), (r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'), - (ncname + r'(:\*)', Keyword.Type, 'operator'), + (ncname + r':\*', Keyword.Type, 'operator'), (qname, Keyword.Type, 'occurrenceindicator'), ], 'kindtest': [ (r'\(:', Comment, 'comment'), - (r'({)', Punctuation, 'root'), + (r'{', Punctuation, 'root'), (r'(\))([*+?]?)', popstate_kindtest_callback), (r'\*', Name, 'closekindtest'), (qname, Name, 'closekindtest'), @@ -2515,7 +2517,7 @@ class XQueryLexer(ExtendedRegexLexer): 'kindtestforpi': [ (r'\(:', Comment, 'comment'), (r'\)', Punctuation, '#pop'), - (ncname, bygroups(Name.Variable, Name.Variable)), + (ncname, Name.Variable), (stringdouble, String.Double), (stringsingle, String.Single), ], @@ -2529,8 +2531,8 @@ class XQueryLexer(ExtendedRegexLexer): 'xml_comment': [ (r'(-->)', popstate_xmlcomment_callback), (r'[^-]{1,2}', Literal), - (r'\u009|\u00A|\u00D|[\u0020-\u00D7FF]|[\u00E000-\u00FFFD]|' - r'[\u0010000-\u0010FFFF]', Literal), + (ur'\t|\r|\n|[\u0020-\U0000D7FF]|[\U0000E000-\U0000FFFD]|' + ur'[\U00010000-\U0010FFFF]', Literal), ], 'processing_instruction': [ (r'\s+', Text, 'processing_instruction_content'), @@ -2539,13 +2541,13 @@ class XQueryLexer(ExtendedRegexLexer): ], 'processing_instruction_content': [ (r'\?>', String.Doc, '#pop'), - (r'\u009|\u00A|\u00D|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' - r'[\u10000-\u10FFFF]', Literal), + (ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + ur'[\U00010000-\U0010FFFF]', Literal), ], 'cdata_section': [ (r']]>', String.Doc, '#pop'), - (r'\u009|\u00A|\u00D|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' - r'[\u10000-\u10FFFF]', Literal), + (ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + ur'[\U00010000-\U0010FFFF]', Literal), ], 'start_tag': [ include('whitespace'), @@ -2613,9 +2615,9 @@ class XQueryLexer(ExtendedRegexLexer): ], 'pragmacontents': [ (r'#\)', Punctuation, 'operator'), - (r'\u009|\u00A|\u00D|[\u0020-\u00D7FF]|[\u00E000-\u00FFFD]|' - r'[\u0010000-\u0010FFFF]', Literal), - (r'(\s*)', Text), + (ur'\t|\r|\n|[\u0020-\U0000D7FF]|[\U0000E000-\U0000FFFD]|' + ur'[\U00010000-\U0010FFFF]', Literal), + (r'(\s+)', Text), ], 'occurrenceindicator': [ include('whitespace'), @@ -2721,8 +2723,9 @@ class XQueryLexer(ExtendedRegexLexer): (r'(element)(\s+)(?=' +qname+ r')', bygroups(Keyword, Text), 'element_qname'), #PROCESSING_INSTRUCTION - (r'(processing-instruction)(\s+)' + ncname + r'(\s*)(\{)', - bygroups(Keyword, Text, Name.Variable, Text, Punctuation), 'operator'), + (r'(processing-instruction)(\s+)(' + ncname + r')(\s*)(\{)', + bygroups(Keyword, Text, Name.Variable, Text, Punctuation), + 'operator'), (r'(declare|define)(\s+)(function)', bygroups(Keyword, Text, Keyword)), @@ -2771,8 +2774,8 @@ class XQueryLexer(ExtendedRegexLexer): (r'//|/|\+|-|;|,|\(|\)', Punctuation), # STANDALONE QNAMES - (qname + r'(?=\s*[{])', Name.Variable, 'qname_braren'), - (qname + r'(?=\s*[(])', Name.Function, 'qname_braren'), + (qname + r'(?=\s*{)', Name.Variable, 'qname_braren'), + (qname + r'(?=\s*\()', Name.Function, 'qname_braren'), (qname, Name.Variable, 'operator'), ] } diff --git a/tests/examplefiles/antlr_throws b/tests/examplefiles/antlr_throws new file mode 100644 index 00000000..816d8914 --- /dev/null +++ b/tests/examplefiles/antlr_throws @@ -0,0 +1 @@ +public f throws a, b, c : x ; diff --git a/tests/examplefiles/function.mu b/tests/examplefiles/function.mu new file mode 100644 index 00000000..46bb259d --- /dev/null +++ b/tests/examplefiles/function.mu @@ -0,0 +1 @@ +a::b () |