diff options
author | Georg Brandl <georg@python.org> | 2016-02-14 17:10:00 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2016-02-14 17:10:00 +0100 |
commit | fff7caebc1befe71ab088a5d245fbb7708cfc0f2 (patch) | |
tree | 346052a8c49dd99ab7dc5abbdc603cc3717308c9 /pygments | |
parent | 2882c34a141f4f36ded54d1473c6d4f73684ffc7 (diff) | |
download | pygments-fff7caebc1befe71ab088a5d245fbb7708cfc0f2.tar.gz |
Fix most complaints from regexlint.
Diffstat (limited to 'pygments')
35 files changed, 479 insertions, 445 deletions
diff --git a/pygments/lexers/algebra.py b/pygments/lexers/algebra.py index fc54c3c3..79460ad4 100644 --- a/pygments/lexers/algebra.py +++ b/pygments/lexers/algebra.py @@ -104,9 +104,9 @@ class MathematicaLexer(RegexLexer): (r'#\d*', Name.Variable), (r'([a-zA-Z]+[a-zA-Z0-9]*)', Name), - (r'-?[0-9]+\.[0-9]*', Number.Float), - (r'-?[0-9]*\.[0-9]+', Number.Float), - (r'-?[0-9]+', Number.Integer), + (r'-?\d+\.\d*', Number.Float), + (r'-?\d*\.\d+', Number.Float), + (r'-?\d+', Number.Integer), (words(operators), Operator), (words(punctuation), Punctuation), diff --git a/pygments/lexers/ampl.py b/pygments/lexers/ampl.py index f57b486f..c3ca80d4 100644 --- a/pygments/lexers/ampl.py +++ b/pygments/lexers/ampl.py @@ -9,13 +9,10 @@ :license: BSD, see LICENSE for details. """ -import re - -from pygments.lexer import RegexLexer, bygroups, using, this +from pygments.lexer import RegexLexer, bygroups, using, this, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation - __all__ = ['AmplLexer'] @@ -30,27 +27,30 @@ class AmplLexer(RegexLexer): filenames = ['*.run'] tokens = { - 'root':[ + 'root': [ (r'\n', Text), (r'\s+', Text.Whitespace), (r'#.*?\n', Comment.Single), (r'/[*](.|\n)*?[*]/', Comment.Multiline), - (r'(call|cd|close|commands|data|delete|display|drop|end|environ|' - r'exit|expand|include|load|model|objective|option|problem|purge|' - r'quit|redeclare|reload|remove|reset|restore|shell|show|solexpand|' - r'solution|solve|update|unload|xref|' - r'coeff|coef|cover|obj|interval|' - r'default|from|to|to_come|net_in|net_out|dimen|dimension|' - r'check|complements|write|end|function|pipe|' - r'format|if|then|else|in|while|repeat|for)\b', Keyword.Reserved), - (r'(integer|binary|symbolic|ordered|circular|reversed|IN|INOUT|OUT|LOCAL)', + (words(( + 'call', 'cd', 'close', 'commands', 'data', 'delete', 'display', + 'drop', 'end', 'environ', 'exit', 'expand', 'include', 'load', + 'model', 'objective', 'option', 'problem', 'purge', 'quit', + 'redeclare', 'reload', 'remove', 'reset', 'restore', 'shell', + 'show', 'solexpand', 'solution', 'solve', 'update', 'unload', + 'xref', 'coeff', 'coef', 'cover', 'obj', 'interval', 'default', + 'from', 'to', 'to_come', 'net_in', 'net_out', 'dimen', + 'dimension', 'check', 'complements', 'write', 'function', + 'pipe', 'format', 'if', 'then', 'else', 'in', 'while', 'repeat', + 'for'), suffix=r'\b'), Keyword.Reserved), + (r'(integer|binary|symbolic|ordered|circular|reversed|INOUT|IN|OUT|LOCAL)', Keyword.Type), (r'\".*?\"', String.Double), (r'\'.*?\'', String.Single), (r'[()\[\]{},;:]+', Punctuation), - (r'\b(\w+)(\.)(astatus|init|init0|lb|lb0|lb1|lb2|lrc|' - r'lslack|rc|relax|slack|sstatus|status|ub|ub0|ub1|' - r'ub2|urc|uslack|val)', + (r'\b(\w+)(\.)(astatus|init0|init|lb0|lb1|lb2|lb|lrc|' + r'lslack|rc|relax|slack|sstatus|status|ub0|ub1|ub2|' + r'ub|urc|uslack|val)', bygroups(Name.Variable, Punctuation, Keyword.Reserved)), (r'(set|param|var|arc|minimize|maximize|subject to|s\.t\.|subj to|' r'node|table|suffix|read table|write table)(\s+)(\w+)', @@ -58,21 +58,26 @@ class AmplLexer(RegexLexer): (r'(param)(\s*)(:)(\s*)(\w+)(\s*)(:)(\s*)((\w|\s)+)', bygroups(Keyword.Declaration, Text, Punctuation, Text, Name.Variable, Text, Punctuation, Text, Name.Variable)), - (r'(let|fix|unfix)(\s*)(\{.*\}|)(\s*)(\w+)', + (r'(let|fix|unfix)(\s*)((?:\{.*\})?)(\s*)(\w+)', bygroups(Keyword.Declaration, Text, using(this), Text, Name.Variable)), - (r'\b(abs|acos|acosh|alias|' - r'asin|asinh|atan|atan2|atanh|ceil|ctime|cos|exp|floor|log|log10|' - r'max|min|precision|round|sin|sinh|sqrt|tan|tanh|time|trunc|Beta|' - r'Cauchy|Exponential|Gamma|Irand224|Normal|Normal01|Poisson|Uniform|Uniform01|' - r'num|num0|ichar|char|length|substr|sprintf|match|sub|gsub|print|printf' - r'next|nextw|prev|prevw|first|last|ord|ord0|card|arity|indexarity)\b', - Name.Builtin), + (words(( + 'abs', 'acos', 'acosh', 'alias', 'asin', 'asinh', 'atan', 'atan2', + 'atanh', 'ceil', 'ctime', 'cos', 'exp', 'floor', 'log', 'log10', + 'max', 'min', 'precision', 'round', 'sin', 'sinh', 'sqrt', 'tan', + 'tanh', 'time', 'trunc', 'Beta', 'Cauchy', 'Exponential', 'Gamma', + 'Irand224', 'Normal', 'Normal01', 'Poisson', 'Uniform', 'Uniform01', + 'num', 'num0', 'ichar', 'char', 'length', 'substr', 'sprintf', + 'match', 'sub', 'gsub', 'print', 'printf', 'next', 'nextw', 'prev', + 'prevw', 'first', 'last', 'ord', 'ord0', 'card', 'arity', + 'indexarity'), prefix=r'\b', suffix=r'\b'), Name.Builtin), (r'(\+|\-|\*|/|\*\*|=|<=|>=|==|\||\^|<|>|\!|\.\.|:=|\&|\!=|<<|>>)', Operator), - (r'(or|exists|forall|and|in|not|within|union|diff|' - r'difference|symdiff|inter|intersect|intersection|' - r'cross|setof|by|less|sum|prod|product|div|mod)', - Keyword.Reserved), #Operator.Name but not enough emphasized with Operator.Name + (words(( + 'or', 'exists', 'forall', 'and', 'in', 'not', 'within', 'union', + 'diff', 'difference', 'symdiff', 'inter', 'intersect', + 'intersection', 'cross', 'setof', 'by', 'less', 'sum', 'prod', + 'product', 'div', 'mod'), suffix=r'\b'), + Keyword.Reserved), # Operator.Name but not enough emphasized with that (r'(\d+\.(?!\.)\d*|\.(?!.)\d+)([eE][+-]?\d+)?', Number.Float), (r'\d+([eE][+-]?\d+)?', Number.Integer), (r'[+-]?Infinity', Number.Integer), diff --git a/pygments/lexers/business.py b/pygments/lexers/business.py index ea888245..43978690 100644 --- a/pygments/lexers/business.py +++ b/pygments/lexers/business.py @@ -57,9 +57,9 @@ class CobolLexer(RegexLexer): ], 'core': [ # Figurative constants - (r'(^|(?<=[^0-9a-z_\-]))(ALL\s+)?' + (r'(^|(?<=[^\w\-]))(ALL\s+)?' r'((ZEROES)|(HIGH-VALUE|LOW-VALUE|QUOTE|SPACE|ZERO)(S)?)' - r'\s*($|(?=[^0-9a-z_\-]))', + r'\s*($|(?=[^\w\-]))', Name.Constant), # Reserved words STATEMENTS and other bolds @@ -79,8 +79,8 @@ class CobolLexer(RegexLexer): 'RETURN', 'REWRITE', 'SCREEN', 'SD', 'SEARCH', 'SECTION', 'SET', 'SORT', 'START', 'STOP', 'STRING', 'SUBTRACT', 'SUPPRESS', 'TERMINATE', 'THEN', 'UNLOCK', 'UNSTRING', 'USE', 'VALIDATE', - 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^0-9a-z_\-]))', - suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^\w\-]))', + suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Reserved), # Reserved words @@ -89,33 +89,33 @@ class CobolLexer(RegexLexer): 'ALPHABET', 'ALPHABETIC', 'ALPHABETIC-LOWER', 'ALPHABETIC-UPPER', 'ALPHANUMERIC', 'ALPHANUMERIC-EDITED', 'ALSO', 'ALTER', 'ALTERNATE' 'ANY', 'ARE', 'AREA', 'AREAS', 'ARGUMENT-NUMBER', 'ARGUMENT-VALUE', 'AS', - 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', 'AUTOTERMINATE', - 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', + 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', + 'AUTOTERMINATE', 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', 'BLANK', 'BLINK', 'BLOCK', 'BOTTOM', 'BY', 'BYTE-LENGTH', 'CHAINING', - 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', 'COLLATING', - 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', 'COMMIT', 'COMMON', - 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', + 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', + 'COLLATING', 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', + 'COMMIT', 'COMMON', 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', 'CONTROLS', 'CONVERTING', 'COPY', 'CORR', 'CORRESPONDING', 'COUNT', 'CRT', - 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', 'DEBUGGING', - 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', + 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', + 'DEBUGGING', 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', 'DELIMITER', 'DEPENDING', 'DESCENDING', 'DETAIL', 'DISK', 'DOWN', 'DUPLICATES', 'DYNAMIC', 'EBCDIC', 'ENTRY', 'ENVIRONMENT-NAME', 'ENVIRONMENT-VALUE', 'EOL', 'EOP', 'EOS', 'ERASE', 'ERROR', 'ESCAPE', 'EXCEPTION', - 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', - 'FILE-ID', 'FILLER', 'FINAL', 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', - 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', 'FUNCTION', - 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', + 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', 'FILE-ID', 'FILLER', 'FINAL', + 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', + 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', + 'FUNCTION', 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', 'HEADING', 'HIGHLIGHT', 'I-O', 'ID', 'IGNORE', 'IGNORING', 'IN', 'INDEX', 'INDEXED', 'INDICATE', - 'INITIAL', 'INITIALIZED', 'INPUT', - 'INTO', 'INTRINSIC', 'INVALID', 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', + 'INITIAL', 'INITIALIZED', 'INPUT', 'INTO', 'INTRINSIC', 'INVALID', + 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', 'LAST', 'LEADING', 'LEFT', 'LENGTH', 'LIMIT', 'LIMITS', 'LINAGE', 'LINAGE-COUNTER', 'LINE', 'LINES', 'LOCALE', 'LOCK', - 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', - 'MULTIPLE', 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', - 'NEGATIVE', 'NEXT', 'NO', 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', - 'NUMERIC-EDITED', 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', + 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', 'MULTIPLE', + 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', 'NEGATIVE', 'NEXT', 'NO', + 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', 'NUMERIC-EDITED', + 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', 'OPTIONAL', 'ORDER', 'ORGANIZATION', 'OTHER', 'OUTPUT', 'OVERFLOW', 'OVERLINE', 'PACKED-DECIMAL', 'PADDING', 'PAGE', 'PARAGRAPH', 'PLUS', 'POINTER', 'POSITION', 'POSITIVE', 'PRESENT', 'PREVIOUS', @@ -137,40 +137,42 @@ class CobolLexer(RegexLexer): 'UNSIGNED-INT', 'UNSIGNED-LONG', 'UNSIGNED-SHORT', 'UNTIL', 'UP', 'UPDATE', 'UPON', 'USAGE', 'USING', 'VALUE', 'VALUES', 'VARYING', 'WAIT', 'WHEN', 'WITH', 'WORDS', 'YYYYDDD', 'YYYYMMDD'), - prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Pseudo), # inactive reserved words (words(( - 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', 'B-AND', - 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', 'CF', 'CH', 'CHAIN', 'CLASS-ID', - 'CLASSIFICATION', 'COMMUNICATION', 'CONDITION', 'DATA-POINTER', - 'DESTINATION', 'DISABLE', 'EC', 'EGI', 'EMI', 'ENABLE', 'END-RECEIVE', - 'ENTRY-CONVENTION', 'EO', 'ESI', 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', - 'FLOAT-BINARY-16', 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', - 'FLOAT-DECIMAL-16', 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', - 'FUNCTION-POINTER', 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', - 'INHERITS', 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', + 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', + 'B-AND', 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', + 'CF', 'CH', 'CHAIN', 'CLASS-ID', 'CLASSIFICATION', 'COMMUNICATION', + 'CONDITION', 'DATA-POINTER', 'DESTINATION', 'DISABLE', 'EC', 'EGI', + 'EMI', 'ENABLE', 'END-RECEIVE', 'ENTRY-CONVENTION', 'EO', 'ESI', + 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', 'FLOAT-BINARY-16', + 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', 'FLOAT-DECIMAL-16', + 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', 'FUNCTION-POINTER', + 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', 'INHERITS', + 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', 'LC_CTYPE', 'LC_MESSAGES', 'LC_MONETARY', 'LC_NUMERIC', 'LC_TIME', - 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', 'NORMAL', - 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', 'PAGE-COUNTER', 'PF', 'PH', - 'PROPERTY', 'PROTOTYPE', 'PURGE', 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', - 'RELATION', 'REPLACE', 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', - 'RF', 'RH', 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', 'STEP', - 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', 'SUPER', 'SYMBOL', - 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', 'TYPEDEF', 'UCS-4', 'UNIVERSAL', - 'USER-DEFAULT', 'UTF-16', 'UTF-8', 'VAL-STATUS', 'VALID', 'VALIDATE', - 'VALIDATE-STATUS'), - prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', + 'NORMAL', 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', + 'PAGE-COUNTER', 'PF', 'PH', 'PROPERTY', 'PROTOTYPE', 'PURGE', + 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', 'RELATION', 'REPLACE', + 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', 'RF', 'RH', + 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', + 'STEP', 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', + 'SUPER', 'SYMBOL', 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', + 'TYPEDEF', 'UCS-4', 'UNIVERSAL', 'USER-DEFAULT', 'UTF-16', 'UTF-8', + 'VAL-STATUS', 'VALID', 'VALIDATE', 'VALIDATE-STATUS'), + prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Error), # Data Types - (r'(^|(?<=[^0-9a-z_\-]))' + (r'(^|(?<=[^\w\-]))' r'(PIC\s+.+?(?=(\s|\.\s))|PICTURE\s+.+?(?=(\s|\.\s))|' r'(COMPUTATIONAL)(-[1-5X])?|(COMP)(-[1-5X])?|' r'BINARY-C-LONG|' r'BINARY-CHAR|BINARY-DOUBLE|BINARY-LONG|BINARY-SHORT|' - r'BINARY)\s*($|(?=[^0-9a-z_\-]))', Keyword.Type), + r'BINARY)\s*($|(?=[^\w\-]))', Keyword.Type), # Operators (r'(\*\*|\*|\+|-|/|<=|>=|<|>|==|/=|=)', Operator), @@ -180,7 +182,7 @@ class CobolLexer(RegexLexer): (r'([(),;:&%.])', Punctuation), # Intrinsics - (r'(^|(?<=[^0-9a-z_\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' + (r'(^|(?<=[^\w\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' r'CHAR|COMBINED-DATETIME|CONCATENATE|COS|CURRENT-DATE|' r'DATE-OF-INTEGER|DATE-TO-YYYYMMDD|DAY-OF-INTEGER|DAY-TO-YYYYDDD|' r'EXCEPTION-(?:FILE|LOCATION|STATEMENT|STATUS)|EXP10|EXP|E|' @@ -192,13 +194,13 @@ class CobolLexer(RegexLexer): r'STANDARD-DEVIATION|STORED-CHAR-LENGTH|SUBSTITUTE(?:-CASE)?|' r'SUM|TAN|TEST-DATE-YYYYMMDD|TEST-DAY-YYYYDDD|TRIM|' r'UPPER-CASE|VARIANCE|WHEN-COMPILED|YEAR-TO-YYYY)\s*' - r'($|(?=[^0-9a-z_\-]))', Name.Function), + r'($|(?=[^\w\-]))', Name.Function), # Booleans - (r'(^|(?<=[^0-9a-z_\-]))(true|false)\s*($|(?=[^0-9a-z_\-]))', Name.Builtin), + (r'(^|(?<=[^\w\-]))(true|false)\s*($|(?=[^\w\-]))', Name.Builtin), # Comparing Operators - (r'(^|(?<=[^0-9a-z_\-]))(equal|equals|ne|lt|le|gt|ge|' - r'greater|less|than|not|and|or)\s*($|(?=[^0-9a-z_\-]))', Operator.Word), + (r'(^|(?<=[^\w\-]))(equal|equals|ne|lt|le|gt|ge|' + r'greater|less|than|not|and|or)\s*($|(?=[^\w\-]))', Operator.Word), ], # \"[^\"\n]*\"|\'[^\'\n]*\' @@ -439,15 +441,15 @@ class OpenEdgeLexer(RegexLexer): filenames = ['*.p', '*.cls'] mimetypes = ['text/x-openedge', 'application/x-openedge'] - types = (r'(?i)(^|(?<=[^0-9a-z_\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|' + types = (r'(?i)(^|(?<=[^\w\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|' r'COM-HANDLE|DATE|DATETIME|DATETIME-TZ|' r'DECIMAL|DEC|DECI|DECIM|DECIMA|HANDLE|' r'INT64|INTEGER|INT|INTE|INTEG|INTEGE|' - r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^0-9a-z_\-]))') + r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^\w\-]))') keywords = words(OPENEDGEKEYWORDS, - prefix=r'(?i)(^|(?<=[^0-9a-z_\-]))', - suffix=r'\s*($|(?=[^0-9a-z_\-]))') + prefix=r'(?i)(^|(?<=[^\w\-]))', + suffix=r'\s*($|(?=[^\w\-]))') tokens = { 'root': [ diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 2f77158b..632871ba 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -124,7 +124,8 @@ class CFamilyLexer(RegexLexer): (r'\\', String), # stray backslash ], 'macro': [ - (r'(include)(' + _ws1 + ')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)), + (r'(include)(' + _ws1 + r')([^\n]+)', + bygroups(Comment.Preproc, Text, Comment.PreprocFile)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 77c7714d..9cc291e5 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -42,7 +42,7 @@ class IniLexer(RegexLexer): bygroups(Name.Attribute, Text, Operator, Text, String)), # standalone option, supported by some INI parsers (r'(.+?)$', Name.Attribute), - ] + ], } def analyse_text(text): @@ -600,7 +600,7 @@ class TerraformLexer(RegexLexer): (r'(".*")', bygroups(String.Double)), ], 'punctuation': [ - (r'[\[\]\(\),.]', Punctuation), + (r'[\[\](),.]', Punctuation), ], # Keep this seperate from punctuation - we sometimes want to use different # Tokens for { } @@ -631,9 +631,8 @@ class TermcapLexer(RegexLexer): .. versionadded:: 2.1 """ name = 'Termcap' - aliases = ['termcap',] - - filenames = ['termcap', 'termcap.src',] + aliases = ['termcap'] + filenames = ['termcap', 'termcap.src'] mimetypes = [] # NOTE: @@ -644,13 +643,13 @@ class TermcapLexer(RegexLexer): tokens = { 'root': [ (r'^#.*$', Comment), - (r'^[^\s#:\|]+', Name.Tag, 'names'), + (r'^[^\s#:|]+', Name.Tag, 'names'), ], 'names': [ (r'\n', Text, '#pop'), (r':', Punctuation, 'defs'), (r'\|', Punctuation), - (r'[^:\|]+', Name.Attribute), + (r'[^:|]+', Name.Attribute), ], 'defs': [ (r'\\\n[ \t]*', Text), @@ -678,9 +677,8 @@ class TerminfoLexer(RegexLexer): .. versionadded:: 2.1 """ name = 'Terminfo' - aliases = ['terminfo',] - - filenames = ['terminfo', 'terminfo.src',] + aliases = ['terminfo'] + filenames = ['terminfo', 'terminfo.src'] mimetypes = [] # NOTE: @@ -691,13 +689,13 @@ class TerminfoLexer(RegexLexer): tokens = { 'root': [ (r'^#.*$', Comment), - (r'^[^\s#,\|]+', Name.Tag, 'names'), + (r'^[^\s#,|]+', Name.Tag, 'names'), ], 'names': [ (r'\n', Text, '#pop'), (r'(,)([ \t]*)', bygroups(Punctuation, Text), 'defs'), (r'\|', Punctuation), - (r'[^,\|]+', Name.Attribute), + (r'[^,|]+', Name.Attribute), ], 'defs': [ (r'\n[ \t]+', Text), @@ -726,8 +724,8 @@ class PkgConfigLexer(RegexLexer): """ name = 'PkgConfig' - aliases = ['pkgconfig',] - filenames = ['*.pc',] + aliases = ['pkgconfig'] + filenames = ['*.pc'] mimetypes = [] tokens = { @@ -793,8 +791,8 @@ class PacmanConfLexer(RegexLexer): """ name = 'PacmanConf' - aliases = ['pacmanconf',] - filenames = ['pacman.conf',] + aliases = ['pacmanconf'] + filenames = ['pacman.conf'] mimetypes = [] tokens = { @@ -822,7 +820,7 @@ class PacmanConfLexer(RegexLexer): '%u', # url ), suffix=r'\b'), Name.Variable), - + # fallback (r'.', Text), ], diff --git a/pygments/lexers/csound.py b/pygments/lexers/csound.py index 51414073..95ee73d8 100644 --- a/pygments/lexers/csound.py +++ b/pygments/lexers/csound.py @@ -9,7 +9,7 @@ :license: BSD, see LICENSE for details. """ -import copy, re +import re from pygments.lexer import RegexLexer, bygroups, default, include, using, words from pygments.token import Comment, Keyword, Name, Number, Operator, Punctuation, \ @@ -21,7 +21,7 @@ from pygments.lexers.scripting import LuaLexer __all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer'] -newline = (r'((?:;|//).*)*(\n)', bygroups(Comment.Single, Text)) +newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text)) class CsoundLexer(RegexLexer): @@ -177,7 +177,7 @@ class CsoundOrchestraLexer(CsoundLexer): (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+', Number.Integer), (r'"', String, 'single-line string'), - (r'{{', String, 'multi-line string'), + (r'\{\{', String, 'multi-line string'), (r'[+\-*/%^!=&|<>#~¬]', Operator), (r'[](),?:[]', Punctuation), (words(( @@ -273,40 +273,40 @@ class CsoundOrchestraLexer(CsoundLexer): (r'[\\"~$%\^\n]', String) ], 'multi-line string': [ - (r'}}', String, '#pop'), - (r'[^\}]+|\}(?!\})', String) + (r'\}\}', String, '#pop'), + (r'[^}]+|\}(?!\})', String) ], 'scoreline opcode': [ include('whitespace or macro call'), - (r'{{', String, 'scoreline'), + (r'\{\{', String, 'scoreline'), default('#pop') ], 'scoreline': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(CsoundScoreLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer)) ], 'python opcode': [ include('whitespace or macro call'), - (r'{{', String, 'python'), + (r'\{\{', String, 'python'), default('#pop') ], 'python': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(PythonLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(PythonLexer)) ], 'lua opcode': [ include('whitespace or macro call'), (r'"', String, 'single-line string'), - (r'{{', String, 'lua'), + (r'\{\{', String, 'lua'), (r',', Punctuation), default('#pop') ], 'lua': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(LuaLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(LuaLexer)) ] } @@ -315,7 +315,7 @@ class CsoundDocumentLexer(RegexLexer): """ For `Csound <http://csound.github.io>`_ documents. - + .. versionadded:: 2.1 """ name = 'Csound Document' diff --git a/pygments/lexers/css.py b/pygments/lexers/css.py index b40201f4..6c585dfa 100644 --- a/pygments/lexers/css.py +++ b/pygments/lexers/css.py @@ -476,8 +476,8 @@ class ScssLexer(RegexLexer): (r'@[\w-]+', Keyword, 'selector'), (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), # TODO: broken, and prone to infinite loops. - #(r'(?=[^;{}][;}])', Name.Attribute, 'attr'), - #(r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), + # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), + # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), default('selector'), ], @@ -518,7 +518,7 @@ class LessCssLexer(CssLexer): inherit, ], 'content': [ - (r'{', Punctuation, '#push'), + (r'\{', Punctuation, '#push'), inherit, ], } diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index eac4b5e5..11b4573e 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -11,7 +11,7 @@ import re from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \ - using, this, default + using, this, default, words from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other from pygments.util import get_choice_opt, iteritems @@ -375,8 +375,8 @@ class VbNetLexer(RegexLexer): filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) - uni_name = '[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + \ - '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', + uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \ + '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*' flags = re.MULTILINE | re.IGNORECASE @@ -394,25 +394,26 @@ class VbNetLexer(RegexLexer): (r'[(){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), - (r'(?<!\.)(AddHandler|Alias|' - r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|' - r'CDec|CDbl|CInt|CLng|CObj|Continue|CSByte|CShort|' - r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|' - r'Default|Delegate|DirectCast|Do|Each|Else|ElseIf|' - r'EndIf|Erase|Error|Event|Exit|False|Finally|For|' - r'Friend|Get|Global|GoSub|GoTo|Handles|If|' - r'Implements|Inherits|Interface|' - r'Let|Lib|Loop|Me|MustInherit|' - r'MustOverride|MyBase|MyClass|Narrowing|New|Next|' - r'Not|Nothing|NotInheritable|NotOverridable|Of|On|' - r'Operator|Option|Optional|Overloads|Overridable|' - r'Overrides|ParamArray|Partial|Private|Protected|' - r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|' - r'Return|Select|Set|Shadows|Shared|Single|' - r'Static|Step|Stop|SyncLock|Then|' - r'Throw|To|True|Try|TryCast|Wend|' - r'Using|When|While|Widening|With|WithEvents|' - r'WriteOnly)\b', Keyword), + (words(( + 'AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case', + 'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl', + 'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng', + 'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare', + 'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else', + 'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False', + 'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo', + 'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let', + 'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase', + 'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing', + 'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator', + 'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides', + 'ParamArray', 'Partial', 'Private', 'Protected', 'Public', + 'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume', + 'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single', + 'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To', + 'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While', + 'Widening', 'With', 'WithEvents', 'WriteOnly'), + prefix='(?<!\.)', suffix=r'\b'), Keyword), (r'(?<!\.)End\b', Keyword, 'end'), (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'), (r'(?<!\.)(Function|Sub|Property)(\s+)', diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py index 24fda2a2..41c9773f 100644 --- a/pygments/lexers/dsls.py +++ b/pygments/lexers/dsls.py @@ -111,8 +111,8 @@ class ThriftLexer(RegexLexer): include('keywords'), include('numbers'), (r'[&=]', Operator), - (r'[:;\,\{\}\(\)\<>\[\]]', Punctuation), - (r'[a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name), + (r'[:;,{}()<>\[\]]', Punctuation), + (r'[a-zA-Z_](\.\w|\w)*', Name), ], 'whitespace': [ (r'\n', Text.Whitespace), @@ -135,7 +135,7 @@ class ThriftLexer(RegexLexer): (r'[^\\\'\n]+', String.Single), ], 'namespace': [ - (r'[a-z\*](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name.Namespace, '#pop'), + (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'), default('#pop'), ], 'class': [ diff --git a/pygments/lexers/elm.py b/pygments/lexers/elm.py index 7df6346a..cd1fb98e 100644 --- a/pygments/lexers/elm.py +++ b/pygments/lexers/elm.py @@ -46,7 +46,7 @@ class ElmLexer(RegexLexer): 'root': [ # Comments - (r'{-', Comment.Multiline, 'comment'), + (r'\{-', Comment.Multiline, 'comment'), (r'--.*', Comment.Single), # Whitespace @@ -86,20 +86,20 @@ class ElmLexer(RegexLexer): (validName, Name.Variable), # Parens - (r'[,\(\)\[\]{}]', Punctuation), + (r'[,()\[\]{}]', Punctuation), ], 'comment': [ - (r'-(?!})', Comment.Multiline), - (r'{-', Comment.Multiline, 'comment'), + (r'-(?!\})', Comment.Multiline), + (r'\{-', Comment.Multiline, 'comment'), (r'[^-}]', Comment.Multiline), - (r'-}', Comment.Multiline, '#pop'), + (r'-\}', Comment.Multiline, '#pop'), ], 'doublequote': [ - (r'\\u[0-9a-fA-F]\{4}', String.Escape), - (r'\\[nrfvb\\\"]', String.Escape), + (r'\\u[0-9a-fA-F]{4}', String.Escape), + (r'\\[nrfvb\\"]', String.Escape), (r'[^"]', String), (r'"', String, '#pop'), ], diff --git a/pygments/lexers/erlang.py b/pygments/lexers/erlang.py index 7838b3c5..93ddd2c2 100644 --- a/pygments/lexers/erlang.py +++ b/pygments/lexers/erlang.py @@ -127,7 +127,7 @@ class ErlangLexer(RegexLexer): 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), - (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), + (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], @@ -240,11 +240,11 @@ class ElixirLexer(RegexLexer): KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') BUILTIN = ( 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', - 'quote', 'unquote', 'unquote_splicing', 'throw', 'super' + 'quote', 'unquote', 'unquote_splicing', 'throw', 'super', ) BUILTIN_DECLARATION = ( 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', - 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback' + 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback', ) BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') @@ -263,7 +263,7 @@ class ElixirLexer(RegexLexer): OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') PUNCTUATION = ( - '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']' + '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']', ) def get_tokens_unprocessed(self, text): diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py index 73ea4a4a..2b17615c 100644 --- a/pygments/lexers/esoteric.py +++ b/pygments/lexers/esoteric.py @@ -90,7 +90,7 @@ class CAmkESLexer(RegexLexer): filenames = ['*.camkes', '*.idl4'] tokens = { - 'root':[ + 'root': [ # C pre-processor directive (r'^\s*#.*\n', Comment.Preproc), @@ -99,7 +99,7 @@ class CAmkESLexer(RegexLexer): (r'/\*(.|\n)*?\*/', Comment), (r'//.*\n', Comment), - (r'[\[\(\){},\.;=\]]', Punctuation), + (r'[\[(){},.;=\]]', Punctuation), (words(('assembly', 'attribute', 'component', 'composition', 'configuration', 'connection', 'connector', 'consumes', diff --git a/pygments/lexers/felix.py b/pygments/lexers/felix.py index b7659769..9631bcc1 100644 --- a/pygments/lexers/felix.py +++ b/pygments/lexers/felix.py @@ -237,7 +237,7 @@ class FelixLexer(RegexLexer): ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), diff --git a/pygments/lexers/fortran.py b/pygments/lexers/fortran.py index 4c22139d..e2f95b11 100644 --- a/pygments/lexers/fortran.py +++ b/pygments/lexers/fortran.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, bygroups, include, words, using +from pygments.lexer import RegexLexer, bygroups, include, words, using, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic @@ -191,16 +191,15 @@ class FortranFixedLexer(RegexLexer): (r'(.{5})', Name.Label, 'cont-char'), (r'.*\n', using(FortranLexer)), ], - 'cont-char': [ (' ', Text, 'code'), ('0', Comment, 'code'), - ('.', Generic.Strong, 'code') + ('.', Generic.Strong, 'code'), ], - 'code': [ (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment, Text), 'root'), (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), - (r'', Text, 'root')] + default('root'), + ] } diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py index 95e68a33..ffc3a3a2 100644 --- a/pygments/lexers/haskell.py +++ b/pygments/lexers/haskell.py @@ -321,7 +321,7 @@ class AgdaLexer(RegexLexer): 'module': [ (r'\{-', Comment.Multiline, 'comment'), (r'[a-zA-Z][\w.]*', Name, '#pop'), - (r'[^a-zA-Z]+', Text) + (r'[\W0-9_]+', Text) ], 'comment': HaskellLexer.tokens['comment'], 'character': HaskellLexer.tokens['character'], diff --git a/pygments/lexers/idl.py b/pygments/lexers/idl.py index d745bcfd..a0b39492 100644 --- a/pygments/lexers/idl.py +++ b/pygments/lexers/idl.py @@ -258,12 +258,13 @@ class IDLLexer(RegexLexer): (r'\b(mod|lt|le|eq|ne|ge|gt|not|and|or|xor)\b', Operator), (r'"[^\"]*"', String.Double), (r"'[^\']*'", String.Single), - (r'\b[\+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([\+\-]?[0-9]+)?\b', Number.Float), - (r'\b\'[\+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex), - (r'\b\'[\+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct), - (r'\b[\+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long), - (r'\b[\+\-]?[0-9]+U?S?\b', Number.Integer), - (r'\b[\+\-]?[0-9]+B\b', Number), + (r'\b[+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([+\-]?[0-9]+)?\b', + Number.Float), + (r'\b\'[+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex), + (r'\b\'[+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct), + (r'\b[+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long), + (r'\b[+\-]?[0-9]+U?S?\b', Number.Integer), + (r'\b[+\-]?[0-9]+B\b', Number), (r'.', Text), ] } diff --git a/pygments/lexers/j.py b/pygments/lexers/j.py index 278374e5..f15595f8 100644 --- a/pygments/lexers/j.py +++ b/pygments/lexers/j.py @@ -48,7 +48,7 @@ class JLexer(RegexLexer): # Definitions (r'0\s+:\s*0|noun\s+define\s*$', Name.Entity, 'nounDefinition'), - (r'\b(([1-4]|13)\s+:\s*0)|((adverb|conjunction|dyad|monad|verb)\s+define)\b', + (r'(([1-4]|13)\s+:\s*0|(adverb|conjunction|dyad|monad|verb)\s+define)\b', Name.Function, 'explicitDefinition'), # Flow Control diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index 2a01cd42..5dca6832 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -97,13 +97,13 @@ class JavascriptLexer(RegexLexer): (r'`', String.Backtick, '#pop'), (r'\\\\', String.Backtick), (r'\\`', String.Backtick), - (r'\${', String.Interpol, 'interp-inside'), + (r'\$\{', String.Interpol, 'interp-inside'), (r'\$', String.Backtick), (r'[^`\\$]+', String.Backtick), ], 'interp-inside': [ # TODO: should this include single-line comments and allow nesting strings? - (r'}', String.Interpol, '#pop'), + (r'\}', String.Interpol, '#pop'), include('root'), ], # (\\\\|\\`|[^`])*`', String.Backtick), @@ -1245,32 +1245,32 @@ class EarlGreyLexer(RegexLexer): include('control'), (r'[^\S\n]+', Text), (r';;.*\n', Comment), - (r'[\[\]\{\}\:\(\)\,\;]', Punctuation), + (r'[\[\]{}:(),;]', Punctuation), (r'\\\n', Text), (r'\\', Text), include('errors'), (words(( 'with', 'where', 'when', 'and', 'not', 'or', 'in', 'as', 'of', 'is'), - prefix=r'(?<=\s|\[)', suffix=r'(?![\w\$\-])'), + prefix=r'(?<=\s|\[)', suffix=r'(?![\w$\-])'), Operator.Word), - (r'[\*@]?->', Name.Function), + (r'[*@]?->', Name.Function), (r'[+\-*/~^<>%&|?!@#.]*=', Operator.Word), (r'\.{2,3}', Operator.Word), # Range Operator (r'([+*/~^<>&|?!]+)|([#\-](?=\s))|@@+(?=\s)|=+', Operator), - (r'(?<![\w\$\-])(var|let)(?:[^\w\$])', Keyword.Declaration), + (r'(?<![\w$\-])(var|let)(?:[^\w$])', Keyword.Declaration), include('keywords'), include('builtins'), include('assignment'), (r'''(?x) - (?:()([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)| - (?<=[\s\{\[\(])(\.)([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)) + (?:()([a-zA-Z$_](?:[\w$\-]*[\w$])?)| + (?<=[\s{\[(])(\.)([a-zA-Z$_](?:[\w$\-]*[\w$])?)) (?=.*%)''', bygroups(Punctuation, Name.Tag, Punctuation, Name.Class.Start), 'dbs'), (r'[rR]?`', String.Backtick, 'bt'), (r'[rR]?```', String.Backtick, 'tbt'), - (r'(?<=[\s\[\{\(,;])\.([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)' - r'(?=[\s\]\}\),;])', String.Symbol), + (r'(?<=[\s\[{(,;])\.([a-zA-Z$_](?:[\w$\-]*[\w$])?)' + r'(?=[\s\]}),;])', String.Symbol), include('nested'), (r'(?:[rR]|[rR]\.[gmi]{1,3})?"', String, combined('stringescape', 'dqs')), (r'(?:[rR]|[rR]\.[gmi]{1,3})?\'', String, combined('stringescape', 'sqs')), @@ -1281,9 +1281,9 @@ class EarlGreyLexer(RegexLexer): include('numbers'), ], 'dbs': [ - (r'(\.)([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(?=[\[\.\s])', + (r'(\.)([a-zA-Z$_](?:[\w$\-]*[\w$])?)(?=[.\[\s])', bygroups(Punctuation, Name.Class.DBS)), - (r'(\[)([\^#][a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(\])', + (r'(\[)([\^#][a-zA-Z$_](?:[\w$\-]*[\w$])?)(\])', bygroups(Punctuation, Name.Entity.DBS, Punctuation)), (r'\s+', Text), (r'%', Operator.DBS, '#pop'), @@ -1293,29 +1293,29 @@ class EarlGreyLexer(RegexLexer): bygroups(Text.Whitespace, Text)), ], 'assignment': [ - (r'(\.)?([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)' + (r'(\.)?([a-zA-Z$_](?:[\w$\-]*[\w$])?)' r'(?=\s+[+\-*/~^<>%&|?!@#.]*\=\s)', bygroups(Punctuation, Name.Variable)) ], 'errors': [ (words(('Error', 'TypeError', 'ReferenceError'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'), Name.Exception), (r'''(?x) - (?<![\w\$]) - E\.[\w\$](?:[\w\$\-]*[\w\$])? - (?:\.[\w\$](?:[\w\$\-]*[\w\$])?)* - (?=[\(\{\[\?\!\s])''', + (?<![\w$]) + E\.[\w$](?:[\w$\-]*[\w$])? + (?:\.[\w$](?:[\w$\-]*[\w$])?)* + (?=[({\[?!\s])''', Name.Exception), ], 'control': [ (r'''(?x) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) (?!\n)\s+ (?!and|as|each\*|each|in|is|mod|of|or|when|where|with) - (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)''', + (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[\w$-]*[\w$])?)''', Keyword.Control), - (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(?!\n)\s+(?=[\'"\d\{\[\(])', + (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(?!\n)\s+(?=[\'"\d{\[(])', Keyword.Control), (r'''(?x) (?: @@ -1324,28 +1324,28 @@ class EarlGreyLexer(RegexLexer): (?<=with|each|with)| (?<=each\*|where) )(\s+) - ([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(:)''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''', bygroups(Text, Keyword.Control, Punctuation)), (r'''(?x) (?<![+\-*/~^<>%&|?!@#.])(\s+) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(:)''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''', bygroups(Text, Keyword.Control, Punctuation)), ], 'nested': [ (r'''(?x) - (?<=[a-zA-Z$0-9_\]\}\)])(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) + (?<=[\w$\]})])(\.) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) (?=\s+with(?:\s|\n))''', bygroups(Punctuation, Name.Function)), (r'''(?x) (?<!\s)(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) - (?=[\}\]\)\.,;:\s])''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?) + (?=[}\]).,;:\s])''', bygroups(Punctuation, Name.Field)), (r'''(?x) - (?<=[a-zA-Z$0-9_\]\}\)])(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) - (?=[\[\{\(:])''', + (?<=[\w$\]})])(\.) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) + (?=[\[{(:])''', bygroups(Punctuation, Name.Function)), ], 'keywords': [ @@ -1354,15 +1354,15 @@ class EarlGreyLexer(RegexLexer): 'continue', 'elif', 'expr-value', 'if', 'match', 'return', 'yield', 'pass', 'else', 'require', 'var', 'let', 'async', 'method', 'gen'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'), Keyword.Pseudo), (words(('this', 'self', '@'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$])'), Keyword.Constant), (words(( 'Function', 'Object', 'Array', 'String', 'Number', 'Boolean', 'ErrorFactory', 'ENode', 'Promise'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$])'), Keyword.Type), ], 'builtins': [ @@ -1373,20 +1373,20 @@ class EarlGreyLexer(RegexLexer): 'getChecker', 'get-checker', 'getProperty', 'get-property', 'getProjector', 'get-projector', 'consume', 'take', 'promisify', 'spawn', 'constructor'), - prefix=r'(?<![\w\-#\.])', suffix=r'(?![\w\-\.])'), + prefix=r'(?<![\w\-#.])', suffix=r'(?![\w\-.])'), Name.Builtin), (words(( 'true', 'false', 'null', 'undefined'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'), Name.Constant), ], 'name': [ - (r'@([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)', Name.Variable.Instance), - (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(\+\+|\-\-)?', + (r'@([a-zA-Z$_](?:[\w$-]*[\w$])?)', Name.Variable.Instance), + (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(\+\+|\-\-)?', bygroups(Name.Symbol, Operator.Word)) ], 'tuple': [ - (r'#[a-zA-Z_][a-zA-Z_\-0-9]*(?=[\s\{\(,;\n])', Name.Namespace) + (r'#[a-zA-Z_][\w\-]*(?=[\s{(,;])', Name.Namespace) ], 'interpoling_string': [ (r'\}', String.Interpol, '#pop'), @@ -1426,7 +1426,7 @@ class EarlGreyLexer(RegexLexer): (r'```', String.Backtick, '#pop'), (r'\n', String.Backtick), (r'\^=?', String.Escape), - (r'[^\`]+', String.Backtick), + (r'[^`]+', String.Backtick), ], 'numbers': [ (r'\d+\.(?!\.)\d*([eE][+-]?[0-9]+)?', Number.Float), @@ -1434,7 +1434,7 @@ class EarlGreyLexer(RegexLexer): (r'8r[0-7]+', Number.Oct), (r'2r[01]+', Number.Bin), (r'16r[a-fA-F0-9]+', Number.Hex), - (r'([3-79]|[1-2][0-9]|3[0-6])r[a-zA-Z\d]+(\.[a-zA-Z\d]+)?', Number.Radix), + (r'([3-79]|[12][0-9]|3[0-6])r[a-zA-Z\d]+(\.[a-zA-Z\d]+)?', Number.Radix), (r'\d+', Number.Integer) ], } diff --git a/pygments/lexers/julia.py b/pygments/lexers/julia.py index d0aa6d35..9f84b8d9 100644 --- a/pygments/lexers/julia.py +++ b/pygments/lexers/julia.py @@ -11,13 +11,16 @@ import re -from pygments.lexer import Lexer, RegexLexer, bygroups, combined, do_insertions +from pygments.lexer import Lexer, RegexLexer, bygroups, combined, \ + do_insertions, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic from pygments.util import shebang_matches, unirange __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] +line_re = re.compile('.*?\n') + class JuliaLexer(RegexLexer): """ @@ -32,13 +35,26 @@ class JuliaLexer(RegexLexer): flags = re.MULTILINE | re.UNICODE - builtins = [ + builtins = ( 'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple', 'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype', 'typemin', 'typemax', 'realmin', 'realmax', 'sizeof', 'eps', 'promote_type', 'method_exists', 'applicable', 'invoke', 'dlopen', 'dlsym', 'system', 'error', 'throw', 'assert', 'new', 'Inf', 'Nan', 'pi', 'im', - ] + ) + + keywords = ( + 'begin', 'while', 'for', 'in', 'return', 'break', 'continue', + 'macro', 'quote', 'let', 'if', 'elseif', 'else', 'try', 'catch', 'end', + 'bitstype', 'ccall', 'do', 'using', 'module', 'import', 'export', + 'importall', 'baremodule', 'immutable', + ) + + types = ( + 'Bool', 'Int', 'Int8', 'Int16', 'Int32', 'Int64', 'Uint', 'Uint8', 'Uint16', + 'Uint32', 'Uint64', 'Float32', 'Float64', 'Complex64', 'Complex128', 'Any', + 'Nothing', 'None', + ) tokens = { 'root': [ @@ -46,34 +62,29 @@ class JuliaLexer(RegexLexer): (r'[^\S\n]+', Text), (r'#=', Comment.Multiline, "blockcomment"), (r'#.*$', Comment), - (r'[]{}:(),;[@]', Punctuation), + (r'[\[\]{}:(),;@]', Punctuation), (r'\\\n', Text), (r'\\', Text), # keywords - (r'(begin|while|for|in|return|break|continue|' - r'macro|quote|let|if|elseif|else|try|catch|end|' - r'bitstype|ccall|do|using|module|import|export|' - r'importall|baremodule|immutable)\b', Keyword), (r'(local|global|const)\b', Keyword.Declaration), - (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64' - r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b', - Keyword.Type), + (words(keywords, suffix=r'\b'), Keyword), + (words(types, suffix=r'\b'), Keyword.Type), # functions (r'(function)((?:\s|\\\s)+)', - bygroups(Keyword, Name.Function), 'funcname'), + bygroups(Keyword, Name.Function), 'funcname'), # types (r'(type|typealias|abstract|immutable)((?:\s|\\\s)+)', - bygroups(Keyword, Name.Class), 'typename'), + bygroups(Keyword, Name.Class), 'typename'), # operators (r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator), (r'\.\*|\.\^|\.\\|\.\/|\\', Operator), # builtins - ('(' + '|'.join(builtins) + r')\b', Name.Builtin), + (words(builtins, suffix=r'\b'), Name.Builtin), # backticks (r'`(?s).*?`', String.Backtick), @@ -116,12 +127,12 @@ class JuliaLexer(RegexLexer): ], 'typename': [ - ('[a-zA-Z_]\w*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' - r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape), ], "blockcomment": [ (r'[^=#]', Comment.Multiline), @@ -138,7 +149,7 @@ class JuliaLexer(RegexLexer): (r'\$[a-zA-Z_]+', String.Interpol), (r'\$\(', String.Interpol, 'in-intp'), # @printf and @sprintf formats - (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[diouxXeEfFgGcrs%]', + (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^$%"\\]+', String), # unhandled special signs @@ -155,9 +166,6 @@ class JuliaLexer(RegexLexer): return shebang_matches(text, r'julia') -line_re = re.compile('.*?\n') - - class JuliaConsoleLexer(Lexer): """ For Julia console sessions. Modeled after MatlabSessionLexer. diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 41fc0fdb..af7f8105 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -564,14 +564,14 @@ class IokeLexer(RegexLexer): ], 'slashRegexp': [ - (r'(?<!\\)/[oxpniums]*', String.Regex, '#pop'), + (r'(?<!\\)/[im-psux]*', String.Regex, '#pop'), include('interpolatableText'), (r'\\/', String.Regex), (r'[^/]', String.Regex) ], 'squareRegexp': [ - (r'(?<!\\)][oxpniums]*', String.Regex, '#pop'), + (r'(?<!\\)][im-psux]*', String.Regex, '#pop'), include('interpolatableText'), (r'\\]', String.Regex), (r'[^\]]', String.Regex) diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py index 84720fab..6d591e10 100644 --- a/pygments/lexers/lisp.py +++ b/pygments/lexers/lisp.py @@ -2135,49 +2135,52 @@ class ShenLexer(RegexLexer): filenames = ['*.shen'] mimetypes = ['text/x-shen', 'application/x-shen'] - DECLARATIONS = re.findall(r'\S+', """ - datatype define defmacro defprolog defcc synonyms declare package - type function - """) - - SPECIAL_FORMS = re.findall(r'\S+', """ - lambda get let if cases cond put time freeze value load $ - protect or and not do output prolog? trap-error error - make-string /. set @p @s @v - """) - - BUILTINS = re.findall(r'\S+', """ - == = * + - / < > >= <= <-address <-vector abort absvector - absvector? address-> adjoin append arity assoc bind boolean? - bound? call cd close cn compile concat cons cons? cut destroy - difference element? empty? enable-type-theory error-to-string - eval eval-kl exception explode external fail fail-if file - findall fix fst fwhen gensym get-time hash hd hdstr hdv head - identical implementation in include include-all-but inferences - input input+ integer? intern intersection is kill language - length limit lineread loaded macro macroexpand map mapcan - maxinferences mode n->string nl nth null number? occurrences - occurs-check open os out port porters pos pr preclude - preclude-all-but print profile profile-results ps quit read - read+ read-byte read-file read-file-as-bytelist - read-file-as-string read-from-string release remove return - reverse run save set simple-error snd specialise spy step - stinput stoutput str string->n string->symbol string? subst - symbol? systemf tail tc tc? thaw tl tlstr tlv track tuple? - undefmacro unify unify! union unprofile unspecialise untrack - variable? vector vector-> vector? verified version warn when - write-byte write-to-file y-or-n? - """) - - BUILTINS_ANYWHERE = re.findall(r'\S+', """ - where skip >> _ ! <e> <!> - """) + DECLARATIONS = ( + 'datatype', 'define', 'defmacro', 'defprolog', 'defcc', + 'synonyms', 'declare', 'package', 'type', 'function', + ) + + SPECIAL_FORMS = ( + 'lambda', 'get', 'let', 'if', 'cases', 'cond', 'put', 'time', 'freeze', + 'value', 'load', '$', 'protect', 'or', 'and', 'not', 'do', 'output', + 'prolog?', 'trap-error', 'error', 'make-string', '/.', 'set', '@p', + '@s', '@v', + ) + + BUILTINS = ( + '==', '=', '*', '+', '-', '/', '<', '>', '>=', '<=', '<-address', + '<-vector', 'abort', 'absvector', 'absvector?', 'address->', 'adjoin', + 'append', 'arity', 'assoc', 'bind', 'boolean?', 'bound?', 'call', 'cd', + 'close', 'cn', 'compile', 'concat', 'cons', 'cons?', 'cut', 'destroy', + 'difference', 'element?', 'empty?', 'enable-type-theory', + 'error-to-string', 'eval', 'eval-kl', 'exception', 'explode', 'external', + 'fail', 'fail-if', 'file', 'findall', 'fix', 'fst', 'fwhen', 'gensym', + 'get-time', 'hash', 'hd', 'hdstr', 'hdv', 'head', 'identical', + 'implementation', 'in', 'include', 'include-all-but', 'inferences', + 'input', 'input+', 'integer?', 'intern', 'intersection', 'is', 'kill', + 'language', 'length', 'limit', 'lineread', 'loaded', 'macro', 'macroexpand', + 'map', 'mapcan', 'maxinferences', 'mode', 'n->string', 'nl', 'nth', 'null', + 'number?', 'occurrences', 'occurs-check', 'open', 'os', 'out', 'port', + 'porters', 'pos', 'pr', 'preclude', 'preclude-all-but', 'print', 'profile', + 'profile-results', 'ps', 'quit', 'read', 'read+', 'read-byte', 'read-file', + 'read-file-as-bytelist', 'read-file-as-string', 'read-from-string', + 'release', 'remove', 'return', 'reverse', 'run', 'save', 'set', + 'simple-error', 'snd', 'specialise', 'spy', 'step', 'stinput', 'stoutput', + 'str', 'string->n', 'string->symbol', 'string?', 'subst', 'symbol?', + 'systemf', 'tail', 'tc', 'tc?', 'thaw', 'tl', 'tlstr', 'tlv', 'track', + 'tuple?', 'undefmacro', 'unify', 'unify!', 'union', 'unprofile', + 'unspecialise', 'untrack', 'variable?', 'vector', 'vector->', 'vector?', + 'verified', 'version', 'warn', 'when', 'write-byte', 'write-to-file', + 'y-or-n?', + ) + + BUILTINS_ANYWHERE = ('where', 'skip', '>>', '_', '!', '<e>', '<!>') MAPPINGS = dict((s, Keyword) for s in DECLARATIONS) MAPPINGS.update((s, Name.Builtin) for s in BUILTINS) MAPPINGS.update((s, Keyword) for s in SPECIAL_FORMS) - valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:_-]' + valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:-]' valid_name = '%s+' % valid_symbol_chars symbol_name = r'[a-z!$%%*+,<=>?/.\'@&#_-]%s*' % valid_symbol_chars variable = r'[A-Z]%s*' % valid_symbol_chars @@ -2313,7 +2316,7 @@ class CPSALexer(SchemeLexer): # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now - valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+' + valid_name = r'[\w!$%&*+,/:<=>?@^~|-]+' tokens = { 'root': [ @@ -2334,7 +2337,7 @@ class CPSALexer(SchemeLexer): # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), - (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), + (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), # constants (r'(#t|#f)', Name.Constant), diff --git a/pygments/lexers/markup.py b/pygments/lexers/markup.py index aac8d27e..02146597 100644 --- a/pygments/lexers/markup.py +++ b/pygments/lexers/markup.py @@ -204,7 +204,7 @@ class RstLexer(RegexLexer): bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' - r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', + r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', _handle_sourcecode), # A directive (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', @@ -230,7 +230,7 @@ class RstLexer(RegexLexer): (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', bygroups(using(this, state='inline'), using(this, state='inline'))), # Code blocks - (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', + (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', bygroups(String.Escape, Text, String, String, Text, String)), include('inline'), ], diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py index a5fcbf78..01771f55 100644 --- a/pygments/lexers/modula2.py +++ b/pygments/lexers/modula2.py @@ -290,7 +290,7 @@ class Modula2Lexer(RegexLexer): ], 'unigraph_punctuation': [ # Common Punctuation - (r'[\(\)\[\]{},.:;\|]', Punctuation), + (r'[()\[\]{},.:;|]', Punctuation), # Case Label Separator Synonym (r'!', Punctuation), # ISO # Blueprint Punctuation diff --git a/pygments/lexers/oberon.py b/pygments/lexers/oberon.py index db18259d..51dfdab6 100644 --- a/pygments/lexers/oberon.py +++ b/pygments/lexers/oberon.py @@ -47,11 +47,11 @@ class ComponentPascalLexer(RegexLexer): (r'\s+', Text), # whitespace ], 'comments': [ - (r'\(\*([^\$].*?)\*\)', Comment.Multiline), + (r'\(\*([^$].*?)\*\)', Comment.Multiline), # TODO: nested comments (* (* ... *) ... (* ... *) *) not supported! ], 'punctuation': [ - (r'[\(\)\[\]\{\},.:;\|]', Punctuation), + (r'[()\[\]{},.:;|]', Punctuation), ], 'numliterals': [ (r'[0-9A-F]+X\b', Number.Hex), # char code @@ -83,7 +83,7 @@ class ComponentPascalLexer(RegexLexer): (r'\$', Operator), ], 'identifiers': [ - (r'([a-zA-Z_\$][\w\$]*)', Name), + (r'([a-zA-Z_$][\w$]*)', Name), ], 'builtins': [ (words(( diff --git a/pygments/lexers/parasail.py b/pygments/lexers/parasail.py index 878f7d26..812e2923 100644 --- a/pygments/lexers/parasail.py +++ b/pygments/lexers/parasail.py @@ -60,7 +60,7 @@ class ParaSailLexer(RegexLexer): (r'[a-zA-Z]\w*', Name), # Operators and Punctuation (r'(<==|==>|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|' - r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\||\|=|/=|\+|-|\*|/|' + r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\|=|\||/=|\+|-|\*|/|' r'\.\.|<\.\.|\.\.<|<\.\.<)', Operator), (r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)', diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py index 776c38b8..9255216d 100644 --- a/pygments/lexers/praat.py +++ b/pygments/lexers/praat.py @@ -27,21 +27,21 @@ class PraatLexer(RegexLexer): aliases = ['praat'] filenames = ['*.praat', '*.proc', '*.psc'] - keywords = [ + keywords = ( 'if', 'then', 'else', 'elsif', 'elif', 'endif', 'fi', 'for', 'from', 'to', 'endfor', 'endproc', 'while', 'endwhile', 'repeat', 'until', 'select', 'plus', 'minus', 'demo', 'assert', 'stopwatch', 'nocheck', 'nowarn', 'noprogress', 'editor', 'endeditor', 'clearinfo', - ] + ) - functions_string = [ + functions_string = ( 'backslashTrigraphsToUnicode', 'chooseDirectory', 'chooseReadFile', 'chooseWriteFile', 'date', 'demoKey', 'do', 'environment', 'extractLine', 'extractWord', 'fixed', 'info', 'left', 'mid', 'percent', 'readFile', 'replace', 'replace_regex', 'right', 'selected', 'string', 'unicodeToBackslashTrigraphs', - ] + ) - functions_numeric = [ + functions_numeric = ( 'abs', 'appendFile', 'appendFileLine', 'appendInfo', 'appendInfoLine', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'barkToHertz', 'beginPause', 'beginSendPraat', 'besselI', 'besselK', 'beta', 'beta2', @@ -67,13 +67,13 @@ class PraatLexer(RegexLexer): 'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP', 'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine', 'writeInfo', 'writeInfoLine', - ] + ) - functions_array = [ + functions_array = ( 'linear', 'randomGauss', 'randomInteger', 'randomUniform', 'zero', - ] + ) - objects = [ + objects = ( 'Activation', 'AffineTransform', 'AmplitudeTier', 'Art', 'Artword', 'Autosegment', 'BarkFilter', 'BarkSpectrogram', 'CCA', 'Categories', 'Cepstrogram', 'Cepstrum', 'Cepstrumc', 'ChebyshevSeries', 'ClassificationTable', @@ -100,17 +100,17 @@ class PraatLexer(RegexLexer): 'Strings', 'StringsIndex', 'Table', 'TableOfReal', 'TextGrid', 'TextInterval', 'TextPoint', 'TextTier', 'Tier', 'Transition', 'VocalTract', 'VocalTractTier', 'Weight', 'WordList', - ] + ) - variables_numeric = [ + variables_numeric = ( 'macintosh', 'windows', 'unix', 'praatVersion', 'pi', 'e', 'undefined', - ] + ) - variables_string = [ + variables_string = ( 'praatVersion', 'tab', 'shellDirectory', 'homeDirectory', 'preferencesDirectory', 'newline', 'temporaryDirectory', 'defaultDirectory', - ] + ) tokens = { 'root': [ @@ -151,7 +151,7 @@ class PraatLexer(RegexLexer): (r"'(?=.*')", String.Interpol, 'string_interpolated'), (r'\.{3}', Keyword, ('#pop', 'old_arguments')), (r':', Keyword, ('#pop', 'comma_list')), - (r'[\s\n]', Text, '#pop'), + (r'\s', Text, '#pop'), ], 'procedure_call': [ (r'\s+', Text), @@ -230,7 +230,7 @@ class PraatLexer(RegexLexer): bygroups(Name.Builtin, Name.Builtin, String.Interpol), ('object_attributes', 'string_interpolated')), - (r'\.?_?[a-z][a-zA-Z0-9_.]*(\$|#)?', Text), + (r'\.?_?[a-z][\w.]*(\$|#)?', Text), (r'[\[\]]', Punctuation, 'comma_list'), (r"'(?=.*')", String.Interpol, 'string_interpolated'), ], @@ -239,7 +239,7 @@ class PraatLexer(RegexLexer): (r'\b(and|or|not|div|mod)\b', Operator.Word), ], 'string_interpolated': [ - (r'\.?[_a-z][a-zA-Z0-9_.]*[\$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', + (r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', String.Interpol), (r"'", String.Interpol, '#pop'), ], diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index f483071b..7601afa8 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -39,7 +39,7 @@ class PythonLexer(RegexLexer): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), @@ -51,8 +51,10 @@ class PythonLexer(RegexLexer): tokens = { 'root': [ (r'\n', Text), - (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), - (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', + bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", + bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), @@ -72,14 +74,22 @@ class PythonLexer(RegexLexer): include('magicfuncs'), include('magicvars'), include('backtick'), - ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), - ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), - ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), - ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), - ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), - ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), - ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), - ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), + ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', + bygroups(String.Affix, String.Double), 'tdqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", + bygroups(String.Affix, String.Single), 'tsqs'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(")', + bygroups(String.Affix, String.Double), 'dqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(')", + bygroups(String.Affix, String.Single), 'sqs'), + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), + combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), + combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), + combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), + combined('stringescape', 'sqs')), include('name'), include('numbers'), ], @@ -252,16 +262,16 @@ class Python3Lexer(RegexLexer): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' - '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name - '(\![sra])?' # conversion - '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' + '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + '(\![sra])?' # conversion + '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' '\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"%\{\n]+', ttype), + (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) @@ -700,7 +710,7 @@ class CythonLexer(RegexLexer): ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), @@ -771,18 +781,20 @@ class DgLexer(RegexLexer): (words(( 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', - 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super', - 'tuple', 'tuple\'', 'type'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), + 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', + 'super', 'tuple', 'tuple\'', 'type'), + prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), Name.Builtin), (words(( '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile', 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate', - 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', 'getattr', - 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', 'input', - 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', 'locals', - 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', 'print', 'repr', - 'reversed', 'round', 'setattr', 'scanl1?', 'snd', 'sorted', 'sum', 'tail', - 'take', 'takewhile', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), + 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', + 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', + 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', + 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', + 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd', + 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'), + prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), Name.Builtin), (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])", Name.Builtin.Pseudo), @@ -808,7 +820,7 @@ class DgLexer(RegexLexer): ], 'string': [ (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), diff --git a/pygments/lexers/qvt.py b/pygments/lexers/qvt.py index 5bc61310..f30e4887 100644 --- a/pygments/lexers/qvt.py +++ b/pygments/lexers/qvt.py @@ -9,7 +9,8 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, bygroups, include, combined +from pygments.lexer import RegexLexer, bygroups, include, combined, default, \ + words from pygments.token import Text, Comment, Operator, Keyword, Punctuation, \ Name, String, Number @@ -50,23 +51,26 @@ class QVToLexer(RegexLexer): bygroups(Comment, Comment, Comment.Preproc, Comment)), # Uncomment the following if you want to distinguish between # '/*' and '/**', à la javadoc - #(r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline), + # (r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'\\\n', Text), (r'(and|not|or|xor|##?)\b', Operator.Word), - (r'([:]{1-2}=|[-+]=)\b', Operator.Word), - (r'(@|<<|>>)\b', Keyword), # stereotypes - (r'!=|<>|=|==|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), + (r'(:{1,2}=|[-+]=)\b', Operator.Word), + (r'(@|<<|>>)\b', Keyword), # stereotypes + (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), (r'[]{}:(),;[]', Punctuation), (r'(true|false|unlimited|null)\b', Keyword.Constant), (r'(this|self|result)\b', Name.Builtin.Pseudo), (r'(var)\b', Keyword.Declaration), (r'(from|import)\b', Keyword.Namespace, 'fromimport'), - (r'(metamodel|class|exception|primitive|enum|transformation|library)(\s+)([a-zA-Z0-9_]+)', + (r'(metamodel|class|exception|primitive|enum|transformation|' + r'library)(\s+)(\w+)', bygroups(Keyword.Word, Text, Name.Class)), - (r'(exception)(\s+)([a-zA-Z0-9_]+)', bygroups(Keyword.Word, Text, Name.Exception)), + (r'(exception)(\s+)(\w+)', + bygroups(Keyword.Word, Text, Name.Exception)), (r'(main)\b', Name.Function), - (r'(mapping|helper|query)(\s+)', bygroups(Keyword.Declaration, Text), 'operation'), + (r'(mapping|helper|query)(\s+)', + bygroups(Keyword.Declaration, Text), 'operation'), (r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'), (r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b', Keyword.Type), @@ -75,46 +79,45 @@ class QVToLexer(RegexLexer): ("'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), - # (r'([a-zA-Z_][a-zA-Z0-9_]*)(::)([a-zA-Z_][a-zA-Z0-9_]*)', + # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)', # bygroups(Text, Text, Text)), - ], + ], 'fromimport': [ (r'(?:[ \t]|\\\n)+', Text), - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), - (r'', Text, '#pop'), - ], + (r'[a-zA-Z_][\w.]*', Name.Namespace), + default('#pop'), + ], 'operation': [ (r'::', Text), - (r'(.*::)([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*(\()', bygroups(Text,Name.Function, Text), '#pop') - ], + (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()', + bygroups(Text, Name.Function, Text, Punctuation), '#pop') + ], 'assert': [ (r'(warning|error|fatal)\b', Keyword, '#pop'), - (r'', Text, '#pop') # all else: go back - ], + default('#pop'), # all else: go back + ], 'keywords': [ - (r'(abstract|access|any|assert|' - r'blackbox|break|case|collect|collectNested|' - r'collectOne|collectselect|collectselectOne|composes|' - r'compute|configuration|constructor|continue|datatype|' - r'default|derived|disjuncts|do|elif|else|end|' - r'endif|except|exists|extends|' - r'forAll|forEach|forOne|from|if|' - r'implies|in|inherits|init|inout|' - r'intermediate|invresolve|invresolveIn|invresolveone|' - r'invresolveoneIn|isUnique|iterate|late|let|' - r'literal|log|map|merges|' - r'modeltype|new|object|one|' - r'ordered|out|package|population|' - r'property|raise|readonly|references|refines|' - r'reject|resolve|resolveIn|resolveone|resolveoneIn|' - r'return|select|selectOne|sortedBy|static|switch|' - r'tag|then|try|typedef|' - r'unlimited|uses|when|where|while|with|' - r'xcollect|xmap|xselect)\b', Keyword), + (words(( + 'abstract', 'access', 'any', 'assert', 'blackbox', 'break', + 'case', 'collect', 'collectNested', 'collectOne', 'collectselect', + 'collectselectOne', 'composes', 'compute', 'configuration', + 'constructor', 'continue', 'datatype', 'default', 'derived', + 'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except', + 'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if', + 'implies', 'in', 'inherits', 'init', 'inout', 'intermediate', + 'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn', + 'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map', + 'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out', + 'package', 'population', 'property', 'raise', 'readonly', + 'references', 'refines', 'reject', 'resolve', 'resolveIn', + 'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne', + 'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef', + 'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect', + 'xmap', 'xselect'), suffix=r'\b'), Keyword), ], # There is no need to distinguish between String.Single and @@ -127,18 +130,18 @@ class QVToLexer(RegexLexer): 'stringescape': [ (r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape) ], - 'dqs': [ # double-quoted string + 'dqs': [ # double-quoted string (r'"', String, '#pop'), (r'\\\\|\\"', String.Escape), include('strings') ], - 'sqs': [ # single-quoted string + 'sqs': [ # single-quoted string (r"'", String, '#pop'), (r"\\\\|\\'", String.Escape), include('strings') ], 'name': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], # numbers: excerpt taken from the python lexer 'numbers': [ @@ -146,5 +149,4 @@ class QVToLexer(RegexLexer): (r'\d+[eE][+-]?[0-9]+', Number.Float), (r'\d+', Number.Integer) ], - } - + } diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index 103b4ad0..6dd6e8b9 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -42,8 +42,7 @@ class SparqlLexer(RegexLexer): u'\u2c00-\u2fef' u'\u3001-\ud7ff' u'\uf900-\ufdcf' - u'\ufdf0-\ufffd' - u'\U00010000-\U000effff') + u'\ufdf0-\ufffd') PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') @@ -56,7 +55,7 @@ class SparqlLexer(RegexLexer): HEX_GRP = '0-9A-Fa-f' - PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%' + PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' # terminal productions :: @@ -191,7 +190,7 @@ class TurtleLexer(RegexLexer): flags = re.IGNORECASE patterns = { - 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range + 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' } @@ -258,8 +257,7 @@ class TurtleLexer(RegexLexer): (r'.', String, '#pop'), ], 'end-of-string': [ - - (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)', + (r'(@)([a-z]+(:?-[a-z0-9]+)*)', bygroups(Operator, Generic.Emph), '#pop:2'), (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py index 4dd9594b..ac0f7533 100644 --- a/pygments/lexers/scripting.py +++ b/pygments/lexers/scripting.py @@ -1020,11 +1020,11 @@ class EasytrieveLexer(RegexLexer): (r"'(''|[^'])*'", String), (r'\s+', Whitespace), # Everything else just belongs to a name - (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name), ], 'after_declaration': [ (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function), - ('', Whitespace, '#pop') + default('#pop'), ], 'after_macro_argument': [ (r'\*.*\n', Comment.Single, '#pop'), @@ -1032,7 +1032,7 @@ class EasytrieveLexer(RegexLexer): (_OPERATORS_PATTERN, Operator, '#pop'), (r"'(''|[^'])*'", String, '#pop'), # Everything else just belongs to a name - (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name), ], } _COMMENT_LINE_REGEX = re.compile(r'^\s*\*') @@ -1122,7 +1122,8 @@ class EasytrieveLexer(RegexLexer): class JclLexer(RegexLexer): """ - `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_ + `Job Control Language (JCL) + <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_ is a scripting language used on mainframe platforms to instruct the system on how to run a batch job or start a subsystem. It is somewhat comparable to MS DOS batch and Unix shell scripts. @@ -1145,10 +1146,10 @@ class JclLexer(RegexLexer): ], 'statement': [ (r'\s*\n', Whitespace, '#pop'), - (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)', + (r'([a-z]\w*)(\s+)(exec|job)(\s*)', bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace), 'option'), - (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'), + (r'[a-z]\w*', Name.Variable, 'statement_command'), (r'\s+', Whitespace, 'statement_command'), ], 'statement_command': [ @@ -1167,10 +1168,10 @@ class JclLexer(RegexLexer): (r'\*', Name.Builtin), (r'[\[\](){}<>;,]', Punctuation), (r'[-+*/=&%]', Operator), - (r'[a-z_][a-z_0-9]*', Name), - (r'[0-9]+\.[0-9]*', Number.Float), - (r'\.[0-9]+', Number.Float), - (r'[0-9]+', Number.Integer), + (r'[a-z_]\w*', Name), + (r'\d+\.\d*', Number.Float), + (r'\.\d+', Number.Float), + (r'\d+', Number.Integer), (r"'", String, 'option_string'), (r'[ \t]+', Whitespace, 'option_comment'), (r'\.', Punctuation), diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py index d3e4c460..cef147b8 100644 --- a/pygments/lexers/supercollider.py +++ b/pygments/lexers/supercollider.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, include, words +from pygments.lexer import RegexLexer, include, words, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation @@ -43,7 +43,7 @@ class SuperColliderLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop'), ], 'badregex': [ (r'\n', Text, '#pop') @@ -79,8 +79,8 @@ class SuperColliderLexer(RegexLexer): 'thisFunctionDef', 'thisFunction', 'thisMethod', 'thisProcess', 'thisThread', 'this'), suffix=r'\b'), Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), - (r'\\?[$a-zA-Z_][a-zA-Z0-9_]*', String.Symbol), + (r'[$a-zA-Z_]\w*', Name.Other), + (r'\\?[$a-zA-Z_]\w*', String.Symbol), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), diff --git a/pygments/lexers/testing.py b/pygments/lexers/testing.py index 0bdebe74..be8b6f71 100644 --- a/pygments/lexers/testing.py +++ b/pygments/lexers/testing.py @@ -147,7 +147,7 @@ class TAPLexer(RegexLexer): (r'^TAP version \d+\n', Name.Namespace), # Specify a plan with a plan line. - (r'^1..\d+', Keyword.Declaration, 'plan'), + (r'^1\.\.\d+', Keyword.Declaration, 'plan'), # A test failure (r'^(not ok)([^\S\n]*)(\d*)', diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py index 60a101cc..f8c7d0a9 100644 --- a/pygments/lexers/theorem.py +++ b/pygments/lexers/theorem.py @@ -390,20 +390,23 @@ class LeanLexer(RegexLexer): flags = re.MULTILINE | re.UNICODE - keywords1 = ('import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', 'renaming', - 'inline', 'hiding', 'exposing', 'parameter', 'parameters', 'conjecture', - 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', 'print', 'theorem', - 'axiom', 'inductive', 'structure', 'universe', 'alias', 'help', - 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', 'calc_subst', 'calc_refl', - 'infix', 'infixl', 'infixr', 'notation', 'eval', 'check', 'exit', 'coercion', 'end', - 'private', 'using', 'namespace', 'including', 'instance', 'section', 'context', - 'protected', 'expose', 'export', 'set_option', 'add_rewrite', 'extends', - 'open', 'example', 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible' + keywords1 = ( + 'import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', + 'renaming', 'inline', 'hiding', 'exposing', 'parameter', 'parameters', + 'conjecture', 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', + 'print', 'theorem', 'axiom', 'inductive', 'structure', 'universe', 'alias', + 'help', 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', + 'calc_subst', 'calc_refl', 'infix', 'infixl', 'infixr', 'notation', 'eval', + 'check', 'exit', 'coercion', 'end', 'private', 'using', 'namespace', + 'including', 'instance', 'section', 'context', 'protected', 'expose', + 'export', 'set_option', 'add_rewrite', 'extends', 'open', 'example', + 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible', ) keywords2 = ( - 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', 'take', - 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', 'proof', 'qed', 'calc', 'match' + 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', + 'take', 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', + 'proof', 'qed', 'calc', 'match', ) keywords3 = ( @@ -414,10 +417,10 @@ class LeanLexer(RegexLexer): operators = ( '!=', '#', '&', '&&', '*', '+', '-', '/', '@', '!', '`', '-.', '->', '.', '..', '...', '::', ':>', ';', ';;', '<', - '<-', '=', '==', '>', '_', '`', '|', '||', '~', '=>', '<=', '>=', + '<-', '=', '==', '>', '_', '|', '||', '~', '=>', '<=', '>=', '/\\', '\\/', u'∀', u'Π', u'λ', u'↔', u'∧', u'∨', u'≠', u'≤', u'≥', - u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡', - u'⟨', u'⟩' + u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', + u'⌟', u'≡', u'⟨', u'⟩', ) punctuation = ('(', ')', ':', '{', '}', '[', ']', u'⦃', u'⦄', ':=', ',') diff --git a/pygments/lexers/typoscript.py b/pygments/lexers/typoscript.py index 25bfef9c..407847ed 100644 --- a/pygments/lexers/typoscript.py +++ b/pygments/lexers/typoscript.py @@ -44,11 +44,11 @@ class TypoScriptCssDataLexer(RegexLexer): # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} - (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)', + (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace @@ -58,8 +58,8 @@ class TypoScriptCssDataLexer(RegexLexer): (r'(?<!(#|\'|"))(?:#(?!(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))[^\n#]+|//[^\n]*)', Comment), # other - (r'[<>,:=\.\*%+\|]', String), - (r'[\w"_\-!\/&;\(\)\{\}]+', String), + (r'[<>,:=.*%+|]', String), + (r'[\w"\-!/&;(){}]+', String), ] } @@ -79,22 +79,22 @@ class TypoScriptHtmlDataLexer(RegexLexer): # INCLUDE_TYPOSCRIPT (r'(INCLUDE_TYPOSCRIPT)', Name.Class), # Language label or extension resource FILE:... or LLL:... or EXT:... - (r'(EXT|FILE|LLL):[^\}\n"]*', String), + (r'(EXT|FILE|LLL):[^}\n"]*', String), # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} - (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)', + (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace (r'\s+', Text), # other - (r'[<>,:=\.\*%+\|]', String), - (r'[\w"_\-!\/&;\(\)\{\}#]+', String), + (r'[<>,:=.*%+|]', String), + (r'[\w"\-!/&;(){}#]+', String), ] } @@ -138,38 +138,38 @@ class TypoScriptLexer(RegexLexer): r'version)([^\]]*)(\])', bygroups(String.Symbol, Name.Constant, Text, String.Symbol)), # Functions - (r'(?=[\w\-_])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|' + (r'(?=[\w\-])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|' r'filelink|if|imageLinkWrap|imgResource|makelinks|numRows|numberFormat|' r'parseFunc|replacement|round|select|split|stdWrap|strPad|tableStyle|' - r'tags|textStyle|typolink)(?![\w\-_])', Name.Function), + r'tags|textStyle|typolink)(?![\w\-])', Name.Function), # Toplevel objects and _* (r'(?:(=?\s*<?\s+|^\s*))(cObj|field|config|content|constants|FEData|' r'file|frameset|includeLibs|lib|page|plugin|register|resources|sitemap|' - r'sitetitle|styles|temp|tt_[^:\.\n\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|' - r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-_])', + r'sitetitle|styles|temp|tt_[^:.\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|' + r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-])', bygroups(Operator, Name.Builtin)), # Content objects - (r'(?=[\w\-_])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|' + (r'(?=[\w\-])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|' r'CTABLE|EDITPANEL|FILE|FILES|FLUIDTEMPLATE|FORM|HMENU|HRULER|HTML|' r'IMAGE|IMGTEXT|IMG_RESOURCE|LOAD_REGISTER|MEDIA|MULTIMEDIA|OTABLE|' r'PAGE|QTOBJECT|RECORDS|RESTORE_REGISTER|SEARCHRESULT|SVG|SWFOBJECT|' - r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-_])', Name.Class), + r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-])', Name.Class), # Menu states - (r'(?=[\w\-_])(ACT|ACTIFSUB|ACTIFSUBRO|ACTRO|CUR|CURIFSUB|CURIFSUBRO|' - r'CURRO|IFSUB|IFSUBRO|NO|SPC|USERDEF1|USERDEF1RO|USERDEF2|USERDEF2RO|' - r'USR|USRRO)', Name.Class), + (r'(?=[\w\-])(ACTIFSUBRO|ACTIFSUB|ACTRO|ACT|CURIFSUBRO|CURIFSUB|CURRO|' + r'CUR|IFSUBRO|IFSUB|NO|SPC|USERDEF1RO|USERDEF1|USERDEF2RO|USERDEF2|' + r'USRRO|USR)', Name.Class), # Menu objects - (r'(?=[\w\-_])(GMENU|GMENU_FOLDOUT|GMENU_LAYERS|IMGMENU|IMGMENUITEM|' - r'JSMENU|JSMENUITEM|TMENU|TMENUITEM|TMENU_LAYERS)', Name.Class), + (r'(?=[\w\-])(GMENU_FOLDOUT|GMENU_LAYERS|GMENU|IMGMENUITEM|IMGMENU|' + r'JSMENUITEM|JSMENU|TMENUITEM|TMENU_LAYERS|TMENU)', Name.Class), # PHP objects - (r'(?=[\w\-_])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class), - (r'(?=[\w\-_])(userFunc)(?![\w\-_])', Name.Function), + (r'(?=[\w\-])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class), + (r'(?=[\w\-])(userFunc)(?![\w\-])', Name.Function), ], 'whitespace': [ (r'\s+', Text), ], 'html':[ - (r'<[^\s][^\n>]*>', using(TypoScriptHtmlDataLexer)), + (r'<\S[^\n>]*>', using(TypoScriptHtmlDataLexer)), (r'&[^;\n]*;', String), (r'(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))', bygroups(Name.Class, Text, String.Symbol, using(TypoScriptCssDataLexer))), @@ -182,28 +182,28 @@ class TypoScriptLexer(RegexLexer): ], 'label': [ # Language label or extension resource FILE:... or LLL:... or EXT:... - (r'(EXT|FILE|LLL):[^\}\n"]*', String), + (r'(EXT|FILE|LLL):[^}\n"]*', String), # Path to a resource - (r'(?![^\w\-_])([\w\-_]+(?:/[\w\-_]+)+/?)([^\s]*\n)', + (r'(?![^\w\-])([\w\-]+(?:/[\w\-]+)+/?)(\S*\n)', bygroups(String, String)), ], 'punctuation': [ - (r'[,\.]', Punctuation), + (r'[,.]', Punctuation), ], 'operator': [ - (r'[<>,:=\.\*%+\|]', Operator), + (r'[<>,:=.*%+|]', Operator), ], 'structure': [ # Brackets and braces - (r'[\{\}\(\)\[\]\\\\]', String.Symbol), + (r'[{}()\[\]\\]', String.Symbol), ], 'constant': [ # Constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # Constant: {register:somevalue} - (r'(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})', + (r'(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})', bygroups(String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol)), # constant # Hex color: #ff0077 @@ -216,7 +216,7 @@ class TypoScriptLexer(RegexLexer): (r'(\s*#\s*\n)', Comment), ], 'other': [ - (r'[\w"\-_!\/&;]+', Text), + (r'[\w"\-!/&;]+', Text), ], } diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 90d8d292..437913e9 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -45,7 +45,7 @@ class VCLLexer(RegexLexer): include('comments'), (r'(\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'}', Punctuation, '#pop'), + (r'\}', Punctuation, '#pop'), ], 'acl': [ include('whitespace'), @@ -53,18 +53,18 @@ class VCLLexer(RegexLexer): (r'[!/]+', Operator), (r';', Punctuation), (r'\d+', Number), - (r'}', Punctuation, '#pop'), + (r'\}', Punctuation, '#pop'), ], 'backend': [ include('whitespace'), (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), - (r'(\.probe)(\s*=\s*)({)', + (r'(\.probe)(\s*=\s*)(\{)', bygroups(Name.Attribute, Operator, Punctuation), 'probe'), (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'{', Punctuation, '#push'), - (r'}', Punctuation, '#pop'), + (r'\{', Punctuation, '#push'), + (r'\}', Punctuation, '#pop'), ], 'statements': [ (r'(\d\.)?\d+[sdwhmy]', Literal.Date), @@ -91,9 +91,9 @@ class VCLLexer(RegexLexer): 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', - 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'(\b|$)'), + 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'\b'), Name.Variable), - (r'[!%&+*-,/<.}{>=|~]+', Operator), + (r'[!%&+*\-,/<.}{>=|~]+', Operator), (r'[();]', Punctuation), (r'[,]+', Punctuation), @@ -105,15 +105,15 @@ class VCLLexer(RegexLexer): (r'storage\.\w+\.\w+\b', Name.Variable), (words(('true', 'false')), Name.Builtin), (r'\d+\b', Number), - (r'(backend)(\s+\w+)(\s*{)', + (r'(backend)(\s+\w+)(\s*\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(probe\s)(\s*\w+\s)({)', + (r'(probe\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), - (r'(acl\s)(\s*\w+\s)({)', + (r'(acl\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved, Name.Constant, Punctuation)), - (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', + (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)', bygroups(Keyword, Name.Function, Punctuation)), (r'([a-zA-Z_]\w*)' r'(\.)' @@ -139,12 +139,12 @@ class VCLLexer(RegexLexer): ], 'multistring': [ (r'[^"}]', String), - (r'"}', String, '#pop'), + (r'"\}', String, '#pop'), (r'["}]', String), ], 'whitespace': [ (r'L?"', String, 'string'), - (r'{"', String, 'multistring'), + (r'\{"', String, 'multistring'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation @@ -176,7 +176,7 @@ class VCLSnippetLexer(VCLLexer): r'storage)($|\.\*)', Name.Variable), ], 'snippetspost': [ - (r'(backend)(\b|$)', Keyword.Reserved), + (r'(backend)\b', Keyword.Reserved), ], 'root': [ include('snippetspre'), |