diff options
author | leushenko <alex.gilding@talktalk.net> | 2013-05-06 15:25:15 +0100 |
---|---|---|
committer | leushenko <alex.gilding@talktalk.net> | 2013-05-06 15:25:15 +0100 |
commit | ecac9a75d6350e60c4afe1109df5a257560f10d5 (patch) | |
tree | 21f80291b0e6e19948a79a747299eebc09eb4451 | |
parent | 392a48daf85bd2f47e834dfddeb9233a0f6fa2af (diff) | |
parent | f65740e54d0d22a4208425947bf9f42d21e33194 (diff) | |
download | pygments-ecac9a75d6350e60c4afe1109df5a257560f10d5.tar.gz |
Merged birkenfeld/pygments-main into default
-rw-r--r-- | pygments/lexers/_mapping.py | 3 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 364 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 2 | ||||
-rw-r--r-- | pygments/lexers/other.py | 4 | ||||
-rw-r--r-- | pygments/lexers/shell.py | 28 | ||||
-rw-r--r-- | pygments/lexers/text.py | 6 | ||||
-rw-r--r-- | pygments/lexers/web.py | 2 | ||||
-rw-r--r-- | tests/examplefiles/Get-CommandDefinitionHtml.ps1 | 66 | ||||
-rw-r--r-- | tests/examplefiles/RoleQ.pm6 | 23 | ||||
-rw-r--r-- | tests/examplefiles/grammar-test.p6 | 22 | ||||
-rw-r--r-- | tests/examplefiles/test.p6 | 252 | ||||
-rw-r--r-- | tests/examplefiles/type.lisp | 16 |
12 files changed, 768 insertions, 20 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 4b1cdf57..03bb99c3 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -210,6 +210,7 @@ LEXERS = { 'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), 'OpaLexer': ('pygments.lexers.functional', 'Opa', ('opa',), ('*.opa',), ('text/x-opa',)), 'OpenEdgeLexer': ('pygments.lexers.other', 'OpenEdge ABL', ('openedge', 'abl', 'progress'), ('*.p', '*.cls'), ('text/x-openedge', 'application/x-openedge')), + 'Perl6Lexer': ('pygments.lexers.agile', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6'), ('text/x-perl6', 'application/x-perl6')), 'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')), 'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]', '*.inc'), ('text/x-php',)), 'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)), @@ -217,7 +218,7 @@ LEXERS = { 'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)), 'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)), 'PovrayLexer': ('pygments.lexers.other', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)), - 'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'posh', 'ps1'), ('*.ps1',), ('text/x-powershell',)), + 'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'posh', 'ps1', 'psm1'), ('*.ps1','*.psm1'), ('text/x-powershell',)), 'PrologLexer': ('pygments.lexers.compiled', 'Prolog', ('prolog',), ('*.prolog', '*.pro', '*.pl'), ('text/x-prolog',)), 'PropertiesLexer': ('pygments.lexers.text', 'Properties', ('properties',), ('*.properties',), ('text/x-java-properties',)), 'ProtoBufLexer': ('pygments.lexers.other', 'Protocol Buffer', ('protobuf',), ('*.proto',), ()), diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 3c1525d0..078f0181 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ - LexerContext, include, combined, do_insertions, bygroups, using + LexerContext, include, combined, do_insertions, bygroups, using, this from pygments.token import Error, Text, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches @@ -23,7 +23,7 @@ __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer', 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', - 'FancyLexer', 'DgLexer'] + 'FancyLexer', 'DgLexer', 'Perl6Lexer'] # b/w compatibility from pygments.lexers.functional import SchemeLexer @@ -1922,3 +1922,363 @@ class DgLexer(RegexLexer): (r"'''", String, '#pop') ], } + +class Perl6Lexer(ExtendedRegexLexer): + """ + For `Perl 6 <http://www.perl6.org>`_ source code. + """ + + name = 'Perl6' + aliases = ['perl6', 'pl6'] + filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', + '*.6pm', '*.p6m', '*.pm6'] + mimetypes = ['text/x-perl6', 'application/x-perl6'] + flags = re.MULTILINE | re.DOTALL | re.UNICODE + + PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" + + PERL6_KEYWORDS = ( + 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT', + 'KEEP', 'LAST', 'LEAVE', 'NEXT', 'POST', 'PRE', 'START', 'TEMP', + 'UNDO', 'as', 'assoc', 'async', 'augment', 'binary', 'break', 'but', + 'cached', 'category', 'class', 'constant', 'contend', 'continue', + 'copy', 'deep', 'default', 'defequiv', 'defer', 'die', 'do', 'else', + 'elsif', 'enum', 'equiv', 'exit', 'export', 'fail', 'fatal', 'for', + 'gather', 'given', 'goto', 'grammar', 'handles', 'has', 'if', 'inline', + 'irs', 'is', 'last', 'leave', 'let', 'lift', 'loop', 'looser', 'macro', + 'make', 'maybe', 'method', 'module', 'multi', 'my', 'next', 'of', + 'ofs', 'only', 'oo', 'ors', 'our', 'package', 'parsed', 'prec', + 'proto', 'readonly', 'redo', 'ref', 'regex', 'reparsed', 'repeat', + 'require', 'required', 'return', 'returns', 'role', 'rule', 'rw', + 'self', 'slang', 'state', 'sub', 'submethod', 'subset', 'supersede', + 'take', 'temp', 'tighter', 'token', 'trusts', 'try', 'unary', + 'unless', 'until', 'use', 'warn', 'when', 'where', 'while', 'will', + ) + + PERL6_BUILTINS = ( + 'ACCEPTS', 'HOW', 'REJECTS', 'VAR', 'WHAT', 'WHENCE', 'WHERE', 'WHICH', + 'WHO', 'abs', 'acos', 'acosec', 'acosech', 'acosh', 'acotan', 'acotanh', + 'all', 'any', 'approx', 'arity', 'asec', 'asech', 'asin', 'asinh' + 'assuming', 'atan', 'atan2', 'atanh', 'attr', 'bless', 'body', 'by' + 'bytes', 'caller', 'callsame', 'callwith', 'can', 'capitalize', 'cat', + 'ceiling', 'chars', 'chmod', 'chomp', 'chop', 'chr', 'chroot', + 'circumfix', 'cis', 'classify', 'clone', 'close', 'cmp_ok', 'codes', + 'comb', 'connect', 'contains', 'context', 'cos', 'cosec', 'cosech', + 'cosh', 'cotan', 'cotanh', 'count', 'defined', 'delete', 'diag', + 'dies_ok', 'does', 'e', 'each', 'eager', 'elems', 'end', 'eof', 'eval', + 'eval_dies_ok', 'eval_elsewhere', 'eval_lives_ok', 'evalfile', 'exists', + 'exp', 'first', 'flip', 'floor', 'flunk', 'flush', 'fmt', 'force_todo', + 'fork', 'from', 'getc', 'gethost', 'getlogin', 'getpeername', 'getpw', + 'gmtime', 'graphs', 'grep', 'hints', 'hyper', 'im', 'index', 'infix', + 'invert', 'is_approx', 'is_deeply', 'isa', 'isa_ok', 'isnt', 'iterator', + 'join', 'key', 'keys', 'kill', 'kv', 'lastcall', 'lazy', 'lc', 'lcfirst', + 'like', 'lines', 'link', 'lives_ok', 'localtime', 'log', 'log10', 'map', + 'max', 'min', 'minmax', 'name', 'new', 'nextsame', 'nextwith', 'nfc', + 'nfd', 'nfkc', 'nfkd', 'nok_error', 'nonce', 'none', 'normalize', 'not', + 'nothing', 'ok', 'once', 'one', 'open', 'opendir', 'operator', 'ord', + 'p5chomp', 'p5chop', 'pack', 'pair', 'pairs', 'pass', 'perl', 'pi', + 'pick', 'plan', 'plan_ok', 'polar', 'pop', 'pos', 'postcircumfix', + 'postfix', 'pred', 'prefix', 'print', 'printf', 'push', 'quasi', + 'quotemeta', 'rand', 're', 'read', 'readdir', 'readline', 'reduce', + 'reverse', 'rewind', 'rewinddir', 'rindex', 'roots', 'round', + 'roundrobin', 'run', 'runinstead', 'sameaccent', 'samecase', 'say', + 'sec', 'sech', 'sech', 'seek', 'shape', 'shift', 'sign', 'signature', + 'sin', 'sinh', 'skip', 'skip_rest', 'sleep', 'slurp', 'sort', 'splice', + 'split', 'sprintf', 'sqrt', 'srand', 'strand', 'subst', 'substr', 'succ', + 'sum', 'symlink', 'tan', 'tanh', 'throws_ok', 'time', 'times', 'to', + 'todo', 'trim', 'trim_end', 'trim_start', 'true', 'truncate', 'uc', + 'ucfirst', 'undef', 'undefine', 'uniq', 'unlike', 'unlink', 'unpack', + 'unpolar', 'unshift', 'unwrap', 'use_ok', 'value', 'values', 'vec', + 'version_lt', 'void', 'wait', 'want', 'wrap', 'write', 'zip', + ) + + PERL6_BUILTIN_CLASSES = ( + 'Abstraction', 'Any', 'AnyChar', 'Array', 'Associative', 'Bag', 'Bit', + 'Blob', 'Block', 'Bool', 'Buf', 'Byte', 'Callable', 'Capture', 'Char', 'Class', + 'Code', 'Codepoint', 'Comparator', 'Complex', 'Decreasing', 'Exception', + 'Failure', 'False', 'Grammar', 'Grapheme', 'Hash', 'IO', 'Increasing', + 'Int', 'Junction', 'KeyBag', 'KeyExtractor', 'KeyHash', 'KeySet', + 'KitchenSink', 'List', 'Macro', 'Mapping', 'Match', 'Matcher', 'Method', + 'Module', 'Num', 'Object', 'Ordered', 'Ordering', 'OrderingPair', + 'Package', 'Pair', 'Positional', 'Proxy', 'Range', 'Rat', 'Regex', + 'Role', 'Routine', 'Scalar', 'Seq', 'Set', 'Signature', 'Str', 'StrLen', + 'StrPos', 'Sub', 'Submethod', 'True', 'UInt', 'Undef', 'Version', 'Void', + 'Whatever', 'bit', 'bool', 'buf', 'buf1', 'buf16', 'buf2', 'buf32', + 'buf4', 'buf64', 'buf8', 'complex', 'int', 'int1', 'int16', 'int2', + 'int32', 'int4', 'int64', 'int8', 'num', 'rat', 'rat1', 'rat16', 'rat2', + 'rat32', 'rat4', 'rat64', 'rat8', 'uint', 'uint1', 'uint16', 'uint2', + 'uint32', 'uint4', 'uint64', 'uint8', 'utf16', 'utf32', 'utf8', + ) + + PERL6_OPERATORS = ( + 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', + 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', + 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', + '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', + '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', + 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', + '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', + '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', + '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', + 'not', '<==', '==>', '<<==', '==>>', + ) + + # Perl 6 has a *lot* of possible bracketing characters + # this list was lifted from STD.pm6 (https://github.com/perl6/std) + PERL6_BRACKETS = { + u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', u'\u007b' : u'\u007d', + u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', + u'\u2018' : u'\u2019', u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d', + u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', u'\u2045' : u'\u2046', + u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', + u'\u220a' : u'\u220d', u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd', + u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', u'\u2266' : u'\u2267', + u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', + u'\u2272' : u'\u2273', u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279', + u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', u'\u2280' : u'\u2281', + u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', + u'\u228a' : u'\u228b', u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8', + u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', u'\u22a9' : u'\u2ae3', + u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', + u'\u22b6' : u'\u22b7', u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1', + u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', u'\u22dc' : u'\u22dd', + u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', + u'\u22e6' : u'\u22e7', u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed', + u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', u'\u22f4' : u'\u22fc', + u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', + u'\u2329' : u'\u232a', u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b', + u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', u'\u2772' : u'\u2773', + u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', + u'\u27dd' : u'\u27de', u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7', + u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', u'\u2985' : u'\u2986', + u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', + u'\u298f' : u'\u2990', u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996', + u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', u'\u29cf' : u'\u29d0', + u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', + u'\u29f8' : u'\u29f9', u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e', + u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', u'\u2a79' : u'\u2a7a', + u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', + u'\u2a8b' : u'\u2a8c', u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96', + u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', u'\u2aa1' : u'\u2aa2', + u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', + u'\u2aaf' : u'\u2ab0', u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe', + u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', u'\u2ac5' : u'\u2ac6', + u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', + u'\u2ad5' : u'\u2ad6', u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa', + u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', u'\u2e0c' : u'\u2e0d', + u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', + u'\u300c' : u'\u300d', u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015', + u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', u'\u301d' : u'\u301e', + u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', + u'\ufe39' : u'\ufe3a', u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40', + u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', u'\ufe59' : u'\ufe5a', + u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', + u'\uff3b' : u'\uff3d', u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63', + } + + def _build_word_match(words, boundary_regex_fragment = None, prefix = '', suffix = ''): + if boundary_regex_fragment is None: + return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + suffix + r')\b' + else: + return r'(?<!' + boundary_regex_fragment + ')' + prefix + '(' + \ + r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + '(?!' + boundary_regex_fragment + ')' + + def brackets_callback(token_class): + def callback(lexer, match, context): + groups = match.groupdict() + opening_chars = groups['delimiter'] + n_chars = len(opening_chars) + adverbs = groups.get('adverbs') + + closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) + text = context.text + + if closer is None: # it's not a mirrored character, which means we + # just need to look for the next occurrence + + end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) + else: # we need to look for the corresponding closing character, + # keep nesting in mind + closing_chars = closer * n_chars + nesting_level = 1 + + search_pos = match.start('delimiter') + + while nesting_level > 0: + next_open_pos = text.find(opening_chars, search_pos + n_chars) + next_close_pos = text.find(closing_chars, search_pos + n_chars) + + if next_close_pos == -1: + next_close_pos = len(text) + nesting_level = 0 + elif next_open_pos != -1 and next_open_pos < next_close_pos: + nesting_level += 1 + search_pos = next_open_pos + else: # next_close_pos < next_open_pos + nesting_level -= 1 + search_pos = next_close_pos + + end_pos = next_close_pos + + if adverbs is not None and re.search(r':to\b', adverbs): + heredoc_terminator = text[match.start('delimiter') + n_chars : end_pos] + end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[ match.end('delimiter') : ], re.MULTILINE) + + if end_heredoc: + end_pos = match.end('delimiter') + end_heredoc.end() + else: + end_pos = len(text) + + yield match.start(), token_class, text[match.start() : end_pos + n_chars] + context.pos = end_pos + n_chars + + return callback + + def opening_brace_callback(lexer, match, context): + stack = context.stack + + yield match.start(), Text, context.text[match.start() : match.end()] + context.pos = match.end() + + # if we encounter an opening brace and we're one level + # below a token state, it means we need to increment + # the nesting level for braces so we know later when + # we should return to the token rules. + if len(stack) > 2 and stack[-2] == 'token': + context.perl6_token_nesting_level += 1 + + def closing_brace_callback(lexer, match, context): + stack = context.stack + + yield match.start(), Text, context.text[match.start() : match.end()] + context.pos = match.end() + + # if we encounter a free closing brace and we're one level + # below a token state, it means we need to check the nesting + # level to see if we need to return to the token state. + if len(stack) > 2 and stack[-2] == 'token': + context.perl6_token_nesting_level -= 1 + if context.perl6_token_nesting_level == 0: + stack.pop() + + def embedded_perl6_callback(lexer, match, context): + context.perl6_token_nesting_level = 1 + yield match.start(), Text, context.text[match.start() : match.end()] + context.pos = match.end() + context.stack.append('root') + + # If you're modifying these rules, be careful if you need to process '{' or '}' characters. + # We have special logic for processing these characters (due to the fact that you can nest + # Perl 6 code in regex blocks), so if you need to process one of them, make sure you also + # process the corresponding one! + tokens = { + 'common' : [ + (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), + (r'#[^\n]*$', Comment.Singleline), + (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), + (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), + (r'^=.*?\n\s*?\n', Comment.Multiline), + (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'), + (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + ')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'), + # deal with a special case in the Perl 6 grammar (role q { ... }) + (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), + (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), + (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix = '(?::[UD])?'), Name.Builtin), + (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), + # copied from PerlLexer + (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable), + (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), + (r'::\?\w+', Name.Variable.Global), + (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), + (r'\$(?:<.*?>)+', Name.Variable), + (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String)), + # copied from PerlLexer + (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), + (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), + (r'0b[01]+(_[01]+)*', Number.Bin), + (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float), + (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), + (r'\d+(_\d+)*', Number.Integer), + (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), + (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), + (r'm\w+(?=\()', Name), + (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String.Regex)), + (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), + (r'<[^\s=].*?\S>', String), + (_build_word_match(PERL6_OPERATORS), Operator), + (r'[0-9a-zA-Z_]' + PERL6_IDENTIFIER_RANGE + '*', Name), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + ], + 'root' : [ + include('common'), + (r'\{', opening_brace_callback), + (r'\}', closing_brace_callback), + (r'.+?', Text), + ], + 'pre-token' : [ + include('common'), + (r'\{', Text, ('#pop', 'token')), + (r'.+?', Text), + ], + 'token-sym-brackets' : [ + (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), + (r'', Name, ('#pop', 'pre-token')), + ], + 'token': [ + (r'}', Text, '#pop'), + (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), + # make sure that quotes in character classes aren't treated as strings + (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), + # make sure that '#' characters in quotes aren't treated as comments + (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), + (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), + (r'#.*?$', Comment.Singleline), + (r'\{', embedded_perl6_callback), + ('.+?', String.Regex), + ], + } + + def analyse_text(text): + def strip_pod(lines): + in_pod = False + stripped_lines = [] + + for line in lines: + if re.match(r'^=(?:end|cut)', line): + in_pod = False + elif re.match(r'^=\w+', line): + in_pod = True + elif not in_pod: + stripped_lines.append(line) + + return stripped_lines + + lines = text.splitlines() + lines = strip_pod(lines) + text = '\n'.join(lines) + + if shebang_matches(text, r'perl6|rakudo|niecza'): + return True + + if 'use v6' in text: + return 0.91 # 0.01 greater than Perl says for 'my $' + if re.search(r'[$@%]\*[A-Z]+', text): # Perl 6-style globals ($*OS) + return 0.91 + if re.search(r'[$@%]\?[A-Z]+', text): # Perl 6 compiler variables ($?PACKAGE) + return 0.91 + if re.search(r'[$@%][!.][A-Za-z0-9_-]+', text): # Perl 6 member variables + return 0.91 + + for line in text.splitlines(): + if re.match(r'\s*(?:my|our)?\s*module', line): # module declarations + return 0.91 + if re.match(r'\s*(?:my|our)?\s*role', line): # role declarations + return 0.91 + if re.match(r'\s*(?:my|our)?\s*class\b', line): # class declarations + return 0.91 + return False + + def __init__(self, **options): + super(Perl6Lexer, self).__init__(**options) + self.encoding = options.get('encoding', 'utf-8') diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 889e7ec6..613be987 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -808,6 +808,8 @@ class CommonLispLexer(RegexLexer): (r'"(\\.|\\\n|[^"\\])*"', String), # quoting (r":" + symbol, String.Symbol), + (r"::" + symbol, String.Symbol), + (r":#" + symbol, String.Symbol), (r"'" + symbol, String.Symbol), (r"'", Operator), (r"`", Operator), diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 8491d19d..803212b0 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -2839,8 +2839,8 @@ class BroLexer(RegexLexer): (r'\\\n', Text), # Keywords (r'(add|alarm|break|case|const|continue|delete|do|else|enum|event' - r'|export|for|function|if|global|local|module|next' - r'|of|print|redef|return|schedule|type|when|while)\b', Keyword), + r'|export|for|function|if|global|hook|local|module|next' + r'|of|print|redef|return|schedule|switch|type|when|while)\b', Keyword), (r'(addr|any|bool|count|counter|double|file|int|interval|net' r'|pattern|port|record|set|string|subnet|table|time|timer' r'|vector)\b', Keyword.Type), diff --git a/pygments/lexers/shell.py b/pygments/lexers/shell.py index b95faf93..d117fd86 100644 --- a/pygments/lexers/shell.py +++ b/pygments/lexers/shell.py @@ -329,8 +329,8 @@ class PowerShellLexer(RegexLexer): *New in Pygments 1.5.* """ name = 'PowerShell' - aliases = ['powershell', 'posh', 'ps1'] - filenames = ['*.ps1'] + aliases = ['powershell', 'posh', 'ps1', 'psm1'] + filenames = ['*.ps1','*.psm1'] mimetypes = ['text/x-powershell'] flags = re.DOTALL | re.IGNORECASE | re.MULTILINE @@ -342,7 +342,7 @@ class PowerShellLexer(RegexLexer): 'dynamicparam do default continue cmdletbinding break begin alias \\? ' '% #script #private #local #global mandatory parametersetname position ' 'valuefrompipeline valuefrompipelinebypropertyname ' - 'valuefromremainingarguments helpmessage try catch').split() + 'valuefromremainingarguments helpmessage try catch throw').split() operators = ( 'and as band bnot bor bxor casesensitive ccontains ceq cge cgt cle ' @@ -368,12 +368,16 @@ class PowerShellLexer(RegexLexer): tokens = { 'root': [ + # we need to count pairs of parentheses for correct highlight + # of '$(...)' blocks in strings + (r'\(', Punctuation, '#push'), + (r'\)', Punctuation, '#pop'), (r'\s+', Text), (r'^(\s*#[#\s]*)(\.(?:%s))([^\n]*$)' % '|'.join(commenthelp), bygroups(Comment, String.Doc, Comment)), (r'#[^\n]*?$', Comment), (r'(<|<)#', Comment.Multiline, 'multline'), - (r'@"\n.*?\n"@', String.Heredoc), + (r'@"\n', String.Heredoc, 'heredoc-double'), (r"@'\n.*?\n'@", String.Heredoc), # escaped syntax (r'`[\'"$@-]', Punctuation), @@ -387,7 +391,7 @@ class PowerShellLexer(RegexLexer): (r'\[[a-z_\[][a-z0-9_. `,\[\]]*\]', Name.Constant), # .net [type]s (r'-[a-z_][a-z0-9_]*', Name), (r'\w+', Name), - (r'[.,{}\[\]$()=+*/\\&%!~?^`|<>-]', Punctuation), + (r'[.,;@{}\[\]$()=+*/\\&%!~?^`|<>-]|::', Punctuation), ], 'multline': [ (r'[^#&.]+', Comment.Multiline), @@ -396,15 +400,17 @@ class PowerShellLexer(RegexLexer): (r'[#&.]', Comment.Multiline), ], 'string': [ + (r"`[0abfnrtv'\"\$]", String.Escape), (r'[^$`"]+', String.Double), - (r'\$\(', String.Interpol, 'interpol'), - (r'`"|""', String.Double), + (r'\$\(', Punctuation, 'root'), + (r'""', String.Double), (r'[`$]', String.Double), (r'"', String.Double, '#pop'), ], - 'interpol': [ - (r'[^$)]+', String.Interpol), - (r'\$\(', String.Interpol, '#push'), - (r'\)', String.Interpol, '#pop'), + 'heredoc-double': [ + (r'\n"@', String.Heredoc, '#pop'), + (r'\$\(', Punctuation, 'root'), + (r'[^@\n]+"]', String.Heredoc), + (r".", String.Heredoc), ] } diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 5e340893..b47c49d2 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -1709,12 +1709,12 @@ class HttpLexer(RegexLexer): tokens = { 'root': [ - (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE)( +)([^ ]+)( +)' - r'(HTTPS?)(/)(1\.[01])(\r?\n|$)', + (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' + r'(HTTP)(/)(1\.[01])(\r?\n|$)', bygroups(Name.Function, Text, Name.Namespace, Text, Keyword.Reserved, Operator, Number, Text), 'headers'), - (r'(HTTPS?)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', + (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, Name.Exception, Text), 'headers'), diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index dc8c7c5f..383bf6ad 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -67,7 +67,7 @@ class JavascriptLexer(RegexLexer): (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' - r'throw|try|catch|finally|new|delete|typeof|instanceof|void|' + r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|' r'this)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' diff --git a/tests/examplefiles/Get-CommandDefinitionHtml.ps1 b/tests/examplefiles/Get-CommandDefinitionHtml.ps1 new file mode 100644 index 00000000..b181955f --- /dev/null +++ b/tests/examplefiles/Get-CommandDefinitionHtml.ps1 @@ -0,0 +1,66 @@ +
+function Get-CommandDefinitionHtml {
+
+ # this tells powershell to allow advanced features,
+ # like the [validatenotnullorempty()] attribute below.
+ [CmdletBinding()]
+ param(
+ [ValidateNotNullOrEmpty()]
+ [string]$name
+ )
+
+ $command = get-command $name
+
+ # Look mom! I'm a cmdlet!
+ $PSCmdlet.WriteVerbose("Dumping HTML for " + $command)
+
+@"
+ <html>
+ <head>
+ <title>$($command.name)</title>
+ </head>
+ <body>
+ <table border="1">
+$(
+ $command.parametersets | % {
+@"
+
+ <tr>
+ <td>$($_.name)</td>
+ <td>
+ <table border="1">
+ <tr>
+ <th colspan="8">Parameters</th>
+
+$(
+ $count = 0
+ $_.parameters | % {
+ if (0 -eq ($count % 8)) {
+@'
+ </tr>
+ <tr>
+'@
+ }
+@"
+ <td>$($_.name)</td>
+"@
+ $count++
+ }
+)
+ </tr>
+ </table>
+ </td>
+ </tr>
+"@
+ }
+)
+ </table>
+ </body>
+ </html>
+"@
+}
+
+Get-CommandDefinitionHtml get-item > out.html
+
+# show in browser
+invoke-item out.html
diff --git a/tests/examplefiles/RoleQ.pm6 b/tests/examplefiles/RoleQ.pm6 new file mode 100644 index 00000000..9b66bde4 --- /dev/null +++ b/tests/examplefiles/RoleQ.pm6 @@ -0,0 +1,23 @@ +role q { + token stopper { \' } + + token escape:sym<\\> { <sym> <item=.backslash> } + + token backslash:sym<qq> { <?before 'q'> <quote=.LANG('MAIN','quote')> } + token backslash:sym<\\> { <text=.sym> } + token backslash:sym<stopper> { <text=.stopper> } + + token backslash:sym<miscq> { {} . } + + method tweak_q($v) { self.panic("Too late for :q") } + method tweak_qq($v) { self.panic("Too late for :qq") } +} + +role qq does b1 does c1 does s1 does a1 does h1 does f1 { + token stopper { \" } + token backslash:sym<unrec> { {} (\w) { self.throw_unrecog_backslash_seq: $/[0].Str } } + token backslash:sym<misc> { \W } + + method tweak_q($v) { self.panic("Too late for :q") } + method tweak_qq($v) { self.panic("Too late for :qq") } +} diff --git a/tests/examplefiles/grammar-test.p6 b/tests/examplefiles/grammar-test.p6 new file mode 100644 index 00000000..28107f3e --- /dev/null +++ b/tests/examplefiles/grammar-test.p6 @@ -0,0 +1,22 @@ +token pod_formatting_code { + $<code>=<[A..Z]> + '<' { $*POD_IN_FORMATTINGCODE := 1 } + $<content>=[ <!before '>'> <pod_string_character> ]+ + '>' { $*POD_IN_FORMATTINGCODE := 0 } +} + +token pod_string { + <pod_string_character>+ +} + +token something:sym«<» { + <!> +} + +token name { + <!> +} + +token comment:sym<#> { + '#' {} \N* +} diff --git a/tests/examplefiles/test.p6 b/tests/examplefiles/test.p6 new file mode 100644 index 00000000..3d12b56c --- /dev/null +++ b/tests/examplefiles/test.p6 @@ -0,0 +1,252 @@ +#!/usr/bin/env perl6 + +use v6; + +my $string = 'I look like a # comment!'; + +if $string eq 'foo' { + say 'hello'; +} + +regex http-verb { + 'GET' + | 'POST' + | 'PUT' + | 'DELETE' + | 'TRACE' + | 'OPTIONS' + | 'HEAD' +} + +# a sample comment + +say 'Hello from Perl 6!' + + +#`{ +multi-line comment! +} + +say 'here'; + +#`( +multi-line comment! +) + +say 'here'; + +#`{{{ +I'm a special comment! +}}} + +say 'there'; + +#`{{ +I'm { even } specialer! +}} + +say 'there'; + +#`{{ +does {{nesting}} work? +}} + +#`«< +trying mixed delimiters +» + +my $string = qq<Hooray, arbitrary delimiter!>; +my $string = qq«Hooray, arbitrary delimiter!»; +my $string = q <now with whitespace!>; +my $string = qq<<more strings>>; + +my %hash := Hash.new; + +=begin pod + +Here's some POD! Wooo + +=end pod + +=for Testing + This is POD (see? role isn't highlighted) + +say('this is not!'); + +=table + Of role things + +say('not in your table'); +#= A single line declarator "block" (with a keyword like role) +#| Another single line declarator "block" (with a keyword like role) +#={ + A declarator block (with a keyword like role) + } +#|{ + Another declarator block (with a keyword like role) + } +#= { A single line declarator "block" with a brace (with a keyword like role) +#=« + More declarator blocks! (with a keyword like role) + » +#|« + More declarator blocks! (with a keyword like role) + » + +say 'Moar code!'; + +my $don't = 16; + +sub don't($x) { + !$x +} + +say don't 'foo'; + +my %hash = ( + :foo(1), +); + +say %hash<foo>; +say %hash<<foo>>; +say %hash«foo»; + +say %*hash<foo>; +say %*hash<<foo>>; +say %*hash«foo»; + +say $<todo>; +say $<todo>; + +for (@A Z @B) -> $a, $b { + say $a + $b; +} + +Q:PIR { + .loadlib "somelib" +} + +my $longstring = q/ + lots + of + text +/; + +my $heredoc = q:to/END_SQL/; +SELECT * FROM Users +WHERE first_name = 'Rob' +END_SQL +my $hello; + +# Fun with regexen + +if 'food' ~~ /foo/ { + say 'match!' +} + +my $re = /foo/; +my $re2 = m/ foo /; +my $re3 = m:i/ FOO /; + +call-a-sub(/ foo /); +call-a-sub(/ foo \/ bar /); + +my $re4 = rx/something | something-else/; +my $result = ms/regexy stuff/; +my $sub0 = s/regexy stuff/more stuff/; +my $sub = ss/regexy stuff/more stuff/; +my $trans = tr/regexy stuff/more stuff/; + +my @values = <a b c d>; +call-sub(<a b c d>); +call-sub <a b c d>; + +my $result = $a < $b; + +for <a b c d> -> $letter { + say $letter; +} + +sub test-sub { + say @_; + say $!; + say $/; + say $0; + say $1; + say @*ARGS; + say $*ARGFILES; + say &?BLOCK; + say ::?CLASS; + say $?CLASS; + say @=COMMENT; + say %?CONFIG; + say $*CWD; + say $=data; + say %?DEEPMAGIC; + say $?DISTRO; + say $*DISTRO; + say $*EGID; + say %*ENV; + say $*ERR; + say $*EUID; + say $*EXECUTABLE_NAME; + say $?FILE; + say $?GRAMMAR; + say $*GID; + say $*IN; + say @*INC; + say %?LANG; + say $*LANG; + say $?LINE; + say %*META-ARGS; + say $?MODULE; + say %*OPTS; + say %*OPT; + say $?KERNEL; + say $*KERNEL; + say $*OUT; + say $?PACKAGE; + say $?PERL; + say $*PERL; + say $*PID; + say %=pod; + say $*PROGRAM_NAME; + say %*PROTOCOLS; + say ::?ROLE; + say $?ROLE; + say &?ROUTINE; + say $?SCOPE; + say $*TZ; + say $*UID; + say $?USAGE; + say $?VM; + say $?XVM; +} + +say <a b c>; + +my $perl5_re = m:P5/ fo{2} /; +my $re5 = rx«something | something-else»; + +my $M := %*COMPILING<%?OPTIONS><M>; + +say $M; + +sub regex-name { ... } +my $pair = role-name => 'foo'; +$pair = rolesque => 'foo'; + +my sub something(Str:D $value) { ... } + +my $s = q«< +some +string +stuff +»; + +my $regex = m«< some chars »; +# after + +say $/<foo><bar>; + +roleq; diff --git a/tests/examplefiles/type.lisp b/tests/examplefiles/type.lisp index 9c769379..c02c29df 100644 --- a/tests/examplefiles/type.lisp +++ b/tests/examplefiles/type.lisp @@ -1200,3 +1200,19 @@ Henry Baker: (unless (clos::funcallable-instance-p #'clos::class-name) (fmakunbound 'clos::class-name)) + + +(keywordp :junk) + T + +(keywordp ::junk) + T + +(symbol-name ::junk) + "JUNK" + +(symbol-name :#junk) + "#JUNK" + +(symbol-name :#.junk) + "#.JUNK" |