diff options
author | Georg Brandl <georg@python.org> | 2013-05-19 09:27:18 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2013-05-19 09:27:18 +0200 |
commit | 586d2e27922212bfeb2c949a424f66c7a4a36983 (patch) | |
tree | 0ec93d2bf994f7adc70bc5e9acfdf60ca23a7f39 /pygments | |
parent | ee21fc5d24c38f2d2ace98f87089971d6a6a8fe1 (diff) | |
parent | 8d9ce74cd983b2a55159f48b213825b86cbcd158 (diff) | |
download | pygments-586d2e27922212bfeb2c949a424f66c7a4a36983.tar.gz |
merge with https://bitbucket.org/bd808/pygments-main/ (EBNF lexer), pull request #193
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/lexers/__init__.py | 11 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 42 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 371 | ||||
-rw-r--r-- | pygments/lexers/asm.py | 2 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 180 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 101 | ||||
-rw-r--r-- | pygments/lexers/other.py | 14 | ||||
-rw-r--r-- | pygments/lexers/shell.py | 39 | ||||
-rw-r--r-- | pygments/lexers/templates.py | 4 | ||||
-rw-r--r-- | pygments/lexers/text.py | 16 | ||||
-rw-r--r-- | pygments/lexers/web.py | 6 | ||||
-rw-r--r-- | pygments/modeline.py | 40 |
12 files changed, 722 insertions, 104 deletions
diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py index 9af6ce68..dbfe4351 100644 --- a/pygments/lexers/__init__.py +++ b/pygments/lexers/__init__.py @@ -15,6 +15,7 @@ import fnmatch from os.path import basename from pygments.lexers._mapping import LEXERS +from pygments.modeline import get_filetype_from_buffer from pygments.plugin import find_plugin_lexers from pygments.util import ClassNotFound, bytes @@ -197,6 +198,16 @@ def guess_lexer(_text, **options): """ Guess a lexer by strong distinctions in the text (eg, shebang). """ + + # try to get a vim modeline first + ft = get_filetype_from_buffer(_text) + + if ft is not None: + try: + return get_lexer_by_name(ft, **options) + except ClassNotFound: + pass + best_lexer = [0.0, None] for lexer in _iter_lexerclasses(): rv = lexer.analyse_text(_text) diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index d491c03b..6a33a032 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -32,14 +32,15 @@ LEXERS = { 'AspectJLexer': ('pygments.lexers.jvm', 'AspectJ', ('aspectj',), ('*.aj',), ('text/x-aspectj',)), 'AsymptoteLexer': ('pygments.lexers.other', 'Asymptote', ('asy', 'asymptote'), ('*.asy',), ('text/x-asymptote',)), 'AutoItLexer': ('pygments.lexers.other', 'AutoIt', ('autoit', 'Autoit'), ('*.au3',), ('text/x-autoit',)), - 'AutohotkeyLexer': ('pygments.lexers.other', 'autohotkey', ('ahk',), ('*.ahk', '*.ahkl'), ('text/x-autohotkey',)), + 'AutohotkeyLexer': ('pygments.lexers.other', 'autohotkey', ('ahk', 'autohotkey'), ('*.ahk', '*.ahkl'), ('text/x-autohotkey',)), 'AwkLexer': ('pygments.lexers.other', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ('application/x-awk',)), 'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)), 'BaseMakefileLexer': ('pygments.lexers.text', 'Base Makefile', ('basemake',), (), ()), 'BashLexer': ('pygments.lexers.shell', 'Bash', ('bash', 'sh', 'ksh'), ('*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass', '.bashrc', 'bashrc', '.bash_*', 'bash_*'), ('application/x-sh', 'application/x-shellscript')), 'BashSessionLexer': ('pygments.lexers.shell', 'Bash Session', ('console',), ('*.sh-session',), ('application/x-shell-session',)), - 'BatchLexer': ('pygments.lexers.shell', 'Batchfile', ('bat',), ('*.bat', '*.cmd'), ('application/x-dos-batch',)), + 'BatchLexer': ('pygments.lexers.shell', 'Batchfile', ('bat', 'dosbatch', 'winbatch'), ('*.bat', '*.cmd'), ('application/x-dos-batch',)), 'BefungeLexer': ('pygments.lexers.other', 'Befunge', ('befunge',), ('*.befunge',), ('application/x-befunge',)), + 'BlitzBasicLexer': ('pygments.lexers.compiled', 'BlitzBasic', ('blitzbasic', 'b3d', 'bplus'), ('*.bb', '*.decls'), ('text/x-bb',)), 'BlitzMaxLexer': ('pygments.lexers.compiled', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), 'BrainfuckLexer': ('pygments.lexers.other', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), @@ -54,7 +55,7 @@ LEXERS = { 'CbmBasicV2Lexer': ('pygments.lexers.other', 'CBM BASIC V2', ('cbmbas',), ('*.bas',), ()), 'CeylonLexer': ('pygments.lexers.jvm', 'Ceylon', ('ceylon',), ('*.ceylon',), ('text/x-ceylon',)), 'Cfengine3Lexer': ('pygments.lexers.other', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()), - 'CheetahHtmlLexer': ('pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire'), (), ('text/html+cheetah', 'text/html+spitfire')), + 'CheetahHtmlLexer': ('pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire', 'htmlcheetah'), (), ('text/html+cheetah', 'text/html+spitfire')), 'CheetahJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Cheetah', ('js+cheetah', 'javascript+cheetah', 'js+spitfire', 'javascript+spitfire'), (), ('application/x-javascript+cheetah', 'text/x-javascript+cheetah', 'text/javascript+cheetah', 'application/x-javascript+spitfire', 'text/x-javascript+spitfire', 'text/javascript+spitfire')), 'CheetahLexer': ('pygments.lexers.templates', 'Cheetah', ('cheetah', 'spitfire'), ('*.tmpl', '*.spt'), ('application/x-cheetah', 'application/x-spitfire')), 'CheetahXmlLexer': ('pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')), @@ -62,10 +63,10 @@ LEXERS = { 'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')), 'CobolFreeformatLexer': ('pygments.lexers.compiled', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()), 'CobolLexer': ('pygments.lexers.compiled', 'COBOL', ('cobol',), ('*.cob', '*.COB', '*.cpy', '*.CPY'), ('text/x-cobol',)), - 'CoffeeScriptLexer': ('pygments.lexers.web', 'CoffeeScript', ('coffee-script', 'coffeescript'), ('*.coffee',), ('text/coffeescript',)), + 'CoffeeScriptLexer': ('pygments.lexers.web', 'CoffeeScript', ('coffee-script', 'coffeescript', 'coffee'), ('*.coffee',), ('text/coffeescript',)), 'ColdfusionHtmlLexer': ('pygments.lexers.templates', 'Coldfusion HTML', ('cfm',), ('*.cfm', '*.cfml', '*.cfc'), ('application/x-coldfusion',)), 'ColdfusionLexer': ('pygments.lexers.templates', 'cfstatement', ('cfs',), (), ()), - 'CommonLispLexer': ('pygments.lexers.functional', 'Common Lisp', ('common-lisp', 'cl'), ('*.cl', '*.lisp', '*.el'), ('text/x-common-lisp',)), + 'CommonLispLexer': ('pygments.lexers.functional', 'Common Lisp', ('common-lisp', 'cl', 'lisp'), ('*.cl', '*.lisp', '*.el'), ('text/x-common-lisp',)), 'CoqLexer': ('pygments.lexers.functional', 'Coq', ('coq',), ('*.v',), ('text/x-coq',)), 'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx', '*.C', '*.H', '*.cp', '*.CPP'), ('text/x-c++hdr', 'text/x-c++src')), 'CppObjdumpLexer': ('pygments.lexers.asm', 'cpp-objdump', ('cpp-objdump', 'c++-objdumb', 'cxx-objdump'), ('*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'), ('text/x-cpp-objdump',)), @@ -77,12 +78,12 @@ LEXERS = { 'CssPhpLexer': ('pygments.lexers.templates', 'CSS+PHP', ('css+php',), (), ('text/css+php',)), 'CssSmartyLexer': ('pygments.lexers.templates', 'CSS+Smarty', ('css+smarty',), (), ('text/css+smarty',)), 'CudaLexer': ('pygments.lexers.compiled', 'CUDA', ('cuda', 'cu'), ('*.cu', '*.cuh'), ('text/x-cuda',)), - 'CythonLexer': ('pygments.lexers.compiled', 'Cython', ('cython', 'pyx'), ('*.pyx', '*.pxd', '*.pxi'), ('text/x-cython', 'application/x-cython')), + 'CythonLexer': ('pygments.lexers.compiled', 'Cython', ('cython', 'pyx', 'pyrex'), ('*.pyx', '*.pxd', '*.pxi'), ('text/x-cython', 'application/x-cython')), 'DLexer': ('pygments.lexers.compiled', 'D', ('d',), ('*.d', '*.di'), ('text/x-dsrc',)), 'DObjdumpLexer': ('pygments.lexers.asm', 'd-objdump', ('d-objdump',), ('*.d-objdump',), ('text/x-d-objdump',)), 'DarcsPatchLexer': ('pygments.lexers.text', 'Darcs Patch', ('dpatch',), ('*.dpatch', '*.darcspatch'), ()), 'DartLexer': ('pygments.lexers.web', 'Dart', ('dart',), ('*.dart',), ('text/x-dart',)), - 'DebianControlLexer': ('pygments.lexers.text', 'Debian Control file', ('control',), ('control',), ()), + 'DebianControlLexer': ('pygments.lexers.text', 'Debian Control file', ('control', 'debcontrol'), ('control',), ()), 'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',), ('text/x-pascal',)), 'DgLexer': ('pygments.lexers.agile', 'dg', ('dg',), ('*.dg',), ('text/x-dg',)), 'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')), @@ -111,7 +112,7 @@ LEXERS = { 'FortranLexer': ('pygments.lexers.compiled', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)), 'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('Clipper', 'XBase'), ('*.PRG', '*.prg'), ()), 'GLShaderLexer': ('pygments.lexers.compiled', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)), - 'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas',), ('*.s', '*.S'), ('text/x-gas',)), + 'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)), 'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')), 'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ('application/x-genshi-text', 'text/x-genshi')), 'GettextLexer': ('pygments.lexers.text', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')), @@ -125,8 +126,8 @@ LEXERS = { 'GroovyLexer': ('pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy',), ('text/x-groovy',)), 'HamlLexer': ('pygments.lexers.web', 'Haml', ('haml', 'HAML'), ('*.haml',), ('text/x-haml',)), 'HaskellLexer': ('pygments.lexers.functional', 'Haskell', ('haskell', 'hs'), ('*.hs',), ('text/x-haskell',)), - 'HaxeLexer': ('pygments.lexers.web', 'haXe', ('hx', 'haXe'), ('*.hx',), ('text/haxe',)), - 'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja'), (), ('text/html+django', 'text/html+jinja')), + 'HaxeLexer': ('pygments.lexers.web', 'haXe', ('hx', 'haXe', 'haxe'), ('*.hx',), ('text/haxe',)), + 'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja', 'htmldjango'), (), ('text/html+django', 'text/html+jinja')), 'HtmlGenshiLexer': ('pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), (), ('text/html+genshi',)), 'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')), 'HtmlPhpLexer': ('pygments.lexers.templates', 'HTML+PHP', ('html+php',), ('*.phtml',), ('application/x-php', 'application/x-httpd-php', 'application/x-httpd-php3', 'application/x-httpd-php4', 'application/x-httpd-php5')), @@ -135,7 +136,7 @@ LEXERS = { 'HxmlLexer': ('pygments.lexers.text', 'Hxml', ('haxeml', 'hxml'), ('*.hxml',), ()), 'HybrisLexer': ('pygments.lexers.other', 'Hybris', ('hybris', 'hy'), ('*.hy', '*.hyb'), ('text/x-hybris', 'application/x-hybris')), 'IDLLexer': ('pygments.lexers.math', 'IDL', ('idl',), ('*.pro',), ('text/idl',)), - 'IniLexer': ('pygments.lexers.text', 'INI', ('ini', 'cfg'), ('*.ini', '*.cfg'), ('text/x-ini',)), + 'IniLexer': ('pygments.lexers.text', 'INI', ('ini', 'cfg', 'dosini'), ('*.ini', '*.cfg'), ('text/x-ini',)), 'IoLexer': ('pygments.lexers.agile', 'Io', ('io',), ('*.io',), ('text/x-iosrc',)), 'IokeLexer': ('pygments.lexers.jvm', 'Ioke', ('ioke', 'ik'), ('*.ik',), ('text/x-iokesrc',)), 'IrcLogsLexer': ('pygments.lexers.text', 'IRC logs', ('irc',), ('*.weechatlog',), ('text/x-irclog',)), @@ -161,13 +162,13 @@ LEXERS = { 'LassoLexer': ('pygments.lexers.web', 'Lasso', ('lasso', 'lassoscript'), ('*.lasso', '*.lasso[89]'), ('text/x-lasso',)), 'LassoXmlLexer': ('pygments.lexers.templates', 'XML+Lasso', ('xml+lasso',), (), ('application/xml+lasso',)), 'LighttpdConfLexer': ('pygments.lexers.text', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)), - 'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell'), ('*.lhs',), ('text/x-literate-haskell',)), + 'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell', 'lhaskell'), ('*.lhs',), ('text/x-literate-haskell',)), 'LiveScriptLexer': ('pygments.lexers.web', 'LiveScript', ('live-script', 'livescript'), ('*.ls',), ('text/livescript',)), 'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)), 'LogosLexer': ('pygments.lexers.compiled', 'Logos', ('logos',), ('*.x', '*.xi', '*.xm', '*.xmi'), ('text/x-logos',)), 'LogtalkLexer': ('pygments.lexers.other', 'Logtalk', ('logtalk',), ('*.lgt',), ('text/x-logtalk',)), 'LuaLexer': ('pygments.lexers.agile', 'Lua', ('lua',), ('*.lua', '*.wlua'), ('text/x-lua', 'application/x-lua')), - 'MOOCodeLexer': ('pygments.lexers.other', 'MOOCode', ('moocode',), ('*.moo',), ('text/x-moocode',)), + 'MOOCodeLexer': ('pygments.lexers.other', 'MOOCode', ('moocode', 'moo'), ('*.moo',), ('text/x-moocode',)), 'MakefileLexer': ('pygments.lexers.text', 'Makefile', ('make', 'makefile', 'mf', 'bsdmake'), ('*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'), ('text/x-makefile',)), 'MakoCssLexer': ('pygments.lexers.templates', 'CSS+Mako', ('css+mako',), (), ('text/css+mako',)), 'MakoHtmlLexer': ('pygments.lexers.templates', 'HTML+Mako', ('html+mako',), (), ('text/html+mako',)), @@ -196,6 +197,7 @@ LEXERS = { 'NSISLexer': ('pygments.lexers.other', 'NSIS', ('nsis', 'nsi', 'nsh'), ('*.nsi', '*.nsh'), ('text/x-nsis',)), 'NasmLexer': ('pygments.lexers.asm', 'NASM', ('nasm',), ('*.asm', '*.ASM'), ('text/x-nasm',)), 'NemerleLexer': ('pygments.lexers.dotnet', 'Nemerle', ('nemerle',), ('*.n',), ('text/x-nemerle',)), + 'NesCLexer': ('pygments.lexers.compiled', 'nesC', ('nesc',), ('*.nc',), ('text/x-nescsrc',)), 'NewLispLexer': ('pygments.lexers.functional', 'NewLisp', ('newlisp',), ('*.lsp', '*.nl'), ('text/x-newlisp', 'application/x-newlisp')), 'NewspeakLexer': ('pygments.lexers.other', 'Newspeak', ('newspeak',), ('*.ns2',), ('text/x-newspeak',)), 'NginxConfLexer': ('pygments.lexers.text', 'Nginx configuration file', ('nginx',), (), ('text/x-nginx-conf',)), @@ -210,17 +212,18 @@ LEXERS = { 'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), 'OpaLexer': ('pygments.lexers.functional', 'Opa', ('opa',), ('*.opa',), ('text/x-opa',)), 'OpenEdgeLexer': ('pygments.lexers.other', 'OpenEdge ABL', ('openedge', 'abl', 'progress'), ('*.p', '*.cls'), ('text/x-openedge', 'application/x-openedge')), + 'Perl6Lexer': ('pygments.lexers.agile', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6'), ('text/x-perl6', 'application/x-perl6')), 'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')), 'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]', '*.inc'), ('text/x-php',)), 'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)), - 'PostScriptLexer': ('pygments.lexers.other', 'PostScript', ('postscript',), ('*.ps', '*.eps'), ('application/postscript',)), + 'PostScriptLexer': ('pygments.lexers.other', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)), 'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)), 'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)), 'PovrayLexer': ('pygments.lexers.other', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)), - 'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'posh', 'ps1'), ('*.ps1',), ('text/x-powershell',)), + 'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)), 'PrologLexer': ('pygments.lexers.compiled', 'Prolog', ('prolog',), ('*.prolog', '*.pro', '*.pl'), ('text/x-prolog',)), - 'PropertiesLexer': ('pygments.lexers.text', 'Properties', ('properties',), ('*.properties',), ('text/x-java-properties',)), - 'ProtoBufLexer': ('pygments.lexers.other', 'Protocol Buffer', ('protobuf',), ('*.proto',), ()), + 'PropertiesLexer': ('pygments.lexers.text', 'Properties', ('properties', 'jproperties'), ('*.properties',), ('text/x-java-properties',)), + 'ProtoBufLexer': ('pygments.lexers.other', 'Protocol Buffer', ('protobuf', 'proto'), ('*.proto',), ()), 'PuppetLexer': ('pygments.lexers.other', 'Puppet', ('puppet',), ('*.pp',), ()), 'PyPyLogLexer': ('pygments.lexers.text', 'PyPy Log', ('pypylog', 'pypy'), ('*.pypylog',), ('application/x-pypylog',)), 'Python3Lexer': ('pygments.lexers.agile', 'Python 3', ('python3', 'py3'), (), ('text/x-python3', 'application/x-python3')), @@ -261,16 +264,17 @@ LEXERS = { 'ScssLexer': ('pygments.lexers.web', 'SCSS', ('scss',), ('*.scss',), ('text/x-scss',)), 'ShellSessionLexer': ('pygments.lexers.shell', 'Shell Session', ('shell-session',), ('*.shell-session',), ('application/x-sh-session',)), 'SmaliLexer': ('pygments.lexers.dalvik', 'Smali', ('smali',), ('*.smali',), ('text/smali',)), - 'SmalltalkLexer': ('pygments.lexers.other', 'Smalltalk', ('smalltalk', 'squeak'), ('*.st',), ('text/x-smalltalk',)), + 'SmalltalkLexer': ('pygments.lexers.other', 'Smalltalk', ('smalltalk', 'squeak', 'st'), ('*.st',), ('text/x-smalltalk',)), 'SmartyLexer': ('pygments.lexers.templates', 'Smarty', ('smarty',), ('*.tpl',), ('application/x-smarty',)), 'SnobolLexer': ('pygments.lexers.other', 'Snobol', ('snobol',), ('*.snobol',), ('text/x-snobol',)), 'SourcePawnLexer': ('pygments.lexers.other', 'SourcePawn', ('sp',), ('*.sp',), ('text/x-sourcepawn',)), - 'SourcesListLexer': ('pygments.lexers.text', 'Debian Sourcelist', ('sourceslist', 'sources.list'), ('sources.list',), ()), + 'SourcesListLexer': ('pygments.lexers.text', 'Debian Sourcelist', ('sourceslist', 'sources.list', 'debsources'), ('sources.list',), ()), 'SqlLexer': ('pygments.lexers.sql', 'SQL', ('sql',), ('*.sql',), ('text/x-sql',)), 'SqliteConsoleLexer': ('pygments.lexers.sql', 'sqlite3con', ('sqlite3',), ('*.sqlite3-console',), ('text/x-sqlite3-console',)), 'SquidConfLexer': ('pygments.lexers.text', 'SquidConf', ('squidconf', 'squid.conf', 'squid'), ('squid.conf',), ('text/x-squidconf',)), 'SspLexer': ('pygments.lexers.templates', 'Scalate Server Page', ('ssp',), ('*.ssp',), ('application/x-ssp',)), 'StanLexer': ('pygments.lexers.math', 'Stan', ('stan',), ('*.stan',), ()), + 'SwigLexer': ('pygments.lexers.compiled', 'SWIG', ('Swig', 'swig'), ('*.swg', '*.i'), ('text/swig',)), 'SystemVerilogLexer': ('pygments.lexers.hdl', 'systemverilog', ('systemverilog', 'sv'), ('*.sv', '*.svh'), ('text/x-systemverilog',)), 'TclLexer': ('pygments.lexers.agile', 'Tcl', ('tcl',), ('*.tcl',), ('text/x-tcl', 'text/x-script.tcl', 'application/x-tcl')), 'TcshLexer': ('pygments.lexers.shell', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)), diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 3c1525d0..896a3deb 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ - LexerContext, include, combined, do_insertions, bygroups, using + LexerContext, include, combined, do_insertions, bygroups, using, this from pygments.token import Error, Text, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches @@ -23,7 +23,7 @@ __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer', 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', - 'FancyLexer', 'DgLexer'] + 'FancyLexer', 'DgLexer', 'Perl6Lexer'] # b/w compatibility from pygments.lexers.functional import SchemeLexer @@ -428,10 +428,13 @@ class Python3TracebackLexer(RegexLexer): r'exception occurred:\n\n', Generic.Traceback), (r'^The above exception was the direct cause of the ' r'following exception:\n\n', Generic.Traceback), + (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), + (r'^( File )("[^"]+")(, line )(\d+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(Python3Lexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', @@ -528,7 +531,7 @@ class RubyLexer(ExtendedRegexLexer): (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), - (r'([a-zA-Z_][a-zA-Z0-9]*)(:)', + (r'([a-zA-Z_][a-zA-Z0-9]*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), @@ -1922,3 +1925,365 @@ class DgLexer(RegexLexer): (r"'''", String, '#pop') ], } + +class Perl6Lexer(ExtendedRegexLexer): + """ + For `Perl 6 <http://www.perl6.org>`_ source code. + + *New in Pygments 1.7.* + """ + + name = 'Perl6' + aliases = ['perl6', 'pl6'] + filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', + '*.6pm', '*.p6m', '*.pm6'] + mimetypes = ['text/x-perl6', 'application/x-perl6'] + flags = re.MULTILINE | re.DOTALL | re.UNICODE + + PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" + + PERL6_KEYWORDS = ( + 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT', + 'KEEP', 'LAST', 'LEAVE', 'NEXT', 'POST', 'PRE', 'START', 'TEMP', + 'UNDO', 'as', 'assoc', 'async', 'augment', 'binary', 'break', 'but', + 'cached', 'category', 'class', 'constant', 'contend', 'continue', + 'copy', 'deep', 'default', 'defequiv', 'defer', 'die', 'do', 'else', + 'elsif', 'enum', 'equiv', 'exit', 'export', 'fail', 'fatal', 'for', + 'gather', 'given', 'goto', 'grammar', 'handles', 'has', 'if', 'inline', + 'irs', 'is', 'last', 'leave', 'let', 'lift', 'loop', 'looser', 'macro', + 'make', 'maybe', 'method', 'module', 'multi', 'my', 'next', 'of', + 'ofs', 'only', 'oo', 'ors', 'our', 'package', 'parsed', 'prec', + 'proto', 'readonly', 'redo', 'ref', 'regex', 'reparsed', 'repeat', + 'require', 'required', 'return', 'returns', 'role', 'rule', 'rw', + 'self', 'slang', 'state', 'sub', 'submethod', 'subset', 'supersede', + 'take', 'temp', 'tighter', 'token', 'trusts', 'try', 'unary', + 'unless', 'until', 'use', 'warn', 'when', 'where', 'while', 'will', + ) + + PERL6_BUILTINS = ( + 'ACCEPTS', 'HOW', 'REJECTS', 'VAR', 'WHAT', 'WHENCE', 'WHERE', 'WHICH', + 'WHO', 'abs', 'acos', 'acosec', 'acosech', 'acosh', 'acotan', 'acotanh', + 'all', 'any', 'approx', 'arity', 'asec', 'asech', 'asin', 'asinh' + 'assuming', 'atan', 'atan2', 'atanh', 'attr', 'bless', 'body', 'by' + 'bytes', 'caller', 'callsame', 'callwith', 'can', 'capitalize', 'cat', + 'ceiling', 'chars', 'chmod', 'chomp', 'chop', 'chr', 'chroot', + 'circumfix', 'cis', 'classify', 'clone', 'close', 'cmp_ok', 'codes', + 'comb', 'connect', 'contains', 'context', 'cos', 'cosec', 'cosech', + 'cosh', 'cotan', 'cotanh', 'count', 'defined', 'delete', 'diag', + 'dies_ok', 'does', 'e', 'each', 'eager', 'elems', 'end', 'eof', 'eval', + 'eval_dies_ok', 'eval_elsewhere', 'eval_lives_ok', 'evalfile', 'exists', + 'exp', 'first', 'flip', 'floor', 'flunk', 'flush', 'fmt', 'force_todo', + 'fork', 'from', 'getc', 'gethost', 'getlogin', 'getpeername', 'getpw', + 'gmtime', 'graphs', 'grep', 'hints', 'hyper', 'im', 'index', 'infix', + 'invert', 'is_approx', 'is_deeply', 'isa', 'isa_ok', 'isnt', 'iterator', + 'join', 'key', 'keys', 'kill', 'kv', 'lastcall', 'lazy', 'lc', 'lcfirst', + 'like', 'lines', 'link', 'lives_ok', 'localtime', 'log', 'log10', 'map', + 'max', 'min', 'minmax', 'name', 'new', 'nextsame', 'nextwith', 'nfc', + 'nfd', 'nfkc', 'nfkd', 'nok_error', 'nonce', 'none', 'normalize', 'not', + 'nothing', 'ok', 'once', 'one', 'open', 'opendir', 'operator', 'ord', + 'p5chomp', 'p5chop', 'pack', 'pair', 'pairs', 'pass', 'perl', 'pi', + 'pick', 'plan', 'plan_ok', 'polar', 'pop', 'pos', 'postcircumfix', + 'postfix', 'pred', 'prefix', 'print', 'printf', 'push', 'quasi', + 'quotemeta', 'rand', 're', 'read', 'readdir', 'readline', 'reduce', + 'reverse', 'rewind', 'rewinddir', 'rindex', 'roots', 'round', + 'roundrobin', 'run', 'runinstead', 'sameaccent', 'samecase', 'say', + 'sec', 'sech', 'sech', 'seek', 'shape', 'shift', 'sign', 'signature', + 'sin', 'sinh', 'skip', 'skip_rest', 'sleep', 'slurp', 'sort', 'splice', + 'split', 'sprintf', 'sqrt', 'srand', 'strand', 'subst', 'substr', 'succ', + 'sum', 'symlink', 'tan', 'tanh', 'throws_ok', 'time', 'times', 'to', + 'todo', 'trim', 'trim_end', 'trim_start', 'true', 'truncate', 'uc', + 'ucfirst', 'undef', 'undefine', 'uniq', 'unlike', 'unlink', 'unpack', + 'unpolar', 'unshift', 'unwrap', 'use_ok', 'value', 'values', 'vec', + 'version_lt', 'void', 'wait', 'want', 'wrap', 'write', 'zip', + ) + + PERL6_BUILTIN_CLASSES = ( + 'Abstraction', 'Any', 'AnyChar', 'Array', 'Associative', 'Bag', 'Bit', + 'Blob', 'Block', 'Bool', 'Buf', 'Byte', 'Callable', 'Capture', 'Char', 'Class', + 'Code', 'Codepoint', 'Comparator', 'Complex', 'Decreasing', 'Exception', + 'Failure', 'False', 'Grammar', 'Grapheme', 'Hash', 'IO', 'Increasing', + 'Int', 'Junction', 'KeyBag', 'KeyExtractor', 'KeyHash', 'KeySet', + 'KitchenSink', 'List', 'Macro', 'Mapping', 'Match', 'Matcher', 'Method', + 'Module', 'Num', 'Object', 'Ordered', 'Ordering', 'OrderingPair', + 'Package', 'Pair', 'Positional', 'Proxy', 'Range', 'Rat', 'Regex', + 'Role', 'Routine', 'Scalar', 'Seq', 'Set', 'Signature', 'Str', 'StrLen', + 'StrPos', 'Sub', 'Submethod', 'True', 'UInt', 'Undef', 'Version', 'Void', + 'Whatever', 'bit', 'bool', 'buf', 'buf1', 'buf16', 'buf2', 'buf32', + 'buf4', 'buf64', 'buf8', 'complex', 'int', 'int1', 'int16', 'int2', + 'int32', 'int4', 'int64', 'int8', 'num', 'rat', 'rat1', 'rat16', 'rat2', + 'rat32', 'rat4', 'rat64', 'rat8', 'uint', 'uint1', 'uint16', 'uint2', + 'uint32', 'uint4', 'uint64', 'uint8', 'utf16', 'utf32', 'utf8', + ) + + PERL6_OPERATORS = ( + 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', + 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', + 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', + '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', + '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', + 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', + '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', + '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', + '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', + 'not', '<==', '==>', '<<==', '==>>', + ) + + # Perl 6 has a *lot* of possible bracketing characters + # this list was lifted from STD.pm6 (https://github.com/perl6/std) + PERL6_BRACKETS = { + u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', u'\u007b' : u'\u007d', + u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c', + u'\u2018' : u'\u2019', u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d', + u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', u'\u2045' : u'\u2046', + u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', u'\u2208' : u'\u220b', u'\u2209' : u'\u220c', + u'\u220a' : u'\u220d', u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd', + u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', u'\u2266' : u'\u2267', + u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', u'\u226e' : u'\u226f', u'\u2270' : u'\u2271', + u'\u2272' : u'\u2273', u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279', + u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', u'\u2280' : u'\u2281', + u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', u'\u2286' : u'\u2287', u'\u2288' : u'\u2289', + u'\u228a' : u'\u228b', u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8', + u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', u'\u22a9' : u'\u2ae3', + u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5', + u'\u22b6' : u'\u22b7', u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1', + u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', u'\u22dc' : u'\u22dd', + u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5', + u'\u22e6' : u'\u22e7', u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed', + u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', u'\u22f4' : u'\u22fc', + u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', u'\u2308' : u'\u2309', u'\u230a' : u'\u230b', + u'\u2329' : u'\u232a', u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b', + u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', u'\u2772' : u'\u2773', + u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6', + u'\u27dd' : u'\u27de', u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7', + u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', u'\u2985' : u'\u2986', + u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', u'\u298b' : u'\u298c', u'\u298d' : u'\u298e', + u'\u298f' : u'\u2990', u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996', + u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', u'\u29cf' : u'\u29d0', + u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db', + u'\u29f8' : u'\u29f9', u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e', + u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', u'\u2a79' : u'\u2a7a', + u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84', + u'\u2a8b' : u'\u2a8c', u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96', + u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', u'\u2aa1' : u'\u2aa2', + u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad', + u'\u2aaf' : u'\u2ab0', u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe', + u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', u'\u2ac5' : u'\u2ac6', + u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4', + u'\u2ad5' : u'\u2ad6', u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa', + u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', u'\u2e0c' : u'\u2e0d', + u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', u'\u3008' : u'\u3009', u'\u300a' : u'\u300b', + u'\u300c' : u'\u300d', u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015', + u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', u'\u301d' : u'\u301e', + u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38', + u'\ufe39' : u'\ufe3a', u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40', + u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', u'\ufe59' : u'\ufe5a', + u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e', + u'\uff3b' : u'\uff3d', u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63', + } + + def _build_word_match(words, boundary_regex_fragment = None, prefix = '', suffix = ''): + if boundary_regex_fragment is None: + return r'\b(' + prefix + r'|'.join([ re.escape(x) for x in words]) + suffix + r')\b' + else: + return r'(?<!' + boundary_regex_fragment + ')' + prefix + '(' + \ + r'|'.join([ re.escape(x) for x in words]) + r')' + suffix + '(?!' + boundary_regex_fragment + ')' + + def brackets_callback(token_class): + def callback(lexer, match, context): + groups = match.groupdict() + opening_chars = groups['delimiter'] + n_chars = len(opening_chars) + adverbs = groups.get('adverbs') + + closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) + text = context.text + + if closer is None: # it's not a mirrored character, which means we + # just need to look for the next occurrence + + end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) + else: # we need to look for the corresponding closing character, + # keep nesting in mind + closing_chars = closer * n_chars + nesting_level = 1 + + search_pos = match.start('delimiter') + + while nesting_level > 0: + next_open_pos = text.find(opening_chars, search_pos + n_chars) + next_close_pos = text.find(closing_chars, search_pos + n_chars) + + if next_close_pos == -1: + next_close_pos = len(text) + nesting_level = 0 + elif next_open_pos != -1 and next_open_pos < next_close_pos: + nesting_level += 1 + search_pos = next_open_pos + else: # next_close_pos < next_open_pos + nesting_level -= 1 + search_pos = next_close_pos + + end_pos = next_close_pos + + if adverbs is not None and re.search(r':to\b', adverbs): + heredoc_terminator = text[match.start('delimiter') + n_chars : end_pos] + end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[ match.end('delimiter') : ], re.MULTILINE) + + if end_heredoc: + end_pos = match.end('delimiter') + end_heredoc.end() + else: + end_pos = len(text) + + yield match.start(), token_class, text[match.start() : end_pos + n_chars] + context.pos = end_pos + n_chars + + return callback + + def opening_brace_callback(lexer, match, context): + stack = context.stack + + yield match.start(), Text, context.text[match.start() : match.end()] + context.pos = match.end() + + # if we encounter an opening brace and we're one level + # below a token state, it means we need to increment + # the nesting level for braces so we know later when + # we should return to the token rules. + if len(stack) > 2 and stack[-2] == 'token': + context.perl6_token_nesting_level += 1 + + def closing_brace_callback(lexer, match, context): + stack = context.stack + + yield match.start(), Text, context.text[match.start() : match.end()] + context.pos = match.end() + + # if we encounter a free closing brace and we're one level + # below a token state, it means we need to check the nesting + # level to see if we need to return to the token state. + if len(stack) > 2 and stack[-2] == 'token': + context.perl6_token_nesting_level -= 1 + if context.perl6_token_nesting_level == 0: + stack.pop() + + def embedded_perl6_callback(lexer, match, context): + context.perl6_token_nesting_level = 1 + yield match.start(), Text, context.text[match.start() : match.end()] + context.pos = match.end() + context.stack.append('root') + + # If you're modifying these rules, be careful if you need to process '{' or '}' characters. + # We have special logic for processing these characters (due to the fact that you can nest + # Perl 6 code in regex blocks), so if you need to process one of them, make sure you also + # process the corresponding one! + tokens = { + 'common' : [ + (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), + (r'#[^\n]*$', Comment.Singleline), + (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), + (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), + (r'^=.*?\n\s*?\n', Comment.Multiline), + (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', bygroups(Keyword, Name), 'token-sym-brackets'), + (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + ')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', bygroups(Keyword, Name), 'pre-token'), + # deal with a special case in the Perl 6 grammar (role q { ... }) + (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), + (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), + (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix = '(?::[UD])?'), Name.Builtin), + (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), + # copied from PerlLexer + (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable), + (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), + (r'::\?\w+', Name.Variable.Global), + (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), + (r'\$(?:<.*?>)+', Name.Variable), + (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String)), + # copied from PerlLexer + (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), + (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), + (r'0b[01]+(_[01]+)*', Number.Bin), + (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', Number.Float), + (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), + (r'\d+(_\d+)*', Number.Integer), + (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), + (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), + (r'm\w+(?=\()', Name), + (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String.Regex)), + (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), + (r'<[^\s=].*?\S>', String), + (_build_word_match(PERL6_OPERATORS), Operator), + (r'[0-9a-zA-Z_]' + PERL6_IDENTIFIER_RANGE + '*', Name), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + ], + 'root' : [ + include('common'), + (r'\{', opening_brace_callback), + (r'\}', closing_brace_callback), + (r'.+?', Text), + ], + 'pre-token' : [ + include('common'), + (r'\{', Text, ('#pop', 'token')), + (r'.+?', Text), + ], + 'token-sym-brackets' : [ + (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), + (r'', Name, ('#pop', 'pre-token')), + ], + 'token': [ + (r'}', Text, '#pop'), + (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), + # make sure that quotes in character classes aren't treated as strings + (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), + # make sure that '#' characters in quotes aren't treated as comments + (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), + (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), + (r'#.*?$', Comment.Singleline), + (r'\{', embedded_perl6_callback), + ('.+?', String.Regex), + ], + } + + def analyse_text(text): + def strip_pod(lines): + in_pod = False + stripped_lines = [] + + for line in lines: + if re.match(r'^=(?:end|cut)', line): + in_pod = False + elif re.match(r'^=\w+', line): + in_pod = True + elif not in_pod: + stripped_lines.append(line) + + return stripped_lines + + lines = text.splitlines() + lines = strip_pod(lines) + text = '\n'.join(lines) + + if shebang_matches(text, r'perl6|rakudo|niecza'): + return True + + if 'use v6' in text: + return 0.91 # 0.01 greater than Perl says for 'my $' + if re.search(r'[$@%]\*[A-Z]+', text): # Perl 6-style globals ($*OS) + return 0.91 + if re.search(r'[$@%]\?[A-Z]+', text): # Perl 6 compiler variables ($?PACKAGE) + return 0.91 + if re.search(r'[$@%][!.][A-Za-z0-9_-]+', text): # Perl 6 member variables + return 0.91 + + for line in text.splitlines(): + if re.match(r'\s*(?:my|our)?\s*module', line): # module declarations + return 0.91 + if re.match(r'\s*(?:my|our)?\s*role', line): # role declarations + return 0.91 + if re.match(r'\s*(?:my|our)?\s*class\b', line): # class declarations + return 0.91 + return False + + def __init__(self, **options): + super(Perl6Lexer, self).__init__(**options) + self.encoding = options.get('encoding', 'utf-8') diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index f080327b..3f67862c 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -25,7 +25,7 @@ class GasLexer(RegexLexer): For Gas (AT&T) assembly code. """ name = 'GAS' - aliases = ['gas'] + aliases = ['gas', 'asm'] filenames = ['*.s', '*.S'] mimetypes = ['text/x-gas'] diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index d44ab6f6..c3b0909d 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -23,11 +23,12 @@ from pygments.scanner import Scanner from pygments.lexers.functional import OcamlLexer from pygments.lexers.jvm import JavaLexer, ScalaLexer -__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer', 'DylanLexer', - 'ObjectiveCLexer', 'ObjectiveCppLexer', 'FortranLexer', 'GLShaderLexer', - 'PrologLexer', 'CythonLexer', 'ValaLexer', 'OocLexer', 'GoLexer', - 'FelixLexer', 'AdaLexer', 'Modula2Lexer', 'BlitzMaxLexer', - 'NimrodLexer', 'FantomLexer', 'RustLexer', 'CudaLexer', 'MonkeyLexer', +__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer', + 'NesCLexer', 'DylanLexer', 'ObjectiveCLexer', 'ObjectiveCppLexer', + 'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer', + 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer', + 'Modula2Lexer', 'BlitzMaxLexer', 'BlitzBasicLexer', 'NimrodLexer', + 'FantomLexer', 'RustLexer', 'CudaLexer', 'MonkeyLexer', 'SwigLexer', 'DylanLidLexer', 'DylanConsoleLexer', 'CobolLexer', 'CobolFreeformatLexer', 'LogosLexer', 'ClayLexer'] @@ -231,6 +232,63 @@ class CppLexer(CFamilyLexer): return 0.1 +class SwigLexer(CppLexer): + """ + For `SWIG <http://www.swig.org/>`_ source code. + + *New in Pygments 1.7.* + """ + name = 'SWIG' + aliases = ['Swig', 'swig'] + filenames = ['*.swg', '*.i'] + mimetypes = ['text/swig'] + priority = 0.04 # Lower than C/C++ and Objective C/C++ + + tokens = { + 'statements': [ + (r'(%[a-z_][a-z0-9_]*)', Name.Function), # SWIG directives + ('\$\**\&?[a-zA-Z0-9_]+', Name), # Special variables + (r'##*[a-zA-Z_][a-zA-Z0-9_]*', Comment.Preproc), # Stringification / additional preprocessor directives + inherit, + ], + } + + # This is a far from complete set of SWIG directives + swig_directives = ( + # Most common directives + '%apply', '%define', '%director', '%enddef', '%exception', '%extend', + '%feature', '%fragment', '%ignore', '%immutable', '%import', '%include', + '%inline', '%insert', '%module', '%newobject', '%nspace', '%pragma', + '%rename', '%shared_ptr', '%template', '%typecheck', '%typemap', + # Less common directives + '%arg', '%attribute', '%bang', '%begin', '%callback', '%catches', '%clear', + '%constant', '%copyctor', '%csconst', '%csconstvalue', '%csenum', + '%csmethodmodifiers', '%csnothrowexception', '%default', '%defaultctor', + '%defaultdtor', '%defined', '%delete', '%delobject', '%descriptor', + '%exceptionclass', '%exceptionvar', '%extend_smart_pointer', '%fragments', + '%header', '%ifcplusplus', '%ignorewarn', '%implicit', '%implicitconv', + '%init', '%javaconst', '%javaconstvalue', '%javaenum', '%javaexception', + '%javamethodmodifiers', '%kwargs', '%luacode', '%mutable', '%naturalvar', + '%nestedworkaround', '%perlcode', '%pythonabc', '%pythonappend', + '%pythoncallback', '%pythoncode', '%pythondynamic', '%pythonmaybecall', + '%pythonnondynamic', '%pythonprepend', '%refobject', '%shadow', '%sizeof', + '%trackobjects', '%types', '%unrefobject', '%varargs', '%warn', '%warnfilter') + + def analyse_text(text): + rv = 0.1 # Same as C/C++ + # Search for SWIG directives, which are conventionally at the beginning of + # a line. The probability of them being within a line is low, so let another + # lexer win in this case. + matches = re.findall(r'^\s*(%[a-z_][a-z0-9_]*)', text, re.M) + for m in matches: + if m in SwigLexer.swig_directives: + rv = 0.98 + break + else: + rv = 0.91 # Fraction higher than MatlabLexer + return rv + + class ECLexer(CLexer): """ For eC source code with preprocessor directives. @@ -266,6 +324,32 @@ class ECLexer(CLexer): } +class NesCLexer(CLexer): + """ + For `nesC <https://github.com/tinyos/nesc>`_ source code with preprocessor + directives. + + *New in Pygments 1.7.* + """ + name = 'nesC' + aliases = ['nesc'] + filenames = ['*.nc'] + mimetypes = ['text/x-nescsrc'] + + tokens = { + 'statements': [ + (r'(abstract|as|async|atomic|call|command|component|components|' + r'configuration|event|extends|generic|implementation|includes|' + r'interface|module|new|norace|post|provides|signal|task|uses)\b', + Keyword), + (r'(nx_struct|nx_union|nx_int8_t|nx_int16_t|nx_int32_t|nx_int64_t|' + r'nx_uint8_t|nx_uint16_t|nx_uint32_t|nx_uint64_t)\b', + Keyword.Type), + inherit, + ], + } + + class ClayLexer(RegexLexer): """ For `Clay <http://claylabs.com/clay/>`_ source. @@ -1267,6 +1351,8 @@ def objective(baselexer): ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), + # @ can also prefix other expressions like @{...} or @(...) + (r'@', Punctuation), inherit, ], 'oc_classname' : [ @@ -1573,7 +1659,7 @@ class CythonLexer(RegexLexer): """ name = 'Cython' - aliases = ['cython', 'pyx'] + aliases = ['cython', 'pyx', 'pyrex'] filenames = ['*.pyx', '*.pxd', '*.pxi'] mimetypes = ['text/x-cython', 'application/x-cython'] @@ -2632,6 +2718,88 @@ class BlitzMaxLexer(RegexLexer): } +class BlitzBasicLexer(RegexLexer): + """ + For `BlitzBasic <http://blitzbasic.com>`_ source code. + + *New in Pygments 1.7.* + """ + + name = 'BlitzBasic' + aliases = ['blitzbasic', 'b3d', 'bplus'] + filenames = ['*.bb', '*.decls'] + mimetypes = ['text/x-bb'] + + bb_vopwords = (r'\b(Shl|Shr|Sar|Mod|Or|And|Not|' + r'Abs|Sgn|Handle|Int|Float|Str|' + r'First|Last|Before|After)\b') + bb_sktypes = r'@{1,2}|[#$%]' + bb_name = r'[a-z][a-z0-9_]*' + bb_var = (r'(%s)(?:([ \t]*)(%s)|([ \t]*)([.])([ \t]*)(?:(%s)))?') % \ + (bb_name, bb_sktypes, bb_name) + + flags = re.MULTILINE | re.IGNORECASE + tokens = { + 'root': [ + # Text + (r'[ \t]+', Text), + # Comments + (r";.*?\n", Comment.Single), + # Data types + ('"', String.Double, 'string'), + # Numbers + (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), + (r'\.[0-9]+(?!\.)', Number.Float), + (r'[0-9]+', Number.Integer), + (r'\$[0-9a-f]+', Number.Hex), + (r'\%[10]+', Number), # Binary + # Other + (r'(?:%s|([+\-*/~=<>^]))' % (bb_vopwords), Operator), + (r'[(),:\[\]\\]', Punctuation), + (r'\.([ \t]*)(%s)' % bb_name, Name.Label), + # Identifiers + (r'\b(New)\b([ \t]+)(%s)' % (bb_name), + bygroups(Keyword.Reserved, Text, Name.Class)), + (r'\b(Gosub|Goto)\b([ \t]+)(%s)' % (bb_name), + bygroups(Keyword.Reserved, Text, Name.Label)), + (r'\b(Object)\b([ \t]*)([.])([ \t]*)(%s)\b' % (bb_name), + bygroups(Operator, Text, Punctuation, Text, Name.Class)), + (r'\b%s\b([ \t]*)(\()' % bb_var, + bygroups(Name.Function, Text, Keyword.Type,Text, Punctuation, + Text, Name.Class, Text, Punctuation)), + (r'\b(Function)\b([ \t]+)%s' % bb_var, + bygroups(Keyword.Reserved, Text, Name.Function, Text, Keyword.Type, + Text, Punctuation, Text, Name.Class)), + (r'\b(Type)([ \t]+)(%s)' % (bb_name), + bygroups(Keyword.Reserved, Text, Name.Class)), + # Keywords + (r'\b(Pi|True|False|Null)\b', Keyword.Constant), + (r'\b(Local|Global|Const|Field|Dim)\b', Keyword.Declaration), + (r'\b(End|Return|Exit|' + r'Chr|Len|Asc|' + r'New|Delete|Insert|' + r'Include|' + r'Function|' + r'Type|' + r'If|Then|Else|ElseIf|EndIf|' + r'For|To|Next|Step|Each|' + r'While|Wend|' + r'Repeat|Until|Forever|' + r'Select|Case|Default|' + r'Goto|Gosub|Data|Read|Restore)\b', Keyword.Reserved), + # Final resolve (for variable names and such) +# (r'(%s)' % (bb_name), Name.Variable), + (bb_var, bygroups(Name.Variable, Text, Keyword.Type, + Text, Punctuation, Text, Name.Class)), + ], + 'string': [ + (r'""', String.Double), + (r'"C?', String.Double, '#pop'), + (r'[^"]+', String.Double), + ], + } + + class NimrodLexer(RegexLexer): """ For `Nimrod <http://nimrod-code.org/>`_ source code. diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 889e7ec6..77fe4723 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -719,7 +719,7 @@ class CommonLispLexer(RegexLexer): *New in Pygments 0.9.* """ name = 'Common Lisp' - aliases = ['common-lisp', 'cl'] + aliases = ['common-lisp', 'cl', 'lisp'] filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too mimetypes = ['text/x-common-lisp'] @@ -808,6 +808,8 @@ class CommonLispLexer(RegexLexer): (r'"(\\.|\\\n|[^"\\])*"', String), # quoting (r":" + symbol, String.Symbol), + (r"::" + symbol, String.Symbol), + (r":#" + symbol, String.Symbol), (r"'" + symbol, String.Symbol), (r"'", Operator), (r"`", Operator), @@ -1026,7 +1028,7 @@ class LiterateHaskellLexer(Lexer): *New in Pygments 0.9.* """ name = 'Literate Haskell' - aliases = ['lhs', 'literate-haskell'] + aliases = ['lhs', 'literate-haskell', 'lhaskell'] filenames = ['*.lhs'] mimetypes = ['text/x-literate-haskell'] @@ -2400,7 +2402,7 @@ class ElixirConsoleLexer(Lexer): class KokaLexer(RegexLexer): """ - Lexer for the `Koka <http://research.microsoft.com/en-us/projects/koka/>`_ + Lexer for the `Koka <http://koka.codeplex.com>`_ language. *New in Pygments 1.6.* @@ -2412,7 +2414,7 @@ class KokaLexer(RegexLexer): mimetypes = ['text/x-koka'] keywords = [ - 'infix', 'infixr', 'infixl', 'prefix', 'postfix', + 'infix', 'infixr', 'infixl', 'type', 'cotype', 'rectype', 'alias', 'struct', 'con', 'fun', 'function', 'val', 'var', @@ -2451,7 +2453,12 @@ class KokaLexer(RegexLexer): sboundary = '(?!'+symbols+')' # name boundary: a keyword should not be followed by any of these - boundary = '(?![a-zA-Z0-9_\\-])' + boundary = '(?![\w/])' + + # koka token abstractions + tokenType = Name.Attribute + tokenTypeDef = Name.Class + tokenConstructor = Generic.Emph # main lexer tokens = { @@ -2459,41 +2466,51 @@ class KokaLexer(RegexLexer): include('whitespace'), # go into type mode - (r'::?' + sboundary, Keyword.Type, 'type'), - (r'alias' + boundary, Keyword, 'alias-type'), - (r'struct' + boundary, Keyword, 'struct-type'), - (r'(%s)' % '|'.join(typeStartKeywords) + boundary, Keyword, 'type'), + (r'::?' + sboundary, tokenType, 'type'), + (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), + 'alias-type'), + (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), + 'struct-type'), + ((r'(%s)' % '|'.join(typeStartKeywords)) + + r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), + 'type'), # special sequences of tokens (we use ?: for non-capturing group as # required by 'bygroups') - (r'(module)(\s*)((?:interface)?)(\s*)' - r'((?:[a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*\.)*' - r'[a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*)', - bygroups(Keyword, Text, Keyword, Text, Name.Namespace)), - (r'(import)(\s+)((?:[a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*\.)*[a-z]' - r'(?:[a-zA-Z0-9_]|\-[a-zA-Z])*)(\s*)((?:as)?)' - r'((?:[A-Z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*)?)', - bygroups(Keyword, Text, Name.Namespace, Text, Keyword, - Name.Namespace)), + (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', + bygroups(Keyword, Text, Keyword, Name.Namespace)), + (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' + r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' + r'((?:[a-z]\w*/)*[a-z]\w*))?', + bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, + Keyword, Name.Namespace)), + + (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' + r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', + bygroups(Keyword, Text, Name.Function)), + (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' + r'([a-z]\w*|\((?:' + symbols + r'|/)\))', + bygroups(Keyword, Text, Keyword, Name.Function)), # keywords (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), (r'(%s)' % '|'.join(keywords) + boundary, Keyword), (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), - (r'::|:=|\->|[=\.:]' + sboundary, Keyword), - (r'\-' + sboundary, Generic.Strong), + (r'::?|:=|\->|[=\.]' + sboundary, Keyword), # names - (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?=\.)', Name.Namespace), - (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?!\.)', Name.Class), - (r'[a-z]([a-zA-Z0-9_]|\-[a-zA-Z])*', Name), - (r'_([a-zA-Z0-9_]|\-[a-zA-Z])*', Name.Variable), + (r'((?:[a-z]\w*/)*)([A-Z]\w*)', + bygroups(Name.Namespace, tokenConstructor)), + (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), + (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', + bygroups(Name.Namespace, Name)), + (r'_\w*', Name.Variable), # literal string (r'@"', String.Double, 'litstring'), # operators - (symbols, Operator), + (symbols + "|/(?![\*/])", Operator), (r'`', Operator), (r'[\{\}\(\)\[\];,]', Punctuation), @@ -2520,17 +2537,17 @@ class KokaLexer(RegexLexer): # type started by colon 'type': [ - (r'[\(\[<]', Keyword.Type, 'type-nested'), + (r'[\(\[<]', tokenType, 'type-nested'), include('type-content') ], # type nested in brackets: can contain parameters, comma etc. 'type-nested': [ - (r'[\)\]>]', Keyword.Type, '#pop'), - (r'[\(\[<]', Keyword.Type, 'type-nested'), - (r',', Keyword.Type), - (r'([a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*)(\s*)(:)(?!:)', - bygroups(Name.Variable,Text,Keyword.Type)), # parameter name + (r'[\)\]>]', tokenType, '#pop'), + (r'[\(\[<]', tokenType, 'type-nested'), + (r',', tokenType), + (r'([a-z]\w*)(\s*)(:)(?!:)', + bygroups(Name, Text, tokenType)), # parameter name include('type-content') ], @@ -2539,23 +2556,23 @@ class KokaLexer(RegexLexer): include('whitespace'), # keywords - (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), + (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', Keyword, '#pop'), # need to match because names overlap... # kinds - (r'[EPH]' + boundary, Keyword.Type), - (r'[*!]', Keyword.Type), + (r'[EPHVX]' + boundary, tokenType), # type names - (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?=\.)', Name.Namespace), - (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?!\.)', Name.Class), - (r'[a-z][0-9]*(?![a-zA-Z_\-])', Keyword.Type), # Generic.Emph - (r'_([a-zA-Z0-9_]|\-[a-zA-Z])*', Keyword.Type), # Generic.Emph - (r'[a-z]([a-zA-Z0-9_]|\-[a-zA-Z])*', Keyword.Type), + (r'[a-z][0-9]*(?![\w/])', tokenType ), + (r'_\w*', tokenType.Variable), # Generic.Emph + (r'((?:[a-z]\w*/)*)([A-Z]\w*)', + bygroups(Name.Namespace, tokenType)), + (r'((?:[a-z]\w*/)*)([a-z]\w+)', + bygroups(Name.Namespace, tokenType)), # type keyword operators - (r'::|\->|[\.:|]', Keyword.Type), + (r'::|\->|[\.:|]', tokenType), #catchall (r'', Text, '#pop') @@ -2563,6 +2580,7 @@ class KokaLexer(RegexLexer): # comments and literals 'whitespace': [ + (r'\n\s*#.*$', Comment.Preproc), (r'\s+', Text), (r'/\*', Comment.Multiline, 'comment'), (r'//.*$', Comment.Single) @@ -2589,11 +2607,10 @@ class KokaLexer(RegexLexer): (r'[\'\n]', String.Char, '#pop'), ], 'escape-sequence': [ - (r'\\[abfnrtv0\\\"\'\?]', String.Escape), + (r'\\[nrt\\\"\']', String.Escape), (r'\\x[0-9a-fA-F]{2}', String.Escape), (r'\\u[0-9a-fA-F]{4}', String.Escape), # Yes, \U literals are 6 hex digits. (r'\\U[0-9a-fA-F]{6}', String.Escape) ] } - diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 8491d19d..675fa101 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -245,7 +245,7 @@ class MOOCodeLexer(RegexLexer): """ name = 'MOOCode' filenames = ['*.moo'] - aliases = ['moocode'] + aliases = ['moocode', 'moo'] mimetypes = ['text/x-moocode'] tokens = { @@ -289,7 +289,7 @@ class SmalltalkLexer(RegexLexer): """ name = 'Smalltalk' filenames = ['*.st'] - aliases = ['smalltalk', 'squeak'] + aliases = ['smalltalk', 'squeak', 'st'] mimetypes = ['text/x-smalltalk'] tokens = { @@ -1979,7 +1979,7 @@ class PostScriptLexer(RegexLexer): *New in Pygments 1.4.* """ name = 'PostScript' - aliases = ['postscript'] + aliases = ['postscript', 'postscr'] filenames = ['*.ps', '*.eps'] mimetypes = ['application/postscript'] @@ -2067,7 +2067,7 @@ class AutohotkeyLexer(RegexLexer): *New in Pygments 1.4.* """ name = 'autohotkey' - aliases = ['ahk'] + aliases = ['ahk', 'autohotkey'] filenames = ['*.ahk', '*.ahkl'] mimetypes = ['text/x-autohotkey'] @@ -2352,7 +2352,7 @@ class ProtoBufLexer(RegexLexer): """ name = 'Protocol Buffer' - aliases = ['protobuf'] + aliases = ['protobuf', 'proto'] filenames = ['*.proto'] tokens = { @@ -2839,8 +2839,8 @@ class BroLexer(RegexLexer): (r'\\\n', Text), # Keywords (r'(add|alarm|break|case|const|continue|delete|do|else|enum|event' - r'|export|for|function|if|global|local|module|next' - r'|of|print|redef|return|schedule|type|when|while)\b', Keyword), + r'|export|for|function|if|global|hook|local|module|next' + r'|of|print|redef|return|schedule|switch|type|when|while)\b', Keyword), (r'(addr|any|bool|count|counter|double|file|int|interval|net' r'|pattern|port|record|set|string|subnet|table|time|timer' r'|vector)\b', Keyword.Type), diff --git a/pygments/lexers/shell.py b/pygments/lexers/shell.py index b95faf93..78c5c996 100644 --- a/pygments/lexers/shell.py +++ b/pygments/lexers/shell.py @@ -67,9 +67,11 @@ class BashLexer(RegexLexer): 'data': [ (r'(?s)\$?"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double), (r"(?s)\$?'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), - (r';', Text), + (r';', Punctuation), + (r'&', Punctuation), + (r'\|', Punctuation), (r'\s+', Text), - (r'[^=\s\[\]{}()$"\'`\\<]+', Text), + (r'[^=\s\[\]{}()$"\'`\\<&|;]+', Text), (r'\d+(?= |\Z)', Number), (r'\$#?(\w+|.)', Name.Variable), (r'<', Text), @@ -206,7 +208,7 @@ class BatchLexer(RegexLexer): *New in Pygments 0.7.* """ name = 'Batchfile' - aliases = ['bat'] + aliases = ['bat', 'dosbatch', 'winbatch'] filenames = ['*.bat', '*.cmd'] mimetypes = ['application/x-dos-batch'] @@ -329,8 +331,8 @@ class PowerShellLexer(RegexLexer): *New in Pygments 1.5.* """ name = 'PowerShell' - aliases = ['powershell', 'posh', 'ps1'] - filenames = ['*.ps1'] + aliases = ['powershell', 'posh', 'ps1', 'psm1'] + filenames = ['*.ps1','*.psm1'] mimetypes = ['text/x-powershell'] flags = re.DOTALL | re.IGNORECASE | re.MULTILINE @@ -342,7 +344,7 @@ class PowerShellLexer(RegexLexer): 'dynamicparam do default continue cmdletbinding break begin alias \\? ' '% #script #private #local #global mandatory parametersetname position ' 'valuefrompipeline valuefrompipelinebypropertyname ' - 'valuefromremainingarguments helpmessage try catch').split() + 'valuefromremainingarguments helpmessage try catch throw').split() operators = ( 'and as band bnot bor bxor casesensitive ccontains ceq cge cgt cle ' @@ -368,12 +370,15 @@ class PowerShellLexer(RegexLexer): tokens = { 'root': [ + # we need to count pairs of parentheses for correct highlight + # of '$(...)' blocks in strings + (r'\(', Punctuation, 'child'), (r'\s+', Text), (r'^(\s*#[#\s]*)(\.(?:%s))([^\n]*$)' % '|'.join(commenthelp), bygroups(Comment, String.Doc, Comment)), (r'#[^\n]*?$', Comment), (r'(<|<)#', Comment.Multiline, 'multline'), - (r'@"\n.*?\n"@', String.Heredoc), + (r'@"\n', String.Heredoc, 'heredoc-double'), (r"@'\n.*?\n'@", String.Heredoc), # escaped syntax (r'`[\'"$@-]', Punctuation), @@ -387,7 +392,11 @@ class PowerShellLexer(RegexLexer): (r'\[[a-z_\[][a-z0-9_. `,\[\]]*\]', Name.Constant), # .net [type]s (r'-[a-z_][a-z0-9_]*', Name), (r'\w+', Name), - (r'[.,{}\[\]$()=+*/\\&%!~?^`|<>-]', Punctuation), + (r'[.,;@{}\[\]$()=+*/\\&%!~?^`|<>-]|::', Punctuation), + ], + 'child': [ + (r'\)', Punctuation, '#pop'), + include('root'), ], 'multline': [ (r'[^#&.]+', Comment.Multiline), @@ -396,15 +405,17 @@ class PowerShellLexer(RegexLexer): (r'[#&.]', Comment.Multiline), ], 'string': [ + (r"`[0abfnrtv'\"\$]", String.Escape), (r'[^$`"]+', String.Double), - (r'\$\(', String.Interpol, 'interpol'), - (r'`"|""', String.Double), + (r'\$\(', Punctuation, 'child'), + (r'""', String.Double), (r'[`$]', String.Double), (r'"', String.Double, '#pop'), ], - 'interpol': [ - (r'[^$)]+', String.Interpol), - (r'\$\(', String.Interpol, '#push'), - (r'\)', String.Interpol, '#pop'), + 'heredoc-double': [ + (r'\n"@', String.Heredoc, '#pop'), + (r'\$\(', Punctuation, 'child'), + (r'[^@\n]+"]', String.Heredoc), + (r".", String.Heredoc), ] } diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index ff4a0453..63fc5f37 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -766,7 +766,7 @@ class CheetahHtmlLexer(DelegatingLexer): """ name = 'HTML+Cheetah' - aliases = ['html+cheetah', 'html+spitfire'] + aliases = ['html+cheetah', 'html+spitfire', 'htmlcheetah'] mimetypes = ['text/html+cheetah', 'text/html+spitfire'] def __init__(self, **options): @@ -1258,7 +1258,7 @@ class HtmlDjangoLexer(DelegatingLexer): """ name = 'HTML+Django/Jinja' - aliases = ['html+django', 'html+jinja'] + aliases = ['html+django', 'html+jinja', 'htmldjango'] alias_filenames = ['*.html', '*.htm', '*.xhtml'] mimetypes = ['text/html+django', 'text/html+jinja'] diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 47c575c0..f3feec83 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -34,7 +34,7 @@ class IniLexer(RegexLexer): """ name = 'INI' - aliases = ['ini', 'cfg'] + aliases = ['ini', 'cfg', 'dosini'] filenames = ['*.ini', '*.cfg'] mimetypes = ['text/x-ini'] @@ -106,7 +106,7 @@ class PropertiesLexer(RegexLexer): """ name = 'Properties' - aliases = ['properties'] + aliases = ['properties', 'jproperties'] filenames = ['*.properties'] mimetypes = ['text/x-java-properties'] @@ -128,7 +128,7 @@ class SourcesListLexer(RegexLexer): """ name = 'Debian Sourcelist' - aliases = ['sourceslist', 'sources.list'] + aliases = ['sourceslist', 'sources.list', 'debsources'] filenames = ['sources.list'] mimetype = ['application/x-debian-sourceslist'] @@ -1053,7 +1053,7 @@ class DebianControlLexer(RegexLexer): *New in Pygments 0.9.* """ name = 'Debian Control file' - aliases = ['control'] + aliases = ['control', 'debcontrol'] filenames = ['control'] tokens = { @@ -1709,12 +1709,12 @@ class HttpLexer(RegexLexer): tokens = { 'root': [ - (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE)( +)([^ ]+)( +)' - r'(HTTPS?)(/)(1\.[01])(\r?\n|$)', + (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' + r'(HTTP)(/)(1\.[01])(\r?\n|$)', bygroups(Name.Function, Text, Name.Namespace, Text, Keyword.Reserved, Operator, Number, Text), 'headers'), - (r'(HTTPS?)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', + (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, Name.Exception, Text), 'headers'), @@ -1848,6 +1848,8 @@ class EbnfLexer(RegexLexer): Lexer for `ISO/IEC 14977 EBNF <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ grammars. + + *New in Pygments 1.7.* """ name = 'EBNF' diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index dc8c7c5f..10fb7672 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -67,7 +67,7 @@ class JavascriptLexer(RegexLexer): (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' - r'throw|try|catch|finally|new|delete|typeof|instanceof|void|' + r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|' r'this)\b', Keyword, 'slashstartsregex'), (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' @@ -1133,7 +1133,7 @@ class HaxeLexer(RegexLexer): """ name = 'haXe' - aliases = ['hx', 'haXe'] + aliases = ['hx', 'haXe', 'haxe'] filenames = ['*.hx'] mimetypes = ['text/haxe'] @@ -1797,7 +1797,7 @@ class CoffeeScriptLexer(RegexLexer): """ name = 'CoffeeScript' - aliases = ['coffee-script', 'coffeescript'] + aliases = ['coffee-script', 'coffeescript', 'coffee'] filenames = ['*.coffee'] mimetypes = ['text/coffeescript'] diff --git a/pygments/modeline.py b/pygments/modeline.py new file mode 100644 index 00000000..cba1cab2 --- /dev/null +++ b/pygments/modeline.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +""" + pygments.modeline + ~~~~~~~~~~~~~~~~~ + + A simple modeline parser (based on pymodeline). + + :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +__all__ = ['get_filetype_from_buffer'] + +modeline_re = re.compile(r''' + (?: vi | vim | ex ) (?: [<=>]? \d* )? : + .* (?: ft | filetype | syn | syntax ) = ( [^:\s]+ ) +''', re.VERBOSE) + +def get_filetype_from_line(l): + m = modeline_re.search(l) + if m: + return m.group(1) + +def get_filetype_from_buffer(buf, max_lines=5): + """ + Scan the buffer for modelines and return filetype if one is found. + """ + lines = buf.splitlines() + for l in lines[-1:-max_lines-1:-1]: + ret = get_filetype_from_line(l) + if ret: + return ret + for l in lines[max_lines:0:-1]: + ret = get_filetype_from_line(l) + if ret: + return ret + + return None |