diff options
author | Georg Brandl <georg@python.org> | 2014-09-19 23:02:18 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-09-19 23:02:18 +0200 |
commit | 731527e9183d17d0f9eaf35bc0dd263ba84ae5c1 (patch) | |
tree | e80c9918f3efb9862c4fce6c45fb8790e97534c1 /pygments | |
parent | 9cb037001991ce4ef3444820d695ddd202dd3b26 (diff) | |
download | pygments-731527e9183d17d0f9eaf35bc0dd263ba84ae5c1.tar.gz |
split up text lexers
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/lexers/_mapping.py | 56 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 2 | ||||
-rw-r--r-- | pygments/lexers/asm.py | 22 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 2 | ||||
-rw-r--r-- | pygments/lexers/configs.py | 373 | ||||
-rw-r--r-- | pygments/lexers/console.py | 79 | ||||
-rw-r--r-- | pygments/lexers/data.py | 427 | ||||
-rw-r--r-- | pygments/lexers/diff.py | 106 | ||||
-rw-r--r-- | pygments/lexers/dsls.py | 45 | ||||
-rw-r--r-- | pygments/lexers/haskell.py | 2 | ||||
-rw-r--r-- | pygments/lexers/installers.py | 110 | ||||
-rw-r--r-- | pygments/lexers/markup.py | 379 | ||||
-rw-r--r-- | pygments/lexers/math.py | 2 | ||||
-rw-r--r-- | pygments/lexers/misc/make.py | 199 | ||||
-rw-r--r-- | pygments/lexers/parsers.py | 224 | ||||
-rw-r--r-- | pygments/lexers/templates.py | 19 | ||||
-rw-r--r-- | pygments/lexers/text.py | 2059 | ||||
-rw-r--r-- | pygments/lexers/textedit.py | 103 | ||||
-rw-r--r-- | pygments/lexers/textfmts.py | 277 | ||||
-rw-r--r-- | pygments/lexers/web.py | 4 |
20 files changed, 2295 insertions, 2195 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 928f8a3f..f0848dd4 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -33,15 +33,15 @@ LEXERS = { 'AntlrPerlLexer': ('pygments.lexers.parsers', 'ANTLR With Perl Target', ('antlr-perl',), ('*.G', '*.g'), ()), 'AntlrPythonLexer': ('pygments.lexers.parsers', 'ANTLR With Python Target', ('antlr-python',), ('*.G', '*.g'), ()), 'AntlrRubyLexer': ('pygments.lexers.parsers', 'ANTLR With Ruby Target', ('antlr-ruby', 'antlr-rb'), ('*.G', '*.g'), ()), - 'ApacheConfLexer': ('pygments.lexers.text', 'ApacheConf', ('apacheconf', 'aconf', 'apache'), ('.htaccess', 'apache.conf', 'apache2.conf'), ('text/x-apacheconf',)), + 'ApacheConfLexer': ('pygments.lexers.configs', 'ApacheConf', ('apacheconf', 'aconf', 'apache'), ('.htaccess', 'apache.conf', 'apache2.conf'), ('text/x-apacheconf',)), 'AppleScriptLexer': ('pygments.lexers.scripting', 'AppleScript', ('applescript',), ('*.applescript',), ()), 'AspectJLexer': ('pygments.lexers.jvm', 'AspectJ', ('aspectj',), ('*.aj',), ('text/x-aspectj',)), 'AsymptoteLexer': ('pygments.lexers.graphics', 'Asymptote', ('asy', 'asymptote'), ('*.asy',), ('text/x-asymptote',)), 'AutoItLexer': ('pygments.lexers.automation', 'AutoIt', ('autoit',), ('*.au3',), ('text/x-autoit',)), 'AutohotkeyLexer': ('pygments.lexers.automation', 'autohotkey', ('ahk', 'autohotkey'), ('*.ahk', '*.ahkl'), ('text/x-autohotkey',)), 'AwkLexer': ('pygments.lexers.textedit', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ('application/x-awk',)), - 'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)), - 'BaseMakefileLexer': ('pygments.lexers.text', 'Base Makefile', ('basemake',), (), ()), + 'BBCodeLexer': ('pygments.lexers.markup', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)), + 'BaseMakefileLexer': ('pygments.lexers.misc.make', 'Base Makefile', ('basemake',), (), ()), 'BashLexer': ('pygments.lexers.shell', 'Bash', ('bash', 'sh', 'ksh'), ('*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass', '.bashrc', 'bashrc', '.bash_*', 'bash_*', 'PKGBUILD'), ('application/x-sh', 'application/x-shellscript')), 'BashSessionLexer': ('pygments.lexers.shell', 'Bash Session', ('console',), ('*.sh-session',), ('application/x-shell-session',)), 'BatchLexer': ('pygments.lexers.shell', 'Batchfile', ('bat', 'batch', 'dosbatch', 'winbatch'), ('*.bat', '*.cmd'), ('application/x-dos-batch',)), @@ -53,7 +53,7 @@ LEXERS = { 'BroLexer': ('pygments.lexers.dsls', 'Bro', ('bro',), ('*.bro',), ()), 'BugsLexer': ('pygments.lexers.math', 'BUGS', ('bugs', 'winbugs', 'openbugs'), ('*.bug',), ()), 'CLexer': ('pygments.lexers.c_like.c_cpp', 'C', ('c',), ('*.c', '*.h', '*.idc'), ('text/x-chdr', 'text/x-csrc')), - 'CMakeLexer': ('pygments.lexers.text', 'CMake', ('cmake',), ('*.cmake', 'CMakeLists.txt'), ('text/x-cmake',)), + 'CMakeLexer': ('pygments.lexers.misc.make', 'CMake', ('cmake',), ('*.cmake', 'CMakeLists.txt'), ('text/x-cmake',)), 'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)), 'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), 'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)), @@ -94,14 +94,13 @@ LEXERS = { 'CythonLexer': ('pygments.lexers.python', 'Cython', ('cython', 'pyx', 'pyrex'), ('*.pyx', '*.pxd', '*.pxi'), ('text/x-cython', 'application/x-cython')), 'DLexer': ('pygments.lexers.c_like.d', 'D', ('d',), ('*.d', '*.di'), ('text/x-dsrc',)), 'DObjdumpLexer': ('pygments.lexers.asm', 'd-objdump', ('d-objdump',), ('*.d-objdump',), ('text/x-d-objdump',)), - 'DarcsPatchLexer': ('pygments.lexers.text', 'Darcs Patch', ('dpatch',), ('*.dpatch', '*.darcspatch'), ()), + 'DarcsPatchLexer': ('pygments.lexers.diff', 'Darcs Patch', ('dpatch',), ('*.dpatch', '*.darcspatch'), ()), 'DartLexer': ('pygments.lexers.web', 'Dart', ('dart',), ('*.dart',), ('text/x-dart',)), - 'DebianControlLexer': ('pygments.lexers.text', 'Debian Control file', ('control', 'debcontrol'), ('control',), ()), 'DelphiLexer': ('pygments.lexers.pascal', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',), ('text/x-pascal',)), 'DgLexer': ('pygments.lexers.python', 'dg', ('dg',), ('*.dg',), ('text/x-dg',)), - 'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')), + 'DiffLexer': ('pygments.lexers.diff', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')), 'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')), - 'DockerLexer': ('pygments.lexers.text', 'Docker', ('docker', 'dockerfile'), ('Dockerfile', '*.docker'), ('text/x-dockerfile-config',)), + 'DockerLexer': ('pygments.lexers.configs', 'Docker', ('docker', 'dockerfile'), ('Dockerfile', '*.docker'), ('text/x-dockerfile-config',)), 'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)), 'DuelLexer': ('pygments.lexers.web', 'Duel', ('duel', 'jbst', 'jsonml+bst'), ('*.duel', '*.jbst'), ('text/x-duel', 'text/x-jbst')), 'DylanConsoleLexer': ('pygments.lexers.misc.dylan', 'Dylan session', ('dylan-console', 'dylan-repl'), ('*.dylan-console',), ('text/x-dylan-console',)), @@ -109,7 +108,7 @@ LEXERS = { 'DylanLidLexer': ('pygments.lexers.misc.dylan', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)), 'ECLLexer': ('pygments.lexers.misc.ecl', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)), 'ECLexer': ('pygments.lexers.c_like.other', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')), - 'EbnfLexer': ('pygments.lexers.text', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)), + 'EbnfLexer': ('pygments.lexers.parsers', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)), 'EiffelLexer': ('pygments.lexers.eiffel', 'Eiffel', ('eiffel',), ('*.e',), ('text/x-eiffel',)), 'ElixirConsoleLexer': ('pygments.lexers.misc.erlang', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)), 'ElixirLexer': ('pygments.lexers.misc.erlang', 'Elixir', ('elixir', 'ex', 'exs'), ('*.ex', '*.exs'), ('text/x-elixir',)), @@ -131,7 +130,7 @@ LEXERS = { 'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)), 'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')), 'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ('application/x-genshi-text', 'text/x-genshi')), - 'GettextLexer': ('pygments.lexers.text', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')), + 'GettextLexer': ('pygments.lexers.textfmts', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')), 'GherkinLexer': ('pygments.lexers.testing', 'Gherkin', ('cucumber', 'gherkin'), ('*.feature',), ('text/x-gherkin',)), 'GnuplotLexer': ('pygments.lexers.graphics', 'Gnuplot', ('gnuplot',), ('*.plot', '*.plt'), ('text/x-gnuplot',)), 'GoLexer': ('pygments.lexers.c_like.go', 'Go', ('go',), ('*.go',), ('text/x-gosrc',)), @@ -139,7 +138,7 @@ LEXERS = { 'GoodDataCLLexer': ('pygments.lexers.business', 'GoodData-CL', ('gooddata-cl',), ('*.gdc',), ('text/x-gooddata-cl',)), 'GosuLexer': ('pygments.lexers.jvm', 'Gosu', ('gosu',), ('*.gs', '*.gsx', '*.gsp', '*.vark'), ('text/x-gosu',)), 'GosuTemplateLexer': ('pygments.lexers.jvm', 'Gosu Template', ('gst',), ('*.gst',), ('text/x-gosu-template',)), - 'GroffLexer': ('pygments.lexers.text', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')), + 'GroffLexer': ('pygments.lexers.markup', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')), 'GroovyLexer': ('pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy',), ('text/x-groovy',)), 'HamlLexer': ('pygments.lexers.web', 'Haml', ('haml',), ('*.haml',), ('text/x-haml',)), 'HandlebarsHtmlLexer': ('pygments.lexers.templates', 'HTML+Handlebars', ('html+handlebars',), ('*.handlebars', '*.hbs'), ('text/html+handlebars', 'text/x-handlebars-template')), @@ -151,8 +150,8 @@ LEXERS = { 'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')), 'HtmlPhpLexer': ('pygments.lexers.templates', 'HTML+PHP', ('html+php',), ('*.phtml',), ('application/x-php', 'application/x-httpd-php', 'application/x-httpd-php3', 'application/x-httpd-php4', 'application/x-httpd-php5')), 'HtmlSmartyLexer': ('pygments.lexers.templates', 'HTML+Smarty', ('html+smarty',), (), ('text/html+smarty',)), - 'HttpLexer': ('pygments.lexers.text', 'HTTP', ('http',), (), ()), - 'HxmlLexer': ('pygments.lexers.text', 'Hxml', ('haxeml', 'hxml'), ('*.hxml',), ()), + 'HttpLexer': ('pygments.lexers.textfmts', 'HTTP', ('http',), (), ()), + 'HxmlLexer': ('pygments.lexers.dsls', 'Hxml', ('haxeml', 'hxml'), ('*.hxml',), ()), 'HyLexer': ('pygments.lexers.lisp', 'Hy', ('hylang',), ('*.hy',), ('text/x-hy', 'application/x-hy')), 'HybrisLexer': ('pygments.lexers.scripting', 'Hybris', ('hybris', 'hy'), ('*.hy', '*.hyb'), ('text/x-hybris', 'application/x-hybris')), 'IDLLexer': ('pygments.lexers.math', 'IDL', ('idl',), ('*.pro',), ('text/idl',)), @@ -161,10 +160,10 @@ LEXERS = { 'Inform6Lexer': ('pygments.lexers.inform', 'Inform 6', ('inform6', 'i6'), ('*.inf',), ()), 'Inform6TemplateLexer': ('pygments.lexers.inform', 'Inform 6 template', ('i6t',), ('*.i6t',), ()), 'Inform7Lexer': ('pygments.lexers.inform', 'Inform 7', ('inform7', 'i7'), ('*.ni', '*.i7x'), ()), - 'IniLexer': ('pygments.lexers.text', 'INI', ('ini', 'cfg', 'dosini'), ('*.ini', '*.cfg'), ('text/x-ini',)), + 'IniLexer': ('pygments.lexers.configs', 'INI', ('ini', 'cfg', 'dosini'), ('*.ini', '*.cfg'), ('text/x-ini',)), 'IoLexer': ('pygments.lexers.misc.iolang', 'Io', ('io',), ('*.io',), ('text/x-iosrc',)), 'IokeLexer': ('pygments.lexers.jvm', 'Ioke', ('ioke', 'ik'), ('*.ik',), ('text/x-iokesrc',)), - 'IrcLogsLexer': ('pygments.lexers.text', 'IRC logs', ('irc',), ('*.weechatlog',), ('text/x-irclog',)), + 'IrcLogsLexer': ('pygments.lexers.textfmts', 'IRC logs', ('irc',), ('*.weechatlog',), ('text/x-irclog',)), 'JadeLexer': ('pygments.lexers.web', 'Jade', ('jade',), ('*.jade',), ('text/x-jade',)), 'JagsLexer': ('pygments.lexers.math', 'JAGS', ('jags',), ('*.jag', '*.bug'), ()), 'JasminLexer': ('pygments.lexers.jvm', 'Jasmin', ('jasmin', 'jasminxt'), ('*.j',), ()), @@ -189,7 +188,7 @@ LEXERS = { 'LassoJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Lasso', ('js+lasso', 'javascript+lasso'), (), ('application/x-javascript+lasso', 'text/x-javascript+lasso', 'text/javascript+lasso')), 'LassoLexer': ('pygments.lexers.web', 'Lasso', ('lasso', 'lassoscript'), ('*.lasso', '*.lasso[89]'), ('text/x-lasso',)), 'LassoXmlLexer': ('pygments.lexers.templates', 'XML+Lasso', ('xml+lasso',), (), ('application/xml+lasso',)), - 'LighttpdConfLexer': ('pygments.lexers.text', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)), + 'LighttpdConfLexer': ('pygments.lexers.configs', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)), 'LimboLexer': ('pygments.lexers.inferno', 'Limbo', ('limbo',), ('*.b',), ('text/limbo',)), 'LiquidLexer': ('pygments.lexers.templates', 'liquid', ('liquid',), ('*.liquid',), ()), 'LiterateAgdaLexer': ('pygments.lexers.haskell', 'Literate Agda', ('lagda', 'literate-agda'), ('*.lagda',), ('text/x-literate-agda',)), @@ -202,7 +201,7 @@ LEXERS = { 'LogtalkLexer': ('pygments.lexers.prolog', 'Logtalk', ('logtalk',), ('*.lgt', '*.logtalk'), ('text/x-logtalk',)), 'LuaLexer': ('pygments.lexers.scripting', 'Lua', ('lua',), ('*.lua', '*.wlua'), ('text/x-lua', 'application/x-lua')), 'MOOCodeLexer': ('pygments.lexers.scripting', 'MOOCode', ('moocode', 'moo'), ('*.moo',), ('text/x-moocode',)), - 'MakefileLexer': ('pygments.lexers.text', 'Makefile', ('make', 'makefile', 'mf', 'bsdmake'), ('*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'), ('text/x-makefile',)), + 'MakefileLexer': ('pygments.lexers.misc.make', 'Makefile', ('make', 'makefile', 'mf', 'bsdmake'), ('*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'), ('text/x-makefile',)), 'MakoCssLexer': ('pygments.lexers.templates', 'CSS+Mako', ('css+mako',), (), ('text/css+mako',)), 'MakoHtmlLexer': ('pygments.lexers.templates', 'HTML+Mako', ('html+mako',), (), ('text/html+mako',)), 'MakoJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Mako', ('js+mako', 'javascript+mako'), (), ('application/x-javascript+mako', 'text/x-javascript+mako', 'text/javascript+mako')), @@ -217,7 +216,7 @@ LEXERS = { 'MiniDLexer': ('pygments.lexers.c_like.d', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)), 'ModelicaLexer': ('pygments.lexers.modeling', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), 'Modula2Lexer': ('pygments.lexers.pascal', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), - 'MoinWikiLexer': ('pygments.lexers.text', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), + 'MoinWikiLexer': ('pygments.lexers.markup', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), 'MonkeyLexer': ('pygments.lexers.misc.basic', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)), 'MoonScriptLexer': ('pygments.lexers.scripting', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')), 'MqlLexer': ('pygments.lexers.c_like.other', 'MQL', ('mql', 'mq4', 'mq5', 'mql4', 'mql5'), ('*.mq4', '*.mq5', '*.mqh'), ('text/x-mql',)), @@ -237,7 +236,7 @@ LEXERS = { 'NesCLexer': ('pygments.lexers.c_like.other', 'nesC', ('nesc',), ('*.nc',), ('text/x-nescsrc',)), 'NewLispLexer': ('pygments.lexers.lisp', 'NewLisp', ('newlisp',), ('*.lsp', '*.nl'), ('text/x-newlisp', 'application/x-newlisp')), 'NewspeakLexer': ('pygments.lexers.misc.smalltalk', 'Newspeak', ('newspeak',), ('*.ns2',), ('text/x-newspeak',)), - 'NginxConfLexer': ('pygments.lexers.text', 'Nginx configuration file', ('nginx',), (), ('text/x-nginx-conf',)), + 'NginxConfLexer': ('pygments.lexers.configs', 'Nginx configuration file', ('nginx',), (), ('text/x-nginx-conf',)), 'NimrodLexer': ('pygments.lexers.misc.nimrod', 'Nimrod', ('nimrod', 'nim'), ('*.nim', '*.nimrod'), ('text/x-nimrod',)), 'NitLexer': ('pygments.lexers.misc.nit', 'Nit', ('nit',), ('*.nit',), ()), 'NixLexer': ('pygments.lexers.misc.nix', 'Nix', ('nixos', 'nix'), ('*.nix',), ('text/x-nix',)), @@ -265,10 +264,10 @@ LEXERS = { 'PovrayLexer': ('pygments.lexers.graphics', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)), 'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)), 'PrologLexer': ('pygments.lexers.prolog', 'Prolog', ('prolog',), ('*.prolog', '*.pro', '*.pl'), ('text/x-prolog',)), - 'PropertiesLexer': ('pygments.lexers.text', 'Properties', ('properties', 'jproperties'), ('*.properties',), ('text/x-java-properties',)), + 'PropertiesLexer': ('pygments.lexers.configs', 'Properties', ('properties', 'jproperties'), ('*.properties',), ('text/x-java-properties',)), 'ProtoBufLexer': ('pygments.lexers.dsls', 'Protocol Buffer', ('protobuf', 'proto'), ('*.proto',), ()), 'PuppetLexer': ('pygments.lexers.dsls', 'Puppet', ('puppet',), ('*.pp',), ()), - 'PyPyLogLexer': ('pygments.lexers.text', 'PyPy Log', ('pypylog', 'pypy'), ('*.pypylog',), ('application/x-pypylog',)), + 'PyPyLogLexer': ('pygments.lexers.console', 'PyPy Log', ('pypylog', 'pypy'), ('*.pypylog',), ('application/x-pypylog',)), 'Python3Lexer': ('pygments.lexers.python', 'Python 3', ('python3', 'py3'), (), ('text/x-python3', 'application/x-python3')), 'Python3TracebackLexer': ('pygments.lexers.python', 'Python 3.0 Traceback', ('py3tb',), ('*.py3tb',), ('text/x-python3-traceback',)), 'PythonConsoleLexer': ('pygments.lexers.python', 'Python console session', ('pycon',), (), ('text/x-python-doctest',)), @@ -292,13 +291,13 @@ LEXERS = { 'RebolLexer': ('pygments.lexers.misc.rebol', 'REBOL', ('rebol',), ('*.r', '*.r3', '*.reb'), ('text/x-rebol',)), 'RedLexer': ('pygments.lexers.misc.rebol', 'Red', ('red', 'red/system'), ('*.red', '*.reds'), ('text/x-red', 'text/x-red-system')), 'RedcodeLexer': ('pygments.lexers.esoteric', 'Redcode', ('redcode',), ('*.cw',), ()), - 'RegeditLexer': ('pygments.lexers.text', 'reg', ('registry',), ('*.reg',), ('text/x-windows-registry',)), + 'RegeditLexer': ('pygments.lexers.configs', 'reg', ('registry',), ('*.reg',), ('text/x-windows-registry',)), 'RexxLexer': ('pygments.lexers.scripting', 'Rexx', ('rexx', 'arexx'), ('*.rexx', '*.rex', '*.rx', '*.arexx'), ('text/x-rexx',)), 'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)), 'RobotFrameworkLexer': ('pygments.lexers.robotframework', 'RobotFramework', ('robotframework',), ('*.txt', '*.robot'), ('text/x-robotframework',)), 'RqlLexer': ('pygments.lexers.sql', 'RQL', ('rql',), ('*.rql',), ('text/x-rql',)), 'RslLexer': ('pygments.lexers.dsls', 'RSL', ('rsl',), ('*.rsl',), ('text/rsl',)), - 'RstLexer': ('pygments.lexers.text', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), + 'RstLexer': ('pygments.lexers.markup', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), 'RubyConsoleLexer': ('pygments.lexers.ruby', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)), 'RubyLexer': ('pygments.lexers.ruby', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')), 'RustLexer': ('pygments.lexers.c_like.rust', 'Rust', ('rust',), ('*.rs',), ('text/x-rustsrc',)), @@ -317,11 +316,10 @@ LEXERS = { 'SmartyLexer': ('pygments.lexers.templates', 'Smarty', ('smarty',), ('*.tpl',), ('application/x-smarty',)), 'SnobolLexer': ('pygments.lexers.misc.snobol', 'Snobol', ('snobol',), ('*.snobol',), ('text/x-snobol',)), 'SourcePawnLexer': ('pygments.lexers.misc.pawn', 'SourcePawn', ('sp',), ('*.sp',), ('text/x-sourcepawn',)), - 'SourcesListLexer': ('pygments.lexers.text', 'Debian Sourcelist', ('sourceslist', 'sources.list', 'debsources'), ('sources.list',), ()), 'SparqlLexer': ('pygments.lexers.rdf', 'SPARQL', ('sparql',), ('*.rq', '*.sparql'), ('application/sparql-query',)), 'SqlLexer': ('pygments.lexers.sql', 'SQL', ('sql',), ('*.sql',), ('text/x-sql',)), 'SqliteConsoleLexer': ('pygments.lexers.sql', 'sqlite3con', ('sqlite3',), ('*.sqlite3-console',), ('text/x-sqlite3-console',)), - 'SquidConfLexer': ('pygments.lexers.text', 'SquidConf', ('squidconf', 'squid.conf', 'squid'), ('squid.conf',), ('text/x-squidconf',)), + 'SquidConfLexer': ('pygments.lexers.configs', 'SquidConf', ('squidconf', 'squid.conf', 'squid'), ('squid.conf',), ('text/x-squidconf',)), 'SspLexer': ('pygments.lexers.templates', 'Scalate Server Page', ('ssp',), ('*.ssp',), ('application/x-ssp',)), 'StanLexer': ('pygments.lexers.math', 'Stan', ('stan',), ('*.stan',), ()), 'SwiftLexer': ('pygments.lexers.c_like.objective', 'Swift', ('swift',), ('*.swift',), ('text/x-swift',)), @@ -330,9 +328,9 @@ LEXERS = { 'TclLexer': ('pygments.lexers.misc.tcl', 'Tcl', ('tcl',), ('*.tcl',), ('text/x-tcl', 'text/x-script.tcl', 'application/x-tcl')), 'TcshLexer': ('pygments.lexers.shell', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)), 'TeaTemplateLexer': ('pygments.lexers.templates', 'Tea', ('tea',), ('*.tea',), ('text/x-tea',)), - 'TexLexer': ('pygments.lexers.text', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')), + 'TexLexer': ('pygments.lexers.markup', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')), 'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)), - 'TodotxtLexer': ('pygments.lexers.text', 'Todotxt', ('todotxt',), ('todo.txt', '*.todotxt'), ('text/x-todo',)), + 'TodotxtLexer': ('pygments.lexers.textfmts', 'Todotxt', ('todotxt',), ('todo.txt', '*.todotxt'), ('text/x-todo',)), 'TreetopLexer': ('pygments.lexers.parsers', 'Treetop', ('treetop',), ('*.treetop', '*.tt'), ()), 'TypeScriptLexer': ('pygments.lexers.web', 'TypeScript', ('ts',), ('*.ts',), ('text/x-typescript',)), 'UrbiscriptLexer': ('pygments.lexers.misc.urbi', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)), @@ -346,7 +344,7 @@ LEXERS = { 'VelocityXmlLexer': ('pygments.lexers.templates', 'XML+Velocity', ('xml+velocity',), (), ('application/xml+velocity',)), 'VerilogLexer': ('pygments.lexers.hdl', 'verilog', ('verilog', 'v'), ('*.v',), ('text/x-verilog',)), 'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)), - 'VimLexer': ('pygments.lexers.text', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)), + 'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)), 'XQueryLexer': ('pygments.lexers.web', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')), 'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')), 'XmlErbLexer': ('pygments.lexers.templates', 'XML+Ruby', ('xml+erb', 'xml+ruby'), (), ('application/xml+ruby',)), @@ -356,7 +354,7 @@ LEXERS = { 'XsltLexer': ('pygments.lexers.web', 'XSLT', ('xslt',), ('*.xsl', '*.xslt', '*.xpl'), ('application/xsl+xml', 'application/xslt+xml')), 'XtendLexer': ('pygments.lexers.jvm', 'Xtend', ('xtend',), ('*.xtend',), ('text/x-xtend',)), 'YamlJinjaLexer': ('pygments.lexers.templates', 'YAML+Jinja', ('yaml+jinja', 'salt', 'sls'), ('*.sls',), ('text/x-yaml+jinja', 'text/x-sls')), - 'YamlLexer': ('pygments.lexers.text', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)), + 'YamlLexer': ('pygments.lexers.data', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)), 'ZephirLexer': ('pygments.lexers.web', 'Zephir', ('zephir',), ('*.zep',), ()), } diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index c90d3847..c467548c 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -9,7 +9,7 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexers.functional import SchemeLexer +from pygments.lexers.lisp import SchemeLexer from pygments.lexers.jvm import IokeLexer, ClojureLexer from pygments.lexers.python import PythonLexer, PythonConsoleLexer, \ PythonTracebackLexer, Python3Lexer, Python3TracebackLexer, DgLexer diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index f995264c..0f7673f9 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -12,12 +12,14 @@ import re from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer -from pygments.lexers.compiled import DLexer, CppLexer, CLexer +from pygments.lexers.c_like.c_cpp import CppLexer, CLexer +from pygments.lexers.c_like.d import DLexer from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ - Other, Keyword, Operator + Other, Keyword, Operator -__all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer', - 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer', 'NasmObjdumpLexer', 'Ca65Lexer'] +__all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', + 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer', 'NasmObjdumpLexer', + 'Ca65Lexer'] class GasLexer(RegexLexer): @@ -154,11 +156,9 @@ class ObjdumpLexer(RegexLexer): filenames = ['*.objdump'] mimetypes = ['text/x-objdump'] - tokens = _objdump_lexer_tokens(GasLexer) - class DObjdumpLexer(DelegatingLexer): """ For the output of 'objdump -Sr on compiled D files' @@ -220,11 +220,11 @@ class LlvmLexer(RegexLexer): include('keyword'), - (r'%' + identifier, Name.Variable),#Name.Identifier.Local), - (r'@' + identifier, Name.Variable.Global),#Name.Identifier.Global), - (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous), - (r'@\d+', Name.Variable.Global),#Name.Identifier.Anonymous), - (r'#\d+', Name.Variable.Global),#Name.Identifier.Global), + (r'%' + identifier, Name.Variable), + (r'@' + identifier, Name.Variable.Global), + (r'%\d+', Name.Variable.Anonymous), + (r'@\d+', Name.Variable.Global), + (r'#\d+', Name.Variable.Global), (r'!' + identifier, Name.Variable), (r'!\d+', Name.Variable.Anonymous), (r'c?' + string, String), diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 8edd6cb5..755a0155 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -9,7 +9,6 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexers.functional import OcamlLexer from pygments.lexers.jvm import JavaLexer, ScalaLexer from pygments.lexers.c_like.c_cpp import CLexer, CppLexer from pygments.lexers.c_like.d import DLexer @@ -24,6 +23,7 @@ from pygments.lexers.fortran import FortranLexer from pygments.lexers.prolog import PrologLexer from pygments.lexers.python import CythonLexer from pygments.lexers.graphics import GLShaderLexer +from pygments.lexers.misc.ml import OcamlLexer from pygments.lexers.misc.basic import BlitzBasicLexer, BlitzMaxLexer, \ MonkeyLexer from pygments.lexers.misc.dylan import DylanLexer, DylanLidLexer, \ diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 44408199..6540615a 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -9,11 +9,108 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, default, words, bygroups +import re + +from pygments.lexer import RegexLexer, default, words, bygroups, include, using from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation + Number, Punctuation, Whitespace +from pygments.lexers.shell import BashLexer + +__all__ = ['IniLexer', 'RegeditLexer', 'PropertiesLexer', 'KconfigLexer', + 'Cfengine3Lexer', 'ApacheConfLexer', 'SquidConfLexer', + 'NginxConfLexer', 'LighttpdConfLexer', 'DockerLexer'] + + +class IniLexer(RegexLexer): + """ + Lexer for configuration files in INI style. + """ + + name = 'INI' + aliases = ['ini', 'cfg', 'dosini'] + filenames = ['*.ini', '*.cfg'] + mimetypes = ['text/x-ini'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r'[;#].*', Comment.Single), + (r'\[.*?\]$', Keyword), + (r'(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)', + bygroups(Name.Attribute, Text, Operator, Text, String)) + ] + } + + def analyse_text(text): + npos = text.find('\n') + if npos < 3: + return False + return text[0] == '[' and text[npos-1] == ']' + + +class RegeditLexer(RegexLexer): + """ + Lexer for `Windows Registry + <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced + by regedit. + + .. versionadded:: 1.6 + """ + + name = 'reg' + aliases = ['registry'] + filenames = ['*.reg'] + mimetypes = ['text/x-windows-registry'] + + tokens = { + 'root': [ + (r'Windows Registry Editor.*', Text), + (r'\s+', Text), + (r'[;#].*', Comment.Single), + (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$', + bygroups(Keyword, Operator, Name.Builtin, Keyword)), + # String keys, which obey somewhat normal escaping + (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)', + bygroups(Name.Attribute, Text, Operator, Text), + 'value'), + # Bare keys (includes @) + (r'(.*?)([ \t]*)(=)([ \t]*)', + bygroups(Name.Attribute, Text, Operator, Text), + 'value'), + ], + 'value': [ + (r'-', Operator, '#pop'), # delete value + (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)', + bygroups(Name.Variable, Punctuation, Number), '#pop'), + # As far as I know, .reg files do not support line continuation. + (r'.*', String, '#pop'), + ] + } -__all__ = ['KconfigLexer', 'Cfengine3Lexer'] + def analyse_text(text): + return text.startswith('Windows Registry Editor') + + +class PropertiesLexer(RegexLexer): + """ + Lexer for configuration files in Java's properties format. + + .. versionadded:: 1.4 + """ + + name = 'Properties' + aliases = ['properties', 'jproperties'] + filenames = ['*.properties'] + mimetypes = ['text/x-java-properties'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r'(?:[;#]|//).*$', Comment), + (r'(.*?)([ \t]*)([=:])([ \t]*)(.*(?:(?<=\\)\n.*)*)', + bygroups(Name.Attribute, Text, Operator, Text, String)), + ], + } def _rx_indent(level): @@ -164,3 +261,273 @@ class Cfengine3Lexer(RegexLexer): (r'\s+', Text), ], } + + +class ApacheConfLexer(RegexLexer): + """ + Lexer for configuration files following the Apache config file + format. + + .. versionadded:: 0.6 + """ + + name = 'ApacheConf' + aliases = ['apacheconf', 'aconf', 'apache'] + filenames = ['.htaccess', 'apache.conf', 'apache2.conf'] + mimetypes = ['text/x-apacheconf'] + flags = re.MULTILINE | re.IGNORECASE + + tokens = { + 'root': [ + (r'\s+', Text), + (r'(#.*?)$', Comment), + (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)', + bygroups(Name.Tag, Text, String, Name.Tag)), + (r'([a-z]\w*)(\s+)', + bygroups(Name.Builtin, Text), 'value'), + (r'\.+', Text), + ], + 'value': [ + (r'$', Text, '#pop'), + (r'[^\S\n]+', Text), + (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), + (r'\d+', Number), + (r'/([a-z0-9][\w./-]+)', String.Other), + (r'(on|off|none|any|all|double|email|dns|min|minimal|' + r'os|productonly|full|emerg|alert|crit|error|warn|' + r'notice|info|debug|registry|script|inetd|standalone|' + r'user|group)\b', Keyword), + (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), + (r'[^\s"]+', Text) + ] + } + + +class SquidConfLexer(RegexLexer): + """ + Lexer for `squid <http://www.squid-cache.org/>`_ configuration files. + + .. versionadded:: 0.9 + """ + + name = 'SquidConf' + aliases = ['squidconf', 'squid.conf', 'squid'] + filenames = ['squid.conf'] + mimetypes = ['text/x-squidconf'] + flags = re.IGNORECASE + + keywords = ( + "access_log", "acl", "always_direct", "announce_host", + "announce_period", "announce_port", "announce_to", "anonymize_headers", + "append_domain", "as_whois_server", "auth_param_basic", + "authenticate_children", "authenticate_program", "authenticate_ttl", + "broken_posts", "buffered_logs", "cache_access_log", "cache_announce", + "cache_dir", "cache_dns_program", "cache_effective_group", + "cache_effective_user", "cache_host", "cache_host_acl", + "cache_host_domain", "cache_log", "cache_mem", "cache_mem_high", + "cache_mem_low", "cache_mgr", "cachemgr_passwd", "cache_peer", + "cache_peer_access", "cahce_replacement_policy", "cache_stoplist", + "cache_stoplist_pattern", "cache_store_log", "cache_swap", + "cache_swap_high", "cache_swap_log", "cache_swap_low", "client_db", + "client_lifetime", "client_netmask", "connect_timeout", "coredump_dir", + "dead_peer_timeout", "debug_options", "delay_access", "delay_class", + "delay_initial_bucket_level", "delay_parameters", "delay_pools", + "deny_info", "dns_children", "dns_defnames", "dns_nameservers", + "dns_testnames", "emulate_httpd_log", "err_html_text", + "fake_user_agent", "firewall_ip", "forwarded_for", "forward_snmpd_port", + "fqdncache_size", "ftpget_options", "ftpget_program", "ftp_list_width", + "ftp_passive", "ftp_user", "half_closed_clients", "header_access", + "header_replace", "hierarchy_stoplist", "high_response_time_warning", + "high_page_fault_warning", "hosts_file", "htcp_port", "http_access", + "http_anonymizer", "httpd_accel", "httpd_accel_host", + "httpd_accel_port", "httpd_accel_uses_host_header", + "httpd_accel_with_proxy", "http_port", "http_reply_access", + "icp_access", "icp_hit_stale", "icp_port", "icp_query_timeout", + "ident_lookup", "ident_lookup_access", "ident_timeout", + "incoming_http_average", "incoming_icp_average", "inside_firewall", + "ipcache_high", "ipcache_low", "ipcache_size", "local_domain", + "local_ip", "logfile_rotate", "log_fqdn", "log_icp_queries", + "log_mime_hdrs", "maximum_object_size", "maximum_single_addr_tries", + "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", + "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", + "memory_pools_limit", "memory_replacement_policy", "mime_table", + "min_http_poll_cnt", "min_icp_poll_cnt", "minimum_direct_hops", + "minimum_object_size", "minimum_retry_timeout", "miss_access", + "negative_dns_ttl", "negative_ttl", "neighbor_timeout", + "neighbor_type_domain", "netdb_high", "netdb_low", "netdb_ping_period", + "netdb_ping_rate", "never_direct", "no_cache", "passthrough_proxy", + "pconn_timeout", "pid_filename", "pinger_program", "positive_dns_ttl", + "prefer_direct", "proxy_auth", "proxy_auth_realm", "query_icmp", + "quick_abort", "quick_abort", "quick_abort_max", "quick_abort_min", + "quick_abort_pct", "range_offset_limit", "read_timeout", + "redirect_children", "redirect_program", + "redirect_rewrites_host_header", "reference_age", "reference_age", + "refresh_pattern", "reload_into_ims", "request_body_max_size", + "request_size", "request_timeout", "shutdown_lifetime", + "single_parent_bypass", "siteselect_timeout", "snmp_access", + "snmp_incoming_address", "snmp_port", "source_ping", "ssl_proxy", + "store_avg_object_size", "store_objects_per_bucket", + "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", + "tcp_incoming_address", "tcp_outgoing_address", "tcp_recv_bufsize", + "test_reachability", "udp_hit_obj", "udp_hit_obj_size", + "udp_incoming_address", "udp_outgoing_address", "unique_hostname", + "unlinkd_program", "uri_whitespace", "useragent_log", + "visible_hostname", "wais_relay", "wais_relay_host", "wais_relay_port", + ) + + opts = ( + "proxy-only", "weight", "ttl", "no-query", "default", "round-robin", + "multicast-responder", "on", "off", "all", "deny", "allow", "via", + "parent", "no-digest", "heap", "lru", "realm", "children", "q1", "q2", + "credentialsttl", "none", "disable", "offline_toggle", "diskd", + ) + + actions = ( + "shutdown", "info", "parameter", "server_list", "client_list", + r'squid.conf', + ) + + actions_stats = ( + "objects", "vm_objects", "utilization", "ipcache", "fqdncache", "dns", + "redirector", "io", "reply_headers", "filedescriptors", "netdb", + ) + + actions_log = ("status", "enable", "disable", "clear") + + acls = ( + "url_regex", "urlpath_regex", "referer_regex", "port", "proto", + "req_mime_type", "rep_mime_type", "method", "browser", "user", "src", + "dst", "time", "dstdomain", "ident", "snmp_community", + ) + + ip_re = ( + r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|' + r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|' + r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|' + r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}' + r'(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|' + r'(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|' + r'[1-9]?\d)){3}))' + ) + + tokens = { + 'root': [ + (r'\s+', Whitespace), + (r'#', Comment, 'comment'), + (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), + (words(opts, prefix=r'\b', suffix=r'\b'), Name.Constant), + # Actions + (words(actions, prefix=r'\b', suffix=r'\b'), String), + (words(actions_stats, prefix=r'stats/', suffix=r'\b'), String), + (words(actions_log, prefix=r'log/', suffix=r'='), String), + (words(acls, prefix=r'\b', suffix=r'\b'), Keyword), + (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float), + (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number), + (r'\S+', Text), + ], + 'comment': [ + (r'\s*TAG:.*', String.Escape, '#pop'), + (r'.*', Comment, '#pop'), + ], + } + + +class NginxConfLexer(RegexLexer): + """ + Lexer for `Nginx <http://nginx.net/>`_ configuration files. + + .. versionadded:: 0.11 + """ + name = 'Nginx configuration file' + aliases = ['nginx'] + filenames = [] + mimetypes = ['text/x-nginx-conf'] + + tokens = { + 'root': [ + (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)), + (r'[^\s;#]+', Keyword, 'stmt'), + include('base'), + ], + 'block': [ + (r'}', Punctuation, '#pop:2'), + (r'[^\s;#]+', Keyword.Namespace, 'stmt'), + include('base'), + ], + 'stmt': [ + (r'{', Punctuation, 'block'), + (r';', Punctuation, '#pop'), + include('base'), + ], + 'base': [ + (r'#.*\n', Comment.Single), + (r'on|off', Name.Constant), + (r'\$[^\s;#()]+', Name.Variable), + (r'([a-z0-9.-]+)(:)([0-9]+)', + bygroups(Name, Punctuation, Number.Integer)), + (r'[a-z-]+/[a-z-+]+', String), # mimetype + # (r'[a-zA-Z._-]+', Keyword), + (r'[0-9]+[km]?\b', Number.Integer), + (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)), + (r'[:=~]', Punctuation), + (r'[^\s;#{}$]+', String), # catch all + (r'/[^\s;#]*', Name), # pathname + (r'\s+', Text), + (r'[$;]', Text), # leftover characters + ], + } + + +class LighttpdConfLexer(RegexLexer): + """ + Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files. + + .. versionadded:: 0.11 + """ + name = 'Lighttpd configuration file' + aliases = ['lighty', 'lighttpd'] + filenames = [] + mimetypes = ['text/x-lighttpd-conf'] + + tokens = { + 'root': [ + (r'#.*\n', Comment.Single), + (r'/\S*', Name), # pathname + (r'[a-zA-Z._-]+', Keyword), + (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), + (r'[0-9]+', Number), + (r'=>|=~|\+=|==|=|\+', Operator), + (r'\$[A-Z]+', Name.Builtin), + (r'[(){}\[\],]', Punctuation), + (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), + (r'\s+', Text), + ], + + } + + +class DockerLexer(RegexLexer): + """ + Lexer for `Docker <http://docker.io>`_ configuration files. + + .. versionadded:: 2.0 + """ + name = 'Docker' + aliases = ['docker', 'dockerfile'] + filenames = ['Dockerfile', '*.docker'] + mimetypes = ['text/x-dockerfile-config'] + + _keywords = (r'(?:FROM|MAINTAINER|RUN|CMD|EXPOSE|ENV|ADD|ENTRYPOINT|' + r'VOLUME|WORKDIR)') + + flags = re.IGNORECASE | re.MULTILINE + + tokens = { + 'root': [ + (r'^(ONBUILD)(\s+)(%s)\b' % (_keywords,), + bygroups(Name.Keyword, Whitespace, Keyword)), + (_keywords + r'\b', Keyword), + (r'#.*', Comment), + (r'.+', using(BashLexer)), + ], + } diff --git a/pygments/lexers/console.py b/pygments/lexers/console.py index f259ab9f..334e7195 100644 --- a/pygments/lexers/console.py +++ b/pygments/lexers/console.py @@ -9,10 +9,11 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer -from pygments.token import Generic, Comment, String, Text +from pygments.lexer import RegexLexer, include, bygroups +from pygments.token import Generic, Comment, String, Text, Keyword, Name, \ + Punctuation, Number -__all__ = ['VCTreeStatusLexer'] +__all__ = ['VCTreeStatusLexer', 'PyPyLogLexer'] class VCTreeStatusLexer(RegexLexer): @@ -39,3 +40,75 @@ class VCTreeStatusLexer(RegexLexer): (r'.*\n', Text) ] } + + +class PyPyLogLexer(RegexLexer): + """ + Lexer for PyPy log files. + + .. versionadded:: 1.5 + """ + name = "PyPy Log" + aliases = ["pypylog", "pypy"] + filenames = ["*.pypylog"] + mimetypes = ['application/x-pypylog'] + + tokens = { + "root": [ + (r"\[\w+\] {jit-log-.*?$", Keyword, "jit-log"), + (r"\[\w+\] {jit-backend-counts$", Keyword, "jit-backend-counts"), + include("extra-stuff"), + ], + "jit-log": [ + (r"\[\w+\] jit-log-.*?}$", Keyword, "#pop"), + (r"^\+\d+: ", Comment), + (r"--end of the loop--", Comment), + (r"[ifp]\d+", Name), + (r"ptr\d+", Name), + (r"(\()(\w+(?:\.\w+)?)(\))", + bygroups(Punctuation, Name.Builtin, Punctuation)), + (r"[\[\]=,()]", Punctuation), + (r"(\d+\.\d+|inf|-inf)", Number.Float), + (r"-?\d+", Number.Integer), + (r"'.*'", String), + (r"(None|descr|ConstClass|ConstPtr|TargetToken)", Name), + (r"<.*?>+", Name.Builtin), + (r"(label|debug_merge_point|jump|finish)", Name.Class), + (r"(int_add_ovf|int_add|int_sub_ovf|int_sub|int_mul_ovf|int_mul|" + r"int_floordiv|int_mod|int_lshift|int_rshift|int_and|int_or|" + r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|" + r"int_is_true|" + r"uint_floordiv|uint_ge|uint_lt|" + r"float_add|float_sub|float_mul|float_truediv|float_neg|" + r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|" + r"ptr_eq|ptr_ne|instance_ptr_eq|instance_ptr_ne|" + r"cast_int_to_float|cast_float_to_int|" + r"force_token|quasiimmut_field|same_as|virtual_ref_finish|" + r"virtual_ref|mark_opaque_ptr|" + r"call_may_force|call_assembler|call_loopinvariant|" + r"call_release_gil|call_pure|call|" + r"new_with_vtable|new_array|newstr|newunicode|new|" + r"arraylen_gc|" + r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" + r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|" + r"getfield_gc|getinteriorfield_gc|setinteriorfield_gc|" + r"getfield_raw|setfield_gc|setfield_raw|" + r"strgetitem|strsetitem|strlen|copystrcontent|" + r"unicodegetitem|unicodesetitem|unicodelen|" + r"guard_true|guard_false|guard_value|guard_isnull|" + r"guard_nonnull_class|guard_nonnull|guard_class|guard_no_overflow|" + r"guard_not_forced|guard_no_exception|guard_not_invalidated)", + Name.Builtin), + include("extra-stuff"), + ], + "jit-backend-counts": [ + (r"\[\w+\] jit-backend-counts}$", Keyword, "#pop"), + (r":", Punctuation), + (r"\d+", Number), + include("extra-stuff"), + ], + "extra-stuff": [ + (r"\s+", Text), + (r"#.*?$", Comment), + ], + } diff --git a/pygments/lexers/data.py b/pygments/lexers/data.py new file mode 100644 index 00000000..d1d33a1e --- /dev/null +++ b/pygments/lexers/data.py @@ -0,0 +1,427 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.data + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for data file format. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import ExtendedRegexLexer, LexerContext, include, bygroups +from pygments.token import Text, Comment, Keyword, Name, String, Number, \ + Punctuation, Literal + +__all__ = ['YamlLexer'] + + +class YamlLexerContext(LexerContext): + """Indentation context for the YAML lexer.""" + + def __init__(self, *args, **kwds): + super(YamlLexerContext, self).__init__(*args, **kwds) + self.indent_stack = [] + self.indent = -1 + self.next_indent = 0 + self.block_scalar_indent = None + + +class YamlLexer(ExtendedRegexLexer): + """ + Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization + language. + + .. versionadded:: 0.11 + """ + + name = 'YAML' + aliases = ['yaml'] + filenames = ['*.yaml', '*.yml'] + mimetypes = ['text/x-yaml'] + + def something(token_class): + """Do not produce empty tokens.""" + def callback(lexer, match, context): + text = match.group() + if not text: + return + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def reset_indent(token_class): + """Reset the indentation levels.""" + def callback(lexer, match, context): + text = match.group() + context.indent_stack = [] + context.indent = -1 + context.next_indent = 0 + context.block_scalar_indent = None + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def save_indent(token_class, start=False): + """Save a possible indentation level.""" + def callback(lexer, match, context): + text = match.group() + extra = '' + if start: + context.next_indent = len(text) + if context.next_indent < context.indent: + while context.next_indent < context.indent: + context.indent = context.indent_stack.pop() + if context.next_indent > context.indent: + extra = text[context.indent:] + text = text[:context.indent] + else: + context.next_indent += len(text) + if text: + yield match.start(), token_class, text + if extra: + yield match.start()+len(text), token_class.Error, extra + context.pos = match.end() + return callback + + def set_indent(token_class, implicit=False): + """Set the previously saved indentation level.""" + def callback(lexer, match, context): + text = match.group() + if context.indent < context.next_indent: + context.indent_stack.append(context.indent) + context.indent = context.next_indent + if not implicit: + context.next_indent += len(text) + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def set_block_scalar_indent(token_class): + """Set an explicit indentation level for a block scalar.""" + def callback(lexer, match, context): + text = match.group() + context.block_scalar_indent = None + if not text: + return + increment = match.group(1) + if increment: + current_indent = max(context.indent, 0) + increment = int(increment) + context.block_scalar_indent = current_indent + increment + if text: + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def parse_block_scalar_empty_line(indent_token_class, content_token_class): + """Process an empty line in a block scalar.""" + def callback(lexer, match, context): + text = match.group() + if (context.block_scalar_indent is None or + len(text) <= context.block_scalar_indent): + if text: + yield match.start(), indent_token_class, text + else: + indentation = text[:context.block_scalar_indent] + content = text[context.block_scalar_indent:] + yield match.start(), indent_token_class, indentation + yield (match.start()+context.block_scalar_indent, + content_token_class, content) + context.pos = match.end() + return callback + + def parse_block_scalar_indent(token_class): + """Process indentation spaces in a block scalar.""" + def callback(lexer, match, context): + text = match.group() + if context.block_scalar_indent is None: + if len(text) <= max(context.indent, 0): + context.stack.pop() + context.stack.pop() + return + context.block_scalar_indent = len(text) + else: + if len(text) < context.block_scalar_indent: + context.stack.pop() + context.stack.pop() + return + if text: + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def parse_plain_scalar_indent(token_class): + """Process indentation spaces in a plain scalar.""" + def callback(lexer, match, context): + text = match.group() + if len(text) <= context.indent: + context.stack.pop() + context.stack.pop() + return + if text: + yield match.start(), token_class, text + context.pos = match.end() + return callback + + tokens = { + # the root rules + 'root': [ + # ignored whitespaces + (r'[ ]+(?=#|$)', Text), + # line breaks + (r'\n+', Text), + # a comment + (r'#[^\n]*', Comment.Single), + # the '%YAML' directive + (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), + # the %TAG directive + (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), + # document start and document end indicators + (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), + 'block-line'), + # indentation spaces + (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True), + ('block-line', 'indentation')), + ], + + # trailing whitespaces after directives or a block scalar indicator + 'ignored-line': [ + # ignored whitespaces + (r'[ ]+(?=#|$)', Text), + # a comment + (r'#[^\n]*', Comment.Single), + # line break + (r'\n', Text, '#pop:2'), + ], + + # the %YAML directive + 'yaml-directive': [ + # the version number + (r'([ ]+)([0-9]+\.[0-9]+)', + bygroups(Text, Number), 'ignored-line'), + ], + + # the %YAG directive + 'tag-directive': [ + # a tag handle and the corresponding prefix + (r'([ ]+)(!|![0-9A-Za-z_-]*!)' + r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)', + bygroups(Text, Keyword.Type, Text, Keyword.Type), + 'ignored-line'), + ], + + # block scalar indicators and indentation spaces + 'indentation': [ + # trailing whitespaces are ignored + (r'[ ]*$', something(Text), '#pop:2'), + # whitespaces preceeding block collection indicators + (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), + # block collection indicators + (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), + # the beginning a block line + (r'[ ]*', save_indent(Text), '#pop'), + ], + + # an indented line in the block context + 'block-line': [ + # the line end + (r'[ ]*(?=#|$)', something(Text), '#pop'), + # whitespaces separating tokens + (r'[ ]+', Text), + # tags, anchors and aliases, + include('descriptors'), + # block collections and scalars + include('block-nodes'), + # flow collections and quoted scalars + include('flow-nodes'), + # a plain scalar + (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])', + something(Name.Variable), + 'plain-scalar-in-block-context'), + ], + + # tags, anchors, aliases + 'descriptors': [ + # a full-form tag + (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type), + # a tag in the form '!', '!suffix' or '!handle!suffix' + (r'!(?:[0-9A-Za-z_-]+)?' + r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type), + # an anchor + (r'&[0-9A-Za-z_-]+', Name.Label), + # an alias + (r'\*[0-9A-Za-z_-]+', Name.Variable), + ], + + # block collections and scalars + 'block-nodes': [ + # implicit key + (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), + # literal and folded scalars + (r'[|>]', Punctuation.Indicator, + ('block-scalar-content', 'block-scalar-header')), + ], + + # flow collections and quoted scalars + 'flow-nodes': [ + # a flow sequence + (r'\[', Punctuation.Indicator, 'flow-sequence'), + # a flow mapping + (r'\{', Punctuation.Indicator, 'flow-mapping'), + # a single-quoted scalar + (r'\'', String, 'single-quoted-scalar'), + # a double-quoted scalar + (r'\"', String, 'double-quoted-scalar'), + ], + + # the content of a flow collection + 'flow-collection': [ + # whitespaces + (r'[ ]+', Text), + # line breaks + (r'\n+', Text), + # a comment + (r'#[^\n]*', Comment.Single), + # simple indicators + (r'[?:,]', Punctuation.Indicator), + # tags, anchors and aliases + include('descriptors'), + # nested collections and quoted scalars + include('flow-nodes'), + # a plain scalar + (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])', + something(Name.Variable), + 'plain-scalar-in-flow-context'), + ], + + # a flow sequence indicated by '[' and ']' + 'flow-sequence': [ + # include flow collection rules + include('flow-collection'), + # the closing indicator + (r'\]', Punctuation.Indicator, '#pop'), + ], + + # a flow mapping indicated by '{' and '}' + 'flow-mapping': [ + # include flow collection rules + include('flow-collection'), + # the closing indicator + (r'\}', Punctuation.Indicator, '#pop'), + ], + + # block scalar lines + 'block-scalar-content': [ + # line break + (r'\n', Text), + # empty line + (r'^[ ]+$', + parse_block_scalar_empty_line(Text, Name.Constant)), + # indentation spaces (we may leave the state here) + (r'^[ ]*', parse_block_scalar_indent(Text)), + # line content + (r'[^\n\r\f\v]+', Name.Constant), + ], + + # the content of a literal or folded scalar + 'block-scalar-header': [ + # indentation indicator followed by chomping flag + (r'([1-9])?[+-]?(?=[ ]|$)', + set_block_scalar_indent(Punctuation.Indicator), + 'ignored-line'), + # chomping flag followed by indentation indicator + (r'[+-]?([1-9])?(?=[ ]|$)', + set_block_scalar_indent(Punctuation.Indicator), + 'ignored-line'), + ], + + # ignored and regular whitespaces in quoted scalars + 'quoted-scalar-whitespaces': [ + # leading and trailing whitespaces are ignored + (r'^[ ]+', Text), + (r'[ ]+$', Text), + # line breaks are ignored + (r'\n+', Text), + # other whitespaces are a part of the value + (r'[ ]+', Name.Variable), + ], + + # single-quoted scalars + 'single-quoted-scalar': [ + # include whitespace and line break rules + include('quoted-scalar-whitespaces'), + # escaping of the quote character + (r'\'\'', String.Escape), + # regular non-whitespace characters + (r'[^ \t\n\r\f\v\']+', String), + # the closing quote + (r'\'', String, '#pop'), + ], + + # double-quoted scalars + 'double-quoted-scalar': [ + # include whitespace and line break rules + include('quoted-scalar-whitespaces'), + # escaping of special characters + (r'\\[0abt\tn\nvfre "\\N_LP]', String), + # escape codes + (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', + String.Escape), + # regular non-whitespace characters + (r'[^ \t\n\r\f\v\"\\]+', String), + # the closing quote + (r'"', String, '#pop'), + ], + + # the beginning of a new line while scanning a plain scalar + 'plain-scalar-in-block-context-new-line': [ + # empty lines + (r'^[ ]+$', Text), + # line breaks + (r'\n+', Text), + # document start and document end indicators + (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), + # indentation spaces (we may leave the block line state here) + (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), + ], + + # a plain scalar in the block context + 'plain-scalar-in-block-context': [ + # the scalar ends with the ':' indicator + (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), + # the scalar ends with whitespaces followed by a comment + (r'[ ]+(?=#)', Text, '#pop'), + # trailing whitespaces are ignored + (r'[ ]+$', Text), + # line breaks are ignored + (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), + # other whitespaces are a part of the value + (r'[ ]+', Literal.Scalar.Plain), + # regular non-whitespace characters + (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain), + ], + + # a plain scalar is the flow context + 'plain-scalar-in-flow-context': [ + # the scalar ends with an indicator character + (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), + # the scalar ends with a comment + (r'[ ]+(?=#)', Text, '#pop'), + # leading and trailing whitespaces are ignored + (r'^[ ]+', Text), + (r'[ ]+$', Text), + # line breaks are ignored + (r'\n+', Text), + # other whitespaces are a part of the value + (r'[ ]+', Name.Variable), + # regular non-whitespace characters + (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable), + ], + + } + + def get_tokens_unprocessed(self, text=None, context=None): + if context is None: + context = YamlLexerContext(text, 0) + return super(YamlLexer, self).get_tokens_unprocessed(text, context) diff --git a/pygments/lexers/diff.py b/pygments/lexers/diff.py new file mode 100644 index 00000000..fe6435c5 --- /dev/null +++ b/pygments/lexers/diff.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.diff + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for diff/patch formats. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups +from pygments.token import Text, Comment, Operator, Keyword, Name, Generic, \ + Literal + +__all__ = ['DiffLexer', 'DarcsPatchLexer'] + + +class DiffLexer(RegexLexer): + """ + Lexer for unified or context-style diffs or patches. + """ + + name = 'Diff' + aliases = ['diff', 'udiff'] + filenames = ['*.diff', '*.patch'] + mimetypes = ['text/x-diff', 'text/x-patch'] + + tokens = { + 'root': [ + (r' .*\n', Text), + (r'\+.*\n', Generic.Inserted), + (r'-.*\n', Generic.Deleted), + (r'!.*\n', Generic.Strong), + (r'@.*\n', Generic.Subheading), + (r'([Ii]ndex|diff).*\n', Generic.Heading), + (r'=.*\n', Generic.Heading), + (r'.*\n', Text), + ] + } + + def analyse_text(text): + if text[:7] == 'Index: ': + return True + if text[:5] == 'diff ': + return True + if text[:4] == '--- ': + return 0.9 + + +class DarcsPatchLexer(RegexLexer): + """ + DarcsPatchLexer is a lexer for the various versions of the darcs patch + format. Examples of this format are derived by commands such as + ``darcs annotate --patch`` and ``darcs send``. + + .. versionadded:: 0.10 + """ + + name = 'Darcs Patch' + aliases = ['dpatch'] + filenames = ['*.dpatch', '*.darcspatch'] + + DPATCH_KEYWORDS = ('hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move', + 'replace') + + tokens = { + 'root': [ + (r'<', Operator), + (r'>', Operator), + (r'{', Operator), + (r'}', Operator), + (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', + bygroups(Operator, Keyword, Name, Text, Name, Operator, + Literal.Date, Text, Operator)), + (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', + bygroups(Operator, Keyword, Name, Text, Name, Operator, + Literal.Date, Text), 'comment'), + (r'New patches:', Generic.Heading), + (r'Context:', Generic.Heading), + (r'Patch bundle hash:', Generic.Heading), + (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS), + bygroups(Text, Keyword, Text)), + (r'\+', Generic.Inserted, "insert"), + (r'-', Generic.Deleted, "delete"), + (r'.*\n', Text), + ], + 'comment': [ + (r'[^\]].*\n', Comment), + (r'\]', Operator, "#pop"), + ], + 'specialText': [ # darcs add [_CODE_] special operators for clarity + (r'\n', Text, "#pop"), # line-based + (r'\[_[^_]*_]', Operator), + ], + 'insert': [ + include('specialText'), + (r'\[', Generic.Inserted), + (r'[^\n\[]+', Generic.Inserted), + ], + 'delete': [ + include('specialText'), + (r'\[', Generic.Deleted), + (r'[^\n\[]+', Generic.Deleted), + ], + } diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py index bfbc860e..db3badea 100644 --- a/pygments/lexers/dsls.py +++ b/pygments/lexers/dsls.py @@ -13,10 +13,10 @@ import re from pygments.lexer import RegexLexer, bygroups, words, include from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Literal + Number, Punctuation, Literal, Generic, Whitespace __all__ = ['ProtoBufLexer', 'BroLexer', 'PuppetLexer', 'RslLexer', - 'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer'] + 'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer', 'HxmlLexer'] class ProtoBufLexer(RegexLexer): @@ -504,3 +504,44 @@ class PanLexer(RegexLexer): include('root'), ], } + + +class HxmlLexer(RegexLexer): + """ + Lexer for `haXe build <http://haxe.org/doc/compiler>`_ files. + + .. versionadded:: 1.6 + """ + name = 'Hxml' + aliases = ['haxeml', 'hxml'] + filenames = ['*.hxml'] + + tokens = { + 'root': [ + # Seperator + (r'(--)(next)', bygroups(Punctuation, Generic.Heading)), + # Compiler switches with one dash + (r'(-)(prompt|debug|v)', bygroups(Punctuation, Keyword.Keyword)), + # Compilerswitches with two dashes + (r'(--)(neko-source|flash-strict|flash-use-stage|no-opt|no-traces|' + r'no-inline|times|no-output)', bygroups(Punctuation, Keyword)), + # Targets and other options that take an argument + (r'(-)(cpp|js|neko|x|as3|swf9?|swf-lib|php|xml|main|lib|D|resource|' + r'cp|cmd)( +)(.+)', + bygroups(Punctuation, Keyword, Whitespace, String)), + # Options that take only numerical arguments + (r'(-)(swf-version)( +)(\d+)', + bygroups(Punctuation, Keyword, Number.Integer)), + # An Option that defines the size, the fps and the background + # color of an flash movie + (r'(-)(swf-header)( +)(\d+)(:)(\d+)(:)(\d+)(:)([A-Fa-f0-9]{6})', + bygroups(Punctuation, Keyword, Whitespace, Number.Integer, + Punctuation, Number.Integer, Punctuation, Number.Integer, + Punctuation, Number.Hex)), + # options with two dashes that takes arguments + (r'(--)(js-namespace|php-front|php-lib|remap|gen-hx-classes)( +)' + r'(.+)', bygroups(Punctuation, Keyword, Whitespace, String)), + # Single line comment, multiline ones are not allowed. + (r'#.*', Comment.Single) + ] + } diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py index 27593986..a6aa55f6 100644 --- a/pygments/lexers/haskell.py +++ b/pygments/lexers/haskell.py @@ -503,7 +503,7 @@ class LiterateLexer(Lexer): insertions.append((len(code), [(0, Text, line)])) else: # latex-style - from pygments.lexers.text import TexLexer + from pygments.lexers.markup import TexLexer lxlexer = TexLexer(**self.options) codelines = 0 latex = '' diff --git a/pygments/lexers/installers.py b/pygments/lexers/installers.py index 758e8fbc..b91613cd 100644 --- a/pygments/lexers/installers.py +++ b/pygments/lexers/installers.py @@ -13,7 +13,7 @@ import re from pygments.lexer import RegexLexer, include, bygroups, using, this from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Punctuation, Generic + Punctuation, Generic, Number, Whitespace __all__ = ['NSISLexer', 'RPMSpecLexer'] @@ -213,3 +213,111 @@ class RPMSpecLexer(RegexLexer): (r'%\{[a-zA-Z]\w+\}', Keyword.Constant), ] } + + +class SourcesListLexer(RegexLexer): + """ + Lexer that highlights debian sources.list files. + + .. versionadded:: 0.7 + """ + + name = 'Debian Sourcelist' + aliases = ['sourceslist', 'sources.list', 'debsources'] + filenames = ['sources.list'] + mimetype = ['application/x-debian-sourceslist'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r'#.*?$', Comment), + (r'^(deb(?:-src)?)(\s+)', + bygroups(Keyword, Text), 'distribution') + ], + 'distribution': [ + (r'#.*?$', Comment, '#pop'), + (r'\$\(ARCH\)', Name.Variable), + (r'[^\s$[]+', String), + (r'\[', String.Other, 'escaped-distribution'), + (r'\$', String), + (r'\s+', Text, 'components') + ], + 'escaped-distribution': [ + (r'\]', String.Other, '#pop'), + (r'\$\(ARCH\)', Name.Variable), + (r'[^\]$]+', String.Other), + (r'\$', String.Other) + ], + 'components': [ + (r'#.*?$', Comment, '#pop:2'), + (r'$', Text, '#pop:2'), + (r'\s+', Text), + (r'\S+', Keyword.Pseudo), + ] + } + + def analyse_text(text): + for line in text.split('\n'): + line = line.strip() + if not (line.startswith('#') or line.startswith('deb ') or + line.startswith('deb-src ') or not line): + return False + return True + + +class DebianControlLexer(RegexLexer): + """ + Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. + + .. versionadded:: 0.9 + """ + name = 'Debian Control file' + aliases = ['control', 'debcontrol'] + filenames = ['control'] + + tokens = { + 'root': [ + (r'^(Description)', Keyword, 'description'), + (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), + (r'^((Build-)?Depends)', Keyword, 'depends'), + (r'^((?:Python-)?Version)(:\s*)(\S+)$', + bygroups(Keyword, Text, Number)), + (r'^((?:Installed-)?Size)(:\s*)(\S+)$', + bygroups(Keyword, Text, Number)), + (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', + bygroups(Keyword, Text, Number)), + (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', + bygroups(Keyword, Whitespace, String)), + ], + 'maintainer': [ + (r'<[^>]+>', Generic.Strong), + (r'<[^>]+>$', Generic.Strong, '#pop'), + (r',\n?', Text), + (r'.', Text), + ], + 'description': [ + (r'(.*)(Homepage)(: )(\S+)', + bygroups(Text, String, Name, Name.Class)), + (r':.*\n', Generic.Strong), + (r' .*\n', Text), + ('', Text, '#pop'), + ], + 'depends': [ + (r':\s*', Text), + (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), + (r'\(', Text, 'depend_vers'), + (r',', Text), + (r'\|', Operator), + (r'[\s]+', Text), + (r'[}\)]\s*$', Text, '#pop'), + (r'}', Text), + (r'[^,]$', Name.Function, '#pop'), + (r'([\+\.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), + (r'\[.*?\]', Name.Entity), + ], + 'depend_vers': [ + (r'\),', Text, '#pop'), + (r'\)[^,]', Text, '#pop:2'), + (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number)) + ] + } diff --git a/pygments/lexers/markup.py b/pygments/lexers/markup.py new file mode 100644 index 00000000..df269790 --- /dev/null +++ b/pygments/lexers/markup.py @@ -0,0 +1,379 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.markup + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for markup languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, bygroups, using, this, \ + do_insertions, default +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic +from pygments.util import get_bool_opt, ClassNotFound + +__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer'] + + +class BBCodeLexer(RegexLexer): + """ + A lexer that highlights BBCode(-like) syntax. + + .. versionadded:: 0.6 + """ + + name = 'BBCode' + aliases = ['bbcode'] + mimetypes = ['text/x-bbcode'] + + tokens = { + 'root': [ + (r'[^[]+', Text), + # tag/end tag begin + (r'\[/?\w+', Keyword, 'tag'), + # stray bracket + (r'\[', Text), + ], + 'tag': [ + (r'\s+', Text), + # attribute with value + (r'(\w+)(=)("?[^\s"\]]+"?)', + bygroups(Name.Attribute, Operator, String)), + # tag argument (a la [color=green]) + (r'(=)("?[^\s"\]]+"?)', + bygroups(Operator, String)), + # tag end + (r'\]', Keyword, '#pop'), + ], + } + + +class MoinWikiLexer(RegexLexer): + """ + For MoinMoin (and Trac) Wiki markup. + + .. versionadded:: 0.7 + """ + + name = 'MoinMoin/Trac Wiki markup' + aliases = ['trac-wiki', 'moin'] + filenames = [] + mimetypes = ['text/x-trac-wiki'] + flags = re.MULTILINE | re.IGNORECASE + + tokens = { + 'root': [ + (r'^#.*$', Comment), + (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next + # Titles + (r'^(=+)([^=]+)(=+)(\s*#.+)?$', + bygroups(Generic.Heading, using(this), Generic.Heading, String)), + # Literal code blocks, with optional shebang + (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), + (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting + # Lists + (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), + (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), + # Other Formatting + (r'\[\[\w+.*?\]\]', Keyword), # Macro + (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', + bygroups(Keyword, String, Keyword)), # Link + (r'^----+$', Keyword), # Horizontal rules + (r'[^\n\'\[{!_~^,|]+', Text), + (r'\n', Text), + (r'.', Text), + ], + 'codeblock': [ + (r'}}}', Name.Builtin, '#pop'), + # these blocks are allowed to be nested in Trac, but not MoinMoin + (r'{{{', Text, '#push'), + (r'[^{}]+', Comment.Preproc), # slurp boring text + (r'.', Comment.Preproc), # allow loose { or } + ], + } + + +class RstLexer(RegexLexer): + """ + For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. + + .. versionadded:: 0.7 + + Additional options accepted: + + `handlecodeblocks` + Highlight the contents of ``.. sourcecode:: language``, + ``.. code:: language`` and ``.. code-block:: language`` + directives with a lexer for the given language (default: + ``True``). + + .. versionadded:: 0.8 + """ + name = 'reStructuredText' + aliases = ['rst', 'rest', 'restructuredtext'] + filenames = ['*.rst', '*.rest'] + mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] + flags = re.MULTILINE + + def _handle_sourcecode(self, match): + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), Punctuation, match.group(1) + yield match.start(2), Text, match.group(2) + yield match.start(3), Operator.Word, match.group(3) + yield match.start(4), Punctuation, match.group(4) + yield match.start(5), Text, match.group(5) + yield match.start(6), Keyword, match.group(6) + yield match.start(7), Text, match.group(7) + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name(match.group(6).strip()) + except ClassNotFound: + pass + indention = match.group(8) + indention_size = len(indention) + code = (indention + match.group(9) + match.group(10) + match.group(11)) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(8), String, code + return + + # highlight the lines with the lexer. + ins = [] + codelines = code.splitlines(True) + code = '' + for line in codelines: + if len(line) > indention_size: + ins.append((len(code), [(0, Text, line[:indention_size])])) + code += line[indention_size:] + else: + code += line + for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): + yield item + + # from docutils.parsers.rst.states + closers = u'\'")]}>\u2019\u201d\xbb!?' + unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' + end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' + % (re.escape(unicode_delimiters), + re.escape(closers))) + + tokens = { + 'root': [ + # Heading with overline + (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' + r'(.+)(\n)(\1)(\n)', + bygroups(Generic.Heading, Text, Generic.Heading, + Text, Generic.Heading, Text)), + # Plain heading + (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' + r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', + bygroups(Generic.Heading, Text, Generic.Heading, Text)), + # Bulleted lists + (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + # Numbered lists + (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + # Numbered, but keep words at BOL from becoming lists + (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', + bygroups(Text, Number, using(this, state='inline'))), + (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', + bygroups(Text, Number, using(this, state='inline'))), + # Line blocks + (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', + bygroups(Text, Operator, using(this, state='inline'))), + # Sourcecode directives + (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' + r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', + _handle_sourcecode), + # A directive + (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, + using(this, state='inline'))), + # A reference target + (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', + bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A footnote/citation target + (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', + bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A substitution def + (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, + Punctuation, Text, using(this, state='inline'))), + # Comments + (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), + # Field list + (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)), + (r'^( *)(:.*?:)([ \t]+)(.*?)$', + bygroups(Text, Name.Class, Text, Name.Function)), + # Definition list + (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', + bygroups(using(this, state='inline'), using(this, state='inline'))), + # Code blocks + (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', + bygroups(String.Escape, Text, String, String, Text, String)), + include('inline'), + ], + 'inline': [ + (r'\\.', Text), # escape + (r'``', String, 'literal'), # code + (r'(`.+?)(<.+?>)(`__?)', # reference with inline target + bygroups(String, String.Interpol, String)), + (r'`.+?`__?', String), # reference + (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', + bygroups(Name.Variable, Name.Attribute)), # role + (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', + bygroups(Name.Attribute, Name.Variable)), # role (content first) + (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis + (r'\*.+?\*', Generic.Emph), # Emphasis + (r'\[.*?\]_', String), # Footnote or citation + (r'<.+?>', Name.Tag), # Hyperlink + (r'[^\\\n\[*`:]+', Text), + (r'.', Text), + ], + 'literal': [ + (r'[^`]+', String), + (r'``' + end_string_suffix, String, '#pop'), + (r'`', String), + ] + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options) + + def analyse_text(text): + if text[:2] == '..' and text[2:3] != '.': + return 0.3 + p1 = text.find("\n") + p2 = text.find("\n", p1 + 1) + if (p2 > -1 and # has two lines + p1 * 2 + 1 == p2 and # they are the same length + text[p1+1] in '-=' and # the next line both starts and ends with + text[p1+1] == text[p2-1]): # ...a sufficiently high header + return 0.5 + + +class TexLexer(RegexLexer): + """ + Lexer for the TeX and LaTeX typesetting languages. + """ + + name = 'TeX' + aliases = ['tex', 'latex'] + filenames = ['*.tex', '*.aux', '*.toc'] + mimetypes = ['text/x-tex', 'text/x-latex'] + + tokens = { + 'general': [ + (r'%.*?\n', Comment), + (r'[{}]', Name.Builtin), + (r'[&_^]', Name.Builtin), + ], + 'root': [ + (r'\\\[', String.Backtick, 'displaymath'), + (r'\\\(', String, 'inlinemath'), + (r'\$\$', String.Backtick, 'displaymath'), + (r'\$', String, 'inlinemath'), + (r'\\([a-zA-Z]+|.)', Keyword, 'command'), + include('general'), + (r'[^\\$%&_^{}]+', Text), + ], + 'math': [ + (r'\\([a-zA-Z]+|.)', Name.Variable), + include('general'), + (r'[0-9]+', Number), + (r'[-=!+*/()\[\]]', Operator), + (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), + ], + 'inlinemath': [ + (r'\\\)', String, '#pop'), + (r'\$', String, '#pop'), + include('math'), + ], + 'displaymath': [ + (r'\\\]', String, '#pop'), + (r'\$\$', String, '#pop'), + (r'\$', Name.Builtin), + include('math'), + ], + 'command': [ + (r'\[.*?\]', Name.Attribute), + (r'\*', Keyword), + default('#pop'), + ], + } + + def analyse_text(text): + for start in ("\\documentclass", "\\input", "\\documentstyle", + "\\relax"): + if text[:len(start)] == start: + return True + + +class GroffLexer(RegexLexer): + """ + Lexer for the (g)roff typesetting language, supporting groff + extensions. Mainly useful for highlighting manpage sources. + + .. versionadded:: 0.6 + """ + + name = 'Groff' + aliases = ['groff', 'nroff', 'man'] + filenames = ['*.[1234567]', '*.man'] + mimetypes = ['application/x-troff', 'text/troff'] + + tokens = { + 'root': [ + (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), + (r'\.', Punctuation, 'request'), + # Regular characters, slurp till we find a backslash or newline + (r'[^\\\n]*', Text, 'textline'), + ], + 'textline': [ + include('escapes'), + (r'[^\\\n]+', Text), + (r'\n', Text, '#pop'), + ], + 'escapes': [ + # groff has many ways to write escapes. + (r'\\"[^\n]*', Comment), + (r'\\[fn]\w', String.Escape), + (r'\\\(.{2}', String.Escape), + (r'\\.\[.*\]', String.Escape), + (r'\\.', String.Escape), + (r'\\\n', Text, 'request'), + ], + 'request': [ + (r'\n', Text, '#pop'), + include('escapes'), + (r'"[^\n"]+"', String.Double), + (r'\d+', Number), + (r'\S+', String), + (r'\s+', Text), + ], + } + + def analyse_text(text): + if text[:1] != '.': + return False + if text[:3] == '.\\"': + return True + if text[:4] == '.TH ': + return True + if text[1:3].isalnum() and text[3].isspace(): + return 0.9 diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py index c51403e2..27e4ad3a 100644 --- a/pygments/lexers/math.py +++ b/pygments/lexers/math.py @@ -19,7 +19,7 @@ from pygments.lexer import Lexer, RegexLexer, bygroups, include, \ from pygments.token import Comment, String, Punctuation, Keyword, Name, \ Operator, Number, Text, Generic -from pygments.lexers.agile import PythonLexer +from pygments.lexers.python import PythonLexer from pygments.lexers import _scilab_builtins from pygments.lexers import _stan_builtins diff --git a/pygments/lexers/misc/make.py b/pygments/lexers/misc/make.py new file mode 100644 index 00000000..c585640f --- /dev/null +++ b/pygments/lexers/misc/make.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.misc.make + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Makefiles and similar. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, include, bygroups, \ + do_insertions, using +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Punctuation +from pygments.lexers.shell import BashLexer + +__all__ = ['MakefileLexer', 'BaseMakefileLexer', 'CMakeLexer'] + + +class MakefileLexer(Lexer): + """ + Lexer for BSD and GNU make extensions (lenient enough to handle both in + the same file even). + + *Rewritten in Pygments 0.10.* + """ + + name = 'Makefile' + aliases = ['make', 'makefile', 'mf', 'bsdmake'] + filenames = ['*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] + mimetypes = ['text/x-makefile'] + + r_special = re.compile( + r'^(?:' + # BSD Make + r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|' + # GNU Make + r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)') + r_comment = re.compile(r'^\s*@?#') + + def get_tokens_unprocessed(self, text): + ins = [] + lines = text.splitlines(True) + done = '' + lex = BaseMakefileLexer(**self.options) + backslashflag = False + for line in lines: + if self.r_special.match(line) or backslashflag: + ins.append((len(done), [(0, Comment.Preproc, line)])) + backslashflag = line.strip().endswith('\\') + elif self.r_comment.match(line): + ins.append((len(done), [(0, Comment, line)])) + else: + done += line + for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): + yield item + + def analyse_text(text): + # Many makefiles have $(BIG_CAPS) style variables + if re.search(r'\$\([A-Z_]+\)', text): + return 0.1 + + +class BaseMakefileLexer(RegexLexer): + """ + Lexer for simple Makefiles (no preprocessing). + + .. versionadded:: 0.10 + """ + + name = 'Base Makefile' + aliases = ['basemake'] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + # recipes (need to allow spaces because of expandtabs) + (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), + # special variables + (r'\$[<@$+%?|*]', Keyword), + (r'\s+', Text), + (r'#.*?\n', Comment), + (r'(export)(\s+)(?=[\w${}\t -]+\n)', + bygroups(Keyword, Text), 'export'), + (r'export\s+', Keyword), + # assignment + (r'([\w${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', + bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), + # strings + (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), + (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), + # targets + (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), + 'block-header'), + # expansions + (r'\$\(', Keyword, 'expansion'), + ], + 'expansion': [ + (r'[^$a-zA-Z_)]+', Text), + (r'[a-zA-Z_]+', Name.Variable), + (r'\$', Keyword), + (r'\(', Keyword, '#push'), + (r'\)', Keyword, '#pop'), + ], + 'export': [ + (r'[\w${}-]+', Name.Variable), + (r'\n', Text, '#pop'), + (r'\s+', Text), + ], + 'block-header': [ + (r'[,|]', Punctuation), + (r'#.*?\n', Comment, '#pop'), + (r'\\\n', Text), # line continuation + (r'\$\(', Keyword, 'expansion'), + (r'[a-zA-Z_]+', Name), + (r'\n', Text, '#pop'), + (r'.', Text), + ], + } + + +class CMakeLexer(RegexLexer): + """ + Lexer for `CMake <http://cmake.org/Wiki/CMake>`_ files. + + .. versionadded:: 1.2 + """ + name = 'CMake' + aliases = ['cmake'] + filenames = ['*.cmake', 'CMakeLists.txt'] + mimetypes = ['text/x-cmake'] + + tokens = { + 'root': [ + # (r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|' + # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|' + # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|' + # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|' + # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|' + # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|' + # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|' + # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|' + # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|' + # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|' + # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|' + # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|' + # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|' + # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|' + # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|' + # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|' + # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|' + # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|' + # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|' + # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|' + # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|' + # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|' + # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|' + # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|' + # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|' + # r'COUNTARGS)\b', Name.Builtin, 'args'), + (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Builtin, Text, + Punctuation), 'args'), + include('keywords'), + include('ws') + ], + 'args': [ + (r'\(', Punctuation, '#push'), + (r'\)', Punctuation, '#pop'), + (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)), + (r'(\$<)(.+?)(>)', bygroups(Operator, Name.Variable, Operator)), + (r'(?s)".*?"', String.Double), + (r'\\\S+', String), + (r'[^\)$"# \t\n]+', String), + (r'\n', Text), # explicitly legal + include('keywords'), + include('ws') + ], + 'string': [ + + ], + 'keywords': [ + (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|' + r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword), + ], + 'ws': [ + (r'[ \t]+', Text), + (r'#.*\n', Comment), + ] + } + + def analyse_text(text): + exp = r'^ *CMAKE_MINIMUM_REQUIRED *\( *VERSION *\d(\.\d)* *( FATAL_ERROR)? *\) *$' + if re.search(exp, text, flags=re.MULTILINE | re.IGNORECASE): + return 0.8 + return 0.0 diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py index 4c23c760..992f204a 100644 --- a/pygments/lexers/parsers.py +++ b/pygments/lexers/parsers.py @@ -14,22 +14,24 @@ import re from pygments.lexer import RegexLexer, DelegatingLexer, \ include, bygroups, using from pygments.token import Punctuation, Other, Text, Comment, Operator, \ - Keyword, Name, String, Number, Whitespace -from pygments.lexers.compiled import JavaLexer, CLexer, CppLexer, \ - ObjectiveCLexer, DLexer + Keyword, Name, String, Number, Whitespace +from pygments.lexers.jvm import JavaLexer +from pygments.lexers.c_like.c_cpp import CLexer, CppLexer +from pygments.lexers.c_like.objective import ObjectiveCLexer +from pygments.lexers.c_like.d import DLexer from pygments.lexers.dotnet import CSharpLexer -from pygments.lexers.agile import RubyLexer, PythonLexer, PerlLexer -from pygments.lexers.web import ActionScriptLexer - +from pygments.lexers.ruby import RubyLexer +from pygments.lexers.python import PythonLexer +from pygments.lexers.perl import PerlLexer __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer', 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer', 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer', 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer', - #'AntlrCLexer', + # 'AntlrCLexer', 'AntlrCSharpLexer', 'AntlrObjectiveCLexer', - 'AntlrJavaLexer', "AntlrActionScriptLexer", - 'TreetopLexer'] + 'AntlrJavaLexer', 'AntlrActionScriptLexer', + 'TreetopLexer', 'EbnfLexer'] class RagelLexer(RegexLexer): @@ -63,29 +65,29 @@ class RagelLexer(RegexLexer): (r'[+-]?[0-9]+', Number.Integer), ], 'literals': [ - (r'"(\\\\|\\"|[^"])*"', String), # double quote string - (r"'(\\\\|\\'|[^'])*'", String), # single quote string - (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals - (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions + (r'"(\\\\|\\"|[^"])*"', String), # double quote string + (r"'(\\\\|\\'|[^'])*'", String), # single quote string + (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals + (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions ], 'identifiers': [ (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable), ], 'operators': [ - (r',', Operator), # Join - (r'\||&|--?', Operator), # Union, Intersection and Subtraction - (r'\.|<:|:>>?', Operator), # Concatention - (r':', Operator), # Label - (r'->', Operator), # Epsilon Transition - (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions - (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions - (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions - (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions - (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions - (r'>|@|\$|%', Operator), # Transition Actions and Priorities - (r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition - (r'!|\^', Operator), # Negation - (r'\(|\)', Operator), # Grouping + (r',', Operator), # Join + (r'\||&|--?', Operator), # Union, Intersection and Subtraction + (r'\.|<:|:>>?', Operator), # Concatention + (r':', Operator), # Label + (r'->', Operator), # Epsilon Transition + (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions + (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions + (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions + (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions + (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions + (r'>|@|\$|%', Operator), # Transition Actions and Priorities + (r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition + (r'!|\^', Operator), # Negation + (r'\(|\)', Operator), # Grouping ], 'root': [ include('literals'), @@ -100,16 +102,16 @@ class RagelLexer(RegexLexer): (r';', Punctuation), ], 'host': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks - r'[^{}\'"/#]+', # exclude unsafe characters - r'[^\\]\\[{}]', # allow escaped { or } + (r'(' + r'|'.join(( # keep host code in largest possible chunks + r'[^{}\'"/#]+', # exclude unsafe characters + r'[^\\]\\[{}]', # allow escaped { or } # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'//.*$\n?', # single line comment - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment - r'\#.*$\n?', # ruby comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'\#.*$\n?', # ruby comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. @@ -141,17 +143,17 @@ class RagelEmbeddedLexer(RegexLexer): tokens = { 'root': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks - r'[^%\'"/#]+', # exclude unsafe characters - r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them + (r'(' + r'|'.join(( # keep host code in largest possible chunks + r'[^%\'"/#]+', # exclude unsafe characters + r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment - r'//.*$\n?', # single line comment - r'\#.*$\n?', # ruby/ragel comment - r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'//.*$\n?', # single line comment + r'\#.*$\n?', # ruby/ragel comment + r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression # / is safe now that we've handled regex and javadoc comments r'/', @@ -168,12 +170,12 @@ class RagelEmbeddedLexer(RegexLexer): (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'), ], 'multi-line-fsm': [ - (r'(' + r'|'.join(( # keep ragel code in largest possible chunks. + (r'(' + r'|'.join(( # keep ragel code in largest possible chunks. r'(' + r'|'.join(( - r'[^}\'"\[/#]', # exclude unsafe characters - r'}(?=[^%]|$)', # } is okay as long as it's not followed by % - r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two... - r'[^\\]\\[{}]', # ...and } is okay if it's escaped + r'[^}\'"\[/#]', # exclude unsafe characters + r'}(?=[^%]|$)', # } is okay as long as it's not followed by % + r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two... + r'[^\\]\\[{}]', # ...and } is okay if it's escaped # allow / if it's preceded with one of these symbols # (ragel EOF actions) @@ -189,15 +191,15 @@ class RagelEmbeddedLexer(RegexLexer): # We want to match as many of these as we can in one block. # Not sure if we need the + sign here, # does it help performance? - )) + r')+', + )) + r')+', # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment - r'//.*$\n?', # single line comment - r'\#.*$\n?', # ruby/ragel comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'//.*$\n?', # single line comment + r'\#.*$\n?', # ruby/ragel comment )) + r')+', using(RagelLexer)), (r'}%%', Punctuation, '#pop'), @@ -221,7 +223,7 @@ class RagelRubyLexer(DelegatingLexer): def __init__(self, **options): super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer, - **options) + **options) def analyse_text(text): return '@LANG: ruby' in text @@ -336,9 +338,9 @@ class AntlrLexer(RegexLexer): aliases = ['antlr'] filenames = [] - _id = r'[A-Za-z][A-Za-z_0-9]*' - _TOKEN_REF = r'[A-Z][A-Za-z_0-9]*' - _RULE_REF = r'[a-z][A-Za-z_0-9]*' + _id = r'[A-Za-z][A-Za-z_0-9]*' + _TOKEN_REF = r'[A-Z][A-Za-z_0-9]*' + _RULE_REF = r'[a-z][A-Za-z_0-9]*' _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\'' _INT = r'[0-9]+' @@ -372,7 +374,7 @@ class AntlrLexer(RegexLexer): bygroups(Name.Label, Whitespace, Punctuation, Whitespace, Name.Label, Whitespace, Punctuation), 'action'), # rule - (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \ + (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', bygroups(Keyword, Whitespace, Name.Label, Punctuation), ('rule-alts', 'rule-prelims')), ], @@ -395,14 +397,14 @@ class AntlrLexer(RegexLexer): (r'(throws)(\s+)(' + _id + ')', bygroups(Keyword, Whitespace, Name.Label)), (r'(,)(\s*)(' + _id + ')', - bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws + bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws # optionsSpec (r'options\b', Keyword, 'options'), # ruleScopeSpec - scope followed by target language code or name of action # TODO finish implementing other possibilities for scope # L173 ANTLRv3.g from ANTLR book (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation), - 'action'), + 'action'), (r'(scope)(\s+)(' + _id + ')(\s*)(;)', bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)), # ruleAction @@ -450,20 +452,20 @@ class AntlrLexer(RegexLexer): include('comments'), (r'{', Punctuation), (r'(' + _id + r')(\s*)(=)(\s*)(' + - '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)', + '|'.join((_id, _STRING_LITERAL, _INT, '\*')) + ')(\s*)(;)', bygroups(Name.Variable, Whitespace, Punctuation, Whitespace, Text, Whitespace, Punctuation)), (r'}', Punctuation, '#pop'), ], 'action': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks - r'[^\${}\'"/\\]+', # exclude unsafe characters + (r'(' + r'|'.join(( # keep host code in largest possible chunks + r'[^\${}\'"/\\]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'//.*$\n?', # single line comment - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. @@ -483,14 +485,14 @@ class AntlrLexer(RegexLexer): (r'}', Punctuation, '#pop'), ], 'nested-arg-action': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks. - r'[^\$\[\]\'"/]+', # exclude unsafe characters + (r'(' + r'|'.join(( # keep host code in largest possible chunks. + r'[^\$\[\]\'"/]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'//.*$\n?', # single line comment - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. @@ -520,7 +522,7 @@ class AntlrLexer(RegexLexer): # so just assume they're C++. No idea how to make Objective C work in the # future. -#class AntlrCLexer(DelegatingLexer): +# class AntlrCLexer(DelegatingLexer): # """ # ANTLR with C Target # @@ -537,6 +539,7 @@ class AntlrLexer(RegexLexer): # def analyse_text(text): # return re.match(r'^\s*language\s*=\s*C\s*;', text) + class AntlrCppLexer(DelegatingLexer): """ `ANTLR`_ with CPP Target @@ -553,7 +556,7 @@ class AntlrCppLexer(DelegatingLexer): def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*C\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*C\s*;', text, re.M) class AntlrObjectiveCLexer(DelegatingLexer): @@ -573,7 +576,7 @@ class AntlrObjectiveCLexer(DelegatingLexer): def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*ObjC\s*;', text) + re.search(r'^\s*language\s*=\s*ObjC\s*;', text) class AntlrCSharpLexer(DelegatingLexer): @@ -593,7 +596,7 @@ class AntlrCSharpLexer(DelegatingLexer): def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M) class AntlrPythonLexer(DelegatingLexer): @@ -613,7 +616,7 @@ class AntlrPythonLexer(DelegatingLexer): def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M) class AntlrJavaLexer(DelegatingLexer): @@ -653,7 +656,7 @@ class AntlrRubyLexer(DelegatingLexer): def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M) class AntlrPerlLexer(DelegatingLexer): @@ -673,7 +676,7 @@ class AntlrPerlLexer(DelegatingLexer): def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M) class AntlrActionScriptLexer(DelegatingLexer): @@ -688,12 +691,14 @@ class AntlrActionScriptLexer(DelegatingLexer): filenames = ['*.G', '*.g'] def __init__(self, **options): + from pygments.lexers.web import ActionScriptLexer super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer, AntlrLexer, **options) def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M) + class TreetopBaseLexer(RegexLexer): """ @@ -763,6 +768,7 @@ class TreetopBaseLexer(RegexLexer): ], } + class TreetopLexer(DelegatingLexer): """ A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars. @@ -776,3 +782,53 @@ class TreetopLexer(DelegatingLexer): def __init__(self, **options): super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options) + + +class EbnfLexer(RegexLexer): + """ + Lexer for `ISO/IEC 14977 EBNF + <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ + grammars. + + .. versionadded:: 2.0 + """ + + name = 'EBNF' + aliases = ['ebnf'] + filenames = ['*.ebnf'] + mimetypes = ['text/x-ebnf'] + + tokens = { + 'root': [ + include('whitespace'), + include('comment_start'), + include('identifier'), + (r'=', Operator, 'production'), + ], + 'production': [ + include('whitespace'), + include('comment_start'), + include('identifier'), + (r'"[^"]*"', String.Double), + (r"'[^']*'", String.Single), + (r'(\?[^?]*\?)', Name.Entity), + (r'[\[\]{}(),|]', Punctuation), + (r'-', Operator), + (r';', Punctuation, '#pop'), + ], + 'whitespace': [ + (r'\s+', Text), + ], + 'comment_start': [ + (r'\(\*', Comment.Multiline, 'comment'), + ], + 'comment': [ + (r'[^*)]', Comment.Multiline), + include('comment_start'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[*)]', Comment.Multiline), + ], + 'identifier': [ + (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword), + ], + } diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index 8d91d9d0..d8b7a503 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -12,16 +12,15 @@ import re from pygments.lexers.web import \ - PhpLexer, HtmlLexer, XmlLexer, JavascriptLexer, CssLexer, LassoLexer -from pygments.lexers.agile import PythonLexer, PerlLexer -from pygments.lexers.compiled import JavaLexer -from pygments.lexers.jvm import TeaLangLexer -from pygments.lexers.text import YamlLexer + PhpLexer, HtmlLexer, XmlLexer, JavascriptLexer, CssLexer, LassoLexer +from pygments.lexers.python import PythonLexer +from pygments.lexers.perl import PerlLexer +from pygments.lexers.jvm import JavaLexer, TeaLangLexer +from pygments.lexers.data import YamlLexer from pygments.lexer import Lexer, DelegatingLexer, RegexLexer, bygroups, \ - include, using, this, default, combined -from pygments.token import Error, Punctuation, \ - Text, Comment, Operator, Keyword, Name, String, Number, Other, Token, \ - Whitespace + include, using, this, default, combined +from pygments.token import Error, Punctuation, Whitespace, \ + Text, Comment, Operator, Keyword, Name, String, Number, Other, Token from pygments.util import html_doctype_matches, looks_like_xml __all__ = ['HtmlPhpLexer', 'XmlPhpLexer', 'CssPhpLexer', @@ -63,7 +62,7 @@ class ErbLexer(Lexer): _block_re = re.compile(r'(<%%|%%>|<%=|<%#|<%-|<%|-%>|%>|^%[^%].*?$)', re.M) def __init__(self, **options): - from pygments.lexers.agile import RubyLexer + from pygments.lexers.ruby import RubyLexer self.ruby_lexer = RubyLexer(**options) Lexer.__init__(self, **options) diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 8de3ded7..d4aeaeeb 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -9,2047 +9,18 @@ :license: BSD, see LICENSE for details. """ -import re -from bisect import bisect - -from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \ - bygroups, include, using, this, do_insertions, default -from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \ - Generic, Operator, Number, Whitespace, Literal -from pygments.util import get_bool_opt, ClassNotFound -from pygments.lexers.agile import PythonLexer -from pygments.lexers.other import BashLexer - -__all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer', - 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer', - 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer', - 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer', - 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer', - 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer', - 'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer', 'EbnfLexer', - 'TodotxtLexer', 'DockerLexer'] - - -class IniLexer(RegexLexer): - """ - Lexer for configuration files in INI style. - """ - - name = 'INI' - aliases = ['ini', 'cfg', 'dosini'] - filenames = ['*.ini', '*.cfg'] - mimetypes = ['text/x-ini'] - - tokens = { - 'root': [ - (r'\s+', Text), - (r'[;#].*', Comment.Single), - (r'\[.*?\]$', Keyword), - (r'(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)', - bygroups(Name.Attribute, Text, Operator, Text, String)) - ] - } - - def analyse_text(text): - npos = text.find('\n') - if npos < 3: - return False - return text[0] == '[' and text[npos-1] == ']' - - -class RegeditLexer(RegexLexer): - """ - Lexer for `Windows Registry - <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced - by regedit. - - .. versionadded:: 1.6 - """ - - name = 'reg' - aliases = ['registry'] - filenames = ['*.reg'] - mimetypes = ['text/x-windows-registry'] - - tokens = { - 'root': [ - (r'Windows Registry Editor.*', Text), - (r'\s+', Text), - (r'[;#].*', Comment.Single), - (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$', - bygroups(Keyword, Operator, Name.Builtin, Keyword)), - # String keys, which obey somewhat normal escaping - (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)', - bygroups(Name.Attribute, Text, Operator, Text), - 'value'), - # Bare keys (includes @) - (r'(.*?)([ \t]*)(=)([ \t]*)', - bygroups(Name.Attribute, Text, Operator, Text), - 'value'), - ], - 'value': [ - (r'-', Operator, '#pop'), # delete value - (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)', - bygroups(Name.Variable, Punctuation, Number), '#pop'), - # As far as I know, .reg files do not support line continuation. - (r'.*', String, '#pop'), - ] - } - - def analyse_text(text): - return text.startswith('Windows Registry Editor') - - -class PropertiesLexer(RegexLexer): - """ - Lexer for configuration files in Java's properties format. - - .. versionadded:: 1.4 - """ - - name = 'Properties' - aliases = ['properties', 'jproperties'] - filenames = ['*.properties'] - mimetypes = ['text/x-java-properties'] - - tokens = { - 'root': [ - (r'\s+', Text), - (r'(?:[;#]|//).*$', Comment), - (r'(.*?)([ \t]*)([=:])([ \t]*)(.*(?:(?<=\\)\n.*)*)', - bygroups(Name.Attribute, Text, Operator, Text, String)), - ], - } - - -class SourcesListLexer(RegexLexer): - """ - Lexer that highlights debian sources.list files. - - .. versionadded:: 0.7 - """ - - name = 'Debian Sourcelist' - aliases = ['sourceslist', 'sources.list', 'debsources'] - filenames = ['sources.list'] - mimetype = ['application/x-debian-sourceslist'] - - tokens = { - 'root': [ - (r'\s+', Text), - (r'#.*?$', Comment), - (r'^(deb(?:-src)?)(\s+)', - bygroups(Keyword, Text), 'distribution') - ], - 'distribution': [ - (r'#.*?$', Comment, '#pop'), - (r'\$\(ARCH\)', Name.Variable), - (r'[^\s$[]+', String), - (r'\[', String.Other, 'escaped-distribution'), - (r'\$', String), - (r'\s+', Text, 'components') - ], - 'escaped-distribution': [ - (r'\]', String.Other, '#pop'), - (r'\$\(ARCH\)', Name.Variable), - (r'[^\]$]+', String.Other), - (r'\$', String.Other) - ], - 'components': [ - (r'#.*?$', Comment, '#pop:2'), - (r'$', Text, '#pop:2'), - (r'\s+', Text), - (r'\S+', Keyword.Pseudo), - ] - } - - def analyse_text(text): - for line in text.split('\n'): - line = line.strip() - if not (line.startswith('#') or line.startswith('deb ') or - line.startswith('deb-src ') or not line): - return False - return True - - -class MakefileLexer(Lexer): - """ - Lexer for BSD and GNU make extensions (lenient enough to handle both in - the same file even). - - *Rewritten in Pygments 0.10.* - """ - - name = 'Makefile' - aliases = ['make', 'makefile', 'mf', 'bsdmake'] - filenames = ['*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] - mimetypes = ['text/x-makefile'] - - r_special = re.compile(r'^(?:' - # BSD Make - r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|' - # GNU Make - r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)') - r_comment = re.compile(r'^\s*@?#') - - def get_tokens_unprocessed(self, text): - ins = [] - lines = text.splitlines(True) - done = '' - lex = BaseMakefileLexer(**self.options) - backslashflag = False - for line in lines: - if self.r_special.match(line) or backslashflag: - ins.append((len(done), [(0, Comment.Preproc, line)])) - backslashflag = line.strip().endswith('\\') - elif self.r_comment.match(line): - ins.append((len(done), [(0, Comment, line)])) - else: - done += line - for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): - yield item - - def analyse_text(text): - # Many makefiles have $(BIG_CAPS) style variables - if re.search(r'\$\([A-Z_]+\)', text): - return 0.1 - - -class BaseMakefileLexer(RegexLexer): - """ - Lexer for simple Makefiles (no preprocessing). - - .. versionadded:: 0.10 - """ - - name = 'Base Makefile' - aliases = ['basemake'] - filenames = [] - mimetypes = [] - - tokens = { - 'root': [ - # recipes (need to allow spaces because of expandtabs) - (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), - # special variables - (r'\$[<@$+%?|*]', Keyword), - (r'\s+', Text), - (r'#.*?\n', Comment), - (r'(export)(\s+)(?=[\w${}\t -]+\n)', - bygroups(Keyword, Text), 'export'), - (r'export\s+', Keyword), - # assignment - (r'([\w${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', - bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), - # strings - (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), - (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), - # targets - (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), - 'block-header'), - # expansions - (r'\$\(', Keyword, 'expansion'), - ], - 'expansion': [ - (r'[^$a-zA-Z_)]+', Text), - (r'[a-zA-Z_]+', Name.Variable), - (r'\$', Keyword), - (r'\(', Keyword, '#push'), - (r'\)', Keyword, '#pop'), - ], - 'export': [ - (r'[\w${}-]+', Name.Variable), - (r'\n', Text, '#pop'), - (r'\s+', Text), - ], - 'block-header': [ - (r'[,|]', Punctuation), - (r'#.*?\n', Comment, '#pop'), - (r'\\\n', Text), # line continuation - (r'\$\(', Keyword, 'expansion'), - (r'[a-zA-Z_]+', Name), - (r'\n', Text, '#pop'), - (r'.', Text), - ], - } - - -class DiffLexer(RegexLexer): - """ - Lexer for unified or context-style diffs or patches. - """ - - name = 'Diff' - aliases = ['diff', 'udiff'] - filenames = ['*.diff', '*.patch'] - mimetypes = ['text/x-diff', 'text/x-patch'] - - tokens = { - 'root': [ - (r' .*\n', Text), - (r'\+.*\n', Generic.Inserted), - (r'-.*\n', Generic.Deleted), - (r'!.*\n', Generic.Strong), - (r'@.*\n', Generic.Subheading), - (r'([Ii]ndex|diff).*\n', Generic.Heading), - (r'=.*\n', Generic.Heading), - (r'.*\n', Text), - ] - } - - def analyse_text(text): - if text[:7] == 'Index: ': - return True - if text[:5] == 'diff ': - return True - if text[:4] == '--- ': - return 0.9 - - -DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move', - 'replace'] - -class DarcsPatchLexer(RegexLexer): - """ - DarcsPatchLexer is a lexer for the various versions of the darcs patch - format. Examples of this format are derived by commands such as - ``darcs annotate --patch`` and ``darcs send``. - - .. versionadded:: 0.10 - """ - name = 'Darcs Patch' - aliases = ['dpatch'] - filenames = ['*.dpatch', '*.darcspatch'] - - tokens = { - 'root': [ - (r'<', Operator), - (r'>', Operator), - (r'{', Operator), - (r'}', Operator), - (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', - bygroups(Operator, Keyword, Name, Text, Name, Operator, - Literal.Date, Text, Operator)), - (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', - bygroups(Operator, Keyword, Name, Text, Name, Operator, - Literal.Date, Text), 'comment'), - (r'New patches:', Generic.Heading), - (r'Context:', Generic.Heading), - (r'Patch bundle hash:', Generic.Heading), - (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS), - bygroups(Text, Keyword, Text)), - (r'\+', Generic.Inserted, "insert"), - (r'-', Generic.Deleted, "delete"), - (r'.*\n', Text), - ], - 'comment': [ - (r'[^\]].*\n', Comment), - (r'\]', Operator, "#pop"), - ], - 'specialText': [ # darcs add [_CODE_] special operators for clarity - (r'\n', Text, "#pop"), # line-based - (r'\[_[^_]*_]', Operator), - ], - 'insert': [ - include('specialText'), - (r'\[', Generic.Inserted), - (r'[^\n\[]+', Generic.Inserted), - ], - 'delete': [ - include('specialText'), - (r'\[', Generic.Deleted), - (r'[^\n\[]+', Generic.Deleted), - ], - } - - -class IrcLogsLexer(RegexLexer): - """ - Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. - """ - - name = 'IRC logs' - aliases = ['irc'] - filenames = ['*.weechatlog'] - mimetypes = ['text/x-irclog'] - - flags = re.VERBOSE | re.MULTILINE - timestamp = r""" - ( - # irssi / xchat and others - (?: \[|\()? # Opening bracket or paren for the timestamp - (?: # Timestamp - (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits - [T ])? # Date/time separator: T or space - (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits - ) - (?: \]|\))?\s+ # Closing bracket or paren for the timestamp - | - # weechat - \d{4}\s\w{3}\s\d{2}\s # Date - \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace - | - # xchat - \w{3}\s\d{2}\s # Date - \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace - )? - """ - tokens = { - 'root': [ - # log start/end - (r'^\*\*\*\*(.*)\*\*\*\*$', Comment), - # hack - ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)), - # normal msgs - ("^" + timestamp + r""" - (\s*<.*?>\s*) # Nick """, - bygroups(Comment.Preproc, Name.Tag), 'msg'), - # /me msgs - ("^" + timestamp + r""" - (\s*[*]\s+) # Star - (\S+\s+.*?\n) # Nick + rest of message """, - bygroups(Comment.Preproc, Keyword, Generic.Inserted)), - # join/part msgs - ("^" + timestamp + r""" - (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols - (\S+\s+) # Nick + Space - (.*?\n) # Rest of message """, - bygroups(Comment.Preproc, Keyword, String, Comment)), - (r"^.*?\n", Text), - ], - 'msg': [ - (r"\S+:(?!//)", Name.Attribute), # Prefix - (r".*\n", Text, '#pop'), - ], - } - - -class BBCodeLexer(RegexLexer): - """ - A lexer that highlights BBCode(-like) syntax. - - .. versionadded:: 0.6 - """ - - name = 'BBCode' - aliases = ['bbcode'] - mimetypes = ['text/x-bbcode'] - - tokens = { - 'root': [ - (r'[^[]+', Text), - # tag/end tag begin - (r'\[/?\w+', Keyword, 'tag'), - # stray bracket - (r'\[', Text), - ], - 'tag': [ - (r'\s+', Text), - # attribute with value - (r'(\w+)(=)("?[^\s"\]]+"?)', - bygroups(Name.Attribute, Operator, String)), - # tag argument (a la [color=green]) - (r'(=)("?[^\s"\]]+"?)', - bygroups(Operator, String)), - # tag end - (r'\]', Keyword, '#pop'), - ], - } - - -class TexLexer(RegexLexer): - """ - Lexer for the TeX and LaTeX typesetting languages. - """ - - name = 'TeX' - aliases = ['tex', 'latex'] - filenames = ['*.tex', '*.aux', '*.toc'] - mimetypes = ['text/x-tex', 'text/x-latex'] - - tokens = { - 'general': [ - (r'%.*?\n', Comment), - (r'[{}]', Name.Builtin), - (r'[&_^]', Name.Builtin), - ], - 'root': [ - (r'\\\[', String.Backtick, 'displaymath'), - (r'\\\(', String, 'inlinemath'), - (r'\$\$', String.Backtick, 'displaymath'), - (r'\$', String, 'inlinemath'), - (r'\\([a-zA-Z]+|.)', Keyword, 'command'), - include('general'), - (r'[^\\$%&_^{}]+', Text), - ], - 'math': [ - (r'\\([a-zA-Z]+|.)', Name.Variable), - include('general'), - (r'[0-9]+', Number), - (r'[-=!+*/()\[\]]', Operator), - (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), - ], - 'inlinemath': [ - (r'\\\)', String, '#pop'), - (r'\$', String, '#pop'), - include('math'), - ], - 'displaymath': [ - (r'\\\]', String, '#pop'), - (r'\$\$', String, '#pop'), - (r'\$', Name.Builtin), - include('math'), - ], - 'command': [ - (r'\[.*?\]', Name.Attribute), - (r'\*', Keyword), - default('#pop'), - ], - } - - def analyse_text(text): - for start in ("\\documentclass", "\\input", "\\documentstyle", - "\\relax"): - if text[:len(start)] == start: - return True - - -class GroffLexer(RegexLexer): - """ - Lexer for the (g)roff typesetting language, supporting groff - extensions. Mainly useful for highlighting manpage sources. - - .. versionadded:: 0.6 - """ - - name = 'Groff' - aliases = ['groff', 'nroff', 'man'] - filenames = ['*.[1234567]', '*.man'] - mimetypes = ['application/x-troff', 'text/troff'] - - tokens = { - 'root': [ - (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), - (r'\.', Punctuation, 'request'), - # Regular characters, slurp till we find a backslash or newline - (r'[^\\\n]*', Text, 'textline'), - ], - 'textline': [ - include('escapes'), - (r'[^\\\n]+', Text), - (r'\n', Text, '#pop'), - ], - 'escapes': [ - # groff has many ways to write escapes. - (r'\\"[^\n]*', Comment), - (r'\\[fn]\w', String.Escape), - (r'\\\(.{2}', String.Escape), - (r'\\.\[.*\]', String.Escape), - (r'\\.', String.Escape), - (r'\\\n', Text, 'request'), - ], - 'request': [ - (r'\n', Text, '#pop'), - include('escapes'), - (r'"[^\n"]+"', String.Double), - (r'\d+', Number), - (r'\S+', String), - (r'\s+', Text), - ], - } - - def analyse_text(text): - if text[:1] != '.': - return False - if text[:3] == '.\\"': - return True - if text[:4] == '.TH ': - return True - if text[1:3].isalnum() and text[3].isspace(): - return 0.9 - - -class ApacheConfLexer(RegexLexer): - """ - Lexer for configuration files following the Apache config file - format. - - .. versionadded:: 0.6 - """ - - name = 'ApacheConf' - aliases = ['apacheconf', 'aconf', 'apache'] - filenames = ['.htaccess', 'apache.conf', 'apache2.conf'] - mimetypes = ['text/x-apacheconf'] - flags = re.MULTILINE | re.IGNORECASE - - tokens = { - 'root': [ - (r'\s+', Text), - (r'(#.*?)$', Comment), - (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)', - bygroups(Name.Tag, Text, String, Name.Tag)), - (r'([a-z]\w*)(\s+)', - bygroups(Name.Builtin, Text), 'value'), - (r'\.+', Text), - ], - 'value': [ - (r'$', Text, '#pop'), - (r'[^\S\n]+', Text), - (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), - (r'\d+', Number), - (r'/([a-z0-9][\w./-]+)', String.Other), - (r'(on|off|none|any|all|double|email|dns|min|minimal|' - r'os|productonly|full|emerg|alert|crit|error|warn|' - r'notice|info|debug|registry|script|inetd|standalone|' - r'user|group)\b', Keyword), - (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), - (r'[^\s"]+', Text) - ] - } - - -class MoinWikiLexer(RegexLexer): - """ - For MoinMoin (and Trac) Wiki markup. - - .. versionadded:: 0.7 - """ - - name = 'MoinMoin/Trac Wiki markup' - aliases = ['trac-wiki', 'moin'] - filenames = [] - mimetypes = ['text/x-trac-wiki'] - flags = re.MULTILINE | re.IGNORECASE - - tokens = { - 'root': [ - (r'^#.*$', Comment), - (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next - # Titles - (r'^(=+)([^=]+)(=+)(\s*#.+)?$', - bygroups(Generic.Heading, using(this), Generic.Heading, String)), - # Literal code blocks, with optional shebang - (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), - (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting - # Lists - (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), - (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), - # Other Formatting - (r'\[\[\w+.*?\]\]', Keyword), # Macro - (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', - bygroups(Keyword, String, Keyword)), # Link - (r'^----+$', Keyword), # Horizontal rules - (r'[^\n\'\[{!_~^,|]+', Text), - (r'\n', Text), - (r'.', Text), - ], - 'codeblock': [ - (r'}}}', Name.Builtin, '#pop'), - # these blocks are allowed to be nested in Trac, but not MoinMoin - (r'{{{', Text, '#push'), - (r'[^{}]+', Comment.Preproc), # slurp boring text - (r'.', Comment.Preproc), # allow loose { or } - ], - } - - -class RstLexer(RegexLexer): - """ - For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. - - .. versionadded:: 0.7 - - Additional options accepted: - - `handlecodeblocks` - Highlight the contents of ``.. sourcecode:: language``, - ``.. code:: language`` and ``.. code-block:: language`` - directives with a lexer for the given language (default: - ``True``). - - .. versionadded:: 0.8 - """ - name = 'reStructuredText' - aliases = ['rst', 'rest', 'restructuredtext'] - filenames = ['*.rst', '*.rest'] - mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] - flags = re.MULTILINE - - def _handle_sourcecode(self, match): - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), Punctuation, match.group(1) - yield match.start(2), Text, match.group(2) - yield match.start(3), Operator.Word, match.group(3) - yield match.start(4), Punctuation, match.group(4) - yield match.start(5), Text, match.group(5) - yield match.start(6), Keyword, match.group(6) - yield match.start(7), Text, match.group(7) - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name(match.group(6).strip()) - except ClassNotFound: - pass - indention = match.group(8) - indention_size = len(indention) - code = (indention + match.group(9) + match.group(10) + match.group(11)) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(8), String, code - return - - # highlight the lines with the lexer. - ins = [] - codelines = code.splitlines(True) - code = '' - for line in codelines: - if len(line) > indention_size: - ins.append((len(code), [(0, Text, line[:indention_size])])) - code += line[indention_size:] - else: - code += line - for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): - yield item - - # from docutils.parsers.rst.states - closers = u'\'")]}>\u2019\u201d\xbb!?' - unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' - end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' - % (re.escape(unicode_delimiters), - re.escape(closers))) - - tokens = { - 'root': [ - # Heading with overline - (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' - r'(.+)(\n)(\1)(\n)', - bygroups(Generic.Heading, Text, Generic.Heading, - Text, Generic.Heading, Text)), - # Plain heading - (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' - r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', - bygroups(Generic.Heading, Text, Generic.Heading, Text)), - # Bulleted lists - (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - # Numbered lists - (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - # Numbered, but keep words at BOL from becoming lists - (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', - bygroups(Text, Number, using(this, state='inline'))), - (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', - bygroups(Text, Number, using(this, state='inline'))), - # Line blocks - (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', - bygroups(Text, Operator, using(this, state='inline'))), - # Sourcecode directives - (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' - r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', - _handle_sourcecode), - # A directive - (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', - bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, - using(this, state='inline'))), - # A reference target - (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', - bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A footnote/citation target - (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', - bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A substitution def - (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', - bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, - Punctuation, Text, using(this, state='inline'))), - # Comments - (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), - # Field list - (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)), - (r'^( *)(:.*?:)([ \t]+)(.*?)$', - bygroups(Text, Name.Class, Text, Name.Function)), - # Definition list - (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', - bygroups(using(this, state='inline'), using(this, state='inline'))), - # Code blocks - (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', - bygroups(String.Escape, Text, String, String, Text, String)), - include('inline'), - ], - 'inline': [ - (r'\\.', Text), # escape - (r'``', String, 'literal'), # code - (r'(`.+?)(<.+?>)(`__?)', # reference with inline target - bygroups(String, String.Interpol, String)), - (r'`.+?`__?', String), # reference - (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', - bygroups(Name.Variable, Name.Attribute)), # role - (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', - bygroups(Name.Attribute, Name.Variable)), # role (content first) - (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis - (r'\*.+?\*', Generic.Emph), # Emphasis - (r'\[.*?\]_', String), # Footnote or citation - (r'<.+?>', Name.Tag), # Hyperlink - (r'[^\\\n\[*`:]+', Text), - (r'.', Text), - ], - 'literal': [ - (r'[^`]+', String), - (r'``' + end_string_suffix, String, '#pop'), - (r'`', String), - ] - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) - - def analyse_text(text): - if text[:2] == '..' and text[2:3] != '.': - return 0.3 - p1 = text.find("\n") - p2 = text.find("\n", p1 + 1) - if (p2 > -1 and # has two lines - p1 * 2 + 1 == p2 and # they are the same length - text[p1+1] in '-=' and # the next line both starts and ends with - text[p1+1] == text[p2-1]): # ...a sufficiently high header - return 0.5 - - -class VimLexer(RegexLexer): - """ - Lexer for VimL script files. - - .. versionadded:: 0.8 - """ - name = 'VimL' - aliases = ['vim'] - filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', - '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] - mimetypes = ['text/x-vim'] - flags = re.MULTILINE - - _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' - - tokens = { - 'root': [ - (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', - bygroups(using(this), Keyword, Text, Operator, Text, Text, - using(PythonLexer), Text)), - (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', - bygroups(using(this), Keyword, Text, using(PythonLexer))), - - (r'^\s*".*', Comment), - - (r'[ \t]+', Text), - # TODO: regexes can have other delims - (r'/(\\\\|\\/|[^\n/])*/', String.Regex), - (r'"(\\\\|\\"|[^\n"])*"', String.Double), - (r"'(''|[^\n'])*'", String.Single), - - # Who decided that doublequote was a good comment character?? - (r'(?<=\s)"[^\-:.%#=*].*', Comment), - (r'-?\d+', Number), - (r'#[0-9a-f]{6}', Number.Hex), - (r'^:', Punctuation), - (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. - (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', - Keyword), - (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), - (r'\b\w+\b', Name.Other), # These are postprocessed below - (r'.', Text), - ], - } - def __init__(self, **options): - from pygments.lexers._vimbuiltins import command, option, auto - self._cmd = command - self._opt = option - self._aut = auto - - RegexLexer.__init__(self, **options) - - def is_in(self, w, mapping): - r""" - It's kind of difficult to decide if something might be a keyword - in VimL because it allows you to abbreviate them. In fact, - 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are - valid ways to call it so rather than making really awful regexps - like:: - - \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b - - we match `\b\w+\b` and then call is_in() on those tokens. See - `scripts/get_vimkw.py` for how the lists are extracted. - """ - p = bisect(mapping, (w,)) - if p > 0: - if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ - mapping[p-1][1][:len(w)] == w: return True - if p < len(mapping): - return mapping[p][0] == w[:len(mapping[p][0])] and \ - mapping[p][1][:len(w)] == w - return False - - def get_tokens_unprocessed(self, text): - # TODO: builtins are only subsequent tokens on lines - # and 'keywords' only happen at the beginning except - # for :au ones - for index, token, value in \ - RegexLexer.get_tokens_unprocessed(self, text): - if token is Name.Other: - if self.is_in(value, self._cmd): - yield index, Keyword, value - elif self.is_in(value, self._opt) or \ - self.is_in(value, self._aut): - yield index, Name.Builtin, value - else: - yield index, Text, value - else: - yield index, token, value - - -class GettextLexer(RegexLexer): - """ - Lexer for Gettext catalog files. - - .. versionadded:: 0.9 - """ - name = 'Gettext Catalog' - aliases = ['pot', 'po'] - filenames = ['*.pot', '*.po'] - mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] - - tokens = { - 'root': [ - (r'^#,\s.*?$', Keyword.Type), - (r'^#:\s.*?$', Keyword.Declaration), - #(r'^#$', Comment), - (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), - (r'^(")([A-Za-z-]+:)(.*")$', - bygroups(String, Name.Property, String)), - (r'^".*"$', String), - (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$', - bygroups(Name.Variable, Text, String)), - (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', - bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), - ] - } - - -class SquidConfLexer(RegexLexer): - """ - Lexer for `squid <http://www.squid-cache.org/>`_ configuration files. - - .. versionadded:: 0.9 - """ - - name = 'SquidConf' - aliases = ['squidconf', 'squid.conf', 'squid'] - filenames = ['squid.conf'] - mimetypes = ['text/x-squidconf'] - flags = re.IGNORECASE - - keywords = [ - "access_log", "acl", "always_direct", "announce_host", - "announce_period", "announce_port", "announce_to", "anonymize_headers", - "append_domain", "as_whois_server", "auth_param_basic", - "authenticate_children", "authenticate_program", "authenticate_ttl", - "broken_posts", "buffered_logs", "cache_access_log", "cache_announce", - "cache_dir", "cache_dns_program", "cache_effective_group", - "cache_effective_user", "cache_host", "cache_host_acl", - "cache_host_domain", "cache_log", "cache_mem", "cache_mem_high", - "cache_mem_low", "cache_mgr", "cachemgr_passwd", "cache_peer", - "cache_peer_access", "cahce_replacement_policy", "cache_stoplist", - "cache_stoplist_pattern", "cache_store_log", "cache_swap", - "cache_swap_high", "cache_swap_log", "cache_swap_low", "client_db", - "client_lifetime", "client_netmask", "connect_timeout", "coredump_dir", - "dead_peer_timeout", "debug_options", "delay_access", "delay_class", - "delay_initial_bucket_level", "delay_parameters", "delay_pools", - "deny_info", "dns_children", "dns_defnames", "dns_nameservers", - "dns_testnames", "emulate_httpd_log", "err_html_text", - "fake_user_agent", "firewall_ip", "forwarded_for", "forward_snmpd_port", - "fqdncache_size", "ftpget_options", "ftpget_program", "ftp_list_width", - "ftp_passive", "ftp_user", "half_closed_clients", "header_access", - "header_replace", "hierarchy_stoplist", "high_response_time_warning", - "high_page_fault_warning", "hosts_file", "htcp_port", "http_access", - "http_anonymizer", "httpd_accel", "httpd_accel_host", - "httpd_accel_port", "httpd_accel_uses_host_header", - "httpd_accel_with_proxy", "http_port", "http_reply_access", - "icp_access", "icp_hit_stale", "icp_port", "icp_query_timeout", - "ident_lookup", "ident_lookup_access", "ident_timeout", - "incoming_http_average", "incoming_icp_average", "inside_firewall", - "ipcache_high", "ipcache_low", "ipcache_size", "local_domain", - "local_ip", "logfile_rotate", "log_fqdn", "log_icp_queries", - "log_mime_hdrs", "maximum_object_size", "maximum_single_addr_tries", - "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", - "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", - "memory_pools_limit", "memory_replacement_policy", "mime_table", - "min_http_poll_cnt", "min_icp_poll_cnt", "minimum_direct_hops", - "minimum_object_size", "minimum_retry_timeout", "miss_access", - "negative_dns_ttl", "negative_ttl", "neighbor_timeout", - "neighbor_type_domain", "netdb_high", "netdb_low", "netdb_ping_period", - "netdb_ping_rate", "never_direct", "no_cache", "passthrough_proxy", - "pconn_timeout", "pid_filename", "pinger_program", "positive_dns_ttl", - "prefer_direct", "proxy_auth", "proxy_auth_realm", "query_icmp", - "quick_abort", "quick_abort", "quick_abort_max", "quick_abort_min", - "quick_abort_pct", "range_offset_limit", "read_timeout", - "redirect_children", "redirect_program", - "redirect_rewrites_host_header", "reference_age", "reference_age", - "refresh_pattern", "reload_into_ims", "request_body_max_size", - "request_size", "request_timeout", "shutdown_lifetime", - "single_parent_bypass", "siteselect_timeout", "snmp_access", - "snmp_incoming_address", "snmp_port", "source_ping", "ssl_proxy", - "store_avg_object_size", "store_objects_per_bucket", - "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", - "tcp_incoming_address", "tcp_outgoing_address", "tcp_recv_bufsize", - "test_reachability", "udp_hit_obj", "udp_hit_obj_size", - "udp_incoming_address", "udp_outgoing_address", "unique_hostname", - "unlinkd_program", "uri_whitespace", "useragent_log", - "visible_hostname", "wais_relay", "wais_relay_host", "wais_relay_port", - ] - - opts = [ - "proxy-only", "weight", "ttl", "no-query", "default", "round-robin", - "multicast-responder", "on", "off", "all", "deny", "allow", "via", - "parent", "no-digest", "heap", "lru", "realm", "children", "q1", "q2", - "credentialsttl", "none", "disable", "offline_toggle", "diskd", - ] - - actions = [ - "shutdown", "info", "parameter", "server_list", "client_list", - r'squid\.conf', - ] - - actions_stats = [ - "objects", "vm_objects", "utilization", "ipcache", "fqdncache", "dns", - "redirector", "io", "reply_headers", "filedescriptors", "netdb", - ] - - actions_log = ["status", "enable", "disable", "clear"] - - acls = [ - "url_regex", "urlpath_regex", "referer_regex", "port", "proto", - "req_mime_type", "rep_mime_type", "method", "browser", "user", "src", - "dst", "time", "dstdomain", "ident", "snmp_community", - ] - - ip_re = ( - r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|' - r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|' - r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|' - r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}' - r'(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|' - r'(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|' - r'[1-9]?\d)){3}))' - ) - - def makelistre(list): - return r'\b(?:' + '|'.join(list) + r')\b' - - tokens = { - 'root': [ - (r'\s+', Whitespace), - (r'#', Comment, 'comment'), - (makelistre(keywords), Keyword), - (makelistre(opts), Name.Constant), - # Actions - (makelistre(actions), String), - (r'stats/'+makelistre(actions), String), - (r'log/'+makelistre(actions)+r'=', String), - (makelistre(acls), Keyword), - (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float), - (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number), - (r'\S+', Text), - ], - 'comment': [ - (r'\s*TAG:.*', String.Escape, '#pop'), - (r'.*', Comment, '#pop'), - ], - } - - -class DebianControlLexer(RegexLexer): - """ - Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. - - .. versionadded:: 0.9 - """ - name = 'Debian Control file' - aliases = ['control', 'debcontrol'] - filenames = ['control'] - - tokens = { - 'root': [ - (r'^(Description)', Keyword, 'description'), - (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), - (r'^((Build-)?Depends)', Keyword, 'depends'), - (r'^((?:Python-)?Version)(:\s*)(\S+)$', - bygroups(Keyword, Text, Number)), - (r'^((?:Installed-)?Size)(:\s*)(\S+)$', - bygroups(Keyword, Text, Number)), - (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', - bygroups(Keyword, Text, Number)), - (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', - bygroups(Keyword, Whitespace, String)), - ], - 'maintainer': [ - (r'<[^>]+>', Generic.Strong), - (r'<[^>]+>$', Generic.Strong, '#pop'), - (r',\n?', Text), - (r'.', Text), - ], - 'description': [ - (r'(.*)(Homepage)(: )(\S+)', - bygroups(Text, String, Name, Name.Class)), - (r':.*\n', Generic.Strong), - (r' .*\n', Text), - ('', Text, '#pop'), - ], - 'depends': [ - (r':\s*', Text), - (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), - (r'\(', Text, 'depend_vers'), - (r',', Text), - (r'\|', Operator), - (r'[\s]+', Text), - (r'[}\)]\s*$', Text, '#pop'), - (r'}', Text), - (r'[^,]$', Name.Function, '#pop'), - (r'([\+\.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), - (r'\[.*?\]', Name.Entity), - ], - 'depend_vers': [ - (r'\),', Text, '#pop'), - (r'\)[^,]', Text, '#pop:2'), - (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number)) - ] - } - - -class YamlLexerContext(LexerContext): - """Indentation context for the YAML lexer.""" - - def __init__(self, *args, **kwds): - super(YamlLexerContext, self).__init__(*args, **kwds) - self.indent_stack = [] - self.indent = -1 - self.next_indent = 0 - self.block_scalar_indent = None - - -class YamlLexer(ExtendedRegexLexer): - """ - Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization - language. - - .. versionadded:: 0.11 - """ - - name = 'YAML' - aliases = ['yaml'] - filenames = ['*.yaml', '*.yml'] - mimetypes = ['text/x-yaml'] - - - def something(token_class): - """Do not produce empty tokens.""" - def callback(lexer, match, context): - text = match.group() - if not text: - return - yield match.start(), token_class, text - context.pos = match.end() - return callback - - def reset_indent(token_class): - """Reset the indentation levels.""" - def callback(lexer, match, context): - text = match.group() - context.indent_stack = [] - context.indent = -1 - context.next_indent = 0 - context.block_scalar_indent = None - yield match.start(), token_class, text - context.pos = match.end() - return callback - - def save_indent(token_class, start=False): - """Save a possible indentation level.""" - def callback(lexer, match, context): - text = match.group() - extra = '' - if start: - context.next_indent = len(text) - if context.next_indent < context.indent: - while context.next_indent < context.indent: - context.indent = context.indent_stack.pop() - if context.next_indent > context.indent: - extra = text[context.indent:] - text = text[:context.indent] - else: - context.next_indent += len(text) - if text: - yield match.start(), token_class, text - if extra: - yield match.start()+len(text), token_class.Error, extra - context.pos = match.end() - return callback - - def set_indent(token_class, implicit=False): - """Set the previously saved indentation level.""" - def callback(lexer, match, context): - text = match.group() - if context.indent < context.next_indent: - context.indent_stack.append(context.indent) - context.indent = context.next_indent - if not implicit: - context.next_indent += len(text) - yield match.start(), token_class, text - context.pos = match.end() - return callback - - def set_block_scalar_indent(token_class): - """Set an explicit indentation level for a block scalar.""" - def callback(lexer, match, context): - text = match.group() - context.block_scalar_indent = None - if not text: - return - increment = match.group(1) - if increment: - current_indent = max(context.indent, 0) - increment = int(increment) - context.block_scalar_indent = current_indent + increment - if text: - yield match.start(), token_class, text - context.pos = match.end() - return callback - - def parse_block_scalar_empty_line(indent_token_class, content_token_class): - """Process an empty line in a block scalar.""" - def callback(lexer, match, context): - text = match.group() - if (context.block_scalar_indent is None or - len(text) <= context.block_scalar_indent): - if text: - yield match.start(), indent_token_class, text - else: - indentation = text[:context.block_scalar_indent] - content = text[context.block_scalar_indent:] - yield match.start(), indent_token_class, indentation - yield (match.start()+context.block_scalar_indent, - content_token_class, content) - context.pos = match.end() - return callback - - def parse_block_scalar_indent(token_class): - """Process indentation spaces in a block scalar.""" - def callback(lexer, match, context): - text = match.group() - if context.block_scalar_indent is None: - if len(text) <= max(context.indent, 0): - context.stack.pop() - context.stack.pop() - return - context.block_scalar_indent = len(text) - else: - if len(text) < context.block_scalar_indent: - context.stack.pop() - context.stack.pop() - return - if text: - yield match.start(), token_class, text - context.pos = match.end() - return callback - - def parse_plain_scalar_indent(token_class): - """Process indentation spaces in a plain scalar.""" - def callback(lexer, match, context): - text = match.group() - if len(text) <= context.indent: - context.stack.pop() - context.stack.pop() - return - if text: - yield match.start(), token_class, text - context.pos = match.end() - return callback - - - - tokens = { - # the root rules - 'root': [ - # ignored whitespaces - (r'[ ]+(?=#|$)', Text), - # line breaks - (r'\n+', Text), - # a comment - (r'#[^\n]*', Comment.Single), - # the '%YAML' directive - (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), - # the %TAG directive - (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), - # document start and document end indicators - (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), - 'block-line'), - # indentation spaces - (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True), - ('block-line', 'indentation')), - ], - - # trailing whitespaces after directives or a block scalar indicator - 'ignored-line': [ - # ignored whitespaces - (r'[ ]+(?=#|$)', Text), - # a comment - (r'#[^\n]*', Comment.Single), - # line break - (r'\n', Text, '#pop:2'), - ], - - # the %YAML directive - 'yaml-directive': [ - # the version number - (r'([ ]+)([0-9]+\.[0-9]+)', - bygroups(Text, Number), 'ignored-line'), - ], - - # the %YAG directive - 'tag-directive': [ - # a tag handle and the corresponding prefix - (r'([ ]+)(!|![0-9A-Za-z_-]*!)' - r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)', - bygroups(Text, Keyword.Type, Text, Keyword.Type), - 'ignored-line'), - ], - - # block scalar indicators and indentation spaces - 'indentation': [ - # trailing whitespaces are ignored - (r'[ ]*$', something(Text), '#pop:2'), - # whitespaces preceeding block collection indicators - (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), - # block collection indicators - (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), - # the beginning a block line - (r'[ ]*', save_indent(Text), '#pop'), - ], - - # an indented line in the block context - 'block-line': [ - # the line end - (r'[ ]*(?=#|$)', something(Text), '#pop'), - # whitespaces separating tokens - (r'[ ]+', Text), - # tags, anchors and aliases, - include('descriptors'), - # block collections and scalars - include('block-nodes'), - # flow collections and quoted scalars - include('flow-nodes'), - # a plain scalar - (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])', - something(Name.Variable), - 'plain-scalar-in-block-context'), - ], - - # tags, anchors, aliases - 'descriptors' : [ - # a full-form tag - (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type), - # a tag in the form '!', '!suffix' or '!handle!suffix' - (r'!(?:[0-9A-Za-z_-]+)?' - r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type), - # an anchor - (r'&[0-9A-Za-z_-]+', Name.Label), - # an alias - (r'\*[0-9A-Za-z_-]+', Name.Variable), - ], - - # block collections and scalars - 'block-nodes': [ - # implicit key - (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), - # literal and folded scalars - (r'[|>]', Punctuation.Indicator, - ('block-scalar-content', 'block-scalar-header')), - ], - - # flow collections and quoted scalars - 'flow-nodes': [ - # a flow sequence - (r'\[', Punctuation.Indicator, 'flow-sequence'), - # a flow mapping - (r'\{', Punctuation.Indicator, 'flow-mapping'), - # a single-quoted scalar - (r'\'', String, 'single-quoted-scalar'), - # a double-quoted scalar - (r'\"', String, 'double-quoted-scalar'), - ], - - # the content of a flow collection - 'flow-collection': [ - # whitespaces - (r'[ ]+', Text), - # line breaks - (r'\n+', Text), - # a comment - (r'#[^\n]*', Comment.Single), - # simple indicators - (r'[?:,]', Punctuation.Indicator), - # tags, anchors and aliases - include('descriptors'), - # nested collections and quoted scalars - include('flow-nodes'), - # a plain scalar - (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])', - something(Name.Variable), - 'plain-scalar-in-flow-context'), - ], - - # a flow sequence indicated by '[' and ']' - 'flow-sequence': [ - # include flow collection rules - include('flow-collection'), - # the closing indicator - (r'\]', Punctuation.Indicator, '#pop'), - ], - - # a flow mapping indicated by '{' and '}' - 'flow-mapping': [ - # include flow collection rules - include('flow-collection'), - # the closing indicator - (r'\}', Punctuation.Indicator, '#pop'), - ], - - # block scalar lines - 'block-scalar-content': [ - # line break - (r'\n', Text), - # empty line - (r'^[ ]+$', - parse_block_scalar_empty_line(Text, Name.Constant)), - # indentation spaces (we may leave the state here) - (r'^[ ]*', parse_block_scalar_indent(Text)), - # line content - (r'[^\n\r\f\v]+', Name.Constant), - ], - - # the content of a literal or folded scalar - 'block-scalar-header': [ - # indentation indicator followed by chomping flag - (r'([1-9])?[+-]?(?=[ ]|$)', - set_block_scalar_indent(Punctuation.Indicator), - 'ignored-line'), - # chomping flag followed by indentation indicator - (r'[+-]?([1-9])?(?=[ ]|$)', - set_block_scalar_indent(Punctuation.Indicator), - 'ignored-line'), - ], - - # ignored and regular whitespaces in quoted scalars - 'quoted-scalar-whitespaces': [ - # leading and trailing whitespaces are ignored - (r'^[ ]+', Text), - (r'[ ]+$', Text), - # line breaks are ignored - (r'\n+', Text), - # other whitespaces are a part of the value - (r'[ ]+', Name.Variable), - ], - - # single-quoted scalars - 'single-quoted-scalar': [ - # include whitespace and line break rules - include('quoted-scalar-whitespaces'), - # escaping of the quote character - (r'\'\'', String.Escape), - # regular non-whitespace characters - (r'[^ \t\n\r\f\v\']+', String), - # the closing quote - (r'\'', String, '#pop'), - ], - - # double-quoted scalars - 'double-quoted-scalar': [ - # include whitespace and line break rules - include('quoted-scalar-whitespaces'), - # escaping of special characters - (r'\\[0abt\tn\nvfre "\\N_LP]', String), - # escape codes - (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', - String.Escape), - # regular non-whitespace characters - (r'[^ \t\n\r\f\v\"\\]+', String), - # the closing quote - (r'"', String, '#pop'), - ], - - # the beginning of a new line while scanning a plain scalar - 'plain-scalar-in-block-context-new-line': [ - # empty lines - (r'^[ ]+$', Text), - # line breaks - (r'\n+', Text), - # document start and document end indicators - (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), - # indentation spaces (we may leave the block line state here) - (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), - ], - - # a plain scalar in the block context - 'plain-scalar-in-block-context': [ - # the scalar ends with the ':' indicator - (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), - # the scalar ends with whitespaces followed by a comment - (r'[ ]+(?=#)', Text, '#pop'), - # trailing whitespaces are ignored - (r'[ ]+$', Text), - # line breaks are ignored - (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), - # other whitespaces are a part of the value - (r'[ ]+', Literal.Scalar.Plain), - # regular non-whitespace characters - (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain), - ], - - # a plain scalar is the flow context - 'plain-scalar-in-flow-context': [ - # the scalar ends with an indicator character - (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), - # the scalar ends with a comment - (r'[ ]+(?=#)', Text, '#pop'), - # leading and trailing whitespaces are ignored - (r'^[ ]+', Text), - (r'[ ]+$', Text), - # line breaks are ignored - (r'\n+', Text), - # other whitespaces are a part of the value - (r'[ ]+', Name.Variable), - # regular non-whitespace characters - (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable), - ], - - } - - def get_tokens_unprocessed(self, text=None, context=None): - if context is None: - context = YamlLexerContext(text, 0) - return super(YamlLexer, self).get_tokens_unprocessed(text, context) - - -class LighttpdConfLexer(RegexLexer): - """ - Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files. - - .. versionadded:: 0.11 - """ - name = 'Lighttpd configuration file' - aliases = ['lighty', 'lighttpd'] - filenames = [] - mimetypes = ['text/x-lighttpd-conf'] - - tokens = { - 'root': [ - (r'#.*\n', Comment.Single), - (r'/\S*', Name), # pathname - (r'[a-zA-Z._-]+', Keyword), - (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), - (r'[0-9]+', Number), - (r'=>|=~|\+=|==|=|\+', Operator), - (r'\$[A-Z]+', Name.Builtin), - (r'[(){}\[\],]', Punctuation), - (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), - (r'\s+', Text), - ], - - } - - -class NginxConfLexer(RegexLexer): - """ - Lexer for `Nginx <http://nginx.net/>`_ configuration files. - - .. versionadded:: 0.11 - """ - name = 'Nginx configuration file' - aliases = ['nginx'] - filenames = [] - mimetypes = ['text/x-nginx-conf'] - - tokens = { - 'root': [ - (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)), - (r'[^\s;#]+', Keyword, 'stmt'), - include('base'), - ], - 'block': [ - (r'}', Punctuation, '#pop:2'), - (r'[^\s;#]+', Keyword.Namespace, 'stmt'), - include('base'), - ], - 'stmt': [ - (r'{', Punctuation, 'block'), - (r';', Punctuation, '#pop'), - include('base'), - ], - 'base': [ - (r'#.*\n', Comment.Single), - (r'on|off', Name.Constant), - (r'\$[^\s;#()]+', Name.Variable), - (r'([a-z0-9.-]+)(:)([0-9]+)', - bygroups(Name, Punctuation, Number.Integer)), - (r'[a-z-]+/[a-z-+]+', String), # mimetype - #(r'[a-zA-Z._-]+', Keyword), - (r'[0-9]+[km]?\b', Number.Integer), - (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)), - (r'[:=~]', Punctuation), - (r'[^\s;#{}$]+', String), # catch all - (r'/[^\s;#]*', Name), # pathname - (r'\s+', Text), - (r'[$;]', Text), # leftover characters - ], - } - - -class CMakeLexer(RegexLexer): - """ - Lexer for `CMake <http://cmake.org/Wiki/CMake>`_ files. - - .. versionadded:: 1.2 - """ - name = 'CMake' - aliases = ['cmake'] - filenames = ['*.cmake', 'CMakeLists.txt'] - mimetypes = ['text/x-cmake'] - - tokens = { - 'root': [ - #(r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|' - # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|' - # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|' - # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|' - # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|' - # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|' - # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|' - # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|' - # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|' - # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|' - # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|' - # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|' - # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|' - # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|' - # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|' - # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|' - # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|' - # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|' - # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|' - # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|' - # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|' - # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|' - # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|' - # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|' - # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|' - # r'COUNTARGS)\b', Name.Builtin, 'args'), - (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Builtin, Text, - Punctuation), 'args'), - include('keywords'), - include('ws') - ], - 'args': [ - (r'\(', Punctuation, '#push'), - (r'\)', Punctuation, '#pop'), - (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)), - (r'(\$<)(.+?)(>)', bygroups(Operator, Name.Variable, Operator)), - (r'(?s)".*?"', String.Double), - (r'\\\S+', String), - (r'[^\)$"# \t\n]+', String), - (r'\n', Text), # explicitly legal - include('keywords'), - include('ws') - ], - 'string': [ - - ], - 'keywords': [ - (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|' - r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword), - ], - 'ws': [ - (r'[ \t]+', Text), - (r'#.*\n', Comment), - ] - } - - def analyse_text(text): - exp = r'^ *CMAKE_MINIMUM_REQUIRED *\( *VERSION *\d(\.\d)* *( FATAL_ERROR)? *\) *$' - if re.search(exp, text, flags=re.MULTILINE | re.IGNORECASE): - return 0.8 - return 0.0 - - -class HttpLexer(RegexLexer): - """ - Lexer for HTTP sessions. - - .. versionadded:: 1.5 - """ - - name = 'HTTP' - aliases = ['http'] - - flags = re.DOTALL - - def header_callback(self, match): - if match.group(1).lower() == 'content-type': - content_type = match.group(5).strip() - if ';' in content_type: - content_type = content_type[:content_type.find(';')].strip() - self.content_type = content_type - yield match.start(1), Name.Attribute, match.group(1) - yield match.start(2), Text, match.group(2) - yield match.start(3), Operator, match.group(3) - yield match.start(4), Text, match.group(4) - yield match.start(5), Literal, match.group(5) - yield match.start(6), Text, match.group(6) - - def continuous_header_callback(self, match): - yield match.start(1), Text, match.group(1) - yield match.start(2), Literal, match.group(2) - yield match.start(3), Text, match.group(3) - - def content_callback(self, match): - content_type = getattr(self, 'content_type', None) - content = match.group() - offset = match.start() - if content_type: - from pygments.lexers import get_lexer_for_mimetype - try: - lexer = get_lexer_for_mimetype(content_type) - except ClassNotFound: - pass - else: - for idx, token, value in lexer.get_tokens_unprocessed(content): - yield offset + idx, token, value - return - yield offset, Text, content - - tokens = { - 'root': [ - (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' - r'(HTTP)(/)(1\.[01])(\r?\n|$)', - bygroups(Name.Function, Text, Name.Namespace, Text, - Keyword.Reserved, Operator, Number, Text), - 'headers'), - (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', - bygroups(Keyword.Reserved, Operator, Number, Text, Number, - Text, Name.Exception, Text), - 'headers'), - ], - 'headers': [ - (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|$)', header_callback), - (r'([\t ]+)([^\r\n]+)(\r?\n|$)', continuous_header_callback), - (r'\r?\n', Text, 'content') - ], - 'content': [ - (r'.+', content_callback) - ] - } - - -class PyPyLogLexer(RegexLexer): - """ - Lexer for PyPy log files. - - .. versionadded:: 1.5 - """ - name = "PyPy Log" - aliases = ["pypylog", "pypy"] - filenames = ["*.pypylog"] - mimetypes = ['application/x-pypylog'] - - tokens = { - "root": [ - (r"\[\w+\] {jit-log-.*?$", Keyword, "jit-log"), - (r"\[\w+\] {jit-backend-counts$", Keyword, "jit-backend-counts"), - include("extra-stuff"), - ], - "jit-log": [ - (r"\[\w+\] jit-log-.*?}$", Keyword, "#pop"), - (r"^\+\d+: ", Comment), - (r"--end of the loop--", Comment), - (r"[ifp]\d+", Name), - (r"ptr\d+", Name), - (r"(\()(\w+(?:\.\w+)?)(\))", - bygroups(Punctuation, Name.Builtin, Punctuation)), - (r"[\[\]=,()]", Punctuation), - (r"(\d+\.\d+|inf|-inf)", Number.Float), - (r"-?\d+", Number.Integer), - (r"'.*'", String), - (r"(None|descr|ConstClass|ConstPtr|TargetToken)", Name), - (r"<.*?>+", Name.Builtin), - (r"(label|debug_merge_point|jump|finish)", Name.Class), - (r"(int_add_ovf|int_add|int_sub_ovf|int_sub|int_mul_ovf|int_mul|" - r"int_floordiv|int_mod|int_lshift|int_rshift|int_and|int_or|" - r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|" - r"int_is_true|" - r"uint_floordiv|uint_ge|uint_lt|" - r"float_add|float_sub|float_mul|float_truediv|float_neg|" - r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|" - r"ptr_eq|ptr_ne|instance_ptr_eq|instance_ptr_ne|" - r"cast_int_to_float|cast_float_to_int|" - r"force_token|quasiimmut_field|same_as|virtual_ref_finish|" - r"virtual_ref|mark_opaque_ptr|" - r"call_may_force|call_assembler|call_loopinvariant|" - r"call_release_gil|call_pure|call|" - r"new_with_vtable|new_array|newstr|newunicode|new|" - r"arraylen_gc|" - r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" - r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|" - r"getfield_gc|getinteriorfield_gc|setinteriorfield_gc|" - r"getfield_raw|setfield_gc|setfield_raw|" - r"strgetitem|strsetitem|strlen|copystrcontent|" - r"unicodegetitem|unicodesetitem|unicodelen|" - r"guard_true|guard_false|guard_value|guard_isnull|" - r"guard_nonnull_class|guard_nonnull|guard_class|guard_no_overflow|" - r"guard_not_forced|guard_no_exception|guard_not_invalidated)", - Name.Builtin), - include("extra-stuff"), - ], - "jit-backend-counts": [ - (r"\[\w+\] jit-backend-counts}$", Keyword, "#pop"), - (r":", Punctuation), - (r"\d+", Number), - include("extra-stuff"), - ], - "extra-stuff": [ - (r"\s+", Text), - (r"#.*?$", Comment), - ], - } - - -class HxmlLexer(RegexLexer): - """ - Lexer for `haXe build <http://haxe.org/doc/compiler>`_ files. - - .. versionadded:: 1.6 - """ - name = 'Hxml' - aliases = ['haxeml', 'hxml'] - filenames = ['*.hxml'] - - tokens = { - 'root': [ - # Seperator - (r'(--)(next)', bygroups(Punctuation, Generic.Heading)), - # Compiler switches with one dash - (r'(-)(prompt|debug|v)', bygroups(Punctuation, Keyword.Keyword)), - # Compilerswitches with two dashes - (r'(--)(neko-source|flash-strict|flash-use-stage|no-opt|no-traces|' - r'no-inline|times|no-output)', bygroups(Punctuation, Keyword)), - # Targets and other options that take an argument - (r'(-)(cpp|js|neko|x|as3|swf9?|swf-lib|php|xml|main|lib|D|resource|' - r'cp|cmd)( +)(.+)', - bygroups(Punctuation, Keyword, Whitespace, String)), - # Options that take only numerical arguments - (r'(-)(swf-version)( +)(\d+)', - bygroups(Punctuation, Keyword, Number.Integer)), - # An Option that defines the size, the fps and the background - # color of an flash movie - (r'(-)(swf-header)( +)(\d+)(:)(\d+)(:)(\d+)(:)([A-Fa-f0-9]{6})', - bygroups(Punctuation, Keyword, Whitespace, Number.Integer, - Punctuation, Number.Integer, Punctuation, Number.Integer, - Punctuation, Number.Hex)), - # options with two dashes that takes arguments - (r'(--)(js-namespace|php-front|php-lib|remap|gen-hx-classes)( +)' - r'(.+)', bygroups(Punctuation, Keyword, Whitespace, String)), - # Single line comment, multiline ones are not allowed. - (r'#.*', Comment.Single) - ] - } - - -class EbnfLexer(RegexLexer): - """ - Lexer for `ISO/IEC 14977 EBNF - <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ - grammars. - - .. versionadded:: 2.0 - """ - - name = 'EBNF' - aliases = ['ebnf'] - filenames = ['*.ebnf'] - mimetypes = ['text/x-ebnf'] - - tokens = { - 'root': [ - include('whitespace'), - include('comment_start'), - include('identifier'), - (r'=', Operator, 'production'), - ], - 'production': [ - include('whitespace'), - include('comment_start'), - include('identifier'), - (r'"[^"]*"', String.Double), - (r"'[^']*'", String.Single), - (r'(\?[^?]*\?)', Name.Entity), - (r'[\[\]{}(),|]', Punctuation), - (r'-', Operator), - (r';', Punctuation, '#pop'), - ], - 'whitespace': [ - (r'\s+', Text), - ], - 'comment_start': [ - (r'\(\*', Comment.Multiline, 'comment'), - ], - 'comment': [ - (r'[^*)]', Comment.Multiline), - include('comment_start'), - (r'\*\)', Comment.Multiline, '#pop'), - (r'[*)]', Comment.Multiline), - ], - 'identifier': [ - (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword), - ], - } - -class TodotxtLexer(RegexLexer): - """ - Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format. - - .. versionadded:: 2.0 - """ - - name = 'Todotxt' - aliases = ['todotxt'] - # *.todotxt is not a standard extension for Todo.txt files; including it - # makes testing easier, and also makes autodetecting file type easier. - filenames = ['todo.txt', '*.todotxt'] - mimetypes = ['text/x-todo'] - - ## Aliases mapping standard token types of Todo.txt format concepts - CompleteTaskText = Operator # Chosen to de-emphasize complete tasks - IncompleteTaskText = Text # Incomplete tasks should look like plain text - - # Priority should have most emphasis to indicate importance of tasks - Priority = Generic.Heading - # Dates should have next most emphasis because time is important - Date = Generic.Subheading - - # Project and context should have equal weight, and be in different colors - Project = Generic.Error - Context = String - - # If tag functionality is added, it should have the same weight as Project - # and Context, and a different color. Generic.Traceback would work well. - - # Regex patterns for building up rules; dates, priorities, projects, and - # contexts are all atomic - # TODO: Make date regex more ISO 8601 compliant - date_regex = r'\d{4,}-\d{2}-\d{2}' - priority_regex = r'\([A-Z]\)' - project_regex = r'\+\S+' - context_regex = r'@\S+' - - # Compound regex expressions - complete_one_date_regex = r'(x )(' + date_regex + r')' - complete_two_date_regex = (complete_one_date_regex + r'( )(' + - date_regex + r')') - priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')' - - tokens = { - # Should parse starting at beginning of line; each line is a task - 'root': [ - ## Complete task entry points: two total: - # 1. Complete task with two dates - (complete_two_date_regex, bygroups(CompleteTaskText, Date, - CompleteTaskText, Date), - 'complete'), - # 2. Complete task with one date - (complete_one_date_regex, bygroups(CompleteTaskText, Date), - 'complete'), - - ## Incomplete task entry points: six total: - # 1. Priority plus date - (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date), - 'incomplete'), - # 2. Priority only - (priority_regex, Priority, 'incomplete'), - # 3. Leading date - (date_regex, Date, 'incomplete'), - # 4. Leading context - (context_regex, Context, 'incomplete'), - # 5. Leading project - (project_regex, Project, 'incomplete'), - # 6. Non-whitespace catch-all - ('\S+', IncompleteTaskText, 'incomplete'), - ], - - # Parse a complete task - 'complete': [ - # Newline indicates end of task, should return to root - (r'\s*\n', CompleteTaskText, '#pop'), - # Tokenize contexts and projects - (context_regex, Context), - (project_regex, Project), - # Tokenize non-whitespace text - ('\S+', CompleteTaskText), - # Tokenize whitespace not containing a newline - ('\s+', CompleteTaskText), - ], - - # Parse an incomplete task - 'incomplete': [ - # Newline indicates end of task, should return to root - (r'\s*\n', IncompleteTaskText, '#pop'), - # Tokenize contexts and projects - (context_regex, Context), - (project_regex, Project), - # Tokenize non-whitespace text - ('\S+', IncompleteTaskText), - # Tokenize whitespace not containing a newline - ('\s+', IncompleteTaskText), - ], - } - - -class DockerLexer(RegexLexer): - """ - Lexer for `Docker <http://docker.io>`_ configuration files. - - .. versionadded:: 2.0 - """ - name = 'Docker' - aliases = ['docker', 'dockerfile'] - filenames = ['Dockerfile', '*.docker'] - mimetypes = ['text/x-dockerfile-config'] - - _keywords = (r'(?:FROM|MAINTAINER|RUN|CMD|EXPOSE|ENV|ADD|ENTRYPOINT|' - r'VOLUME|WORKDIR)') - - flags = re.IGNORECASE | re.MULTILINE - - tokens = { - 'root': [ - (r'^(ONBUILD)(\s+)(%s)\b' % (_keywords,), - bygroups(Name.Keyword, Whitespace, Keyword)), - (_keywords + r'\b', Keyword), - (r'#.*', Comment), - (r'.+', using(BashLexer)), - ], - } +from pygments.lexers.configs import ApacheConfLexer, NginxConfLexer, \ + SquidConfLexer, LighttpdConfLexer, IniLexer, RegeditLexer, PropertiesLexer +from pygments.lexers.console import PyPyLogLexer +from pygments.lexers.textedit import VimLexer +from pygments.lexers.markup import BBCodeLexer, MoinWikiLexer, RstLexer, \ + TexLexer, GroffLexer +from pygments.lexers.installers import DebianControlLexer, SourcesListLexer +from pygments.lexers.misc.make import MakefileLexer, BaseMakefileLexer, \ + CMakeLexer +from pygments.lexers.dsls import HxmlLexer +from pygments.lexers.diff import DiffLexer, DarcsPatchLexer +from pygments.lexers.data import YamlLexer +from pygments.lexers.textfmts import IrcLogsLexer, GettextLexer, HttpLexer + +__all__ = [] diff --git a/pygments/lexers/textedit.py b/pygments/lexers/textedit.py index 66255fae..1f6d3fee 100644 --- a/pygments/lexers/textedit.py +++ b/pygments/lexers/textedit.py @@ -9,11 +9,16 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, include, default +import re +from bisect import bisect + +from pygments.lexer import RegexLexer, include, default, bygroups, using, this from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation -__all__ = ['AwkLexer'] +from pygments.lexers.python import PythonLexer + +__all__ = ['AwkLexer', 'VimLexer'] class AwkLexer(RegexLexer): @@ -68,3 +73,97 @@ class AwkLexer(RegexLexer): (r"'(\\\\|\\'|[^'])*'", String.Single), ] } + + +class VimLexer(RegexLexer): + """ + Lexer for VimL script files. + + .. versionadded:: 0.8 + """ + name = 'VimL' + aliases = ['vim'] + filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', + '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] + mimetypes = ['text/x-vim'] + flags = re.MULTILINE + + _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' + + tokens = { + 'root': [ + (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', + bygroups(using(this), Keyword, Text, Operator, Text, Text, + using(PythonLexer), Text)), + (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', + bygroups(using(this), Keyword, Text, using(PythonLexer))), + + (r'^\s*".*', Comment), + + (r'[ \t]+', Text), + # TODO: regexes can have other delims + (r'/(\\\\|\\/|[^\n/])*/', String.Regex), + (r'"(\\\\|\\"|[^\n"])*"', String.Double), + (r"'(''|[^\n'])*'", String.Single), + + # Who decided that doublequote was a good comment character?? + (r'(?<=\s)"[^\-:.%#=*].*', Comment), + (r'-?\d+', Number), + (r'#[0-9a-f]{6}', Number.Hex), + (r'^:', Punctuation), + (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. + (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', + Keyword), + (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), + (r'\b\w+\b', Name.Other), # These are postprocessed below + (r'.', Text), + ], + } + + def __init__(self, **options): + from pygments.lexers._vimbuiltins import command, option, auto + self._cmd = command + self._opt = option + self._aut = auto + + RegexLexer.__init__(self, **options) + + def is_in(self, w, mapping): + r""" + It's kind of difficult to decide if something might be a keyword + in VimL because it allows you to abbreviate them. In fact, + 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are + valid ways to call it so rather than making really awful regexps + like:: + + \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b + + we match `\b\w+\b` and then call is_in() on those tokens. See + `scripts/get_vimkw.py` for how the lists are extracted. + """ + p = bisect(mapping, (w,)) + if p > 0: + if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ + mapping[p-1][1][:len(w)] == w: + return True + if p < len(mapping): + return mapping[p][0] == w[:len(mapping[p][0])] and \ + mapping[p][1][:len(w)] == w + return False + + def get_tokens_unprocessed(self, text): + # TODO: builtins are only subsequent tokens on lines + # and 'keywords' only happen at the beginning except + # for :au ones + for index, token, value in \ + RegexLexer.get_tokens_unprocessed(self, text): + if token is Name.Other: + if self.is_in(value, self._cmd): + yield index, Keyword, value + elif self.is_in(value, self._opt) or \ + self.is_in(value, self._aut): + yield index, Name.Builtin, value + else: + yield index, Text, value + else: + yield index, token, value diff --git a/pygments/lexers/textfmts.py b/pygments/lexers/textfmts.py new file mode 100644 index 00000000..63e67182 --- /dev/null +++ b/pygments/lexers/textfmts.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.textfmts + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for various text formats. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, bygroups +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Generic, Literal +from pygments.util import ClassNotFound + +__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer'] + + +class IrcLogsLexer(RegexLexer): + """ + Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. + """ + + name = 'IRC logs' + aliases = ['irc'] + filenames = ['*.weechatlog'] + mimetypes = ['text/x-irclog'] + + flags = re.VERBOSE | re.MULTILINE + timestamp = r""" + ( + # irssi / xchat and others + (?: \[|\()? # Opening bracket or paren for the timestamp + (?: # Timestamp + (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits + [T ])? # Date/time separator: T or space + (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits + ) + (?: \]|\))?\s+ # Closing bracket or paren for the timestamp + | + # weechat + \d{4}\s\w{3}\s\d{2}\s # Date + \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace + | + # xchat + \w{3}\s\d{2}\s # Date + \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace + )? + """ + tokens = { + 'root': [ + # log start/end + (r'^\*\*\*\*(.*)\*\*\*\*$', Comment), + # hack + ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)), + # normal msgs + ("^" + timestamp + r""" + (\s*<.*?>\s*) # Nick """, + bygroups(Comment.Preproc, Name.Tag), 'msg'), + # /me msgs + ("^" + timestamp + r""" + (\s*[*]\s+) # Star + (\S+\s+.*?\n) # Nick + rest of message """, + bygroups(Comment.Preproc, Keyword, Generic.Inserted)), + # join/part msgs + ("^" + timestamp + r""" + (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols + (\S+\s+) # Nick + Space + (.*?\n) # Rest of message """, + bygroups(Comment.Preproc, Keyword, String, Comment)), + (r"^.*?\n", Text), + ], + 'msg': [ + (r"\S+:(?!//)", Name.Attribute), # Prefix + (r".*\n", Text, '#pop'), + ], + } + + +class GettextLexer(RegexLexer): + """ + Lexer for Gettext catalog files. + + .. versionadded:: 0.9 + """ + name = 'Gettext Catalog' + aliases = ['pot', 'po'] + filenames = ['*.pot', '*.po'] + mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] + + tokens = { + 'root': [ + (r'^#,\s.*?$', Keyword.Type), + (r'^#:\s.*?$', Keyword.Declaration), + # (r'^#$', Comment), + (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), + (r'^(")([A-Za-z-]+:)(.*")$', + bygroups(String, Name.Property, String)), + (r'^".*"$', String), + (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$', + bygroups(Name.Variable, Text, String)), + (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', + bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), + ] + } + + +class HttpLexer(RegexLexer): + """ + Lexer for HTTP sessions. + + .. versionadded:: 1.5 + """ + + name = 'HTTP' + aliases = ['http'] + + flags = re.DOTALL + + def header_callback(self, match): + if match.group(1).lower() == 'content-type': + content_type = match.group(5).strip() + if ';' in content_type: + content_type = content_type[:content_type.find(';')].strip() + self.content_type = content_type + yield match.start(1), Name.Attribute, match.group(1) + yield match.start(2), Text, match.group(2) + yield match.start(3), Operator, match.group(3) + yield match.start(4), Text, match.group(4) + yield match.start(5), Literal, match.group(5) + yield match.start(6), Text, match.group(6) + + def continuous_header_callback(self, match): + yield match.start(1), Text, match.group(1) + yield match.start(2), Literal, match.group(2) + yield match.start(3), Text, match.group(3) + + def content_callback(self, match): + content_type = getattr(self, 'content_type', None) + content = match.group() + offset = match.start() + if content_type: + from pygments.lexers import get_lexer_for_mimetype + try: + lexer = get_lexer_for_mimetype(content_type) + except ClassNotFound: + pass + else: + for idx, token, value in lexer.get_tokens_unprocessed(content): + yield offset + idx, token, value + return + yield offset, Text, content + + tokens = { + 'root': [ + (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' + r'(HTTP)(/)(1\.[01])(\r?\n|$)', + bygroups(Name.Function, Text, Name.Namespace, Text, + Keyword.Reserved, Operator, Number, Text), + 'headers'), + (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', + bygroups(Keyword.Reserved, Operator, Number, Text, Number, + Text, Name.Exception, Text), + 'headers'), + ], + 'headers': [ + (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|$)', header_callback), + (r'([\t ]+)([^\r\n]+)(\r?\n|$)', continuous_header_callback), + (r'\r?\n', Text, 'content') + ], + 'content': [ + (r'.+', content_callback) + ] + } + + +class TodotxtLexer(RegexLexer): + """ + Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format. + + .. versionadded:: 2.0 + """ + + name = 'Todotxt' + aliases = ['todotxt'] + # *.todotxt is not a standard extension for Todo.txt files; including it + # makes testing easier, and also makes autodetecting file type easier. + filenames = ['todo.txt', '*.todotxt'] + mimetypes = ['text/x-todo'] + + # Aliases mapping standard token types of Todo.txt format concepts + CompleteTaskText = Operator # Chosen to de-emphasize complete tasks + IncompleteTaskText = Text # Incomplete tasks should look like plain text + + # Priority should have most emphasis to indicate importance of tasks + Priority = Generic.Heading + # Dates should have next most emphasis because time is important + Date = Generic.Subheading + + # Project and context should have equal weight, and be in different colors + Project = Generic.Error + Context = String + + # If tag functionality is added, it should have the same weight as Project + # and Context, and a different color. Generic.Traceback would work well. + + # Regex patterns for building up rules; dates, priorities, projects, and + # contexts are all atomic + # TODO: Make date regex more ISO 8601 compliant + date_regex = r'\d{4,}-\d{2}-\d{2}' + priority_regex = r'\([A-Z]\)' + project_regex = r'\+\S+' + context_regex = r'@\S+' + + # Compound regex expressions + complete_one_date_regex = r'(x )(' + date_regex + r')' + complete_two_date_regex = (complete_one_date_regex + r'( )(' + + date_regex + r')') + priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')' + + tokens = { + # Should parse starting at beginning of line; each line is a task + 'root': [ + # Complete task entry points: two total: + # 1. Complete task with two dates + (complete_two_date_regex, bygroups(CompleteTaskText, Date, + CompleteTaskText, Date), + 'complete'), + # 2. Complete task with one date + (complete_one_date_regex, bygroups(CompleteTaskText, Date), + 'complete'), + + # Incomplete task entry points: six total: + # 1. Priority plus date + (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date), + 'incomplete'), + # 2. Priority only + (priority_regex, Priority, 'incomplete'), + # 3. Leading date + (date_regex, Date, 'incomplete'), + # 4. Leading context + (context_regex, Context, 'incomplete'), + # 5. Leading project + (project_regex, Project, 'incomplete'), + # 6. Non-whitespace catch-all + ('\S+', IncompleteTaskText, 'incomplete'), + ], + + # Parse a complete task + 'complete': [ + # Newline indicates end of task, should return to root + (r'\s*\n', CompleteTaskText, '#pop'), + # Tokenize contexts and projects + (context_regex, Context), + (project_regex, Project), + # Tokenize non-whitespace text + ('\S+', CompleteTaskText), + # Tokenize whitespace not containing a newline + ('\s+', CompleteTaskText), + ], + + # Parse an incomplete task + 'incomplete': [ + # Newline indicates end of task, should return to root + (r'\s*\n', IncompleteTaskText, '#pop'), + # Tokenize contexts and projects + (context_regex, Context), + (project_regex, Project), + # Tokenize non-whitespace text + ('\S+', IncompleteTaskText), + # Tokenize whitespace not containing a newline + ('\s+', IncompleteTaskText), + ], + } diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 0188508f..b28d3df7 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -18,8 +18,8 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Other, Punctuation, Literal from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \ html_doctype_matches, unirange, iteritems -from pygments.lexers.agile import RubyLexer -from pygments.lexers.compiled import ScalaLexer +from pygments.lexers.ruby import RubyLexer +from pygments.lexers.jvm import ScalaLexer __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'JsonLexer', 'CssLexer', |