diff options
author | Georg Brandl <georg@python.org> | 2012-02-04 10:49:13 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2012-02-04 10:49:13 +0100 |
commit | 7cef32ec183e53298e76304b965f1ea2c5f3577c (patch) | |
tree | 28cec35ab3f3bcf2eb8def50c49d6610356297be | |
parent | d541282daa49002937f56da77ac5bb3a4b4f15c3 (diff) | |
parent | 2c1a1ade8db7b432ff2b5c42cb40f7a898c05f5d (diff) | |
download | pygments-7cef32ec183e53298e76304b965f1ea2c5f3577c.tar.gz |
Merge and fix ECL lexer.
-rw-r--r-- | AUTHORS | 5 | ||||
-rw-r--r-- | CHANGES | 7 | ||||
-rw-r--r-- | pygments/formatters/html.py | 2 | ||||
-rw-r--r-- | pygments/lexer.py | 2 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 9 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 139 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 13 | ||||
-rw-r--r-- | pygments/lexers/dotnet.py | 17 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 324 | ||||
-rw-r--r-- | pygments/lexers/other.py | 157 | ||||
-rw-r--r-- | pygments/lexers/pypylog.py | 14 | ||||
-rw-r--r-- | pygments/lexers/web.py | 81 | ||||
-rw-r--r-- | pygments/util.py | 2 | ||||
-rw-r--r-- | tests/examplefiles/classes.dylan | 16 | ||||
-rw-r--r-- | tests/examplefiles/example.moon | 629 | ||||
-rw-r--r-- | tests/examplefiles/example.snobol | 15 | ||||
-rw-r--r-- | tests/examplefiles/http_request_example | 14 | ||||
-rw-r--r-- | tests/examplefiles/http_response_example | 27 | ||||
-rw-r--r-- | tests/examplefiles/nemerle_sample.n | 6 | ||||
-rw-r--r-- | tests/examplefiles/test.cs | 23 | ||||
-rw-r--r-- | tests/test_basic_api.py | 2 | ||||
-rw-r--r-- | tests/test_examplefiles.py | 5 | ||||
-rw-r--r-- | tests/test_html_formatter.py | 32 | ||||
-rw-r--r-- | tests/test_perllexer.py | 137 | ||||
-rw-r--r-- | tests/test_util.py | 38 |
25 files changed, 1631 insertions, 85 deletions
@@ -22,6 +22,7 @@ Other contributors, listed alphabetically, are: * Frits van Bommel -- assembler lexers * Pierre Bourdon -- bugfixes * Hiram Chirino -- Scaml and Jade lexers +* Leaf Corcoran -- MoonScript lexer * Christopher Creutzig -- MuPAD lexer * Pete Curry -- bugfixes * Owen Durni -- haXe lexer @@ -36,6 +37,7 @@ Other contributors, listed alphabetically, are: * Matt Good -- Genshi, Cheetah lexers * Patrick Gotthardt -- PHP namespaces support * Olivier Guibe -- Asymptote lexer +* Martin Harriman -- SNOBOL lexer * Matthew Harrison -- SVG formatter * Steven Hazel -- Tcl lexer * Aslak Hellesøy -- Gherkin lexer @@ -45,6 +47,7 @@ Other contributors, listed alphabetically, are: * Ben Hollis -- Mason lexer * Tim Howard -- BlitzMax lexer * Dennis Kaarsemaker -- sources.list lexer +* Adam Koprowski -- Opa lexer * Benjamin Kowarsch -- Modula-2 lexer * Marek Kubica -- Scheme lexer * Jochen Kupperschmidt -- Markdown processor @@ -56,6 +59,7 @@ Other contributors, listed alphabetically, are: * Stephen McKamey -- Duel/JBST lexer * Brian McKenna -- F# lexer * Lukas Meuser -- BBCode formatter, Lua lexer +* Hong Minhee -- HTTP lexer * Michael Mior -- Awk lexer * Paulo Moura -- Logtalk lexer * Mher Movsisyan -- DTD lexer @@ -69,6 +73,7 @@ Other contributors, listed alphabetically, are: * Benjamin Peterson -- Test suite refactoring * Dominik Picheta -- Nimrod lexer * Justin Reidy -- MXML lexer +* Norman Richards -- JSON lexer * Lubomir Rintel -- GoodData MAQL and CL lexers * Andre Roberge -- Tango style * Konrad Rudolph -- LaTeX formatter enhancements @@ -6,7 +6,7 @@ http://bitbucket.org/birkenfeld/pygments-main/issues. Version 1.5 ----------- -(codename not decided, released Jul xx, 2011) +(codename not decided, released 2012) - Lexers added: @@ -24,6 +24,11 @@ Version 1.5 * Octave (PR#22) * Standard ML (PR#14) * CFengine3 (#601) + * Opa (PR#37) + * HTTP sessions (PR#42) + * JSON (PR#31) + * SNOBOL (PR#30) + * MoonScript (PR#43) - In the LaTeX formatter, escape special &, < and > chars (#648). diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index 1f0ca680..f6c6400a 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -596,7 +596,7 @@ class HtmlFormatter(Formatter): def _wrap_lineanchors(self, inner): s = self.lineanchors - i = 0 + i = self.linenostart - 1 # subtract 1 since we have to increment i *before* yielding for t, line in inner: if t: i += 1 diff --git a/pygments/lexer.py b/pygments/lexer.py index 53ea5ac1..33427ae3 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -439,7 +439,7 @@ class RegexLexerMeta(LexerMeta): def __call__(cls, *args, **kwds): """Instantiate cls after preprocessing its token definitions.""" - if not hasattr(cls, '_tokens'): + if '_tokens' not in cls.__dict__: cls._all_tokens = {} cls._tmpname = 0 if hasattr(cls, 'token_variants') and cls.token_variants: diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 977df8c7..49ea9728 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -75,8 +75,8 @@ LEXERS = { 'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)), 'DuelLexer': ('pygments.lexers.web', 'Duel', ('duel', 'Duel Engine', 'Duel View', 'JBST', 'jbst', 'JsonML+BST'), ('*.duel', '*.jbst'), ('text/x-duel', 'text/x-jbst')), 'DylanLexer': ('pygments.lexers.compiled', 'Dylan', ('dylan',), ('*.dylan', '*.dyl'), ('text/x-dylan',)), + 'ECLLexer': ('pygments.lexers.other', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)), 'ECLexer': ('pygments.lexers.compiled', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')), - 'ECLLexer': ('pygments.lexers.other', 'ECL', ('ecl',), ('*.ecl'), ('text/x-ecl')), 'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)), 'ErlangLexer': ('pygments.lexers.functional', 'Erlang', ('erlang',), ('*.erl', '*.hrl'), ('text/x-erlang',)), 'ErlangShellLexer': ('pygments.lexers.functional', 'Erlang erl session', ('erl',), ('*.erl-sh',), ('text/x-erl-shellsession',)), @@ -109,17 +109,19 @@ LEXERS = { 'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')), 'HtmlPhpLexer': ('pygments.lexers.templates', 'HTML+PHP', ('html+php',), ('*.phtml',), ('application/x-php', 'application/x-httpd-php', 'application/x-httpd-php3', 'application/x-httpd-php4', 'application/x-httpd-php5')), 'HtmlSmartyLexer': ('pygments.lexers.templates', 'HTML+Smarty', ('html+smarty',), (), ('text/html+smarty',)), + 'HttpLexer': ('pygments.lexers.other', 'HTTP', ('http',), (), ()), 'HybrisLexer': ('pygments.lexers.other', 'Hybris', ('hybris', 'hy'), ('*.hy', '*.hyb'), ('text/x-hybris', 'application/x-hybris')), 'IniLexer': ('pygments.lexers.text', 'INI', ('ini', 'cfg'), ('*.ini', '*.cfg'), ('text/x-ini',)), 'IoLexer': ('pygments.lexers.agile', 'Io', ('io',), ('*.io',), ('text/x-iosrc',)), 'IokeLexer': ('pygments.lexers.agile', 'Ioke', ('ioke', 'ik'), ('*.ik',), ('text/x-iokesrc',)), 'IrcLogsLexer': ('pygments.lexers.text', 'IRC logs', ('irc',), ('*.weechatlog',), ('text/x-irclog',)), + 'JSONLexer': ('pygments.lexers.web', 'JSON', ('json',), ('*.json',), ('application/json',)), 'JadeLexer': ('pygments.lexers.web', 'Jade', ('jade', 'JADE'), ('*.jade',), ('text/x-jade',)), 'JavaLexer': ('pygments.lexers.compiled', 'Java', ('java',), ('*.java',), ('text/x-java',)), 'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('js+django', 'javascript+django', 'js+jinja', 'javascript+jinja'), (), ('application/x-javascript+django', 'application/x-javascript+jinja', 'text/x-javascript+django', 'text/x-javascript+jinja', 'text/javascript+django', 'text/javascript+jinja')), 'JavascriptErbLexer': ('pygments.lexers.templates', 'JavaScript+Ruby', ('js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby'), (), ('application/x-javascript+ruby', 'text/x-javascript+ruby', 'text/javascript+ruby')), 'JavascriptGenshiLexer': ('pygments.lexers.templates', 'JavaScript+Genshi Text', ('js+genshitext', 'js+genshi', 'javascript+genshitext', 'javascript+genshi'), (), ('application/x-javascript+genshi', 'text/x-javascript+genshi', 'text/javascript+genshi')), - 'JavascriptLexer': ('pygments.lexers.web', 'JavaScript', ('js', 'javascript'), ('*.js', '*.json'), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript', 'application/json')), + 'JavascriptLexer': ('pygments.lexers.web', 'JavaScript', ('js', 'javascript'), ('*.js',), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript')), 'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')), 'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')), 'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)), @@ -143,6 +145,7 @@ LEXERS = { 'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), 'Modula2Lexer': ('pygments.lexers.compiled', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), 'MoinWikiLexer': ('pygments.lexers.text', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), + 'MoonScriptLexer': ('pygments.lexers.agile', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')), 'MuPADLexer': ('pygments.lexers.math', 'MuPAD', ('mupad',), ('*.mu',), ()), 'MxmlLexer': ('pygments.lexers.web', 'MXML', ('mxml',), ('*.mxml',), ()), 'MySqlLexer': ('pygments.lexers.other', 'MySQL', ('mysql',), (), ('text/x-mysql',)), @@ -164,6 +167,7 @@ LEXERS = { 'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), 'OctaveLexer': ('pygments.lexers.math', 'Octave', ('octave',), ('*.m',), ('text/octave',)), 'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), + 'OpaLexer': ('pygments.lexers.functional', 'Opa', ('opa',), ('*.opa',), ('text/x-opa',)), 'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')), 'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)), 'PlPgsqlLexer': ('pygments.lexers.postgres', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)), @@ -205,6 +209,7 @@ LEXERS = { 'ScssLexer': ('pygments.lexers.web', 'SCSS', ('scss',), ('*.scss',), ('text/x-scss',)), 'SmalltalkLexer': ('pygments.lexers.other', 'Smalltalk', ('smalltalk', 'squeak'), ('*.st',), ('text/x-smalltalk',)), 'SmartyLexer': ('pygments.lexers.templates', 'Smarty', ('smarty',), ('*.tpl',), ('application/x-smarty',)), + 'SnobolLexer': ('pygments.lexers.other', 'Snobol', ('snobol',), ('*.snobol',), ('text/x-snobol',)), 'SourcesListLexer': ('pygments.lexers.text', 'Debian Sourcelist', ('sourceslist', 'sources.list'), ('sources.list',), ()), 'SqlLexer': ('pygments.lexers.other', 'SQL', ('sql',), ('*.sql',), ('text/x-sql',)), 'SqliteConsoleLexer': ('pygments.lexers.other', 'sqlite3con', ('sqlite3',), ('*.sqlite3-console',), ('text/x-sqlite3-console',)), diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 0c0de54e..6fcb7183 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -21,9 +21,9 @@ from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', - 'MiniDLexer', 'IoLexer', 'TclLexer', 'ClojureLexer', - 'Python3Lexer', 'Python3TracebackLexer', 'FactorLexer', - 'IokeLexer', 'FancyLexer', 'GroovyLexer'] + 'MoonScriptLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', + 'ClojureLexer', 'Python3Lexer', 'Python3TracebackLexer', + 'FactorLexer', 'IokeLexer', 'FancyLexer', 'GroovyLexer'] # b/w compatibility from pygments.lexers.functional import SchemeLexer @@ -105,12 +105,12 @@ class PythonLexer(RegexLexer): r'WindowsError|ZeroDivisionError)\b', Name.Exception), ], 'numbers': [ - (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), - (r'\d+[eE][+-]?[0-9]+', Number.Float), - (r'0[0-7]+', Number.Oct), + (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float), + (r'\d+[eE][+-]?[0-9]+j?', Number.Float), + (r'0[0-7]+j?', Number.Oct), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), - (r'\d+', Number.Integer) + (r'\d+j?', Number.Integer) ], 'backtick': [ ('`.*?`', String.Backtick), @@ -199,7 +199,7 @@ class Python3Lexer(RegexLexer): tokens = PythonLexer.tokens.copy() tokens['keywords'] = [ (r'(assert|break|continue|del|elif|else|except|' - r'finally|for|global|if|lambda|pass|raise|' + r'finally|for|global|if|lambda|pass|raise|nonlocal|' r'return|try|while|yield|as|with|True|False|None)\b', Keyword), ] tokens['builtins'] = [ @@ -824,19 +824,19 @@ class PerlLexer(RegexLexer): mimetypes = ['text/x-perl', 'application/x-perl'] flags = re.DOTALL | re.MULTILINE - # TODO: give this a perl guy who knows how to parse perl... + # TODO: give this to a perl guy who knows how to parse perl... tokens = { 'balanced-regex': [ - (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'), - (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), + (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'), + (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'), (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), - (r'{(\\\\|\\}|[^}])*}[egimosx]*', String.Regex, '#pop'), - (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'), - (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'), - (r'\((\\\\|\\\)|[^\)])*\)[egimosx]*', String.Regex, '#pop'), - (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'), - (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'), - (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'), + (r'{(\\\\|\\[^\\]|[^\\}])*}[egimosx]*', String.Regex, '#pop'), + (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'), + (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'), + (r'\((\\\\|\\[^\\]|[^\\\)])*\)[egimosx]*', String.Regex, '#pop'), + (r'@(\\\\|\\[^\\]|[^\\\@])*@[egimosx]*', String.Regex, '#pop'), + (r'%(\\\\|\\[^\\]|[^\\\%])*%[egimosx]*', String.Regex, '#pop'), + (r'\$(\\\\|\\[^\\]|[^\\\$])*\$[egimosx]*', String.Regex, '#pop'), ], 'root': [ (r'\#.*?$', Comment.Single), @@ -848,20 +848,26 @@ class PerlLexer(RegexLexer): bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'), (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), # common delimiters - (r's/(\\\\|\\/|[^/])*/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex), + (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', + String.Regex), (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex), (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex), - (r's@(\\\\|\\@|[^@])*@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex), - (r's%(\\\\|\\%|[^%])*%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex), + (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', + String.Regex), + (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', + String.Regex), # balanced delimiters - (r's{(\\\\|\\}|[^}])*}\s*', String.Regex, 'balanced-regex'), - (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'), - (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'), - (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'), - - (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex), + (r's{(\\\\|\\[^\\]|[^\\}])*}\s*', String.Regex, 'balanced-regex'), + (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'), + (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex, + 'balanced-regex'), + (r's\((\\\\|\\[^\\]|[^\\\)])*\)\s*', String.Regex, + 'balanced-regex'), + + (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex), (r'm(?=[/!\\{<\[\(@%\$])', String.Regex, 'balanced-regex'), - (r'((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[gcimosx]*', String.Regex), + (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*', + String.Regex), (r'\s+', Text), (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|' r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|' @@ -903,9 +909,9 @@ class PerlLexer(RegexLexer): Number.Float), (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), (r'\d+(_\d+)*', Number.Integer), - (r"'(\\\\|\\'|[^'])*'", String), - (r'"(\\\\|\\"|[^"])*"', String), - (r'`(\\\\|\\`|[^`])*`', String.Backtick), + (r"'(\\\\|\\[^\\]|[^'\\])*'", String), + (r'"(\\\\|\\[^\\]|[^"\\])*"', String), + (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick), (r'<([^\s>]+)>', String.Regex), (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'), (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'), @@ -1112,6 +1118,73 @@ class LuaLexer(RegexLexer): yield index, token, value +class MoonScriptLexer(LuaLexer): + """ + For `MoonScript <http://moonscript.org.org>`_ source code. + + *New in Pygments 1.5.* + """ + + name = "MoonScript" + aliases = ["moon", "moonscript"] + filenames = ["*.moon"] + mimetypes = ['text/x-moonscript', 'application/x-moonscript'] + + tokens = { + 'root': [ + (r'#!(.*?)$', Comment.Preproc), + (r'', Text, 'base'), + ], + 'base': [ + ('--.*$', Comment.Single), + (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float), + (r'(?i)\d+e[+-]?\d+', Number.Float), + (r'(?i)0x[0-9a-f]*', Number.Hex), + (r'\d+', Number.Integer), + (r'\n', Text), + (r'[^\S\n]+', Text), + (r'(?s)\[(=*)\[.*?\]\1\]', String), + (r'(->|=>)', Name.Function), + (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), + (r'(==|!=|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#!.\\:])', Operator), + (r'[;,]', Punctuation), + (r'[\[\]\{\}\(\)]', Keyword.Type), + (r"(class|extends|if|then|super|do|with|import|export|" + r"while|elseif|return|for|in|from|when|using|else|" + r"and|or|not|switch|break)\b", Keyword), + (r'(true|false|nil)\b', Keyword.Constant), + (r'(and|or|not)\b', Operator.Word), + (r'(self)\b', Name.Builtin.Pseudo), + (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable), + (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), + (r'[A-Z]\w+', Name.Class), # proper name + (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), + ("'", String.Single, combined('stringescape', 'sqs')), + ('"', String.Double, combined('stringescape', 'dqs')) + ], + 'stringescape': [ + (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape) + ], + 'sqs': [ + ("'", String.Single, '#pop'), + (".", String) + ], + 'dqs': [ + ('"', String.Double, '#pop'), + (".", String) + ] + } + + def get_tokens_unprocessed(self, text): + # set . as Operator instead of Punctuation + for index, token, value in \ + LuaLexer.get_tokens_unprocessed(self, text): + if token == Punctuation and value == ".": + token = Operator + yield index, token, value + + + class MiniDLexer(RegexLexer): """ For `MiniD <http://www.dsource.org/projects/minid>`_ (a D-like scripting @@ -1430,8 +1503,8 @@ class ClojureLexer(RegexLexer): valid_name = r'[\w!$%*+,<=>?/.-]+' def _multi_escape(entries): - return '|'.join([re.escape(entry) + '(?![\\w-!$%*+,<=>?/.-])' - for entry in entries]) + return '(?:' + '|'.join(map(re.escape, entries)) + \ + ')?![\\w!$%*+,<=>?/.-]' tokens = { 'root' : [ diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 5da2b875..af153a54 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -1232,8 +1232,10 @@ class DylanLexer(RegexLexer): (r'\$[a-zA-Z0-9-]+', Name.Constant), (r'[!$%&*/:<>=?~^.+\[\]{}-]+', Operator), (r'\s+', Text), + (r'#"[a-zA-Z0-9-]+"', Keyword), (r'#[a-zA-Z0-9-]+', Keyword), - (r'[a-zA-Z0-9-]+', Name.Variable), + (r'#(\(|\[)', Punctuation), + (r'[a-zA-Z0-9-_]+', Name.Variable), ], 'string': [ (r'"', String, '#pop'), @@ -2296,7 +2298,7 @@ class AdaLexer(RegexLexer): include('numbers'), (r"'[^']'", String.Character), (r'([a-z0-9_]+)(\s*|[(,])', bygroups(Name, using(this))), - (r"(<>|=>|:=|[\(\)\|:;,.'])", Punctuation), + (r"(<>|=>|:=|[()|:;,.'])", Punctuation), (r'[*<>+=/&-]', Operator), (r'\n+', Text), ], @@ -2317,7 +2319,7 @@ class AdaLexer(RegexLexer): ], 'end' : [ ('(if|case|record|loop|select)', Keyword.Reserved), - ('"[^"]+"|[a-zA-Z0-9_]+', Name.Function), + ('"[^"]+"|[a-zA-Z0-9_.]+', Name.Function), ('[\n\s]+', Text), (';', Punctuation, '#pop'), ], @@ -2337,11 +2339,12 @@ class AdaLexer(RegexLexer): ], 'import': [ (r'[a-z0-9_.]+', Name.Namespace, '#pop'), + (r'', Text, '#pop'), ], 'formal_part' : [ (r'\)', Punctuation, '#pop'), - (r'([a-z0-9_]+)(\s*)(,|:[^=])', bygroups(Name.Variable, - Text, Punctuation)), + (r'[a-z0-9_]+', Name.Variable), + (r',|:[^=]', Punctuation), (r'(in|not|null|out|access)\b', Keyword.Reserved), include('root'), ], diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index f78df352..20f645e1 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -92,7 +92,7 @@ class CSharpLexer(RegexLexer): (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), - (r'@"(\\\\|\\"|[^"])*"', String), + (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" @@ -111,10 +111,12 @@ class CSharpLexer(RegexLexer): r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' - r'get|set|new|partial|yield|add|remove|value)\b', Keyword), + r'get|set|new|partial|yield|add|remove|value|alias|ascending|' + r'descending|from|group|into|orderby|select|where|' + r'join|equals)\b', Keyword), (r'(global)(::)', bygroups(Keyword, Punctuation)), - (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' - r'short|string|uint|ulong|ushort)\b\??', Keyword.Type), + (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|' + r'sbyte|short|string|uint|ulong|ushort|var)\b\??', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), @@ -214,7 +216,7 @@ class NemerleLexer(RegexLexer): (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Punctuation), - (r'@"(\\\\|\\"|[^"])*"', String), + (r'@"(""|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"0[xX][0-9a-fA-F]+[Ll]?", Number), @@ -278,8 +280,9 @@ class NemerleLexer(RegexLexer): ], 'splice-string-content': [ (r'if|match', Keyword), - (r'[~!%^&*+=|\[\]:;,.<>/?-]', Punctuation), + (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation), (cs_ident, Name), + (r'\d+', Number), (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop') ] @@ -581,7 +584,7 @@ class FSharpLexer(RegexLexer): Name.Namespace, 'dotted'), (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), (r'//.*?\n', Comment.Single), - (r'\(\*', Comment, 'comment'), + (r'\(\*(?!\))', Comment, 'comment'), (r'\b(%s)\b' % '|'.join(keywords), Keyword), (r'(%s)' % '|'.join(keyopts), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index a8102cc8..c6f5421f 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -14,11 +14,11 @@ import re from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions from pygments.token import Text, Comment, Operator, Keyword, Name, \ String, Number, Punctuation, Literal, Generic, Error - +import pygments.formatters __all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer', - 'ErlangShellLexer'] + 'ErlangShellLexer', 'OpaLexer'] class SchemeLexer(RegexLexer): @@ -1079,3 +1079,323 @@ class ErlangShellLexer(Lexer): erlexer.get_tokens_unprocessed(curcode)): yield item + +class OpaLexer(RegexLexer): + """ + Lexer for the Opa language (http://opalang.org). + + *New in Pygments 1.5.* + """ + + name = 'Opa' + aliases = ['opa'] + filenames = ['*.opa'] + mimetypes = ['text/x-opa'] + + # most of these aren't strictly keywords + # but if you color only real keywords, you might just + # as well not color anything + keywords = [ + 'and', 'as', 'begin', 'css', 'database', 'db', 'do', 'else', 'end', + 'external', 'forall', 'if', 'import', 'match', 'package', 'parser', + 'rec', 'server', 'then', 'type', 'val', 'with', 'xml_parser' + ] + + # matches both stuff and `stuff` + ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))' + + op_re = r'[.=\-<>,@~%/+?*&^!]' + punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere + # because they are also used for inserts + + tokens = { + # copied from the caml lexer, should be adapted + 'escape-sequence': [ + (r'\\[\\\"\'ntr}]', String.Escape), + (r'\\[0-9]{3}', String.Escape), + (r'\\x[0-9a-fA-F]{2}', String.Escape), + ], + + # factorizing these rules, because they are inserted many times + 'comments': [ + (r'/\*', Comment, 'nested-comment'), + (r'//.*?$', Comment), + ], + 'comments-and-spaces': [ + include('comments'), + (r'\s+', Text), + ], + + 'root': [ + include('comments-and-spaces'), + # keywords + (r'\b(%s)\b' % '|'.join(keywords), Keyword), + # directives + # we could parse the actual set of directives instead of anything + # starting with @, but this is troublesome + # because it needs to be adjusted all the time + # and assuming we parse only sources that compile, it is useless + (r'@'+ident_re+r'\b', Name.Builtin.Pseudo), + + # number literals + (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float), + (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float), + (r'-?\d+[eE][+\-]?\d+', Number.Float), + (r'0[xX][\da-fA-F]+', Number.Hex), + (r'0[oO][0-7]+', Number.Oct), + (r'0[bB][01]+', Number.Binary), + (r'\d+', Number.Integer), + # color literals + (r'#[\da-fA-F]{3,6}', Number.Integer), + + # string literals + (r'"', String.Double, 'string'), + # char literal, should be checked because this is the regexp from + # the caml lexer + (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'", + String.Char), + + # this is meant to deal with embedded exprs in strings + # every time we find a '}' we pop a state so that if we were + # inside a string, we are back in the string state + # as a consequence, we must also push a state every time we find a + # '{' or else we will have errors when parsing {} for instance + (r'{', Operator, '#push'), + (r'}', Operator, '#pop'), + + # html literals + # this is a much more strict that the actual parser, + # since a<b would not be parsed as html + # but then again, the parser is way too lax, and we can't hope + # to have something as tolerant + (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'), + + # db path + # matching the '[_]' in '/a[_]' because it is a part + # of the syntax of the db path definition + # unfortunately, i don't know how to match the ']' in + # /a[1], so this is somewhat inconsistent + (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), + # putting the same color on <- as on db path, since + # it can be used only to mean Db.write + (r'<-(?!'+op_re+r')', Name.Variable), + + # 'modules' + # although modules are not distinguished by their names as in caml + # the standard library seems to follow the convention that modules + # only area capitalized + (r'\b([A-Z]\w*)(?=\.)', Name.Namespace), + + # operators + # = has a special role because this is the only + # way to syntactic distinguish binding constructions + # unfortunately, this colors the equal in {x=2} too + (r'=(?!'+op_re+r')', Keyword), + (r'(%s)+' % op_re, Operator), + (r'(%s)+' % punc_re, Operator), + + # coercions + (r':', Operator, 'type'), + # type variables + # we need this rule because we don't parse specially type + # definitions so in "type t('a) = ...", "'a" is parsed by 'root' + ("'"+ident_re, Keyword.Type), + + # id literal, #something, or #{expr} + (r'#'+ident_re, String.Single), + (r'#(?={)', String.Single), + + # identifiers + # this avoids to color '2' in 'a2' as an integer + (ident_re, Text), + + # default, not sure if that is needed or not + # (r'.', Text), + ], + + # it is quite painful to have to parse types to know where they end + # this is the general rule for a type + # a type is either: + # * -> ty + # * type-with-slash + # * type-with-slash -> ty + # * type-with-slash (, type-with-slash)+ -> ty + # + # the code is pretty funky in here, but this code would roughly + # translate in caml to: + # let rec type stream = + # match stream with + # | [< "->"; stream >] -> type stream + # | [< ""; stream >] -> + # type_with_slash stream + # type_lhs_1 stream; + # and type_1 stream = ... + 'type': [ + include('comments-and-spaces'), + (r'->', Keyword.Type), + (r'', Keyword.Type, ('#pop', 'type-lhs-1', 'type-with-slash')), + ], + + # parses all the atomic or closed constructions in the syntax of type + # expressions: record types, tuple types, type constructors, basic type + # and type variables + 'type-1': [ + include('comments-and-spaces'), + (r'\(', Keyword.Type, ('#pop', 'type-tuple')), + (r'~?{', Keyword.Type, ('#pop', 'type-record')), + (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')), + (ident_re, Keyword.Type, '#pop'), + ("'"+ident_re, Keyword.Type), + # this case is not in the syntax but sometimes + # we think we are parsing types when in fact we are parsing + # some css, so we just pop the states until we get back into + # the root state + (r'', Keyword.Type, '#pop'), + ], + + # type-with-slash is either: + # * type-1 + # * type-1 (/ type-1)+ + 'type-with-slash': [ + include('comments-and-spaces'), + (r'', Keyword.Type, ('#pop', 'slash-type-1', 'type-1')), + ], + 'slash-type-1': [ + include('comments-and-spaces'), + ('/', Keyword.Type, ('#pop', 'type-1')), + # same remark as above + (r'', Keyword.Type, '#pop'), + ], + + # we go in this state after having parsed a type-with-slash + # while trying to parse a type + # and at this point we must determine if we are parsing an arrow + # type (in which case we must continue parsing) or not (in which + # case we stop) + 'type-lhs-1': [ + include('comments-and-spaces'), + (r'->', Keyword.Type, ('#pop', 'type')), + (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), + (r'', Keyword.Type, '#pop'), + ], + 'type-arrow': [ + include('comments-and-spaces'), + # the look ahead here allows to parse f(x : int, y : float -> truc) + # correctly + (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), + (r'->', Keyword.Type, ('#pop', 'type')), + # same remark as above + (r'', Keyword.Type, '#pop'), + ], + + # no need to do precise parsing for tuples and records + # because they are closed constructions, so we can simply + # find the closing delimiter + # note that this function would be not work if the source + # contained identifiers like `{)` (although it could be patched + # to support it) + 'type-tuple': [ + include('comments-and-spaces'), + (r'[^\(\)/*]+', Keyword.Type), + (r'[/*]', Keyword.Type), + (r'\(', Keyword.Type, '#push'), + (r'\)', Keyword.Type, '#pop'), + ], + 'type-record': [ + include('comments-and-spaces'), + (r'[^{}/*]+', Keyword.Type), + (r'[/*]', Keyword.Type), + (r'{', Keyword.Type, '#push'), + (r'}', Keyword.Type, '#pop'), + ], + +# 'type-tuple': [ +# include('comments-and-spaces'), +# (r'\)', Keyword.Type, '#pop'), +# (r'', Keyword.Type, ('#pop', 'type-tuple-1', 'type-1')), +# ], +# 'type-tuple-1': [ +# include('comments-and-spaces'), +# (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,) +# (r',', Keyword.Type, 'type-1'), +# ], +# 'type-record':[ +# include('comments-and-spaces'), +# (r'}', Keyword.Type, '#pop'), +# (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'), +# ], +# 'type-record-field-expr': [ +# +# ], + + 'nested-comment': [ + (r'[^/*]+', Comment), + (r'/\*', Comment, '#push'), + (r'\*/', Comment, '#pop'), + (r'[/*]', Comment), + ], + + # the coy pasting between string and single-string + # is kinda sad. Is there a way to avoid that?? + 'string': [ + (r'[^\\"{]+', String.Double), + (r'"', String.Double, '#pop'), + (r'{', Operator, 'root'), + include('escape-sequence'), + ], + 'single-string': [ + (r'[^\\\'{]+', String.Double), + (r'\'', String.Double, '#pop'), + (r'{', Operator, 'root'), + include('escape-sequence'), + ], + + # all the html stuff + # can't really reuse some existing html parser + # because we must be able to parse embedded expressions + + # we are in this state after someone parsed the '<' that + # started the html literal + 'html-open-tag': [ + (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')), + (r'>', String.Single, ('#pop', 'html-content')), + ], + + # we are in this state after someone parsed the '</' that + # started the end of the closing tag + 'html-end-tag': [ + # this is a star, because </> is allowed + (r'[\w\-:]*>', String.Single, '#pop'), + ], + + # we are in this state after having parsed '<ident(:ident)?' + # we thus parse a possibly empty list of attributes + 'html-attr': [ + (r'\s+', Text), + (r'[\w\-:]+=', String.Single, 'html-attr-value'), + (r'/>', String.Single, '#pop'), + (r'>', String.Single, ('#pop', 'html-content')), + ], + + 'html-attr-value': [ + (r"'", String.Single, ('#pop', 'single-string')), + (r'"', String.Single, ('#pop', 'string')), + (r'#'+ident_re, String.Single, '#pop'), + (r'#(?={)', String.Single, ('#pop', 'root')), + (r'{', Operator, ('#pop', 'root')), # this is a tail call! + ], + + # we should probably deal with '\' escapes here + 'html-content': [ + (r'<!--', Comment, 'html-comment'), + (r'</', String.Single, ('#pop', 'html-end-tag')), + (r'<', String.Single, 'html-open-tag'), + (r'{', Operator, 'root'), + (r'.|\s+', String.Single), + ], + + 'html-comment': [ + (r'-->', Comment, '#pop'), + (r'[^\-]+|-', Comment), + ], + } diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index c5172922..44cf4f47 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -15,7 +15,7 @@ from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ this, do_insertions, combined from pygments.token import Error, Punctuation, Literal, Token, \ Text, Comment, Operator, Keyword, Name, String, Number, Generic -from pygments.util import shebang_matches +from pygments.util import ClassNotFound, shebang_matches from pygments.lexers.web import HtmlLexer @@ -27,7 +27,7 @@ __all__ = ['SqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'BrainfuckLexer', 'NewspeakLexer', 'GherkinLexer', 'AsymptoteLexer', 'PostScriptLexer', 'AutohotkeyLexer', 'GoodDataCLLexer', 'MaqlLexer', 'ProtoBufLexer', 'HybrisLexer', 'AwkLexer', - 'Cfengine3Lexer', 'ECLLexer'] + 'Cfengine3Lexer', 'HttpLexer', 'SnobolLexer', 'ECLLexer'] line_re = re.compile('.*?\n') @@ -221,17 +221,19 @@ class MySqlLexer(RegexLexer): class ECLLexer(RegexLexer): """ - Lexer for the declarative big-data `ECL <http://hpccsystems.com/community/docs/ecl-language-reference/html>`_ + Lexer for the declarative big-data `ECL + <http://hpccsystems.com/community/docs/ecl-language-reference/html>`_ language. - *New in Pygments 0.7.* + *New in Pygments 1.5.* """ + name = 'ECL' aliases = ['ecl'] filenames = ['*.ecl'] mimetypes = ['application/x-ecl'] - flags = re.IGNORECASE + flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ @@ -265,16 +267,17 @@ class ECLLexer(RegexLexer): ], 'types': [ (r'(RECORD|END)[^\d]', Keyword.Declaration), - (r'(ASCII|BIG_ENDIAN|BOOLEAN|DATA|DECIMAL|EBCDIC|INTEGER|PATTERN|' + (r'((?:ASCII|BIG_ENDIAN|BOOLEAN|DATA|DECIMAL|EBCDIC|INTEGER|PATTERN|' r'QSTRING|REAL|RECORD|RULE|SET OF|STRING|TOKEN|UDECIMAL|UNICODE|' - r'UNSIGNED|VARSTRING|VARUNICODE)\d*\s+', Keyword.Type), + r'UNSIGNED|VARSTRING|VARUNICODE)\d*)(\s+)', + bygroups(Keyword.Type, Text)), ], 'keywords': [ (r'(APPLY|ASSERT|BUILD|BUILDINDEX|EVALUATE|FAIL|KEYDIFF|KEYPATCH|' r'LOADXML|NOTHOR|NOTIFY|OUTPUT|PARALLEL|SEQUENTIAL|SOAPCALL|WAIT' r'CHECKPOINT|DEPRECATED|FAILCODE|FAILMESSAGE|FAILURE|GLOBAL|' r'INDEPENDENT|ONWARNING|PERSIST|PRIORITY|RECOVERY|STORED|SUCCESS|' - r'WAIT|WHEN)\W', Keyword.Reserved), + r'WAIT|WHEN)\b', Keyword.Reserved), # These are classed differently, check later (r'(ALL|AND|ANY|AS|ATMOST|BEFORE|BEGINC\+\+|BEST|BETWEEN|CASE|CONST|' r'COUNTER|CSV|DESCEND|ENCRYPT|ENDC\+\+|ENDMACRO|EXCEPT|EXCLUSIVE|' @@ -287,7 +290,7 @@ class ECLLexer(RegexLexer): r'RIGHT|SCAN|SELF|SEPARATOR|SERVICE|SHARED|SKEW|SKIP|SQL|STORE|' r'TERMINATOR|THOR|THRESHOLD|TOKEN|TRANSFORM|TRIM|TRUE|TYPE|' r'UNICODEORDER|UNSORTED|VALIDATE|VIRTUAL|WHOLE|WILD|WITHIN|XML|' - r'XPATH|__COMPRESSED__)\W', Keyword.Reserved), + r'XPATH|__COMPRESSED__)\b', Keyword.Reserved), ], 'functions': [ (r'(ABS|ACOS|ALLNODES|ASCII|ASIN|ASSTRING|ATAN|ATAN2|AVE|CASE|' @@ -306,11 +309,12 @@ class ECLLexer(RegexLexer): r'SOAPCALL|SORT|SORTED|SQRT|STEPPED|STORED|SUM|TABLE|TAN|TANH|' r'THISNODE|TOPN|TOUNICODE|TRANSFER|TRIM|TRUNCATE|TYPEOF|UNGROUP|' r'UNICODEORDER|VARIANCE|WHICH|WORKUNIT|XMLDECODE|XMLENCODE|' - r'XMLTEXT|XMLUNICODE)\W', Name.Function), + r'XMLTEXT|XMLUNICODE)\b', Name.Function), ], 'string': [ (r'"', String, '#pop'), (r'\'', String, '#pop'), + (r'[^"\']+', String), ], } @@ -3012,11 +3016,12 @@ class Cfengine3Lexer(RegexLexer): bygroups(Keyword.Reserved,Text,Operator,Text)), (r'"', String, 'string'), (r'(\w+)(\()', bygroups(Name.Function, Punctuation)), - (r'([\w.!&|]+)(::)', bygroups(Name.Class, Punctuation)), + (r'([\w.!&|\(\)]+)(::)', bygroups(Name.Class, Punctuation)), (r'(\w+)(:)', bygroups(Keyword.Declaration,Punctuation)), (r'@[\{\(][^\)\}]+[\}\)]', Name.Variable), (r'[(){},;]', Punctuation), (r'=>', Operator), + (r'->', Operator), (r'\d+\.\d+', Number.Float), (r'\d+', Number.Integer), (r'\w+', Name.Function), @@ -3041,3 +3046,133 @@ class Cfengine3Lexer(RegexLexer): (r'\s+', Text), ], } + + +class HttpLexer(RegexLexer): + """ + Lexer for HTTP sessions. + + *New in Pygments 1.5.* + """ + + name = 'HTTP' + aliases = ['http'] + + flags = re.DOTALL + + def header_callback(self, match): + if match.group(1).lower() == 'content-type': + content_type = match.group(5).strip() + if ';' in content_type: + content_type = content_type[:content_type.find(';')].strip() + self.content_type = content_type + yield match.start(1), Name.Attribute, match.group(1) + yield match.start(2), Text, match.group(2) + yield match.start(3), Operator, match.group(3) + yield match.start(4), Text, match.group(4) + yield match.start(5), Literal, match.group(5) + yield match.start(6), Text, match.group(6) + + def content_callback(self, match): + content_type = getattr(self, 'content_type', None) + content = match.group() + offset = match.start() + if content_type: + from pygments.lexers import get_lexer_for_mimetype + try: + lexer = get_lexer_for_mimetype(content_type) + except ClassNotFound: + pass + else: + for idx, token, value in lexer.get_tokens_unprocessed(content): + yield offset + idx, token, value + return + yield offset, Text, content + + tokens = { + 'root': [ + (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE)( +)([^ ]+)( +)' + r'(HTTPS?)(/)(1\.[01])(\r?\n|$)', + bygroups(Name.Function, Text, Name.Namespace, Text, + Keyword.Reserved, Operator, Number, Text), + 'headers'), + (r'(HTTPS?)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', + bygroups(Keyword.Reserved, Operator, Number, Text, Number, + Text, Name.Exception, Text), + 'headers'), + ], + 'headers': [ + (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|$)', header_callback), + (r'\r?\n', Text, 'content') + ], + 'content': [ + (r'.+', content_callback) + ] + } + + +class SnobolLexer(RegexLexer): + """ + Lexer for the SNOBOL4 programming language. + + Recognizes the common ASCII equivalents of the original SNOBOL4 operators. + Does not require spaces around binary operators. + + *New in Pygments 1.5.* + """ + + name = "Snobol" + aliases = ["snobol"] + filenames = ['*.snobol'] + mimetypes = ['text/x-snobol'] + + tokens = { + # root state, start of line + # comments, continuation lines, and directives start in column 1 + # as do labels + 'root': [ + (r'\*.*\n', Comment), + (r'[\+\.] ', Punctuation, 'statement'), + (r'-.*\n', Comment), + (r'END\s*\n', Name.Label, 'heredoc'), + (r'[A-Za-z\$][\w$]*', Name.Label, 'statement'), + (r'\s+', Text, 'statement'), + ], + # statement state, line after continuation or label + 'statement': [ + (r'\s*\n', Text, '#pop'), + (r'\s+', Text), + (r'(?<=[^\w.])(LT|LE|EQ|NE|GE|GT|INTEGER|IDENT|DIFFER|LGT|SIZE|' + r'REPLACE|TRIM|DUPL|REMDR|DATE|TIME|EVAL|APPLY|OPSYN|LOAD|UNLOAD|' + r'LEN|SPAN|BREAK|ANY|NOTANY|TAB|RTAB|REM|POS|RPOS|FAIL|FENCE|' + r'ABORT|ARB|ARBNO|BAL|SUCCEED|INPUT|OUTPUT|TERMINAL)(?=[^\w.])', + Name.Builtin), + (r'[A-Za-z][\w\.]*', Name), + # ASCII equivalents of original operators + # | for the EBCDIC equivalent, ! likewise + # \ for EBCDIC negation + (r'\*\*|[\?\$\.!%\*/#+\-@\|&\\!=]', Operator), + (r'"[^"]*"', String), + (r"'[^']*'", String), + # Accept SPITBOL syntax for real numbers + # as well as Macro SNOBOL4 + (r'[0-9]+(?=[^\.EeDd])', Number.Integer), + (r'[0-9]+(\.[0-9]*)?([EDed][-+]?[0-9]+)?', Number.Float), + # Goto + (r':', Punctuation, 'goto'), + (r'[\(\)<>,;]', Punctuation), + ], + # Goto block + 'goto': [ + (r'\s*\n', Text, "#pop:2"), + (r'\s+', Text), + (r'F|S', Keyword), + (r'(\()([A-Za-z][\w.]*)(\))', + bygroups(Punctuation, Name.Label, Punctuation)) + ], + # everything after the END statement is basically one + # big heredoc. + 'heredoc': [ + (r'.*\n', String.Heredoc) + ] + } diff --git a/pygments/lexers/pypylog.py b/pygments/lexers/pypylog.py index c3af3387..48f0dbb9 100644 --- a/pygments/lexers/pypylog.py +++ b/pygments/lexers/pypylog.py @@ -37,6 +37,7 @@ class PyPyLogLexer(RegexLexer): "jit-log": [ (r"\[\w+\] jit-log-.*?}$", Keyword, "#pop"), + (r"^\+\d+: ", Comment), (r"[ifp]\d+", Name), (r"ptr\d+", Name), (r"(\()([\w_]+(?:\.[\w_]+)?)(\))", @@ -45,9 +46,9 @@ class PyPyLogLexer(RegexLexer): (r"(\d+\.\d+|inf|-inf)", Number.Float), (r"-?\d+", Number.Integer), (r"'.*'", String), - (r"(None|descr|ConstClass|ConstPtr)", Name), + (r"(None|descr|ConstClass|ConstPtr|TargetToken)", Name), (r"<.*?>", Name.Builtin), - (r"(debug_merge_point|jump|finish)", Name.Class), + (r"(label|debug_merge_point|jump|finish)", Name.Class), (r"(int_add_ovf|int_add|int_sub_ovf|int_sub|int_mul_ovf|int_mul|" r"int_floordiv|int_mod|int_lshift|int_rshift|int_and|int_or|" r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|" @@ -55,14 +56,15 @@ class PyPyLogLexer(RegexLexer): r"uint_floordiv|uint_ge|uint_lt|" r"float_add|float_sub|float_mul|float_truediv|" r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|" - r"ptr_eq|" - r"cast_int_to_float|cast_float_to_int|cast_opaque_ptr|" - r"force_token|quasiimmut_field|same_as|virtual_ref_finish|virtual_ref|" + r"ptr_eq|ptr_ne|instance_ptr_eq|instance_ptr_ne|" + r"cast_int_to_float|cast_float_to_int|" + r"force_token|quasiimmut_field|same_as|virtual_ref_finish|virtual_ref|mark_opaque_ptr|" r"call_may_force|call_assembler|call_loopinvariant|call_release_gil|call_pure|call|" r"new_with_vtable|new_array|newstr|newunicode|new|" r"arraylen_gc|" r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" - r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|getfield_gc|" + r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|getfield_gc|getinteriorfield_gc|" + r"getinteriorfield_gc|setinteriorfield_gc|" r"getfield_raw|setfield_gc|setfield_raw|" r"strgetitem|strsetitem|strlen|copystrcontent|" r"unicodegetitem|unicodesetitem|unicodelen|" diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 148762fd..f76e3420 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -22,7 +22,7 @@ from pygments.lexers.agile import RubyLexer from pygments.lexers.compiled import ScalaLexer -__all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'CssLexer', +__all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'JSONLexer', 'CssLexer', 'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer', 'MxmlLexer', 'HaxeLexer', 'HamlLexer', 'SassLexer', 'ScssLexer', 'ObjectiveJLexer', 'CoffeeScriptLexer', 'DuelLexer', 'ScamlLexer', @@ -36,9 +36,9 @@ class JavascriptLexer(RegexLexer): name = 'JavaScript' aliases = ['js', 'javascript'] - filenames = ['*.js', '*.json'] + filenames = ['*.js', ] mimetypes = ['application/javascript', 'application/x-javascript', - 'text/x-javascript', 'text/javascript', 'application/json'] + 'text/x-javascript', 'text/javascript', ] flags = re.DOTALL tokens = { @@ -89,6 +89,74 @@ class JavascriptLexer(RegexLexer): } +class JSONLexer(RegexLexer): + """ + For JSON data structures. + + *New in Pygments 1.5.* + """ + + name = 'JSON' + aliases = ['json'] + filenames = ['*.json'] + mimetypes = [ 'application/json', ] + + flags = re.DOTALL + tokens = { + 'whitespace': [ + (r'\s+', Text), + ], + + # represents a simple terminal value + 'simplevalue':[ + (r'(true|false|null)\b', Keyword.Constant), + (r'-?[0-9]+', Number.Integer), + (r'"(\\\\|\\"|[^"])*"', String.Double), + ], + + + # the right hand side of an object, after the attribute name + 'objectattribute': [ + include('value'), + (r':', Punctuation), + # comma terminates the attribute but expects more + (r',', Punctuation, '#pop'), + # a closing bracket terminates the entire object, so pop twice + (r'}', Punctuation, ('#pop', '#pop')), + ], + + # a json object - { attr, attr, ... } + 'objectvalue': [ + include('whitespace'), + (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'), + (r'}', Punctuation, '#pop'), + ], + + # json array - [ value, value, ... } + 'arrayvalue': [ + include('whitespace'), + include('value'), + (r',', Punctuation), + (r']', Punctuation, '#pop'), + ], + + # a json value - either a simple value or a complex value (object or array) + 'value': [ + include('whitespace'), + include('simplevalue'), + (r'{', Punctuation, 'objectvalue'), + (r'\[', Punctuation, 'arrayvalue'), + ], + + + # the root of a json document whould be a value + 'root': [ + include('value'), + ], + + } + + class ActionScriptLexer(RegexLexer): """ For ActionScript source code. @@ -1728,7 +1796,7 @@ class CoffeeScriptLexer(RegexLexer): 'slashstartsregex': [ include('commentsandwhitespace'), (r'///', String.Regex, ('#pop', 'multilineregex')), - (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' + (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'', Text, '#pop'), ], @@ -1766,8 +1834,9 @@ class CoffeeScriptLexer(RegexLexer): ("'", String, 'sqs'), ], 'strings': [ - (r'[^#\\\'"]+', String) # note that all coffee script strings are multi-line. - # hashmarks, quotes and backslashes must be parsed one at a time + (r'[^#\\\'"]+', String), + # note that all coffee script strings are multi-line. + # hashmarks, quotes and backslashes must be parsed one at a time ], 'interpoling_string' : [ (r'}', String.Interpol, "#pop"), diff --git a/pygments/util.py b/pygments/util.py index 429e40a1..a400624b 100644 --- a/pygments/util.py +++ b/pygments/util.py @@ -118,7 +118,7 @@ def make_analysator(f): return 0.0 try: return min(1.0, max(0.0, float(rv))) - except ValueError: + except (ValueError, TypeError): return 0.0 text_analyse.__doc__ = f.__doc__ return staticmethod(text_analyse) diff --git a/tests/examplefiles/classes.dylan b/tests/examplefiles/classes.dylan index ff435b77..6dd55ff2 100644 --- a/tests/examplefiles/classes.dylan +++ b/tests/examplefiles/classes.dylan @@ -22,3 +22,19 @@ end function; define constant $blue-car = make(<car>, model: "Viper"); define constant $black-car = make(<car>, model: "Town Car", sunroof?: #t); define constant $red-car = make(<car>, model: "F40", sunroof?: #f); + +define method foo() => _ :: <boolean> + #t +end method; + +define method foo() => _ :: <boolean>; + #t +end method; + +define method \+() +end; + +define constant $symbol = #"hello"; +define variable *vector* = #[3.5, 5] +define constant $list = #(1, 2); +define constant $pair = #(1 . "foo") diff --git a/tests/examplefiles/example.moon b/tests/examplefiles/example.moon new file mode 100644 index 00000000..d4415e32 --- /dev/null +++ b/tests/examplefiles/example.moon @@ -0,0 +1,629 @@ +-- transform.moon +-- Leaf Corcoran (leafot@gmail.com) 2011 +-- +-- This is part of the MoonScript compiler. See <http://moonscript.org> +-- MoonScript is licensed under the MIT License +-- + +module "moonscript.transform", package.seeall + +types = require "moonscript.types" +util = require "moonscript.util" +data = require "moonscript.data" + +import reversed from util +import ntype, build, smart_node, is_slice from types +import insert from table + +export Statement, Value, NameProxy, LocalName, Run + +-- always declares as local +class LocalName + new: (@name) => self[1] = "temp_name" + get_name: => @name + +class NameProxy + new: (@prefix) => + self[1] = "temp_name" + + get_name: (scope) => + if not @name + @name = scope\free_name @prefix, true + @name + + chain: (...) => + items = {...} -- todo: fix ... propagation + items = for i in *items + if type(i) == "string" + {"dot", i} + else + i + + build.chain { + base: self + unpack items + } + + index: (key) => + build.chain { + base: self, {"index", key} + } + + __tostring: => + if @name + ("name<%s>")\format @name + else + ("name<prefix(%s)>")\format @prefix + +class Run + new: (@fn) => + self[1] = "run" + + call: (state) => + self.fn state + +-- transform the last stm is a list of stms +-- will puke on group +apply_to_last = (stms, fn) -> + -- find last (real) exp + last_exp_id = 0 + for i = #stms, 1, -1 + stm = stms[i] + if stm and util.moon.type(stm) != Run + last_exp_id = i + break + + return for i, stm in ipairs stms + if i == last_exp_id + fn stm + else + stm + +-- is a body a sindle expression/statement +is_singular = (body) -> + return false if #body != 1 + if "group" == ntype body + is_singular body[2] + else + true + +constructor_name = "new" + +class Transformer + new: (@transformers, @scope) => + @seen_nodes = {} + + transform: (scope, node, ...) => + -- print scope, node, ... + return node if @seen_nodes[node] + @seen_nodes[node] = true + while true + transformer = @transformers[ntype node] + res = if transformer + transformer(scope, node, ...) or node + else + node + return node if res == node + node = res + + __call: (node, ...) => + @transform @scope, node, ... + + instance: (scope) => + Transformer @transformers, scope + + can_transform: (node) => + @transformers[ntype node] != nil + +construct_comprehension = (inner, clauses) -> + current_stms = inner + for _, clause in reversed clauses + t = clause[1] + current_stms = if t == "for" + _, names, iter = unpack clause + {"foreach", names, iter, current_stms} + elseif t == "when" + _, cond = unpack clause + {"if", cond, current_stms} + else + error "Unknown comprehension clause: "..t + current_stms = {current_stms} + + current_stms[1] + +Statement = Transformer { + assign: (node) => + _, names, values = unpack node + -- bubble cascading assigns + if #values == 1 and types.cascading[ntype values[1]] + values[1] = @transform.statement values[1], (stm) -> + t = ntype stm + if types.is_value stm + {"assign", names, {stm}} + else + stm + + build.group { + {"declare", names} + values[1] + } + else + node + + export: (node) => + -- assign values if they are included + if #node > 2 + if node[2] == "class" + cls = smart_node node[3] + build.group { + {"export", {cls.name}} + cls + } + else + build.group { + node + build.assign { + names: node[2] + values: node[3] + } + } + else + nil + + update: (node) => + _, name, op, exp = unpack node + op_final = op\match "^(.+)=$" + error "Unknown op: "..op if not op_final + build.assign_one name, {"exp", name, op_final, exp} + + import: (node) => + _, names, source = unpack node + + stubs = for name in *names + if type(name) == "table" + name + else + {"dot", name} + + real_names = for name in *names + type(name) == "table" and name[2] or name + + if type(source) == "string" + build.assign { + names: real_names + values: [build.chain { base: source, stub} for stub in *stubs] + } + else + source_name = NameProxy "table" + build.group { + {"declare", real_names} + build["do"] { + build.assign_one source_name, source + build.assign { + names: real_names + values: [build.chain { base: source_name, stub} for stub in *stubs] + } + } + } + + comprehension: (node, action) => + _, exp, clauses = unpack node + + action = action or (exp) -> {exp} + construct_comprehension action(exp), clauses + + -- handle cascading return decorator + if: (node, ret) => + if ret + smart_node node + -- mutate all the bodies + node['then'] = apply_to_last node['then'], ret + for i = 4, #node + case = node[i] + body_idx = #node[i] + case[body_idx] = apply_to_last case[body_idx], ret + node + + with: (node, ret) => + _, exp, block = unpack node + scope_name = NameProxy "with" + build["do"] { + build.assign_one scope_name, exp + Run => @set "scope_var", scope_name + build.group block + if ret + ret scope_name + } + + foreach: (node) => + smart_node node + if ntype(node.iter) == "unpack" + list = node.iter[2] + + index_name = NameProxy "index" + list_name = NameProxy "list" + + slice_var = nil + bounds = if is_slice list + slice = list[#list] + table.remove list + table.remove slice, 1 + + slice[2] = if slice[2] and slice[2] != "" + max_tmp_name = NameProxy "max" + slice_var = build.assign_one max_tmp_name, slice[2] + {"exp", max_tmp_name, "<", 0 + "and", {"length", list_name}, "+", max_tmp_name + "or", max_tmp_name } + else + {"length", list_name} + + slice + else + {1, {"length", list_name}} + + build.group { + build.assign_one list_name, list + slice_var + build["for"] { + name: index_name + bounds: bounds + body: { + {"assign", node.names, {list_name\index index_name}} + build.group node.body + } + } + } + + switch: (node, ret) => + _, exp, conds = unpack node + exp_name = NameProxy "exp" + + -- convert switch conds into if statment conds + convert_cond = (cond) -> + t, case_exp, body = unpack cond + out = {} + insert out, t == "case" and "elseif" or "else" + if t != "else" + insert out, {"exp", case_exp, "==", exp_name} if t != "else" + else + body = case_exp + + if ret + body = apply_to_last body, ret + + insert out, body + + out + + first = true + if_stm = {"if"} + for cond in *conds + if_cond = convert_cond cond + if first + first = false + insert if_stm, if_cond[2] + insert if_stm, if_cond[3] + else + insert if_stm, if_cond + + build.group { + build.assign_one exp_name, exp + if_stm + } + + class: (node) => + _, name, parent_val, body = unpack node + + -- split apart properties and statements + statements = {} + properties = {} + for item in *body + switch item[1] + when "stm" + insert statements, item[2] + when "props" + for tuple in *item[2,] + insert properties, tuple + + -- find constructor + constructor = nil + properties = for tuple in *properties + if tuple[1] == constructor_name + constructor = tuple[2] + nil + else + tuple + + parent_cls_name = NameProxy "parent" + base_name = NameProxy "base" + self_name = NameProxy "self" + cls_name = NameProxy "class" + + if not constructor + constructor = build.fndef { + args: {{"..."}} + arrow: "fat" + body: { + build["if"] { + cond: parent_cls_name + then: { + build.chain { base: "super", {"call", {"..."}} } + } + } + } + } + else + smart_node constructor + constructor.arrow = "fat" + + cls = build.table { + {"__init", constructor} + {"__base", base_name} + {"__name", {"string", '"', name}} -- "quote the string" + {"__parent", parent_cls_name} + } + + -- look up a name in the class object + class_lookup = build["if"] { + cond: {"exp", "val", "==", "nil", "and", parent_cls_name} + then: { + parent_cls_name\index"name" + } + } + insert class_lookup, {"else", {"val"}} + + cls_mt = build.table { + {"__index", build.fndef { + args: {{"cls"}, {"name"}} + body: { + build.assign_one LocalName"val", build.chain { + base: "rawget", {"call", {base_name, "name"}} + } + class_lookup + } + }} + {"__call", build.fndef { + args: {{"cls"}, {"..."}} + body: { + build.assign_one self_name, build.chain { + base: "setmetatable" + {"call", {"{}", base_name}} + } + build.chain { + base: "cls.__init" + {"call", {self_name, "..."}} + } + self_name + } + }} + } + + cls = build.chain { + base: "setmetatable" + {"call", {cls, cls_mt}} + } + + value = nil + with build + value = .block_exp { + Run => + @set "super", (block, chain) -> + if chain + slice = [item for item in *chain[3,]] + new_chain = {"chain", parent_cls_name} + + head = slice[1] + + if head == nil + return parent_cls_name + + switch head[1] + -- calling super, inject calling name and self into chain + when "call" + calling_name = block\get"current_block" + slice[1] = {"call", {"self", unpack head[2]}} + act = if ntype(calling_name) != "value" then "index" else "dot" + insert new_chain, {act, calling_name} + + -- colon call on super, replace class with self as first arg + when "colon" + call = head[3] + insert new_chain, {"dot", head[2]} + slice[1] = { "call", { "self", unpack call[2] } } + + insert new_chain, item for item in *slice + + new_chain + else + parent_cls_name + + .assign_one parent_cls_name, parent_val == "" and "nil" or parent_val + .assign_one base_name, {"table", properties} + .assign_one base_name\chain"__index", base_name + + build["if"] { + cond: parent_cls_name + then: { + .chain { + base: "setmetatable" + {"call", { + base_name, + .chain { base: parent_cls_name, {"dot", "__base"}} + }} + } + } + } + + .assign_one cls_name, cls + .assign_one base_name\chain"__class", cls_name + + .group if #statements > 0 { + .assign_one LocalName"self", cls_name + .group statements + } else {} + + cls_name + } + + value = .group { + .declare names: {name} + .assign { + names: {name} + values: {value} + } + } + + value +} + +class Accumulator + body_idx: { for: 4, while: 3, foreach: 4 } + + new: => + @accum_name = NameProxy "accum" + @value_name = NameProxy "value" + @len_name = NameProxy "len" + + -- wraps node and mutates body + convert: (node) => + index = @body_idx[ntype node] + node[index] = @mutate_body node[index] + @wrap node + + -- wrap the node into a block_exp + wrap: (node) => + build.block_exp { + build.assign_one @accum_name, build.table! + build.assign_one @len_name, 0 + node + @accum_name + } + + -- mutates the body of a loop construct to save last value into accumulator + -- can optionally skip nil results + mutate_body: (body, skip_nil=true) => + val = if not skip_nil and is_singular body + with body[1] + body = {} + else + body = apply_to_last body, (n) -> + build.assign_one @value_name, n + @value_name + + update = { + {"update", @len_name, "+=", 1} + build.assign_one @accum_name\index(@len_name), val + } + + if skip_nil + table.insert body, build["if"] { + cond: {"exp", @value_name, "!=", "nil"} + then: update + } + else + table.insert body, build.group update + + body + +default_accumulator = (node) => + Accumulator!\convert node + + +implicitly_return = (scope) -> + fn = (stm) -> + t = ntype stm + if types.manual_return[t] or not types.is_value stm + stm + elseif types.cascading[t] + scope.transform.statement stm, fn + else + if t == "comprehension" and not types.comprehension_has_value stm + stm + else + {"return", stm} + + fn + +Value = Transformer { + for: default_accumulator + while: default_accumulator + foreach: default_accumulator + + comprehension: (node) => + a = Accumulator! + node = @transform.statement node, (exp) -> + a\mutate_body {exp}, false + a\wrap node + + tblcomprehension: (node) => + _, key_exp, value_exp, clauses = unpack node + + accum = NameProxy "tbl" + dest = build.chain { base: accum, {"index", key_exp} } + inner = build.assign_one dest, value_exp + + build.block_exp { + build.assign_one accum, build.table! + construct_comprehension {inner}, clauses + accum + } + + fndef: (node) => + smart_node node + node.body = apply_to_last node.body, implicitly_return self + node + + if: (node) => build.block_exp { node } + with: (node) => build.block_exp { node } + switch: (node) => + build.block_exp { node } + + -- pull out colon chain + chain: (node) => + stub = node[#node] + if type(stub) == "table" and stub[1] == "colon_stub" + table.remove node, #node + + base_name = NameProxy "base" + fn_name = NameProxy "fn" + + is_super = node[2] == "super" + @transform.value build.block_exp { + build.assign { + names: {base_name} + values: {node} + } + + build.assign { + names: {fn_name} + values: { + build.chain { base: base_name, {"dot", stub[2]} } + } + } + + build.fndef { + args: {{"..."}} + body: { + build.chain { + base: fn_name, {"call", {is_super and "self" or base_name, "..."}} + } + } + } + } + + block_exp: (node) => + _, body = unpack node + + fn = nil + arg_list = {} + + insert body, Run => + if @has_varargs + insert arg_list, "..." + insert fn.args, {"..."} + + fn = smart_node build.fndef body: body + build.chain { base: {"parens", fn}, {"call", arg_list} } +} + diff --git a/tests/examplefiles/example.snobol b/tests/examplefiles/example.snobol new file mode 100644 index 00000000..26ca5cf4 --- /dev/null +++ b/tests/examplefiles/example.snobol @@ -0,0 +1,15 @@ +-SOME RANDOM DIRECTIVE WOULD GO HERE +* +* SNOBOL4 example file for lexer +* + SOME.THING_OR_OTHER32 = 1 + 1.0 - 1E3 * 1E-3 ** 2.718284590E0 ++ :F(END)S(IN_LOOP) + PATTERN = LEN(3) ("GAR" | "BAR") +IN_LOOP THING = INPUT :F(END) + THING LEN(3) ("GAR" | "BAR") :S(OK) + OUTPUT = THING " : Failure!" :(IN_LOOP) +OK OUTPUT = THING ' : "Success"!' :(IN_LOOP) +END +FOOBAR +FOOGAR +THiNIg diff --git a/tests/examplefiles/http_request_example b/tests/examplefiles/http_request_example new file mode 100644 index 00000000..5d2a1d52 --- /dev/null +++ b/tests/examplefiles/http_request_example @@ -0,0 +1,14 @@ +POST /demo/submit/ HTTP/1.1
+Host: pygments.org
+Connection: keep-alivk
+Cache-Control: max-age=0
+Origin: http://pygments.org
+User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7
+Content-Type: application/x-www-form-urlencoded
+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
+Referer: http://pygments.org/
+Accept-Encoding: gzip,deflate,sdch
+Accept-Language: en-US,en;q=0.8
+Accept-Charset: windows-949,utf-8;q=0.7,*;q=0.3
+
+name=test&lang=text&code=asdf&user= diff --git a/tests/examplefiles/http_response_example b/tests/examplefiles/http_response_example new file mode 100644 index 00000000..bf53d61d --- /dev/null +++ b/tests/examplefiles/http_response_example @@ -0,0 +1,27 @@ +HTTP/1.1 200 OK
+Date: Tue, 13 Dec 2011 00:11:44 GMT
+Status: 200 OK
+X-Transaction: 50b85fff78dab4a3
+X-RateLimit-Limit: 150
+ETag: "b31143be48ebfe7512b65fe64fe092f3"
+X-Frame-Options: SAMEORIGIN
+Last-Modified: Tue, 13 Dec 2011 00:11:44 GMT
+X-RateLimit-Remaining: 145
+X-Runtime: 0.01190
+X-Transaction-Mask: a6183ffa5f8ca943ff1b53b5644ef1145f6f285d
+Content-Type: application/json; charset=utf-8
+Content-Length: 2389
+Pragma: no-cache
+X-RateLimit-Class: api
+X-Revision: DEV
+Expires: Tue, 31 Mar 1981 05:00:00 GMT
+Cache-Control: no-cache, no-store, must-revalidate, pre-check=0, post-check=0
+X-MID: a55f21733bc52bb11d1fc58f9b51b4974fbb8f83
+X-RateLimit-Reset: 1323738416
+Set-Cookie: k=10.34.234.116.1323735104238974; path=/; expires=Tue, 20-Dec-11 00:11:44 GMT; domain=.twitter.com
+Set-Cookie: guest_id=v1%3A13237351042425496; domain=.twitter.com; path=/; expires=Thu, 12-Dec-2013 12:11:44 GMT
+Set-Cookie: _twitter_sess=BAh7CDoPY3JlYXRlZF9hdGwrCPS6wjQ0AToHaWQiJTFiMTlhY2E1ZjczYThk%250ANDUwMWQxNjMwZGU2YTQ1ODBhIgpmbGFzaElDOidBY3Rpb25Db250cm9sbGVy%250AOjpGbGFzaDo6Rmxhc2hIYXNoewAGOgpAdXNlZHsA--6b502f30a083e8a41a64f10930e142ea362b1561; domain=.twitter.com; path=/; HttpOnly
+Vary: Accept-Encoding
+Server: tfe
+
+[{"contributors_enabled":false,"profile_background_tile":true,"followers_count":644,"protected":false,"profile_image_url":"http:\/\/a0.twimg.com\/profile_images\/69064242\/gb_normal.jpg","screen_name":"birkenfeld","default_profile_image":false,"following":null,"friends_count":88,"profile_sidebar_fill_color":"7AC3EE","url":"http:\/\/pythonic.pocoo.org\/","name":"Georg Brandl","default_profile":false,"is_translator":false,"utc_offset":3600,"profile_sidebar_border_color":"65B0DA","description":"","profile_background_image_url_https":"https:\/\/si0.twimg.com\/images\/themes\/theme10\/bg.gif","favourites_count":0,"profile_use_background_image":true,"created_at":"Tue Dec 30 22:25:11 +0000 2008","status":{"retweet_count":10,"favorited":false,"geo":null,"possibly_sensitive":false,"coordinates":null,"in_reply_to_screen_name":null,"in_reply_to_status_id_str":null,"retweeted":false,"in_reply_to_status_id":null,"in_reply_to_user_id_str":null,"created_at":"Sat Jul 09 13:42:35 +0000 2011","truncated":false,"id_str":"89690914515206144","contributors":null,"place":null,"source":"web","in_reply_to_user_id":null,"id":89690914515206144,"retweeted_status":{"retweet_count":10,"favorited":false,"geo":null,"possibly_sensitive":false,"coordinates":null,"in_reply_to_screen_name":null,"in_reply_to_status_id_str":null,"retweeted":false,"in_reply_to_status_id":null,"in_reply_to_user_id_str":null,"created_at":"Sat Jul 09 13:07:04 +0000 2011","truncated":false,"id_str":"89681976755372032","contributors":null,"place":null,"source":"web","in_reply_to_user_id":null,"id":89681976755372032,"text":"Excellent Python posts from @mitsuhiko - http:\/\/t.co\/k1wt6e4 and @ncoghlan_dev - http:\/\/t.co\/eTxacgZ (links fixed)"},"text":"RT @jessenoller: Excellent Python posts from @mitsuhiko - http:\/\/t.co\/k1wt6e4 and @ncoghlan_dev - http:\/\/t.co\/eTxacgZ (links fixed)"},"follow_request_sent":null,"statuses_count":553,"geo_enabled":false,"notifications":null,"profile_text_color":"3D1957","id_str":"18490730","lang":"en","profile_background_image_url":"http:\/\/a1.twimg.com\/images\/themes\/theme10\/bg.gif","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/69064242\/gb_normal.jpg","show_all_inline_media":true,"listed_count":65,"profile_link_color":"FF0000","verified":false,"id":18490730,"time_zone":"Berlin","profile_background_color":"642D8B","location":"Bavaria, Germany"}] diff --git a/tests/examplefiles/nemerle_sample.n b/tests/examplefiles/nemerle_sample.n index 2c05033a..5236857d 100644 --- a/tests/examplefiles/nemerle_sample.n +++ b/tests/examplefiles/nemerle_sample.n @@ -13,13 +13,15 @@ namespace Demo.Ns public virtual someMethod(str : string) : list[double] { def x = "simple string"; - def x = $"simple $splice string $(spliceMethod())"; + def x = $"simple $splice string $(spliceMethod() + 1)"; def x = <# recursive <# string #> sample #>; def x = $<# recursive $splice <# string #> sample + ..$(lst; "; "; x => $"x * 2 = $(x * 2)") str #>; + def x = @"somestring \"; def localFunc(arg) { @@ -80,6 +82,6 @@ namespace Demo.Ns macro sampleMacro(expr) syntax ("write", expr) { - <[ WriteLine($expr) ]> + <[ WriteLine($(expr : dyn)) ]> } } diff --git a/tests/examplefiles/test.cs b/tests/examplefiles/test.cs index ffa9bfea..faab7e42 100644 --- a/tests/examplefiles/test.cs +++ b/tests/examplefiles/test.cs @@ -153,6 +153,29 @@ namespace Diva.Core { public OpenerTask (string fileName) { this.fileName = fileName; + var verbatimString = @"c:\test\"; + + var verbatimStringWithNewline = @"test \\ \n \t \r +a +b +c"; + var verbatimStringWithEscapedQuotes = @"He said +""she says \"" is not an escaped character in verbatimstrings"" +"; + + int[] numbers = { 5,6,4,2,4,6,8,9,7,0 }; + var linqExample = from n in numbers + where n > 5 + select n; + + var anotherlinqExample = from n in numbers + orderby n descending + select n; + + int[] someMoreNumbers = { 8,2,17,34,8,9,9,5,3,4,2,1,5 }; + var moreLinq = from n in numbers + join mn in moreNumbers on n equals mn + 2 + select new {n, mn}; } public override void Reset () diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index 02261d24..680a6085 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -57,7 +57,7 @@ def test_lexer_classes(): assert 'root' in cls._tokens, \ '%s has no root state' % cls - if cls.name == 'XQuery': # XXX temporary + if cls.name in ['XQuery', 'Opa']: # XXX temporary return tokens = list(inst.get_tokens(test_content)) diff --git a/tests/test_examplefiles.py b/tests/test_examplefiles.py index e5dbcf4c..9326660c 100644 --- a/tests/test_examplefiles.py +++ b/tests/test_examplefiles.py @@ -60,8 +60,9 @@ def check_lexer(lx, absfn, outfn): tokens = [] for type, val in lx.get_tokens(text): ntext.append(val) - assert type != Error, 'lexer %s generated error token for %s' % \ - (lx, absfn) + assert type != Error, \ + 'lexer %s generated error token for %s: %r at position %d' % \ + (lx, absfn, val, len(u''.join(ntext))) tokens.append((type, val)) if u''.join(ntext) != text: print '\n'.join(difflib.unified_diff(u''.join(ntext).splitlines(), diff --git a/tests/test_html_formatter.py b/tests/test_html_formatter.py index 5a506755..58a50c74 100644 --- a/tests/test_html_formatter.py +++ b/tests/test_html_formatter.py @@ -75,6 +75,38 @@ class HtmlFormatterTest(unittest.TestCase): fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) + def test_linenos(self): + optdict = dict(linenos=True) + outfile = StringIO.StringIO() + fmt = HtmlFormatter(**optdict) + fmt.format(tokensource, outfile) + html = outfile.getvalue() + self.assert_(re.search("<pre>\s+1\s+2\s+3", html)) + + def test_linenos_with_startnum(self): + optdict = dict(linenos=True, linenostart=5) + outfile = StringIO.StringIO() + fmt = HtmlFormatter(**optdict) + fmt.format(tokensource, outfile) + html = outfile.getvalue() + self.assert_(re.search("<pre>\s+5\s+6\s+7", html)) + + def test_lineanchors(self): + optdict = dict(lineanchors="foo") + outfile = StringIO.StringIO() + fmt = HtmlFormatter(**optdict) + fmt.format(tokensource, outfile) + html = outfile.getvalue() + self.assert_(re.search("<pre><a name=\"foo-1\">", html)) + + def test_lineanchors_with_startnum(self): + optdict = dict(lineanchors="foo", linenostart=5) + outfile = StringIO.StringIO() + fmt = HtmlFormatter(**optdict) + fmt.format(tokensource, outfile) + html = outfile.getvalue() + self.assert_(re.search("<pre><a name=\"foo-5\">", html)) + def test_valid_output(self): # test all available wrappers fmt = HtmlFormatter(full=True, linenos=True, noclasses=True, diff --git a/tests/test_perllexer.py b/tests/test_perllexer.py new file mode 100644 index 00000000..4f99af6b --- /dev/null +++ b/tests/test_perllexer.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +""" + Pygments regex lexer tests + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2011 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import time +import unittest + +from pygments.token import String +from pygments.lexers.agile import PerlLexer + + +class RunawayRegexTest(unittest.TestCase): + # A previous version of the Perl lexer would spend a great deal of + # time backtracking when given particular strings. These tests show that + # the runaway backtracking doesn't happen any more (at least for the given + # cases). + + lexer = PerlLexer() + + ### Test helpers. + + def assert_single_token(self, s, token): + """Show that a given string generates only one token.""" + tokens = list(self.lexer.get_tokens_unprocessed(s)) + self.assertEqual(len(tokens), 1, tokens) + self.assertEqual(s, tokens[0][2]) + self.assertEqual(token, tokens[0][1]) + + def assert_tokens(self, strings, expected_tokens): + """Show that a given string generates the expected tokens.""" + tokens = list(self.lexer.get_tokens_unprocessed(''.join(strings))) + self.assertEqual(len(tokens), len(expected_tokens), tokens) + for index, s in enumerate(strings): + self.assertEqual(s, tokens[index][2]) + self.assertEqual(expected_tokens[index], tokens[index][1]) + + def assert_fast_tokenization(self, s): + """Show that a given string is tokenized quickly.""" + start = time.time() + tokens = list(self.lexer.get_tokens_unprocessed(s)) + end = time.time() + # Isn't 10 seconds kind of a long time? Yes, but we don't want false + # positives when the tests are starved for CPU time. + if end-start > 10: + self.fail('tokenization took too long') + return tokens + + ### Strings. + + def test_single_quote_strings(self): + self.assert_single_token(r"'foo\tbar\\\'baz'", String) + self.assert_fast_tokenization("'" + '\\'*999) + + def test_double_quote_strings(self): + self.assert_single_token(r'"foo\tbar\\\"baz"', String) + self.assert_fast_tokenization('"' + '\\'*999) + + def test_backtick_strings(self): + self.assert_single_token(r'`foo\tbar\\\`baz`', String.Backtick) + self.assert_fast_tokenization('`' + '\\'*999) + + ### Regex matches with various delimiters. + + def test_match(self): + self.assert_single_token(r'/aa\tbb/', String.Regex) + self.assert_fast_tokenization('/' + '\\'*999) + + def test_match_with_slash(self): + self.assert_tokens(['m', '/\n\\t\\\\/'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m/xxx\n' + '\\'*999) + + def test_match_with_bang(self): + self.assert_tokens(['m', r'!aa\t\!bb!'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m!' + '\\'*999) + + def test_match_with_brace(self): + self.assert_tokens(['m', r'{aa\t\}bb}'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m{' + '\\'*999) + + def test_match_with_angle_brackets(self): + self.assert_tokens(['m', r'<aa\t\>bb>'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m<' + '\\'*999) + + def test_match_with_parenthesis(self): + self.assert_tokens(['m', r'(aa\t\)bb)'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m(' + '\\'*999) + + def test_match_with_at_sign(self): + self.assert_tokens(['m', r'@aa\t\@bb@'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m@' + '\\'*999) + + def test_match_with_percent_sign(self): + self.assert_tokens(['m', r'%aa\t\%bb%'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m%' + '\\'*999) + + def test_match_with_dollar_sign(self): + self.assert_tokens(['m', r'$aa\t\$bb$'], [String.Regex, String.Regex]) + self.assert_fast_tokenization('m$' + '\\'*999) + + ### Regex substitutions with various delimeters. + + def test_substitution_with_slash(self): + self.assert_single_token('s/aaa/bbb/g', String.Regex) + self.assert_fast_tokenization('s/foo/' + '\\'*999) + + def test_substitution_with_at_sign(self): + self.assert_single_token(r's@aaa@bbb@g', String.Regex) + self.assert_fast_tokenization('s@foo@' + '\\'*999) + + def test_substitution_with_percent_sign(self): + self.assert_single_token(r's%aaa%bbb%g', String.Regex) + self.assert_fast_tokenization('s%foo%' + '\\'*999) + + def test_substitution_with_brace(self): + self.assert_single_token(r's{aaa}', String.Regex) + self.assert_fast_tokenization('s{' + '\\'*999) + + def test_substitution_with_angle_bracket(self): + self.assert_single_token(r's<aaa>', String.Regex) + self.assert_fast_tokenization('s<' + '\\'*999) + + def test_substitution_with_angle_bracket(self): + self.assert_single_token(r's<aaa>', String.Regex) + self.assert_fast_tokenization('s<' + '\\'*999) + + def test_substitution_with_square_bracket(self): + self.assert_single_token(r's[aaa]', String.Regex) + self.assert_fast_tokenization('s[' + '\\'*999) + + def test_substitution_with_parenthesis(self): + self.assert_single_token(r's(aaa)', String.Regex) + self.assert_fast_tokenization('s(' + '\\'*999) diff --git a/tests/test_util.py b/tests/test_util.py index 0876cf70..af1f4e44 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -13,6 +13,12 @@ import os from pygments import util +class FakeLexer(object): + def analyse(text): + return float(text) + analyse = util.make_analysator(analyse) + + class UtilTest(unittest.TestCase): def test_getoptions(self): @@ -53,12 +59,36 @@ class UtilTest(unittest.TestCase): self.assertEquals(util.docstring_headline(f1), "docstring headline") self.assertEquals(util.docstring_headline(f2), "docstring headline") - def test_analysator(self): - class X(object): + def test_analysator_returns_float(self): + # If an analysator wrapped by make_analysator returns a floating point + # number, then that number will be returned by the wrapper. + self.assertEquals(FakeLexer.analyse('0.5'), 0.5) + + def test_analysator_returns_boolean(self): + # If an analysator wrapped by make_analysator returns a boolean value, + # then the wrapper will return 1.0 if the boolean was True or 0.0 if + # it was False. + self.assertEquals(FakeLexer.analyse(True), 1.0) + self.assertEquals(FakeLexer.analyse(False), 0.0) + + def test_analysator_raises_exception(self): + # If an analysator wrapped by make_analysator raises an exception, + # then the wrapper will return 0.0. + class ErrorLexer(object): def analyse(text): - return 0.5 + raise RuntimeError('something bad happened') analyse = util.make_analysator(analyse) - self.assertEquals(X.analyse(''), 0.5) + self.assertEquals(ErrorLexer.analyse(''), 0.0) + + def test_analysator_value_error(self): + # When converting the analysator's return value to a float a + # ValueError may occur. If that happens 0.0 is returned instead. + self.assertEquals(FakeLexer.analyse('bad input'), 0.0) + + def test_analysator_type_error(self): + # When converting the analysator's return value to a float a + # TypeError may occur. If that happens 0.0 is returned instead. + self.assertEquals(FakeLexer.analyse(None), 0.0) def test_shebang_matches(self): self.assert_(util.shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')) |