diff options
author | Georg Brandl <georg@python.org> | 2011-06-18 13:49:00 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2011-06-18 13:49:00 +0200 |
commit | 1507dac8fd099aad7afca378965ceb508b00dddd (patch) | |
tree | 7c7a3448b6b7628f8bd33e207c1b68854ad375ae /pygments | |
parent | 522b59a55921a278a847c573974811417cd9fac6 (diff) | |
parent | 0bf6072b0e75fa4b6afab5d23477d7aa4aa1c77c (diff) | |
download | pygments-1507dac8fd099aad7afca378965ceb508b00dddd.tar.gz |
merge with bgoetzmann/pygments-main
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/formatters/latex.py | 6 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 4 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 30 | ||||
-rw-r--r-- | pygments/lexers/asm.py | 6 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 22 | ||||
-rw-r--r-- | pygments/lexers/dotnet.py | 269 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 6 | ||||
-rw-r--r-- | pygments/lexers/other.py | 33 | ||||
-rw-r--r-- | pygments/lexers/pypylog.py | 2 | ||||
-rw-r--r-- | pygments/lexers/text.py | 32 | ||||
-rw-r--r-- | pygments/lexers/web.py | 28 |
11 files changed, 371 insertions, 67 deletions
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index cc464420..8c71f3e1 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -26,6 +26,9 @@ def escape_tex(text, commandprefix): replace('\x02', r'\%sZcb{}' % commandprefix). \ replace('^', r'\%sZca{}' % commandprefix). \ replace('_', r'\%sZus{}' % commandprefix). \ + replace('&', r'\%sZam{}' % commandprefix). \ + replace('<', r'\%sZlt{}' % commandprefix). \ + replace('>', r'\%sZgt{}' % commandprefix). \ replace('#', r'\%sZsh{}' % commandprefix). \ replace('%', r'\%sZpc{}' % commandprefix). \ replace('$', r'\%sZdl{}' % commandprefix). \ @@ -106,6 +109,9 @@ STYLE_TEMPLATE = r''' \def\%(cp)sZob{\char`\{} \def\%(cp)sZcb{\char`\}} \def\%(cp)sZca{\char`\^} +\def\%(cp)sZam{\char`\&} +\def\%(cp)sZlt{\char`\<} +\def\%(cp)sZgt{\char`\>} \def\%(cp)sZsh{\char`\#} \def\%(cp)sZpc{\char`\%%} \def\%(cp)sZdl{\char`\$} diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index f6dfb56f..aa6a9180 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -41,7 +41,7 @@ LEXERS = { 'BlitzMaxLexer': ('pygments.lexers.compiled', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), 'BrainfuckLexer': ('pygments.lexers.other', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), - 'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h'), ('text/x-chdr', 'text/x-csrc')), + 'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h', '*.idc'), ('text/x-chdr', 'text/x-csrc')), 'CMakeLexer': ('pygments.lexers.text', 'CMake', ('cmake',), ('*.cmake', 'CMakeLists.txt'), ('text/x-cmake',)), 'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)), 'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), @@ -80,6 +80,7 @@ LEXERS = { 'EvoqueHtmlLexer': ('pygments.lexers.templates', 'HTML+Evoque', ('html+evoque',), ('*.html',), ('text/html+evoque',)), 'EvoqueLexer': ('pygments.lexers.templates', 'Evoque', ('evoque',), ('*.evoque',), ('application/x-evoque',)), 'EvoqueXmlLexer': ('pygments.lexers.templates', 'XML+Evoque', ('xml+evoque',), ('*.xml',), ('application/xml+evoque',)), + 'FSharpLexer': ('pygments.lexers.dotnet', 'FSharp', ('fsharp',), ('*.fs', '*.fsi'), ('text/x-fsharp',)), 'FactorLexer': ('pygments.lexers.agile', 'Factor', ('factor',), ('*.factor',), ('text/x-factor',)), 'FancyLexer': ('pygments.lexers.agile', 'Fancy', ('fancy', 'fy'), ('*.fy', '*.fancypack'), ('text/x-fancysrc',)), 'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)), @@ -145,6 +146,7 @@ LEXERS = { 'MyghtyLexer': ('pygments.lexers.templates', 'Myghty', ('myghty',), ('*.myt', 'autodelegate'), ('application/x-myghty',)), 'MyghtyXmlLexer': ('pygments.lexers.templates', 'XML+Myghty', ('xml+myghty',), (), ('application/xml+myghty',)), 'NasmLexer': ('pygments.lexers.asm', 'NASM', ('nasm',), ('*.asm', '*.ASM'), ('text/x-nasm',)), + 'NemerleLexer': ('pygments.lexers.dotnet', 'Nemerle', ('nemerle',), ('*.n',), ('text/x-nemerle',)), 'NewspeakLexer': ('pygments.lexers.other', 'Newspeak', ('newspeak',), ('*.ns2',), ('text/x-newspeak',)), 'NginxConfLexer': ('pygments.lexers.text', 'Nginx configuration file', ('nginx',), (), ('text/x-nginx-conf',)), 'NimrodLexer': ('pygments.lexers.compiled', 'Nimrod', ('nimrod', 'nim'), ('*.nim', '*.nimrod'), ('text/x-nimrod',)), diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 662f8b7e..22fd2dbc 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ - LexerContext, include, combined, do_insertions, bygroups, using + LexerContext, include, combined, do_insertions, bygroups, using, this from pygments.token import Error, Text, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches @@ -174,7 +174,7 @@ class PythonLexer(RegexLexer): } def analyse_text(text): - return shebang_matches(text, r'pythonw?(2\.\d)?') + return shebang_matches(text, r'pythonw?(2(\.\d)?)?') class Python3Lexer(RegexLexer): @@ -484,11 +484,11 @@ class RubyLexer(ExtendedRegexLexer): def gen_rubystrings_rules(): def intp_regex_callback(self, match, ctx): - yield match.start(1), String.Regex, match.group(1) # begin + yield match.start(1), String.Regex, match.group(1) # begin nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v - yield match.start(4), String.Regex, match.group(4) # end[mixounse]* + yield match.start(4), String.Regex, match.group(4) # end[mixounse]* ctx.pos = match.end() def intp_string_callback(self, match, ctx): @@ -496,13 +496,13 @@ class RubyLexer(ExtendedRegexLexer): nctx = LexerContext(match.group(3), 0, ['interpolated-string']) for i, t, v in self.get_tokens_unprocessed(context=nctx): yield match.start(3)+i, t, v - yield match.start(4), String.Other, match.group(4) # end + yield match.start(4), String.Other, match.group(4) # end ctx.pos = match.end() states = {} states['strings'] = [ # easy ones - (r'\:([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|' + (r'\:@{0,2}([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|' r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol), (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), @@ -661,7 +661,8 @@ class RubyLexer(ExtendedRegexLexer): # multiline regex (in method calls) (r'(?<=\(|,)/', String.Regex, 'multiline-regex'), # multiline regex (this time the funny no whitespace rule) - (r'(\s+)(/[^\s=])', String.Regex, 'multiline-regex'), + (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex), + 'multiline-regex'), # lex numbers and ignore following regular expressions which # are division operators in fact (grrrr. i hate that. any # better ideas?) @@ -1044,7 +1045,6 @@ class LuaLexer(RegexLexer): (r'(true|false|nil)\b', Keyword.Constant), (r'(function)(\s+)', bygroups(Keyword, Text), 'funcname'), - (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), @@ -1059,10 +1059,6 @@ class LuaLexer(RegexLexer): ('\(', Punctuation, '#pop'), ], - 'classname': [ - ('[A-Za-z_][A-Za-z0-9_]*', Name.Class, '#pop') - ], - # if I understand correctly, every character is valid in a lua string, # so this state is only for later corrections 'string': [ @@ -1155,7 +1151,7 @@ class MiniDLexer(RegexLexer): ), # StringLiteral # -- WysiwygString - (r'@"(""|.)*"', String), + (r'@"(""|[^"])*"', String), # -- AlternateWysiwygString (r'`(``|.)*`', String), # -- DoubleQuotedString @@ -2003,7 +1999,7 @@ class FancyLexer(RegexLexer): # Comments (r'#(.*?)\n', Comment.Single), # Symbols - (r'\'[^\'\s]+', String.Symbol), + (r'\'([^\'\s\[\]\(\)\{\}]+|\[\])', String.Symbol), # Multi-line DoubleQuotedString (r'"""(\\\\|\\"|[^"])*"""', String), # DoubleQuotedString @@ -2026,6 +2022,7 @@ class FancyLexer(RegexLexer): ('[A-Z][a-zA-Z0-9_]*', Name.Constant), ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), + ('(@|@@)', Operator), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), # numbers - / checks are necessary to avoid mismarking regexes, # see comment in RubyLexer @@ -2046,14 +2043,15 @@ class FancyLexer(RegexLexer): class GroovyLexer(RegexLexer): """ For `Groovy <http://groovy.codehaus.org/>`_ source code. - Syntax can be found at http://svn.codehaus.org/groovy/trunk/groovy/groovy-core/src/main/org/codehaus/groovy/antlr/groovy.g + + *New in Pygments 1.5.* """ name = 'Groovy' aliases = ['groovy'] filenames = ['*.groovy'] mimetypes = ['text/x-groovy'] - + flags = re.MULTILINE | re.DOTALL #: optional Comment or Whitespace diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index 4740569c..a57ea69d 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -88,7 +88,11 @@ class GasLexer(RegexLexer): } def analyse_text(text): - return re.match(r'^\.\w+', text, re.M) + if re.match(r'^\.(text|data|section)', text, re.M): + return True + elif re.match(r'^\.\w+', text, re.M): + return 0.1 + class ObjdumpLexer(RegexLexer): """ diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 2945be98..6c9d7c87 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -35,7 +35,7 @@ class CLexer(RegexLexer): """ name = 'C' aliases = ['c'] - filenames = ['*.c', '*.h'] + filenames = ['*.c', '*.h', '*.idc'] mimetypes = ['text/x-chdr', 'text/x-csrc'] #: optional Comment or Whitespace @@ -1136,7 +1136,7 @@ class ScalaLexer(RegexLexer): (r'(true|false|null)\b', Keyword.Constant), (r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'), (r'(type)(\s+)', bygroups(Keyword, Text), 'type'), - (r'"""(?:.|\n)*?"""', String), + (r'""".*?"""', String), (r'"(\\\\|\\"|[^"])*"', String), (ur"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), # (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, @@ -1144,7 +1144,7 @@ class ScalaLexer(RegexLexer): (idrest, Name), (r'`[^`]+`', Name), (r'\[', Operator, 'typeparam'), - (r'[\(\)\{\};,.]', Operator), + (r'[\(\)\{\};,.#]', Operator), (op, Operator), (ur'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?', Number.Float), @@ -1309,6 +1309,13 @@ class ObjectiveCLexer(RegexLexer): bygroups(using(this), Name.Function, using(this), Text, Punctuation), 'function'), + # methods + (r'^([-+])(\s*)' # method marker + r'(\(.*?\))?(\s*)' # return type + r'([a-zA-Z$_][a-zA-Z0-9$_]*:?)', # begin of method name + bygroups(Keyword, Text, using(this), + Text, Name.Function), + 'method'), # function declarations (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z$_][a-zA-Z0-9$_]*)' # method name @@ -1352,6 +1359,15 @@ class ObjectiveCLexer(RegexLexer): ('{', Punctuation, '#push'), ('}', Punctuation, '#pop'), ], + 'method': [ + include('whitespace'), + (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this), + Name.Variable)), + (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function), + (';', Punctuation, '#pop'), + ('{', Punctuation, 'function'), + ('', Text, '#pop'), + ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index 48feeb85..7c13e8a4 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -10,7 +10,8 @@ """ import re -from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, using, this +from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \ + using, this from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other from pygments.util import get_choice_opt @@ -18,8 +19,8 @@ from pygments import unistring as uni from pygments.lexers.web import XmlLexer -__all__ = ['CSharpLexer', 'BooLexer', 'VbNetLexer', 'CSharpAspxLexer', - 'VbNetAspxLexer'] +__all__ = ['CSharpLexer', 'NemerleLexer', 'BooLexer', 'VbNetLexer', + 'CSharpAspxLexer', 'VbNetAspxLexer', 'FSharpLexer'] def _escape(st): @@ -138,6 +139,164 @@ class CSharpLexer(RegexLexer): RegexLexer.__init__(self, **options) +class NemerleLexer(RegexLexer): + """ + For `Nemerle <http://nemerle.org>`_ source code. + + Additional options accepted: + + `unicodelevel` + Determines which Unicode characters this lexer allows for identifiers. + The possible values are: + + * ``none`` -- only the ASCII letters and numbers are allowed. This + is the fastest selection. + * ``basic`` -- all Unicode characters from the specification except + category ``Lo`` are allowed. + * ``full`` -- all Unicode characters as specified in the C# specs + are allowed. Note that this means a considerable slowdown since the + ``Lo`` category has more than 40,000 characters in it! + + The default value is ``basic``. + + *New in Pygments 1.5.* + """ + + name = 'Nemerle' + aliases = ['nemerle'] + filenames = ['*.n'] + mimetypes = ['text/x-nemerle'] # inferred + + flags = re.MULTILINE | re.DOTALL | re.UNICODE + + # for the range of allowed unicode characters in identifiers, + # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf + + levels = { + 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', + 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), + 'full': ('@?(?:_|[^' + + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])' + + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', + 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', + 'Mc')) + ']*'), + } + + tokens = {} + token_variants = True + + for levelname, cs_ident in levels.items(): + tokens[levelname] = { + 'root': [ + # method names + (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type + r'(' + cs_ident + ')' # method name + r'(\s*)(\()', # signature start + bygroups(using(this), Name.Function, Text, Punctuation)), + (r'^\s*\[.*?\]', Name.Attribute), + (r'[^\S\n]+', Text), + (r'\\\n', Text), # line continuation + (r'//.*?\n', Comment.Single), + (r'/[*](.|\n)*?[*]/', Comment.Multiline), + (r'\n', Text), + (r'\$\s*"', String, 'splice-string'), + (r'\$\s*<#', String, 'splice-string2'), + (r'<#', String, 'recursive-string'), + + (r'(<\[)\s*(' + cs_ident + ':)?', Keyword), + (r'\]\>', Keyword), + + # quasiquotation only + (r'\$' + cs_ident, Name), + (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'), + + (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), + (r'[{}]', Punctuation), + (r'@"(\\\\|\\"|[^"])*"', String), + (r'"(\\\\|\\"|[^"\n])*["\n]', String), + (r"'\\.'|'[^\\]'", String.Char), + (r"0[xX][0-9a-fA-F]+[Ll]?", Number), + (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number), + (r'#[ \t]*(if|endif|else|elif|define|undef|' + r'line|error|warning|region|endregion|pragma)\b.*?\n', + Comment.Preproc), + (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text, + Keyword)), + (r'(abstract|and|as|base|catch|def|delegate|' + r'enum|event|extern|false|finally|' + r'fun|implements|interface|internal|' + r'is|macro|match|matches|module|mutable|new|' + r'null|out|override|params|partial|private|' + r'protected|public|ref|sealed|static|' + r'syntax|this|throw|true|try|type|typeof|' + r'virtual|volatile|when|where|with|' + r'assert|assert2|async|break|checked|continue|do|else|' + r'ensures|for|foreach|if|late|lock|new|nolate|' + r'otherwise|regexp|repeat|requires|return|surroundwith|' + r'unchecked|unless|using|while|yield)\b', Keyword), + (r'(global)(::)', bygroups(Keyword, Punctuation)), + (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' + r'short|string|uint|ulong|ushort|void|array|list)\b\??', + Keyword.Type), + (r'(:>?)\s*(' + cs_ident + r'\??)', + bygroups(Punctuation, Keyword.Type)), + (r'(class|struct|variant|module)(\s+)', + bygroups(Keyword, Text), 'class'), + (r'(namespace|using)(\s+)', bygroups(Keyword, Text), + 'namespace'), + (cs_ident, Name), + ], + 'class': [ + (cs_ident, Name.Class, '#pop') + ], + 'namespace': [ + (r'(?=\()', Text, '#pop'), # using (resource) + ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') + ], + 'splice-string': [ + (r'[^"$]', String), + (r'\$' + cs_ident, Name), + (r'(\$)(\()', bygroups(Name, Punctuation), + 'splice-string-content'), + (r'\\"', String), + (r'"', String, '#pop') + ], + 'splice-string2': [ + (r'[^#<>$]', String), + (r'\$' + cs_ident, Name), + (r'(\$)(\()', bygroups(Name, Punctuation), + 'splice-string-content'), + (r'<#', String, '#push'), + (r'#>', String, '#pop') + ], + 'recursive-string': [ + (r'[^#<>]', String), + (r'<#', String, '#push'), + (r'#>', String, '#pop') + ], + 'splice-string-content': [ + (r'if|match', Keyword), + (r'[~!%^&*+=|\[\]:;,.<>/?-]', Punctuation), + (cs_ident, Name), + (r'\(', Punctuation, '#push'), + (r'\)', Punctuation, '#pop') + ] + } + + def __init__(self, **options): + level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), + 'basic') + if level not in self._all_tokens: + # compile the regexes now + self._tokens = self.__class__.process_tokendef(level) + else: + self._tokens = self._all_tokens[level] + + RegexLexer.__init__(self, **options) + + class BooLexer(RegexLexer): """ For `Boo <http://boo.codehaus.org/>`_ source code. @@ -365,3 +524,107 @@ class VbNetAspxLexer(DelegatingLexer): return 0.2 elif re.search(r'script[^>]+language=["\']vb', text, re.I) is not None: return 0.15 + + +# Very close to functional.OcamlLexer +class FSharpLexer(RegexLexer): + """ + For the F# language. + + *New in Pygments 1.5.* + """ + + name = 'FSharp' + aliases = ['fsharp'] + filenames = ['*.fs', '*.fsi'] + mimetypes = ['text/x-fsharp'] + + keywords = [ + 'abstract', 'and', 'as', 'assert', 'base', 'begin', 'class', + 'default', 'delegate', 'do', 'do!', 'done', 'downcast', + 'downto', 'elif', 'else', 'end', 'exception', 'extern', + 'false', 'finally', 'for', 'fun', 'function', 'global', 'if', + 'in', 'inherit', 'inline', 'interface', 'internal', 'lazy', + 'let', 'let!', 'match', 'member', 'module', 'mutable', + 'namespace', 'new', 'null', 'of', 'open', 'or', 'override', + 'private', 'public', 'rec', 'return', 'return!', 'sig', + 'static', 'struct', 'then', 'to', 'true', 'try', 'type', + 'upcast', 'use', 'use!', 'val', 'void', 'when', 'while', + 'with', 'yield', 'yield!' + ] + keyopts = [ + '!=','#','&','&&','\(','\)','\*','\+',',','-', + '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<', + '<-','>','>]','\?','\?\?','\[','\[<','\[>','\[\|', + ']','_','`','{','\|','\|]','}','~','<@','=','@>' + ] + + operators = r'[!$%&*+\./:<=>?@^|~-]' + word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'not', 'or'] + prefix_syms = r'[!?~]' + infix_syms = r'[=<>@^|&+\*/$%-]' + primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array', + 'byte', 'sbyte', 'int16', 'uint16', 'uint32', 'int64', 'uint64' + 'nativeint', 'unativeint', 'decimal', 'void', 'float32', 'single', + 'double'] + + tokens = { + 'escape-sequence': [ + (r'\\[\\\"\'ntbr]', String.Escape), + (r'\\[0-9]{3}', String.Escape), + (r'\\x[0-9a-fA-F]{2}', String.Escape), + ], + 'root': [ + (r'\s+', Text), + (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), + (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', + Name.Namespace, 'dotted'), + (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), + (r'//.*?\n', Comment.Single), + (r'\(\*', Comment, 'comment'), + (r'\b(%s)\b' % '|'.join(keywords), Keyword), + (r'(%s)' % '|'.join(keyopts), Operator), + (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), + (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), + (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), + + (r'#[ \t]*(if|endif|else|line|nowarn|light)\b.*?\n', + Comment.Preproc), + + (r"[^\W\d][\w']*", Name), + + (r'\d[\d_]*', Number.Integer), + (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), + (r'0[oO][0-7][0-7_]*', Number.Oct), + (r'0[bB][01][01_]*', Number.Binary), + (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), + + (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", + String.Char), + (r"'.'", String.Char), + (r"'", Keyword), # a stray quote is another syntax element + + (r'"', String.Double, 'string'), + + (r'[~?][a-z][\w\']*:', Name.Variable), + ], + 'comment': [ + (r'[^(*)]+', Comment), + (r'\(\*', Comment, '#push'), + (r'\*\)', Comment, '#pop'), + (r'[(*)]', Comment), + ], + 'string': [ + (r'[^\\"]+', String.Double), + include('escape-sequence'), + (r'\\\n', String.Double), + (r'"', String.Double, '#pop'), + ], + 'dotted': [ + (r'\s+', Text), + (r'\.', Punctuation), + (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), + (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'), + (r'[a-z_][A-Za-z0-9_\']*', Name, '#pop'), + ], + } diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 9115f653..724ee833 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -570,11 +570,11 @@ class OcamlLexer(RegexLexer): (r"[^\W\d][\w']*", Name), - (r'\d[\d_]*', Number.Integer), + (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), (r'0[oO][0-7][0-7_]*', Number.Oct), (r'0[bB][01][01_]*', Number.Binary), - (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), + (r'\d[\d_]*', Number.Integer), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", String.Char), @@ -656,7 +656,7 @@ class ErlangLexer(RegexLexer): 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' ] - operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)' + operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!|\?)' word_operators = [ 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div', 'not', 'or', 'orelse', 'rem', 'xor' diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 0cf785cc..88236a26 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -817,7 +817,7 @@ class LogtalkLexer(RegexLexer): Keyword), (r'(object|protocol|category)_property(?=[(])', Keyword), # Entity relations - (r'complements_object(?=[(])', Keyword), + (r'co(mplements_object|nforms_to_protocol)(?=[(])', Keyword), (r'extends_(object|protocol|category)(?=[(])', Keyword), (r'imp(lements_protocol|orts_category)(?=[(])', Keyword), (r'(instantiat|specializ)es_class(?=[(])', Keyword), @@ -826,11 +826,12 @@ class LogtalkLexer(RegexLexer): # Flags (r'(current|set)_logtalk_flag(?=[(])', Keyword), # Compiling, loading, and library paths - (r'logtalk_(compile|l(ibrary_path|oad))(?=[(])', Keyword), + (r'logtalk_(compile|l(ibrary_path|oad_context|oad))(?=[(])', + Keyword), # Database (r'(clause|retract(all)?)(?=[(])', Keyword), (r'a(bolish|ssert(a|z))(?=[(])', Keyword), - # Control + # Control constructs (r'(ca(ll|tch)|throw)(?=[(])', Keyword), (r'(fail|true)\b', Keyword), # All solutions @@ -841,7 +842,7 @@ class LogtalkLexer(RegexLexer): # Term unification (r'unify_with_occurs_check(?=[(])', Keyword), # Term creation and decomposition - (r'(functor|arg|copy_term)(?=[(])', Keyword), + (r'(functor|arg|copy_term|numbervars)(?=[(])', Keyword), # Evaluable functors (r'(rem|mod|abs|sign)(?=[(])', Keyword), (r'float(_(integer|fractional)_part)?(?=[(])', Keyword), @@ -849,8 +850,10 @@ class LogtalkLexer(RegexLexer): # Other arithmetic functors (r'(cos|atan|exp|log|s(in|qrt))(?=[(])', Keyword), # Term testing - (r'(var|atom(ic)?|integer|float|compound|n(onvar|umber))(?=[(])', - Keyword), + (r'(var|atom(ic)?|integer|float|c(allable|ompound)|n(onvar|umber))' + r'(?=[(])', Keyword), + # Term comparison + (r'compare(?=[(])', Keyword), # Stream selection and control (r'(curren|se)t_(in|out)put(?=[(])', Keyword), (r'(open|close)(?=[(])', Keyword), @@ -879,8 +882,10 @@ class LogtalkLexer(RegexLexer): # External call (r'[{}]', Keyword), # Logic and control - (r'\bonce(?=[(])', Keyword), + (r'\b(ignore|once)(?=[(])', Keyword), (r'\brepeat\b', Keyword), + # Sorting + (r'(key)?sort(?=[(])', Keyword), # Bitwise functors (r'(>>|<<|/\\|\\\\|\\)', Operator), # Arithemtic evaluation @@ -895,7 +900,7 @@ class LogtalkLexer(RegexLexer): (r'(==|\\==|@=<|@<|@>=|@>)', Operator), # Evaluable functors (r'(//|[-+*/])', Operator), - (r'\b(mod|rem)\b', Operator), + (r'\b(e|pi|mod|rem)\b', Operator), # Other arithemtic functors (r'\b\*\*\b', Operator), # DCG rules @@ -906,6 +911,8 @@ class LogtalkLexer(RegexLexer): (r'\\+', Operator), # Mode operators (r'[?@]', Operator), + # Existential quantifier + (r'\^', Operator), # Strings (r'"(\\\\|\\"|[^"])*"', String), # Ponctuation @@ -937,16 +944,18 @@ class LogtalkLexer(RegexLexer): (r'in(fo|itialization)(?=[(])', Keyword, 'root'), (r'(dynamic|synchronized|threaded)[.]', Keyword, 'root'), (r'(alias|d(ynamic|iscontiguous)|m(eta_predicate|ode|ultifile)|' - r's(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', Keyword, 'root'), + r's(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', + Keyword, 'root'), (r'op(?=[(])', Keyword, 'root'), - (r'(calls|reexport|use(s|_module))(?=[(])', Keyword, 'root'), + (r'(c(alls|oinductive)|reexport|use(s|_module))(?=[(])', + Keyword, 'root'), (r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'), (r'[a-z][a-zA-Z0-9_]*[.]', Text, 'root'), ], 'entityrelations': [ - (r'(extends|i(nstantiates|mp(lements|orts))|specializes)(?=[(])', - Keyword), + (r'(complements|extends|i(nstantiates|mp(lements|orts))|specializes)' + b'(?=[(])', Keyword), # Numbers (r"0'.", Number), (r'0b[01]+', Number), diff --git a/pygments/lexers/pypylog.py b/pygments/lexers/pypylog.py index 186a532f..f2363c5d 100644 --- a/pygments/lexers/pypylog.py +++ b/pygments/lexers/pypylog.py @@ -29,7 +29,7 @@ class PyPyLogLexer(RegexLexer): (r"[ifp]\d+", Name), (r"ptr\d+", Name), - (r"(\()([\w_]+(:\.[\w_]+)?)(\))", bygroups(Punctuation, Name.Builtin, Punctuation)), + (r"(\()([\w_]+(?:\.[\w_]+)?)(\))", bygroups(Punctuation, Name.Builtin, Punctuation)), (r"[\[\]=,()]", Punctuation), (r"(\d+\.\d+|inf|-inf)", Number.Float), (r"-?\d+", Number.Integer), diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 5da30496..e481bb62 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -653,6 +653,13 @@ class RstLexer(RegexLexer): for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item + # from docutils.parsers.rst.states + closers = u'\'")]}>\u2019\u201d\xbb!?' + unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' + end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' + % (re.escape(unicode_delimiters), + re.escape(closers))) + tokens = { 'root': [ # Heading with overline @@ -689,9 +696,9 @@ class RstLexer(RegexLexer): bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, using(this, state='inline'))), # A reference target - (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$', + (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A footnote target + # A footnote/citation target (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A substitution def @@ -730,10 +737,9 @@ class RstLexer(RegexLexer): (r'.', Text), ], 'literal': [ - (r'[^`\\]+', String), - (r'\\.', String), - (r'``', String, '#pop'), - (r'[`\\]', String), + (r'[^`]+', String), + (r'``' + end_string_suffix, String, '#pop'), + (r'`', String), ] } @@ -877,7 +883,7 @@ class SquidConfLexer(RegexLexer): mimetypes = ['text/x-squidconf'] flags = re.IGNORECASE - keywords = [ "acl", "always_direct", "announce_host", + keywords = [ "access_log", "acl", "always_direct", "announce_host", "announce_period", "announce_port", "announce_to", "anonymize_headers", "append_domain", "as_whois_server", "auth_param_basic", "authenticate_children", @@ -902,7 +908,7 @@ class SquidConfLexer(RegexLexer): "ftpget_options", "ftpget_program", "ftp_list_width", "ftp_passive", "ftp_user", "half_closed_clients", "header_access", "header_replace", "hierarchy_stoplist", - "high_response_time_warning", "high_page_fault_warning", + "high_response_time_warning", "high_page_fault_warning", "hosts_file", "htcp_port", "http_access", "http_anonymizer", "httpd_accel", "httpd_accel_host", "httpd_accel_port", "httpd_accel_uses_host_header", "httpd_accel_with_proxy", @@ -969,14 +975,14 @@ class SquidConfLexer(RegexLexer): "snmp_community", ] - ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' + ip_re = r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))' def makelistre(list): - return r'\b(?:'+'|'.join(list)+r')\b' + return r'\b(?:' + '|'.join(list) + r')\b' tokens = { 'root': [ - (r'\s+', Text), + (r'\s+', Whitespace), (r'#', Comment, 'comment'), (makelistre(keywords), Keyword), (makelistre(opts), Name.Constant), @@ -985,8 +991,8 @@ class SquidConfLexer(RegexLexer): (r'stats/'+makelistre(actions), String), (r'log/'+makelistre(actions)+r'=', String), (makelistre(acls), Keyword), - (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number), - (r'\b\d+\b', Number), + (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float), + (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number), (r'\S+', Text), ], 'comment': [ diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 88d64a6f..56798441 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -172,9 +172,6 @@ class ActionScriptLexer(RegexLexer): ] } - def analyse_text(text): - return 0.05 - class ActionScript3Lexer(RegexLexer): """ @@ -190,6 +187,7 @@ class ActionScript3Lexer(RegexLexer): 'text/actionscript'] identifier = r'[$a-zA-Z_][a-zA-Z0-9_]*' + typeidentifier = identifier + '(?:\.<\w+>)?' flags = re.DOTALL | re.MULTILINE tokens = { @@ -198,12 +196,13 @@ class ActionScript3Lexer(RegexLexer): (r'(function\s+)(' + identifier + r')(\s*)(\()', bygroups(Keyword.Declaration, Name.Function, Text, Operator), 'funcparams'), - (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r')', + (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + + typeidentifier + r')', bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text, Keyword.Type)), (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)', bygroups(Keyword, Text, Name.Namespace, Text)), - (r'(new)(\s+)(' + identifier + r')(\s*)(\()', + (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()', bygroups(Keyword, Text, Keyword.Type, Text, Operator)), (r'//.*?\n', Comment.Single), (r'/\*.*?\*/', Comment.Multiline), @@ -234,13 +233,13 @@ class ActionScript3Lexer(RegexLexer): 'funcparams': [ (r'\s+', Text), (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' + - identifier + r'|\*)(\s*)', + typeidentifier + r'|\*)(\s*)', bygroups(Text, Punctuation, Name, Text, Operator, Text, Keyword.Type, Text), 'defval'), (r'\)', Operator, 'type') ], 'type': [ - (r'(\s*)(:)(\s*)(' + identifier + r'|\*)', + (r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)', bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'), (r'\s*', Text, '#pop:2') ], @@ -252,8 +251,9 @@ class ActionScript3Lexer(RegexLexer): } def analyse_text(text): - if re.match(r'\w+\s*:\s*\w', text): return 0.3 - return 0.1 + if re.match(r'\w+\s*:\s*\w', text): + return 0.3 + return 0 class CssLexer(RegexLexer): @@ -2216,9 +2216,9 @@ class XQueryLexer(ExtendedRegexLexer): punctuation_root_callback), (r'(castable|cast)(\s+)(as)', bygroups(Keyword, Text, Keyword), 'singletype'), - (r'(instance)(\s+)(of)|(treat)(\s+)(as)', - bygroups(Keyword, Text, Keyword), 'itemtype'), - (r'(case)|(as)', Keyword, 'itemtype'), + (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword), 'itemtype'), + (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword), 'itemtype'), + (r'case|as', Keyword, 'itemtype'), (r'(\))(\s*)(as)', bygroups(Punctuation, Text, Keyword), 'itemtype'), (r'\$', Name.Variable, 'varname'), @@ -2304,8 +2304,8 @@ class XQueryLexer(ExtendedRegexLexer): bygroups(Keyword, Text, Keyword, Text, Keyword), 'root'), (r'(castable|cast)(\s+)(as)', bygroups(Keyword, Text, Keyword), 'singletype'), - (r'(instance)(\s+)(of)|(treat)(\s+)(as)', - bygroups(Keyword, Text, Keyword)), + (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword)), + (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword)), (r'case|as', Keyword, 'itemtype'), (r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'), (ncname + r'(:\*)', Keyword.Type, 'operator'), |