# -*- coding: utf-8 -*- """ pygments.lexers.dotnet ~~~~~~~~~~~~~~~~~~~~~~ Lexers for .net languages. :copyright: 2006-2007 by Georg Brandl, Armin Ronacher. :license: BSD, see LICENSE for more details. """ import re from pygments.lexer import RegexLexer, bygroups, using, this from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal from pygments.util import get_choice_opt from pygments import unistring as uni __all__ = ['CSharpLexer', 'BooLexer', 'VbNetLexer'] def _escape(st): return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\ replace(u'[', ur'\[').replace(u']', ur'\]') class CSharpLexer(RegexLexer): """ For `C# `_ source code. Additional options accepted: `unicodelevel` Determines which Unicode characters this lexer allows for identifiers. The possible values are: * ``none`` -- only the ASCII letters and numbers are allowed. This is the fastest selection. * ``basic`` -- all Unicode characters from the specification except category ``Lo`` are allowed. * ``full`` -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the ``Lo`` category has more than 40,000 characters in it! The default value is ``basic``. *New in Pygments 0.8.* """ name = 'C#' aliases = ['csharp', 'c#'] filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred flags = re.MULTILINE | re.DOTALL | re.UNICODE # for the range of allowed unicode characters in identifiers, # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf levels = { 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), 'full': ('@?(?:_|[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])' + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc')) + ']*'), } tokens = {} token_variants = True for levelname, cs_ident in levels.items(): tokens[levelname] = { 'root': [ # method names (r'^([ \t]*(?:' + cs_ident + r'\s+)+?)' # return arguments r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start bygroups(using(this), Name.Function, Text, Punctuation)), (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment), (r'/[*](.|\n)*?[*]/', Comment), (r'\n', Text), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), (r'[{}]', Keyword), (r'@"(\\\\|\\"|[^"])*"', String), (r'"(\\\\|\\"|[^"\n])*["\n]', String), (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'#[ \t]*(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion)\b.*?\n', Comment.Preproc), (r'(abstract|as|base|break|case|catch|' r'checked|const|continue|default|delegate|' r'do|else|enum|event|explicit|extern|false|finally|' r'fixed|for|foreach|goto|if|implicit|in|interface|' r'internal|is|lock|new|null|operator|' r'out|override|params|private|protected|public|readonly|' r'ref|return|sealed|sizeof|stackalloc|static|' r'switch|this|throw|true|try|typeof|' r'unchecked|unsafe|virtual|void|while|' r'get|set|new|partial)\b', Keyword), (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|' r'short|string|uint|ulong|ushort)\b', Keyword.Type), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'), (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'), (cs_ident, Name), ], 'class': [ (cs_ident, Name.Class, '#pop') ], 'namespace': [ (r'(?=\()', Text, '#pop'), # using (resource) ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop') ] } def __init__(self, **options): level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) else: self._tokens = self._all_tokens[level] RegexLexer.__init__(self, **options) class BooLexer(RegexLexer): """ For `Boo `_ source code. """ name = 'Boo' aliases = ['boo'] filenames = ['*.boo'] mimetypes = ['text/x-boo'] tokens = { 'root': [ (r'\s+', Text), (r'(#|//).*$', Comment), (r'/[*]', Comment, 'comment'), (r'[]{}:(),.;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'/(\\\\|\\/|[^/\s])/', String.Regex), (r'@/(\\\\|\\/|[^/])*/', String.Regex), (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator), (r'(as|abstract|callable|constructor|destructor|do|import|' r'enum|event|final|get|interface|internal|of|override|' r'partial|private|protected|public|return|set|static|' r'struct|transient|virtual|yield|super|and|break|cast|' r'continue|elif|else|ensure|except|for|given|goto|if|in|' r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|' r'while|from|as)\b', Keyword), (r'def(?=\s+\(.*?\))', Keyword), (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'), (r'(?`_ source code. """ name = 'VB.net' aliases = ['vb.net', 'vbnet'] filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^\s*<.*?>', Name.Attribute), (r'\s+', Text), (r'\n', Text), (r'rem\b.*?\n', Comment), (r"'.*?\n", Comment), (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#End\s+If|#Const|' r'#ExternalSource.*?\n|#End\s+ExternalSource|' r'#Region.*?\n|#End\s+Region|#ExternalChecksum', Comment.Preproc), (r'[\(\){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), (r'(?>=|<<|>>|:=|' r'<=|>=|<>|[-&*/\\^+=<>]', Operator), ('"', String, 'string'), ('[a-zA-Z_][a-zA-Z0-9_]*[%&@!#$]?', Name), ('#.*?#', Literal.Date), (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float), (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer), (r'_\n', Text), # Line continuation ], 'string': [ (r'""', String), (r'"C?', String, '#pop'), (r'[^"]+', String), ], 'funcname': [ (r'[a-z_][a-z0-9_]*', Name.Function, '#pop') ], 'classname': [ (r'[a-z_][a-z0-9_]*', Name.Class, '#pop') ], 'namespace': [ (r'[a-z_][a-z0-9_.]*', Name.Namespace, '#pop') ], }