# -*- coding: utf-8 -*- """ pygments.lexers.compiled ~~~~~~~~~~~~~~~~~~~~~~~~ Lexers for compiled languages: C/C++, Delphi, Java. :copyright: 2006 by Georg Brandl, Armin Ronacher, Christoph Hack. :license: BSD, see LICENSE for more details. """ import re try: set except NameError: from sets import Set as set from pygments.scanner import Scanner from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ this from pygments.util import get_bool_opt, get_list_opt from pygments.token import \ Text, Comment, Operator, Keyword, Name, String, Number, Punctuation, \ Error __all__ = ['CLexer', 'CppLexer', 'DelphiLexer', 'JavaLexer'] class CLexer(RegexLexer): name = 'C' aliases = ['c'] filenames = ['*.c', '*.h'] mimetypes = ['text/x-chdr', 'text/x-csrc'] #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' tokens = { 'whitespace': [ (r'^\s*#if\s+0', Comment.Preproc, 'if0'), (r'^\s*#', Comment.Preproc, 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|(.|\n)*?[^\\]\n)', Comment), (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment), ], 'statements': [ (r'L?"', String, 'string'), (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'(0x[0-9a-fA-F]|0[0-7]+|(\d+\.\d*|\.\d+)|\d+)' r'e[+-]\d+[lL]?', Number.Float), (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'0[0-7]+[Ll]?', Number.Oct), (r'(\d+\.\d*|\.\d+)', Number.Float), (r'\d+', Number.Integer), (r'[~!%^&*()+=|\[\]:,.<>/?-]', Punctuation), # missing: Operators (r'(auto|break|case|const|continue|default|do|else|enum|extern|' r'for|goto|if|register|restricted|return|sizeof|static|struct|' r'switch|typedef|union|volatile|virtual|while)\b', Keyword), (r'(int|long|float|short|double|char|unsigned|signed|void|' r'_Complex|_Imaginary|_Bool)\b', Keyword.Type), (r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved), (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|' r'declspec|finally|int64|try|leave)\b', Keyword.Reserved), (r'(true|false|NULL)\b', Keyword.Constant), ('[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), ], 'root': [ include('whitespace'), # functions (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')({)', bygroups(using(this), Name.Function, using(this), Text, Keyword), 'function'), # function declarations (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name r'(\s*\([^;]*?\))' # signature r'(' + _ws + r')(;)', bygroups(using(this), Name.Function, using(this), Text, Punctuation)), ('', Text, 'statement'), ], 'statement' : [ include('whitespace'), include('statements'), ('[{}]', Keyword), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), ('{', Keyword, '#push'), ('}', Keyword, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment), (r'//.*?\n', Comment, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?/?-]', Punctuation), (r'(asm|auto|break|case|catch|const|const_cast|continue|' r'default|delete|do|dynamic_cast|else|enum|explicit|export|' r'extern|for|friend|goto|if|mutable|namespace|new|operator|' r'private|protected|public|register|reinterpret_cast|return|' r'sizeof|static|static_cast|struct|switch|template|this|throw|' r'throws|try|typedef|typeid|typename|union|using|volatile|' r'virtual|while)\b', Keyword), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(bool|int|long|float|short|double|char|unsigned|signed|' r'void|wchar_t)\b', Keyword.Type), (r'(_{0,2}inline|naked|thread)\b', Keyword.Reserved), (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|' r'declspec|finally|int64|try|leave|wchar_t|w64|virtual_inheritance|' r'uuidof|unaligned|super|single_inheritance|raise|noop|' r'multiple_inheritance|m128i|m128d|m128|m64|interface|' r'identifier|forceinline|event|assume)\b', Keyword.Reserved), (r'(true|false|NULL)\b', Keyword.Constant), ('[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), ], 'classname': [ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'), # template specification (r'\s*(?=>)', Text, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment), (r'//.*?\n', Comment, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?:;,.@\^]'): token = Operator # stop label highlighting on next ";" if collect_labels and scanner.match == ';': collect_labels = False elif scanner.scan(r'[\(\)\[\]]+'): token = Punctuation # abort function naming ``foo = Function(...)`` next_token_is_function = False # if we are in a function block we count the open # braces because ootherwise it's impossible to # determine the end of the modifier context if in_function_block or in_property_block: if scanner.match == '(': brace_balance[0] += 1 elif scanner.match == ')': brace_balance[0] -= 1 elif scanner.match == '[': brace_balance[1] += 1 elif scanner.match == ']': brace_balance[1] -= 1 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): lowercase_name = scanner.match.lower() if lowercase_name == 'result': token = Name.Builtin.Pseudo elif lowercase_name in self.keywords: token = Keyword # if we are in a special block and a # block ending keyword occours (and the parenthesis # is balanced) we end the current block context if (in_function_block or in_property_block) and \ lowercase_name in self.BLOCK_KEYWORDS and \ brace_balance[0] <= 0 and \ brace_balance[1] <= 0: in_function_block = False in_property_block = False brace_balance = [0, 0] block_labels = set() if lowercase_name in ('label', 'goto'): collect_labels = True elif lowercase_name == 'asm': stack.append('asm') elif lowercase_name == 'property': in_property_block = True next_token_is_property = True elif lowercase_name in ('procedure', 'operator', 'function', 'constructor', 'destructor'): in_function_block = True next_token_is_function = True # we are in a function block and the current name # is in the set of registered modifiers. highlight # it as pseudo keyword elif in_function_block and \ lowercase_name in self.FUNCTION_MODIFIERS: token = Keyword.Pseudo # if we are in a property highlight some more # modifiers elif in_property_block and \ lowercase_name in ('read', 'write'): token = Keyword.Pseudo next_token_is_function = True # if the last iteration set next_token_is_function # to true we now want this name highlighted as # function. so do that and reset the state elif next_token_is_function: # Look if the next token is a dot. If yes it's # not a function, but a class name and the # part after the dot a function name if scanner.test(r'\s*\.\s*'): token = Name.Class # it's not a dot, our job is done else: token = Name.Function next_token_is_function = False # same for properties elif next_token_is_property: token = Name.Property next_token_is_property = False # Highlight this token as label and add it # to the list of known labels elif collect_labels: token = Name.Label block_labels.add(scanner.match.lower()) # name is in list of known labels elif lowercase_name in block_labels: token = Name.Label elif lowercase_name in self.BUILTIN_TYPES: token = Keyword.Type elif lowercase_name in self.DIRECTIVES: token = Keyword.Pseudo # builtins are just builtins if the token # before isn't a dot elif not was_dot and lowercase_name in self.builtins: token = Name.Builtin else: token = Name elif scanner.scan(r"'"): token = String stack.append('string') elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'): token = String.Char elif scanner.scan(r'\$[0-9A-Fa-f]+'): token = Number.Hex elif scanner.scan(r'\d+(?![eE]|\.[^.])'): token = Number.Integer elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): token = Number.Float else: # if the stack depth is deeper than once, pop if len(stack) > 1: stack.pop() scanner.get_char() elif stack[-1] == 'string': if scanner.scan(r"''"): token = String.Escape elif scanner.scan(r"'"): token = String stack.pop() elif scanner.scan(r"[^']*"): token = String else: scanner.get_char() stack.pop() elif stack[-1] == 'asm': if scanner.scan(r'\s+'): token = Text elif scanner.scan(r'end'): token = Keyword stack.pop() elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): if scanner.match.startswith('$'): token = Comment.Preproc else: token = Comment.Multiline elif scanner.scan(r'//.*?$'): token = Comment.Single elif scanner.scan(r"'"): token = String stack.append('string') elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'): token = Name.Label elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): lowercase_name = scanner.match.lower() if lowercase_name in self.ASM_INSTRUCTIONS: token = Keyword elif lowercase_name in self.ASM_REGISTERS: token = Name.Builtin else: token = Name elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'): token = Operator elif scanner.scan(r'[\(\)\[\]]+'): token = Punctuation elif scanner.scan(r'\$[0-9A-Fa-f]+'): token = Number.Hex elif scanner.scan(r'\d+(?![eE]|\.[^.])'): token = Number.Integer elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): token = Number.Float else: scanner.get_char() stack.pop() # save the dot!!!11 if scanner.match.strip(): was_dot = scanner.match == '.' yield scanner.start_pos, token, scanner.match or '' class JavaLexer(RegexLexer): name = 'Java' aliases = ['java'] filenames = ['*.java'] mimetypes = ['text/x-java'] flags = re.MULTILINE | re.DOTALL #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' tokens = { 'root': [ # method names (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.]*\s+)+?)' # return arguments r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name r'(\s*\([^;]*?\))' # signature r'(?=' + _ws + # exception declaration r'(?:throws\s+(?:[a-zA-Z_][a-zA-Z0-9_]*,?\s*)+)?' + _ws + r'\{)', bygroups(using(this), Name.Function, using(this))), (r'[^\S\n]+', Text), (r'//.*?\n', Comment), (r'/\*.*?\*/', Comment), (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator), (r'(abstract|assert|break|case|catch|' r'const|continue|default|do|else|enum|extends|final|' r'finally|for|if|goto|implements|import|instanceof|' r'interface|native|new|package|private|protected|public|' r'return|static|strictfp|super|switch|synchronized|this|' r'throw|throws|transient|try|volatile|while)\b', Keyword), (r'(boolean|byte|char|double|float|int|long|short|void)\b', Keyword.Type), (r'(true|false|null)\b', Keyword.Constant), (r'(class)(\s+)', bygroups(Keyword, Text), 'class'), (r'"(\\\\|\\"|[^"])*"', String), (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number), (r'[0-9]+L?', Number), (r'0x[0-9a-f]+', Number), (r'\n', Text) ], 'class': [ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') ] }