summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlastair Houghton <alastair@coriolis-systems.com>2012-12-19 13:34:13 +0000
committerAlastair Houghton <alastair@coriolis-systems.com>2012-12-19 13:34:13 +0000
commit3c081914779ce96aff16717742f522a50af97c66 (patch)
tree1ef56618ce4a1be5581bc643b16803caff3a28bc
parent33b11aa673efe7d697841758682170e991cbd8ed (diff)
downloadpygments-3c081914779ce96aff16717742f522a50af97c66.tar.gz
Added support for inheritance to RegexLexer, so that subclasses can
selectively inherit tokendefs from their superclasses. Used this new ability to simplify and unify the C family languages in compiled.py, and to add support for Objective-C++. Also added code to support autodetection of language for .h files, with the default being C if no content is provided, since Objective-C uses the .h file extension.
-rw-r--r--pygments/lexer.py70
-rw-r--r--pygments/lexers/__init__.py2
-rw-r--r--pygments/lexers/_mapping.py3
-rw-r--r--pygments/lexers/compiled.py573
4 files changed, 256 insertions, 392 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index ad2c72d1..8535d086 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -8,7 +8,7 @@
:copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-import re
+import re, itertools
from pygments.filter import apply_filters, Filter
from pygments.filters import get_filter_by_name
@@ -18,7 +18,7 @@ from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
__all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
- 'LexerContext', 'include', 'bygroups', 'using', 'this']
+ 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
_encoding_map = [('\xef\xbb\xbf', 'utf-8'),
@@ -81,6 +81,9 @@ class Lexer(object):
#: mime types
mimetypes = []
+ #: Priority, should multiple lexers match and no content is provided
+ priority = 0
+
__metaclass__ = LexerMeta
def __init__(self, **options):
@@ -237,6 +240,14 @@ class include(str):
"""
pass
+class _inherit(object):
+ """
+ Indicates the a state should inherit from its superclass.
+ """
+ def __repr__(self):
+ return 'inherit'
+
+inherit = _inherit()
class combined(tuple):
"""
@@ -428,7 +439,10 @@ class RegexLexerMeta(LexerMeta):
tokens.extend(cls._process_state(unprocessed, processed,
str(tdef)))
continue
-
+ if isinstance(tdef, _inherit):
+ # processed already
+ continue
+
assert type(tdef) is tuple, "wrong rule def %r" % tdef
try:
@@ -456,6 +470,54 @@ class RegexLexerMeta(LexerMeta):
cls._process_state(tokendefs, processed, state)
return processed
+ def get_tokendefs(cls):
+ """
+ Merge tokens from superclasses in MRO order, returning a single
+ tokendef dictionary.
+
+ Any state that is not defined by a subclass will be inherited
+ automatically. States that *are* defined by subclasses will, by
+ default, override that state in the superclass. If a subclass
+ wishes to inherit definitions from a superclass, it can use the
+ special value "inherit", which will cause the superclass' state
+ definition to be included at that point in the state.
+ """
+ tokens = {}
+ inheritable = {}
+ for c in itertools.chain((cls,), cls.__mro__):
+ toks = c.__dict__.get('tokens', {})
+
+ for state, items in toks.iteritems():
+ curitems = tokens.get(state)
+ if curitems is None:
+ tokens[state] = items
+
+ try:
+ inherit_ndx = items.index(inherit)
+ except:
+ inherit_ndx = -1
+
+ if inherit_ndx != -1:
+ inheritable[state] = inherit_ndx
+ continue
+
+ inherit_ndx = inheritable.pop(state, None)
+ if inherit_ndx is None:
+ continue
+
+ # Replace the "inherit" value with the items
+ curitems[inherit_ndx:inherit_ndx+1] = items
+
+ try:
+ new_inh_ndx = items.index(inherit)
+ except:
+ new_inh_ndx = -1
+
+ if new_inh_ndx != -1:
+ inheritable[state] = inherit_ndx + new_inh_ndx
+
+ return tokens
+
def __call__(cls, *args, **kwds):
"""Instantiate cls after preprocessing its token definitions."""
if '_tokens' not in cls.__dict__:
@@ -465,7 +527,7 @@ class RegexLexerMeta(LexerMeta):
# don't process yet
pass
else:
- cls._tokens = cls.process_tokendef('', cls.tokens)
+ cls._tokens = cls.process_tokendef('', cls.get_tokendefs())
return type.__call__(cls, *args, **kwds)
diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py
index eb8609d4..c2b9aaaa 100644
--- a/pygments/lexers/__init__.py
+++ b/pygments/lexers/__init__.py
@@ -113,7 +113,7 @@ def get_lexer_for_filename(_fn, code=None, **options):
# to find lexers which need it overridden.
if code:
return cls.analyse_text(code) + bonus
- return bonus
+ return cls.priority + bonus
if matches:
matches.sort(key=get_rating)
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 68dd2660..3681ea05 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -188,7 +188,8 @@ LEXERS = {
'NimrodLexer': ('pygments.lexers.compiled', 'Nimrod', ('nimrod', 'nim'), ('*.nim', '*.nimrod'), ('text/x-nimrod',)),
'NumPyLexer': ('pygments.lexers.math', 'NumPy', ('numpy',), (), ()),
'ObjdumpLexer': ('pygments.lexers.asm', 'objdump', ('objdump',), ('*.objdump',), ('text/x-objdump',)),
- 'ObjectiveCLexer': ('pygments.lexers.compiled', 'Objective-C', ('objective-c', 'objectivec', 'obj-c', 'objc'), ('*.m',), ('text/x-objective-c',)),
+ 'ObjectiveCLexer': ('pygments.lexers.compiled', 'Objective-C', ('objective-c', 'objectivec', 'obj-c', 'objc'), ('*.m','*.h'), ('text/x-objective-c',)),
+ 'ObjectiveCppLexer': ('pygments.lexers.compiled', 'Objective-C++', ('objective-c++', 'objectivec++', 'obj-c++', 'objc++'), ('*.mm','*.hh'), ('text/x-objective-c++',)),
'ObjectiveJLexer': ('pygments.lexers.web', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)),
'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
'OctaveLexer': ('pygments.lexers.math', 'Octave', ('octave',), ('*.m',), ('text/octave',)),
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index 66b782e5..5ecc7759 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -13,7 +13,7 @@ import re
from string import Template
from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
- this, combined
+ this, combined, inherit
from pygments.util import get_bool_opt, get_list_opt
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Error, Literal
@@ -24,20 +24,17 @@ from pygments.lexers.functional import OcamlLexer
from pygments.lexers.jvm import JavaLexer, ScalaLexer
__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer',
- 'DylanLexer', 'ObjectiveCLexer', 'FortranLexer', 'GLShaderLexer',
- 'PrologLexer', 'CythonLexer', 'ValaLexer', 'OocLexer', 'GoLexer',
- 'FelixLexer', 'AdaLexer', 'Modula2Lexer', 'BlitzMaxLexer',
- 'NimrodLexer', 'FantomLexer', 'RustLexer', 'CudaLexer', 'MonkeyLexer']
+ 'DylanLexer', 'ObjectiveCLexer', 'ObjectiveCppLexer',
+ 'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer',
+ 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer',
+ 'Modula2Lexer', 'BlitzMaxLexer', 'NimrodLexer', 'FantomLexer',
+ 'RustLexer', 'CudaLexer', 'MonkeyLexer']
-
-class CLexer(RegexLexer):
+class CFamilyLexer(RegexLexer):
"""
- For C source code with preprocessor directives.
+ For C family source code. This is used as a base class to avoid repetitious
+ definitions.
"""
- name = 'C'
- aliases = ['c']
- filenames = ['*.c', '*.h', '*.idc']
- mimetypes = ['text/x-chdr', 'text/x-csrc']
#: optional Comment or Whitespace
_ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
@@ -76,12 +73,17 @@ class CLexer(RegexLexer):
(r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
(r'(auto|break|case|const|continue|default|do|else|enum|extern|'
r'for|goto|if|register|restricted|return|sizeof|static|struct|'
- r'switch|typedef|union|volatile|virtual|while)\b', Keyword),
- (r'(int|long|float|short|double|char|unsigned|signed|void)\b',
+ r'switch|typedef|union|volatile|while)\b', Keyword),
+ (r'(bool|int|long|float|short|double|char|unsigned|signed|void|'
+ r'[a-z_][a-z0-9_]*_t)\b',
Keyword.Type),
(r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved),
+ # Vector intrinsics
+ (r'(__(m128i|m128d|m128|m64))\b', Keyword.Reserved),
+ # Microsoft-isms
(r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
- r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
+ r'declspec|finally|int64|try|leave|wchar_t|w64|unaligned|'
+ r'raise|noop|identifier|forceinline|assume)\b', Keyword.Reserved),
(r'(true|false|NULL)\b', Name.Builtin),
('[a-zA-Z_][a-zA-Z0-9_]*', Name),
],
@@ -119,7 +121,8 @@ class CLexer(RegexLexer):
],
'string': [
(r'"', String, '#pop'),
- (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
+ (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
+ r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
(r'[^\\"\n]+', String), # all other characters
(r'\\\n', String), # line continuation
(r'\\', String), # stray backslash
@@ -141,16 +144,17 @@ class CLexer(RegexLexer):
}
stdlib_types = ['size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t',
- 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list',
- 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t',
- 'wctrans_t', 'wint_t', 'wctype_t']
+ 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list',
+ 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t',
+ 'wctrans_t', 'wint_t', 'wctype_t']
c99_types = ['_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
- 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
- 'int_least16_t', 'int_least32_t', 'int_least64_t',
- 'uint_least8_t', 'uint_least16_t', 'uint_least32_t',
- 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
- 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t',
- 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t']
+ 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
+ 'int_least16_t', 'int_least32_t', 'int_least64_t',
+ 'uint_least8_t', 'uint_least16_t', 'uint_least32_t',
+ 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
+ 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t',
+ 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t',
+ 'uintmax_t']
def __init__(self, **options):
self.stdlibhighlighting = get_bool_opt(options,
@@ -169,7 +173,22 @@ class CLexer(RegexLexer):
token = Keyword.Type
yield index, token, value
-class CppLexer(RegexLexer):
+class CLexer(CFamilyLexer):
+ """
+ For C source code with preprocessor directives.
+ """
+ name = 'C'
+ aliases = ['c']
+ filenames = ['*.c', '*.h', '*.idc']
+ mimetypes = ['text/x-chdr', 'text/x-csrc']
+ priority = 0.5
+
+ def analyse_text(text):
+ # We return 0.5 to allow other C-family languages that use .h files
+ # to check them to see if they are, in fact, not plain C
+ return 0.5
+
+class CppLexer(CFamilyLexer):
"""
For C++ source code with preprocessor directives.
"""
@@ -179,90 +198,39 @@ class CppLexer(RegexLexer):
'*.cc', '*.hh', '*.cxx', '*.hxx',
'*.C', '*.H', '*.cp', '*.CPP']
mimetypes = ['text/x-c++hdr', 'text/x-c++src']
-
- #: optional Comment or Whitespace
- _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
- #: only one /* */ style comment
- _ws1 = r':\s*/[*].*?[*]/\s*'
-
+ priority = 0.5
+
tokens = {
- 'root': [
- # preprocessor directives: without whitespace
- ('^#if\s+0', Comment.Preproc, 'if0'),
- ('^#', Comment.Preproc, 'macro'),
- # or with whitespace
- ('^(' + _ws1 + r')(#if\s+0)',
- bygroups(using(this), Comment.Preproc), 'if0'),
- ('^(' + _ws1 + ')(#)',
- bygroups(using(this), Comment.Preproc), 'macro'),
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
- (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
- (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
- (r'[{}]', Punctuation),
- (r'L?"', String, 'string'),
- (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
- (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
- (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
- (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
- (r'0[0-7]+[LlUu]*', Number.Oct),
- (r'\d+[LlUu]*', Number.Integer),
- (r'\*/', Error),
- (r'[~!%^&*+=|?:<>/-]', Operator),
- (r'[()\[\],.;]', Punctuation),
- (r'(asm|auto|break|case|catch|const|const_cast|continue|'
- r'default|delete|do|dynamic_cast|else|enum|explicit|export|'
- r'extern|for|friend|goto|if|mutable|namespace|new|operator|'
- r'private|protected|public|register|reinterpret_cast|return|'
- r'restrict|sizeof|static|static_cast|struct|switch|template|'
- r'this|throw|throws|try|typedef|typeid|typename|union|using|'
- r'volatile|virtual|while)\b', Keyword),
+ 'statements': [
+ (r'(asm|catch|const_cast|delete|dynamic_cast|explicit|'
+ r'export|friend|mutable|namespace|new|operator|'
+ r'private|protected|public|reinterpret_cast|'
+ r'restrict|static_cast|template|this|throw|throws|'
+ r'typeid|typename|using|virtual)\b', Keyword),
(r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
- (r'(bool|int|long|float|short|double|char|unsigned|signed|'
- r'void|wchar_t)\b', Keyword.Type),
- (r'(_{0,2}inline|naked|thread)\b', Keyword.Reserved),
- (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
- r'declspec|finally|int64|try|leave|wchar_t|w64|virtual_inheritance|'
- r'uuidof|unaligned|super|single_inheritance|raise|noop|'
- r'multiple_inheritance|m128i|m128d|m128|m64|interface|'
- r'identifier|forceinline|event|assume)\b', Keyword.Reserved),
+ inherit,
+ ],
+ 'root': [
+ inherit,
+ # C++ Microsoft-isms
+ (r'__(virtual_inheritance|uuidof|super|single_inheritance|'
+ r'multiple_inheritance|interface|event)\b', Keyword.Reserved),
# Offload C++ extensions, http://offload.codeplay.com/
(r'(__offload|__blockingoffload|__outer)\b', Keyword.Pseudo),
- (r'(true|false)\b', Keyword.Constant),
- (r'NULL\b', Name.Builtin),
- ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
],
'classname': [
(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'),
# template specification
(r'\s*(?=>)', Text, '#pop'),
],
- 'string': [
- (r'"', String, '#pop'),
- (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
- (r'[^\\"\n]+', String), # all other characters
- (r'\\\n', String), # line continuation
- (r'\\', String), # stray backslash
- ],
- 'macro': [
- (r'[^/\n]+', Comment.Preproc),
- (r'/[*](.|\n)*?[*]/', Comment.Multiline),
- (r'//.*?\n', Comment.Single, '#pop'),
- (r'/', Comment.Preproc),
- (r'(?<=\\)\n', Comment.Preproc),
- (r'\n', Comment.Preproc, '#pop'),
- ],
- 'if0': [
- (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
- (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
- (r'.*?\n', Comment),
- ]
}
+ def analyse_text(text):
+ # We return 0.5 to allow other C-family languages that use .h files
+ # to check them to see if they are, in fact, not plain C++
+ return 0.5
-class ECLexer(RegexLexer):
+class ECLexer(CLexer):
"""
For eC source code with preprocessor directives.
@@ -273,147 +241,29 @@ class ECLexer(RegexLexer):
filenames = ['*.ec', '*.eh']
mimetypes = ['text/x-echdr', 'text/x-ecsrc']
- #: optional Comment or Whitespace
- _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
- #: only one /* */ style comment
- _ws1 = r':\s*/[*].*?[*]/\s*'
-
tokens = {
- 'whitespace': [
- # preprocessor directives: without whitespace
- ('^#if\s+0', Comment.Preproc, 'if0'),
- ('^#', Comment.Preproc, 'macro'),
- # or with whitespace
- ('^' + _ws1 + r'#if\s+0', Comment.Preproc, 'if0'),
- ('^' + _ws1 + '#', Comment.Preproc, 'macro'),
- (r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*:(?!:))', bygroups(Text, Name.Label)),
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
- (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
- (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
- ],
'statements': [
- (r'L?"', String, 'string'),
- (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
- (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
- (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
- (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
- (r'0[0-7]+[LlUu]*', Number.Oct),
- (r'\d+[LlUu]*', Number.Integer),
- (r'\*/', Error),
- (r'[~!%^&*+=|?:<>/-]', Operator),
- (r'[()\[\],.]', Punctuation),
- (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
- (r'(auto|break|case|const|continue|default|do|else|enum|extern|'
- r'for|goto|if|register|restricted|return|sizeof|static|struct|'
- r'switch|typedef|union|volatile|virtual|while|class|private|public|'
- r'property|import|delete|new|new0|renew|renew0|define|get|set|remote|dllexport|dllimport|stdcall|'
- r'subclass|__on_register_module|namespace|using|typed_object|any_object|incref|register|watch|'
- r'stopwatching|firewatchers|watchable|class_designer|class_fixed|class_no_expansion|isset|'
- r'class_default_property|property_category|class_data|class_property|virtual|thisclass|'
+ (r'(virtual|class|private|public|property|import|delete|new|new0|'
+ r'renew|renew0|define|get|set|remote|dllexport|dllimport|stdcall|'
+ r'subclass|__on_register_module|namespace|using|typed_object|'
+ r'any_object|incref|register|watch|stopwatching|firewatchers|'
+ r'watchable|class_designer|class_fixed|class_no_expansion|isset|'
+ r'class_default_property|property_category|class_data|'
+ r'class_property|virtual|thisclass|'
r'dbtable|dbindex|database_open|dbfield)\b', Keyword),
- (r'(int|long|float|short|double|char|unsigned|signed|void)\b',
- Keyword.Type),
(r'(uint|uint16|uint32|uint64|bool|byte|unichar|int64)\b',
Keyword.Type),
(r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
- (r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved),
- (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
- r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
- (r'(true|false|null|value|this|NULL)\b', Name.Builtin),
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
- ],
- 'root': [
- include('whitespace'),
- # functions
- (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
- r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
- r'(\s*\([^;]*?\))' # signature
- r'(' + _ws + r')?({)',
- bygroups(using(this), Name.Function, using(this), using(this),
- Punctuation),
- 'function'),
- # function declarations
- (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
- r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
- r'(\s*\([^;]*?\))' # signature
- r'(' + _ws + r')?(;)',
- bygroups(using(this), Name.Function, using(this), using(this),
- Punctuation)),
- ('', Text, 'statement'),
+ (r'(null|value|this)\b', Name.Builtin),
+ inherit,
],
'classname': [
(r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'),
# template specification
(r'\s*(?=>)', Text, '#pop'),
],
- 'statement' : [
- include('whitespace'),
- include('statements'),
- ('[{}]', Punctuation),
- (';', Punctuation, '#pop'),
- ],
- 'function': [
- include('whitespace'),
- include('statements'),
- (';', Punctuation),
- ('{', Punctuation, '#push'),
- ('}', Punctuation, '#pop'),
- ],
- 'string': [
- (r'"', String, '#pop'),
- (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
- (r'[^\\"\n]+', String), # all other characters
- (r'\\\n', String), # line continuation
- (r'\\', String), # stray backslash
- ],
- 'macro': [
- (r'[^/\n]+', Comment.Preproc),
- (r'/[*](.|\n)*?[*]/', Comment.Multiline),
- (r'//.*?\n', Comment.Single, '#pop'),
- (r'/', Comment.Preproc),
- (r'(?<=\\)\n', Comment.Preproc),
- (r'\n', Comment.Preproc, '#pop'),
- ],
- 'if0': [
- (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
- (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
- (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
- (r'.*?\n', Comment),
- ]
}
-
- stdlib_types = ['size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t',
- 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list',
- 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t',
- 'wctrans_t', 'wint_t', 'wctype_t']
- c99_types = ['_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
- 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
- 'int_least16_t', 'int_least32_t', 'int_least64_t',
- 'uint_least8_t', 'uint_least16_t', 'uint_least32_t',
- 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
- 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t',
- 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t']
-
- def __init__(self, **options):
- self.stdlibhighlighting = get_bool_opt(options,
- 'stdlibhighlighting', True)
- self.c99highlighting = get_bool_opt(options,
- 'c99highlighting', True)
- RegexLexer.__init__(self, **options)
-
- def get_tokens_unprocessed(self, text):
- for index, token, value in \
- RegexLexer.get_tokens_unprocessed(self, text):
- if token is Name:
- if self.stdlibhighlighting and value in self.stdlib_types:
- token = Keyword.Type
- elif self.c99highlighting and value in self.c99_types:
- token = Keyword.Type
- yield index, token, value
-
-
+
class DLexer(RegexLexer):
"""
For D source.
@@ -1101,176 +951,127 @@ class DylanLexer(RegexLexer):
],
}
+def objective(baselexer):
+ """
+ Generate a subclass of baselexer that accepts the Objective-C syntax
+ extensions.
+ """
-class ObjectiveCLexer(RegexLexer):
+ # Have to be careful not to accidentally match JavaDoc/Doxygen syntax here,
+ # since that's quite common in ordinary C/C++ files. It's OK to match
+ # JavaDoc/Doxygen keywords that only apply to Objective-C, mind.
+ #
+ # The upshot of this is that we CANNOT match @class or @interface
+ _oc_keywords = re.compile(r'@(?:end|implementation|protocol)')
+
+ # Matches [ <ws>? identifier <ws> ( identifier <ws>? ] | identifier? : )
+ # (note the identifier is *optional* when there is a ':'!)
+ _oc_message = re.compile(r'\[\s*[a-zA-Z_][a-zA-Z0-9_]*\s+'
+ r'(?:[a-zA-Z_][a-zA-Z0-9_]*\s*\]|'
+ r'(?:[a-zA-Z_][a-zA-Z0-9_]*)?:)')
+
+ class GeneratedObjectiveCVariant(baselexer):
+ """
+ Implements Objective-C syntax on top of an existing C family lexer.
+ """
+
+ tokens = {
+ 'statements': [
+ (r'@"', String, 'string'),
+ (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
+ String.Char),
+ (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
+ (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
+ (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex),
+ (r'@0[0-7]+[Ll]?', Number.Oct),
+ (r'@\d+[Ll]?', Number.Integer),
+ (r'(in|@selector|@private|@protected|@public|@encode|'
+ r'@synchronized|@try|@throw|@catch|@finally|@end|@property|'
+ r'@synthesize|@dynamic|@optional)\b', Keyword),
+ (r'(id|Class|IMP|SEL|BOOL|IBOutlet|IBAction|unichar)\b',
+ Keyword.Type),
+ (r'@(true|false|YES|NO)\n', Name.Builtin),
+ (r'(YES|NO|nil)\b', Name.Builtin),
+ (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
+ ('#pop', 'oc_classname')),
+ (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
+ ('#pop', 'oc_forward_classname')),
+ inherit,
+ ],
+ 'oc_classname' : [
+ # interface definition that inherits
+ ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*:\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)?',
+ bygroups(Name.Class, Text, Name.Class), '#pop'),
+ # interface definition for a category
+ ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(\([a-zA-Z$_][a-zA-Z0-9$_]*\))',
+ bygroups(Name.Class, Text, Name.Label), '#pop'),
+ # simple interface / implementation
+ ('([a-zA-Z$_][a-zA-Z0-9$_]*)', Name.Class, '#pop')
+ ],
+ 'oc_forward_classname' : [
+ ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*,\s*)',
+ bygroups(Name.Class, Text), 'oc_forward_classname'),
+ ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*;?)',
+ bygroups(Name.Class, Text), '#pop')
+ ],
+ 'root': [
+ # methods
+ (r'^([-+])(\s*)' # method marker
+ r'(\(.*?\))?(\s*)' # return type
+ r'([a-zA-Z$_][a-zA-Z0-9$_]*:?)', # begin of method name
+ bygroups(Keyword, Text, using(this),
+ Text, Name.Function),
+ 'method'),
+ inherit,
+ ],
+ 'method': [
+ include('whitespace'),
+ # TODO unsure if ellipses are allowed elsewhere, see
+ # discussion in Issue 789
+ (r',', Punctuation),
+ (r'\.\.\.', Punctuation),
+ (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this),
+ Name.Variable)),
+ (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function),
+ (';', Punctuation, '#pop'),
+ ('{', Punctuation, 'function'),
+ ('', Text, '#pop'),
+ ],
+ }
+
+ def analyse_text(text):
+ if _oc_keywords.search(text):
+ return 1.0
+ elif '@"' in text: # strings
+ return 0.8
+ elif _oc_message.search(text):
+ return 0.8
+ return 0
+
+ return GeneratedObjectiveCVariant
+
+class ObjectiveCLexer(objective(CLexer)):
"""
For Objective-C source code with preprocessor directives.
"""
name = 'Objective-C'
aliases = ['objective-c', 'objectivec', 'obj-c', 'objc']
- # XXX: objc has .h files too :-/
- filenames = ['*.m']
+ filenames = ['*.m', '*.h']
mimetypes = ['text/x-objective-c']
+ priority = 0.25
+
+class ObjectiveCppLexer(objective(CppLexer)):
+ """
+ For Objective-C++ source code with preprocessor directives.
+ """
- #: optional Comment or Whitespace
- _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
- #: only one /* */ style comment
- _ws1 = r':\s*/[*].*?[*]/\s*'
-
- tokens = {
- 'whitespace': [
- # preprocessor directives: without whitespace
- ('^#if\s+0', Comment.Preproc, 'if0'),
- ('^#', Comment.Preproc, 'macro'),
- # or with whitespace
- ('^(' + _ws1 + r')(#if\s+0)',
- bygroups(using(this), Comment.Preproc), 'if0'),
- ('^(' + _ws1 + ')(#)',
- bygroups(using(this), Comment.Preproc), 'macro'),
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
- (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
- (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
- ],
- 'statements': [
- (r'(L|@)?"', String, 'string'),
- (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
- String.Char),
- (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
- (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
- (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
- (r'0[0-7]+[Ll]?', Number.Oct),
- (r'\d+[Ll]?', Number.Integer),
- (r'[~!%^&*+=|?:<>/-]', Operator),
- (r'[()\[\],.]', Punctuation),
- (r'(auto|break|case|const|continue|default|do|else|enum|extern|'
- r'for|goto|if|register|restricted|return|sizeof|static|struct|'
- r'switch|typedef|union|volatile|virtual|while|in|@selector|'
- r'@private|@protected|@public|@encode|'
- r'@synchronized|@try|@throw|@catch|@finally|@end|@property|'
- r'@synthesize|@dynamic|@optional)\b', Keyword),
- (r'(int|long|float|short|double|char|unsigned|signed|void|'
- r'id|BOOL|IBOutlet|IBAction|SEL)\b', Keyword.Type),
- (r'(_{0,2}inline|naked|restrict|thread|typename)\b',
- Keyword.Reserved),
- (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
- r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
- (r'(TRUE|FALSE|nil|NULL)\b', Name.Builtin),
- ('[a-zA-Z$_][a-zA-Z0-9$_]*:(?!:)', Name.Label),
- ('[a-zA-Z$_][a-zA-Z0-9$_]*', Name),
- (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
- ('#pop', 'classname')),
- (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
- ('#pop', 'forward_classname')),
- ],
- 'root': [
- include('whitespace'),
- # functions
- (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
- r'([a-zA-Z$_][a-zA-Z0-9$_]*)' # method name
- r'(\s*\([^;]*?\))' # signature
- r'(' + _ws + r')?({)',
- bygroups(using(this), Name.Function,
- using(this), Text, Punctuation),
- 'function'),
- # methods
- (r'^([-+])(\s*)' # method marker
- r'(\(.*?\))?(\s*)' # return type
- r'([a-zA-Z$_][a-zA-Z0-9$_]*:?)', # begin of method name
- bygroups(Keyword, Text, using(this),
- Text, Name.Function),
- 'method'),
- # function declarations
- (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
- r'([a-zA-Z$_][a-zA-Z0-9$_]*)' # method name
- r'(\s*\([^;]*?\))' # signature
- r'(' + _ws + r')?(;)',
- bygroups(using(this), Name.Function,
- using(this), Text, Punctuation)),
- (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
- 'classname'),
- (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
- 'forward_classname'),
- (r'(\s*)(@end)(\s*)', bygroups(Text, Keyword, Text)),
- ('', Text, 'statement'),
- ],
- 'classname' : [
- # interface definition that inherits
- ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*:\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)?',
- bygroups(Name.Class, Text, Name.Class), '#pop'),
- # interface definition for a category
- ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*)(\([a-zA-Z$_][a-zA-Z0-9$_]*\))',
- bygroups(Name.Class, Text, Name.Label), '#pop'),
- # simple interface / implementation
- ('([a-zA-Z$_][a-zA-Z0-9$_]*)', Name.Class, '#pop')
- ],
- 'forward_classname' : [
- ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*,\s*)',
- bygroups(Name.Class, Text), 'forward_classname'),
- ('([a-zA-Z$_][a-zA-Z0-9$_]*)(\s*;?)',
- bygroups(Name.Class, Text), '#pop')
- ],
- 'statement' : [
- include('whitespace'),
- include('statements'),
- ('[{}]', Punctuation),
- (';', Punctuation, '#pop'),
- ],
- 'function': [
- include('whitespace'),
- include('statements'),
- (';', Punctuation),
- ('{', Punctuation, '#push'),
- ('}', Punctuation, '#pop'),
- ],
- 'method': [
- include('whitespace'),
- # TODO unsure if ellipses are allowed elsewhere, see discussion in
- # Issue 789
- (r',', Punctuation),
- (r'\.\.\.', Punctuation),
- (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this),
- Name.Variable)),
- (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function),
- (';', Punctuation, '#pop'),
- ('{', Punctuation, 'function'),
- ('', Text, '#pop'),
- ],
- 'string': [
- (r'"', String, '#pop'),
- (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
- (r'[^\\"\n]+', String), # all other characters
- (r'\\\n', String), # line continuation
- (r'\\', String), # stray backslash
- ],
- 'macro': [
- (r'[^/\n]+', Comment.Preproc),
- (r'/[*](.|\n)*?[*]/', Comment.Multiline),
- (r'//.*?\n', Comment.Single, '#pop'),
- (r'/', Comment.Preproc),
- (r'(?<=\\)\n', Comment.Preproc),
- (r'\n', Comment.Preproc, '#pop'),
- ],
- 'if0': [
- (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
- (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
- (r'.*?\n', Comment),
- ]
- }
-
- def analyse_text(text):
- if '@import' in text or '@interface' in text or \
- '@implementation' in text:
- return True
- elif '@"' in text: # strings
- return True
- elif re.match(r'\[[a-zA-Z0-9.]:', text): # message
- return True
- return False
-
-
+ name = 'Objective-C++'
+ aliases = ['objective-c++', 'objectivec++', 'obj-c++', 'objc++']
+ filenames = ['*.mm', '*.hh']
+ mimetypes = ['text/x-objective-c++']
+ priority = 0.25
+
class FortranLexer(RegexLexer):
"""
Lexer for FORTRAN 90 code.