diff options
Diffstat (limited to 'pygments/lexers/compiled.py')
-rw-r--r-- | pygments/lexers/compiled.py | 198 |
1 files changed, 197 insertions, 1 deletions
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index a6069611..cf2bc737 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -25,7 +25,8 @@ from pygments.lexers.functional import OcamlLexer __all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'JavaLexer', 'ScalaLexer', 'DylanLexer', 'OcamlLexer', 'ObjectiveCLexer', 'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer', - 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer'] + 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer', + 'Modula2Lexer'] class CLexer(RegexLexer): @@ -2169,3 +2170,198 @@ class AdaLexer(RegexLexer): include('root'), ], } + + +class Modula2Lexer(RegexLexer): + """ + For `Modula-2 <http://www.modula2.org/>`_ source code. + + Additional options that determine which keywords are highlighted: + + `pim` + Select PIM Modula-2 dialect (default: True). + `iso` + Select ISO Modula-2 dialect (default: False). + `objm2` + Select Objective Modula-2 dialect (default: False). + `gm2ext` + Also highlight GNU extensions (default: False). + + *New in Pygments 1.3.* + """ + name = 'Modula-2' + aliases = ['modula2', 'm2'] + filenames = ['*.def', '*.mod'] + mimetypes = ['text/x-modula2'] + + flags = re.MULTILINE | re.DOTALL + + _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' + + tokens = { + 'whitespace': [ + (r'\n+', Text), # blank lines + (r'\s+', Text), # whitespace + ], + 'identifiers': [ + (r'([a-zA-Z_\$][a-zA-Z0-9_\$]*)', Name), + ], + 'numliterals': [ + (r'[01]+B', Number.Binary), # binary number (ObjM2) + (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) + (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) + (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) + (r'[0-9A-F]+H', Number.Hex), # hexadecimal number + (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number + (r'[0-9]+\.[0-9]+', Number.Float), # real number + (r'[0-9]+', Number.Integer), # decimal whole number + ], + 'strings': [ + (r"'(\\\\|\\'|[^'])*'", String), # single quoted string + (r'"(\\\\|\\"|[^"])*"', String), # double quoted string + ], + 'operators': [ + (r'[*/+=#~&<>\^-]', Operator), + (r':=', Operator), # assignment + (r'@', Operator), # pointer deref (ISO) + (r'\.\.', Operator), # ellipsis or range + (r'`', Operator), # Smalltalk message (ObjM2) + (r'::', Operator), # type conversion (ObjM2) + ], + 'punctuation': [ + (r'[\(\)\[\]{},.:;|]', Punctuation), + ], + 'comments': [ + (r'//.*?\n', Comment.Single), # ObjM2 + (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 + (r'\(\*([^\$].*?)\*\)', Comment.Multiline), + # TO DO: nesting of (* ... *) comments + ], + 'pragmas': [ + (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM + (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 + ], + 'root': [ + include('whitespace'), + include('comments'), + include('pragmas'), + include('identifiers'), + include('numliterals'), + include('strings'), + include('operators'), + include('punctuation'), + ] + } + + pim_reserved_words = [ + # 40 reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', + 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', + 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', + 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', + 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', + 'UNTIL', 'VAR', 'WHILE', 'WITH', + ] + + pim_pervasives = [ + # 31 pervasives + 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', + 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', + 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', + 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', + ] + + iso_reserved_words = [ + # 46 reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', + 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', + 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', + 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', + 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', + 'WITH', + ] + + iso_pervasives = [ + # 42 pervasives + 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', + 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', + 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', + 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', + 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', + 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', + ] + + objm2_reserved_words = [ + # base language, 42 reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', + 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', + 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', + 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', + 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', + # OO extensions, 16 reserved words + 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', + 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', + 'SUPER', 'TRY', + ] + + objm2_pervasives = [ + # base language, 38 pervasives + 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', + 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', + 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', + 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', + 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', + # OO extensions, 3 pervasives + 'OBJECT', 'NO', 'YES', + ] + + gnu_reserved_words = [ + # 10 additional reserved words + 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', + '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', + ] + + gnu_pervasives = [ + # 21 identifiers, actually from pseudo-module SYSTEM + # but we will highlight them as if they were pervasives + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', + 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', + 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', + ] + + def __init__(self, **options): + self.reserved_words = set() + self.pervasives = set() + # ISO Modula-2 + if get_bool_opt(options, 'iso', False): + self.reserved_words.update(self.iso_reserved_words) + self.pervasives.update(self.iso_pervasives) + # Objective Modula-2 + elif get_bool_opt(options, 'objm2', False): + self.reserved_words.update(self.objm2_reserved_words) + self.pervasives.update(self.objm2_pervasives) + # PIM Modula-2 (DEFAULT) + else: + self.reserved_words.update(self.pim_reserved_words) + self.pervasives.update(self.pim_pervasives) + # GNU extensions + if get_bool_opt(options, 'gm2ext', False): + self.reserved_words.update(self.gnu_reserved_words) + self.pervasives.update(self.gnu_pervasives) + # initialise + RegexLexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + for index, token, value in \ + RegexLexer.get_tokens_unprocessed(self, text): + # check for reserved words and pervasives + if token is Name: + if value in self.reserved_words: + token = Keyword.Reserved + elif value in self.pervasives: + token = Keyword.Pervasive + # return result + yield index, token, value |