summaryrefslogtreecommitdiff
path: root/pygments/lexers/compiled.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/compiled.py')
-rw-r--r--pygments/lexers/compiled.py198
1 files changed, 197 insertions, 1 deletions
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index a6069611..cf2bc737 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -25,7 +25,8 @@ from pygments.lexers.functional import OcamlLexer
__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'JavaLexer',
'ScalaLexer', 'DylanLexer', 'OcamlLexer', 'ObjectiveCLexer',
'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer',
- 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer']
+ 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer',
+ 'Modula2Lexer']
class CLexer(RegexLexer):
@@ -2169,3 +2170,198 @@ class AdaLexer(RegexLexer):
include('root'),
],
}
+
+
+class Modula2Lexer(RegexLexer):
+ """
+ For `Modula-2 <http://www.modula2.org/>`_ source code.
+
+ Additional options that determine which keywords are highlighted:
+
+ `pim`
+ Select PIM Modula-2 dialect (default: True).
+ `iso`
+ Select ISO Modula-2 dialect (default: False).
+ `objm2`
+ Select Objective Modula-2 dialect (default: False).
+ `gm2ext`
+ Also highlight GNU extensions (default: False).
+
+ *New in Pygments 1.3.*
+ """
+ name = 'Modula-2'
+ aliases = ['modula2', 'm2']
+ filenames = ['*.def', '*.mod']
+ mimetypes = ['text/x-modula2']
+
+ flags = re.MULTILINE | re.DOTALL
+
+ _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
+
+ tokens = {
+ 'whitespace': [
+ (r'\n+', Text), # blank lines
+ (r'\s+', Text), # whitespace
+ ],
+ 'identifiers': [
+ (r'([a-zA-Z_\$][a-zA-Z0-9_\$]*)', Name),
+ ],
+ 'numliterals': [
+ (r'[01]+B', Number.Binary), # binary number (ObjM2)
+ (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO)
+ (r'[0-7]+C', Number.Oct), # char code (PIM + ISO)
+ (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2)
+ (r'[0-9A-F]+H', Number.Hex), # hexadecimal number
+ (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number
+ (r'[0-9]+\.[0-9]+', Number.Float), # real number
+ (r'[0-9]+', Number.Integer), # decimal whole number
+ ],
+ 'strings': [
+ (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
+ (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
+ ],
+ 'operators': [
+ (r'[*/+=#~&<>\^-]', Operator),
+ (r':=', Operator), # assignment
+ (r'@', Operator), # pointer deref (ISO)
+ (r'\.\.', Operator), # ellipsis or range
+ (r'`', Operator), # Smalltalk message (ObjM2)
+ (r'::', Operator), # type conversion (ObjM2)
+ ],
+ 'punctuation': [
+ (r'[\(\)\[\]{},.:;|]', Punctuation),
+ ],
+ 'comments': [
+ (r'//.*?\n', Comment.Single), # ObjM2
+ (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2
+ (r'\(\*([^\$].*?)\*\)', Comment.Multiline),
+ # TO DO: nesting of (* ... *) comments
+ ],
+ 'pragmas': [
+ (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM
+ (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2
+ ],
+ 'root': [
+ include('whitespace'),
+ include('comments'),
+ include('pragmas'),
+ include('identifiers'),
+ include('numliterals'),
+ include('strings'),
+ include('operators'),
+ include('punctuation'),
+ ]
+ }
+
+ pim_reserved_words = [
+ # 40 reserved words
+ 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION',
+ 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR',
+ 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD',
+ 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED',
+ 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
+ 'UNTIL', 'VAR', 'WHILE', 'WITH',
+ ]
+
+ pim_pervasives = [
+ # 31 pervasives
+ 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC',
+ 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL',
+ 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD',
+ 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL',
+ ]
+
+ iso_reserved_words = [
+ # 46 reserved words
+ 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
+ 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY',
+ 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN',
+ 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER',
+ 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY',
+ 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
+ 'WITH',
+ ]
+
+ iso_pervasives = [
+ # 42 pervasives
+ 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX',
+ 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH',
+ 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH',
+ 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW',
+ 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE',
+ 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
+ ]
+
+ objm2_reserved_words = [
+ # base language, 42 reserved words
+ 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
+ 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF',
+ 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD',
+ 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE',
+ 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
+ 'UNTIL', 'VAR', 'VARIADIC', 'WHILE',
+ # OO extensions, 16 reserved words
+ 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
+ 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
+ 'SUPER', 'TRY',
+ ]
+
+ objm2_pervasives = [
+ # base language, 38 pervasives
+ 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE',
+ 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD',
+ 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL',
+ 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX',
+ 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF',
+ # OO extensions, 3 pervasives
+ 'OBJECT', 'NO', 'YES',
+ ]
+
+ gnu_reserved_words = [
+ # 10 additional reserved words
+ 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
+ '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
+ ]
+
+ gnu_pervasives = [
+ # 21 identifiers, actually from pseudo-module SYSTEM
+ # but we will highlight them as if they were pervasives
+ 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
+ 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
+ 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
+ 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
+ ]
+
+ def __init__(self, **options):
+ self.reserved_words = set()
+ self.pervasives = set()
+ # ISO Modula-2
+ if get_bool_opt(options, 'iso', False):
+ self.reserved_words.update(self.iso_reserved_words)
+ self.pervasives.update(self.iso_pervasives)
+ # Objective Modula-2
+ elif get_bool_opt(options, 'objm2', False):
+ self.reserved_words.update(self.objm2_reserved_words)
+ self.pervasives.update(self.objm2_pervasives)
+ # PIM Modula-2 (DEFAULT)
+ else:
+ self.reserved_words.update(self.pim_reserved_words)
+ self.pervasives.update(self.pim_pervasives)
+ # GNU extensions
+ if get_bool_opt(options, 'gm2ext', False):
+ self.reserved_words.update(self.gnu_reserved_words)
+ self.pervasives.update(self.gnu_pervasives)
+ # initialise
+ RegexLexer.__init__(self, **options)
+
+ def get_tokens_unprocessed(self, text):
+ for index, token, value in \
+ RegexLexer.get_tokens_unprocessed(self, text):
+ # check for reserved words and pervasives
+ if token is Name:
+ if value in self.reserved_words:
+ token = Keyword.Reserved
+ elif value in self.pervasives:
+ token = Keyword.Pervasive
+ # return result
+ yield index, token, value