diff options
author | Georg Brandl <georg@python.org> | 2015-03-08 07:29:37 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2015-03-08 07:29:37 +0100 |
commit | 48eb4ba2f472b5d3edaf8446f7bc127a5272c46f (patch) | |
tree | 54b50cd2c8a0450490aab94673aec76f6b295c3a | |
parent | 0064569c71402121242a09d5e5cb3eddf1ebd6d6 (diff) | |
download | pygments-48eb4ba2f472b5d3edaf8446f7bc127a5272c46f.tar.gz |
Add overhauled Modula-2 lexer from https://bitbucket.org/trijezdci/m2r10/src/tip/_GRAMMAR/
-rw-r--r-- | pygments/lexers/_mapping.py | 2 | ||||
-rw-r--r-- | pygments/lexers/modula2.py | 1568 | ||||
-rw-r--r-- | pygments/lexers/pascal.py | 196 | ||||
-rw-r--r-- | tests/examplefiles/modula2_test_cases.def | 354 |
4 files changed, 1926 insertions, 194 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 99461c55..2b836ac6 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -221,7 +221,7 @@ LEXERS = { 'MatlabSessionLexer': ('pygments.lexers.matlab', 'Matlab session', ('matlabsession',), (), ()), 'MiniDLexer': ('pygments.lexers.d', 'MiniD', ('minid',), (), ('text/x-minidsrc',)), 'ModelicaLexer': ('pygments.lexers.modeling', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), - 'Modula2Lexer': ('pygments.lexers.pascal', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), + 'Modula2Lexer': ('pygments.lexers.modula2', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), 'MoinWikiLexer': ('pygments.lexers.markup', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), 'MonkeyLexer': ('pygments.lexers.basic', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)), 'MoonScriptLexer': ('pygments.lexers.scripting', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')), diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py new file mode 100644 index 00000000..eec913b6 --- /dev/null +++ b/pygments/lexers/modula2.py @@ -0,0 +1,1568 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.modula2 + ~~~~~~~~~~~~~~~~~~~~~~~ + + Multi-Dialect Lexer for Modula-2. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include +from pygments.util import get_bool_opt, get_list_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, \ + String, Number, Punctuation, Error + +__all__ = ['Modula2Lexer'] + + +# Multi-Dialect Modula-2 Lexer +class Modula2Lexer(RegexLexer): + """ + For `Modula-2 <http://www.modula2.org/>`_ source code. + + The Modula-2 lexer supports several dialects. By default, it operates in + fallback mode, recognising the *combined* literals, punctuation symbols + and operators of all supported dialects, and the *combined* reserved words + and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not + differentiating between library defined identifiers. + + To select a specific dialect, a dialect option may be passed + or a dialect tag may be embedded into a source file. + + Dialect Options: + + `m2pim` + Select PIM Modula-2 dialect. + `m2iso` + Select ISO Modula-2 dialect. + `m2r10` + Select Modula-2 R10 dialect. + `objm2` + Select Objective Modula-2 dialect. + + The PIM and ISO dialect options may be qualified with a language extension. + + Language Extensions: + + `+aglet` + Select Aglet Modula-2 extensions, available with m2iso. + `+gm2` + Select GNU Modula-2 extensions, available with m2pim. + `+p1` + Select p1 Modula-2 extensions, available with m2iso. + `+xds` + Select XDS Modula-2 extensions, available with m2iso. + + + Passing a Dialect Option via Unix Commandline Interface + + Dialect options may be passed to the lexer using the `dialect` key. + Only one such option should be passed. If multiple dialect options are + passed, the first valid option is used, any subsequent options are ignored. + + Examples: + + `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input` + Use ISO dialect to render input to HTML output + `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input` + Use ISO dialect with p1 extensions to render input to RTF output + + + Embedding a Dialect Option within a source file + + A dialect option may be embedded in a source file in form of a dialect + tag, a specially formatted comment that specifies a dialect option. + + Dialect Tag EBNF: + + dialectTag : + OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ; + + dialectOption : + 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' | + 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ; + + Prefix : '!' ; + + OpeningCommentDelim : '(*' ; + + ClosingCommentDelim : '*)' ; + + No whitespace is permitted between the tokens of a dialect tag. + + In the event that a source file contains multiple dialect tags, the first + tag that contains a valid dialect option will be used and any subsequent + dialect tags will be ignored. Ideally, a dialect tag should be placed + at the beginning of a source file. + + An embedded dialect tag overrides a dialect option set via command line. + + Examples: + + `(*!m2r10*) DEFINITION MODULE Foobar; ...` + Use Modula2 R10 dialect to render this source file. + `(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...` + Use PIM dialect with GNU extensions to render this source file. + + + Algol Publication Mode: + + In Algol publication mode, source text is rendered for publication of + algorithms in scientific papers and academic texts, following the format + of the Revised Algol-60 Language Report. It is activated by passing + one of two corresponding styles as an option: + + `algol` + render reserved words lowercase underline boldface + and builtins lowercase boldface italic + `algol_nu` + render reserved words lowercase boldface (no underlining) + and builtins lowercase boldface italic + + The lexer automatically performs the required lowercase conversion when + this mode is activated. + + Example: + + `$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input` + Render input file in Algol publication mode to LaTeX output. + + + Rendering Mode of First Class ADT Identifiers: + + The rendering of standard library first class ADT identifiers is controlled + by option flag "treat_stdlib_adts_as_builtins". + + When this option is turned on, standard library ADT identifiers are rendered + as builtins. When it is turned off, they are rendered as ordinary library + identifiers. + + `treat_stdlib_adts_as_builtins` (default: On) + + The option is useful for dialects that support ADTs as first class objects + and provide ADTs in the standard library that would otherwise be built-in. + + At present, only Modula-2 R10 supports library ADTs as first class objects + and therefore, no ADT identifiers are defined for any other dialects. + + Example: + + `$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...` + Render standard library ADTs as ordinary library types. + + .. versionadded:: 1.3 + + .. versionchanged:: 2.1 + Added multi-dialect support. + """ + name = 'Modula-2' + aliases = ['modula2', 'm2'] + filenames = ['*.def', '*.mod'] + mimetypes = ['text/x-modula2'] + + flags = re.MULTILINE | re.DOTALL + + tokens = { + 'whitespace': [ + (r'\n+', Text), # blank lines + (r'\s+', Text), # whitespace + ], + 'dialecttags': [ + # PIM Dialect Tag + (r'\(\*!m2pim\*\)', Comment.Special), + # ISO Dialect Tag + (r'\(\*!m2iso\*\)', Comment.Special), + # M2R10 Dialect Tag + (r'\(\*!m2r10\*\)', Comment.Special), + # ObjM2 Dialect Tag + (r'\(\*!objm2\*\)', Comment.Special), + # Aglet Extensions Dialect Tag + (r'\(\*!m2iso\+aglet\*\)', Comment.Special), + # GNU Extensions Dialect Tag + (r'\(\*!m2pim\+gm2\*\)', Comment.Special), + # p1 Extensions Dialect Tag + (r'\(\*!m2iso\+p1\*\)', Comment.Special), + # XDS Extensions Dialect Tag + (r'\(\*!m2iso\+xds\*\)', Comment.Special), + ], + 'identifiers': [ + (r'([a-zA-Z_$][\w$]*)', Name), + ], + 'prefixed_number_literals': [ + # + # Base-2, whole number + (r'0b[01]+(\'[01]+)*', Number.Bin), + # + # Base-16, whole number + (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex), + ], + 'plain_number_literals': [ + # + # Base-10, real number with exponent + (r'[0-9]+(\'[0-9]+)*' # integral part \ + r'\.[0-9]+(\'[0-9]+)*' # fractional part \ + r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent \ + Number.Float), + # + # Base-10, real number without exponent + (r'[0-9]+(\'[0-9]+)*' # integral part \ + r'\.[0-9]+(\'[0-9]+)*', # fractional part \ + Number.Float), + # + # Base-10, whole number + (r'[0-9]+(\'[0-9]+)*', Number.Integer), + ], + 'suffixed_number_literals': [ + # + # Base-8, whole number + (r'[0-7]+B', Number.Oct), + # + # Base-8, character code + (r'[0-7]+C', Number.Oct), + # + # Base-16, number + (r'[0-9A-F]+H', Number.Hex), + ], + 'string_literals': [ + (r"'(\\\\|\\'|[^'])*'", String), # single quoted string + (r'"(\\\\|\\"|[^"])*"', String), # double quoted string + ], + 'digraph_operators': [ + # Dot Product Operator + (r'\*\.', Operator), + # Array Concatenation Operator + (r'\+>', Operator), # M2R10 + ObjM2 + # Inequality Operator + (r'<>', Operator), # ISO + PIM + # Less-Or-Equal, Subset + (r'<=', Operator), + # Greater-Or-Equal, Superset + (r'>=', Operator), + # Identity Operator + (r'==', Operator), # M2R10 + ObjM2 + # Type Conversion Operator + (r'::', Operator), # M2R10 + ObjM2 + # Assignment Symbol + (r':=', Operator), + # Postfix Increment Mutator + (r'\+\+', Operator), # M2R10 + ObjM2 + # Postfix Decrement Mutator + (r'--', Operator), # M2R10 + ObjM2 + ], + 'unigraph_operators': [ + # Arithmetic Operators + (r'[+-]', Operator), + (r'[*/]', Operator), + # ISO 80000-2 compliant Set Difference Operator + (r'\\', Operator), # M2R10 + ObjM2 + # Relational Operators + (r'[=#<>]', Operator), + # Dereferencing Operator + (r'\^', Operator), + # Dereferencing Operator Synonym + (r'@', Operator), # ISO + # Logical AND Operator Synonym + (r'&', Operator), # PIM + ISO + # Logical NOT Operator Synonym + (r'~', Operator), # PIM + ISO + # Smalltalk Message Prefix + (r'`', Operator), # ObjM2 + ], + 'digraph_punctuation': [ + # Range Constructor + (r'\.\.', Punctuation), + # Opening Chevron Bracket + (r'<<', Punctuation), # M2R10 + ISO + # Closing Chevron Bracket + (r'>>', Punctuation), # M2R10 + ISO + # Blueprint Punctuation + (r'->', Punctuation), # M2R10 + ISO + # Distinguish |# and # in M2 R10 + (r'\|#', Punctuation), + # Distinguish ## and # in M2 R10 + (r'##', Punctuation), + # Distinguish |* and * in M2 R10 + (r'\|\*', Punctuation), + ], + 'unigraph_punctuation': [ + # Common Punctuation + (r'[\(\)\[\]{},.:;\|]', Punctuation), + # Case Label Separator Synonym + (r'!', Punctuation), # ISO + # Blueprint Punctuation + (r'\?', Punctuation), # M2R10 + ObjM2 + ], + 'comments': [ + # Single Line Comment + (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2 + # Block Comment + (r'\(\*([^$].*?)\*\)', Comment.Multiline), + # Template Block Comment + (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2 + ], + 'pragmas': [ + # ISO Style Pragmas + (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2 + # Pascal Style Pragmas + (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM + ], + 'root': [ + include('whitespace'), + include('dialecttags'), + include('pragmas'), + include('comments'), + include('identifiers'), + include('suffixed_number_literals'), # PIM + ISO + include('prefixed_number_literals'), # M2R10 + ObjM2 + include('plain_number_literals'), + include('string_literals'), + include('digraph_punctuation'), + include('digraph_operators'), + include('unigraph_punctuation'), + include('unigraph_operators'), + ] + } + +# C o m m o n D a t a s e t s + + # Common Reserved Words Dataset + common_reserved_words = ( + # 37 common reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF', + 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', + 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', + 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', + ) + + # Common Builtins Dataset + common_builtins = ( + # 16 common builtins + 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER', + 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL', + 'TRUE', + ) + + # Common Pseudo-Module Builtins Dataset + common_pseudo_builtins = ( + # 4 common pseudo builtins + 'ADDRESS', 'BYTE', 'WORD', 'ADR' + ) + +# P I M M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for PIM Modula-2 + pim_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', + '+>', '->', '<<', '>>', '|#', '##', + ) + + # PIM Modula-2 Additional Reserved Words Dataset + pim_additional_reserved_words = ( + # 3 additional reserved words + 'EXPORT', 'QUALIFIED', 'WITH', + ) + + # PIM Modula-2 Additional Builtins Dataset + pim_additional_builtins = ( + # 16 additional builtins + 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH', + 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL', + ) + + # PIM Modula-2 Additional Pseudo-Module Builtins Dataset + pim_additional_pseudo_builtins = ( + # 5 additional pseudo builtins + 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER', + ) + +# I S O M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for ISO Modula-2 + iso_lexemes_to_reject = ( + '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->', + '<<', '>>', '|#', '##', + ) + + # ISO Modula-2 Additional Reserved Words Dataset + iso_additional_reserved_words = ( + # 9 additional reserved words (ISO 10514-1) + 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED', + 'REM', 'RETRY', 'WITH', + # 10 additional reserved words (ISO 10514-2 & ISO 10514-3) + 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY', + 'REVEAL', 'TRACED', 'UNSAFEGUARDED', + ) + + # ISO Modula-2 Additional Builtins Dataset + iso_additional_builtins = ( + # 26 additional builtins (ISO 10514-1) + 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', + 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH', + 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE', + 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', + # 5 additional builtins (ISO 10514-2 & ISO 10514-3) + 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF', + ) + + # ISO Modula-2 Additional Pseudo-Module Builtins Dataset + iso_additional_pseudo_builtins = ( + # 14 additional builtins (SYSTEM) + 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC', + 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR', + 'ROTATE', 'SHIFT', 'CAST', 'TSIZE', + # 13 additional builtins (COROUTINES) + 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER', + 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN', + 'NEWCOROUTINE', 'PROT', 'TRANSFER', + # 9 additional builtins (EXCEPTIONS) + 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber', + 'ExceptionSource', 'GetMessage', 'IsCurrentSource', + 'IsExceptionalExecution', 'RAISE', + # 3 additional builtins (TERMINATION) + 'TERMINATION', 'IsTerminating', 'HasHalted', + # 4 additional builtins (M2EXCEPTION) + 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception', + 'indexException', 'rangeException', 'caseSelectException', + 'invalidLocation', 'functionException', 'wholeValueException', + 'wholeDivException', 'realValueException', 'realDivException', + 'complexValueException', 'complexDivException', 'protException', + 'sysException', 'coException', 'exException', + ) + +# M o d u l a - 2 R 1 0 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Modula-2 R10 + m2r10_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '&', '<>', + ) + + # Modula-2 R10 reserved words in addition to the common set + m2r10_additional_reserved_words = ( + # 12 additional reserved words + 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE', + 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN', + # 2 additional reserved words with symbolic assembly option + 'ASM', 'REG', + ) + + # Modula-2 R10 builtins in addition to the common set + m2r10_additional_builtins = ( + # 26 additional builtins + 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD', + 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT', + 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', + 'UNICHAR', 'WRITE', 'WRITEF', + ) + + # Modula-2 R10 Additional Pseudo-Module Builtins Dataset + m2r10_additional_pseudo_builtins = ( + # 13 additional builtins (TPROPERTIES) + 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL', + 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION', + 'TMAXEXP', 'TMINEXP', + # 4 additional builtins (CONVERSION) + 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL', + # 35 additional builtins (UNSAFE) + 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC', + 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC', + 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR', + 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT', + 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC', + # 11 additional builtins (ATOMIC) + 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND', + 'BWNAND', 'BWOR', 'BWXOR', + # 7 additional builtins (COMPILER) + 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT', + 'HASH', + # 5 additional builtins (ASSEMBLER) + 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE', + ) + +# O b j e c t i v e M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Objective Modula-2 + objm2_lexemes_to_reject = ( + '!', '$', '%', '&', '<>', + ) + + # Objective Modula-2 Extensions + # reserved words in addition to Modula-2 R10 + objm2_additional_reserved_words = ( + # 16 additional reserved words + 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', + 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', + 'SUPER', 'TRY', + ) + + # Objective Modula-2 Extensions + # builtins in addition to Modula-2 R10 + objm2_additional_builtins = ( + # 3 additional builtins + 'OBJECT', 'NO', 'YES', + ) + + # Objective Modula-2 Extensions + # pseudo-module builtins in addition to Modula-2 R10 + objm2_additional_pseudo_builtins = ( + # None + ) + +# A g l e t M o d u l a - 2 D a t a s e t s + + # Aglet Extensions + # reserved words in addition to ISO Modula-2 + aglet_additional_reserved_words = ( + # None + ) + + # Aglet Extensions + # builtins in addition to ISO Modula-2 + aglet_additional_builtins = ( + # 9 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32', + ) + + # Aglet Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + aglet_additional_pseudo_builtins = ( + # None + ) + +# G N U M o d u l a - 2 D a t a s e t s + + # GNU Extensions + # reserved words in addition to PIM Modula-2 + gm2_additional_reserved_words = ( + # 10 additional reserved words + 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', + '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', + ) + + # GNU Extensions + # builtins in addition to PIM Modula-2 + gm2_additional_builtins = ( + # 21 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', + 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', + 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', + ) + + # GNU Extensions + # pseudo-module builtins in addition to PIM Modula-2 + gm2_additional_pseudo_builtins = ( + # None + ) + +# p 1 M o d u l a - 2 D a t a s e t s + + # p1 Extensions + # reserved words in addition to ISO Modula-2 + p1_additional_reserved_words = ( + # None + ) + + # p1 Extensions + # builtins in addition to ISO Modula-2 + p1_additional_builtins = ( + # None + ) + + # p1 Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + p1_additional_pseudo_builtins = ( + # 1 additional builtin + 'BCD', + ) + +# X D S M o d u l a - 2 D a t a s e t s + + # XDS Extensions + # reserved words in addition to ISO Modula-2 + xds_additional_reserved_words = ( + # 1 additional reserved word + 'SEQ', + ) + + # XDS Extensions + # builtins in addition to ISO Modula-2 + xds_additional_builtins = ( + # 9 additional builtins + 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN', + 'LONGCARD', 'SHORTCARD', 'SHORTINT', + ) + + # XDS Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + xds_additional_pseudo_builtins = ( + # 22 additional builtins (SYSTEM) + 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8', + 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE', + 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void' + # 3 additional builtins (COMPILER) + 'COMPILER', 'OPTION', 'EQUATION' + ) + +# P I M S t a n d a r d L i b r a r y D a t a s e t s + + # PIM Modula-2 Standard Library Modules Dataset + pim_stdlib_module_identifiers = ( + 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage', + ) + + # PIM Modula-2 Standard Library Types Dataset + pim_stdlib_type_identifiers = ( + 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission', + 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand', + 'DirectoryCommand', + ) + + # PIM Modula-2 Standard Library Procedures Dataset + pim_stdlib_proc_identifiers = ( + 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn', + 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite', + 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset', + 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar', + 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName', + 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput', + 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd', + 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd', + 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp', + 'ln', 'sin', 'cos', 'arctan', 'entier','ALLOCATE', 'DEALLOCATE', + ) + + # PIM Modula-2 Standard Library Variables Dataset + pim_stdlib_var_identifiers = ( + 'Done', 'termCH', 'in', 'out' + ) + + # PIM Modula-2 Standard Library Constants Dataset + pim_stdlib_const_identifiers = ( + 'EOL', + ) + +# I S O S t a n d a r d L i b r a r y D a t a s e t s + + # ISO Modula-2 Standard Library Modules Dataset + iso_stdlib_module_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Types Dataset + iso_stdlib_type_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Procedures Dataset + iso_stdlib_proc_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Variables Dataset + iso_stdlib_var_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Constants Dataset + iso_stdlib_const_identifiers = ( + # TO DO + ) + +# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s + + # Modula-2 R10 Standard Library ADTs Dataset + m2r10_stdlib_adt_identifiers = ( + 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET', + 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD', + 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT', + 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64', + 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8', + 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8', + 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16', + 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32', + 'INT64', 'INT128', 'STRING', 'UNISTRING', + ) + + # Modula-2 R10 Standard Library Blueprints Dataset + m2r10_stdlib_blueprint_identifiers = ( + 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar', + 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal', + 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray', + 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet', + 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet', + 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension', + 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath', + ) + + # Modula-2 R10 Standard Library Modules Dataset + m2r10_stdlib_module_identifiers = ( + 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO', + 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO', + 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath', + 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath', + 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport', + ) + + # Modula-2 R10 Standard Library Types Dataset + m2r10_stdlib_type_identifiers = ( + 'File', 'Status', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Procedures Dataset + m2r10_stdlib_proc_identifiers = ( + 'ALLOCATE', 'DEALLOCATE', 'SIZE', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Variables Dataset + m2r10_stdlib_var_identifiers = ( + 'stdIn', 'stdOut', 'stdErr', + ) + + # Modula-2 R10 Standard Library Constants Dataset + m2r10_stdlib_const_identifiers = ( + 'pi', 'tau', + ) + +# D i a l e c t s + + + # Dialect modes + dialects = ( + 'unknown', + 'm2pim', 'm2iso', 'm2r10', 'objm2', + 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds', + ) + +# D a t a b a s e s + + # Lexemes to Mark as Errors Database + lexemes_to_reject_db = { + # Lexemes to reject for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Lexemes to reject for PIM Modula-2 + 'm2pim' : ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for ISO Modula-2 + 'm2iso' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for Modula-2 R10 + 'm2r10' : ( + m2r10_lexemes_to_reject, + ), + # Lexemes to reject for Objective Modula-2 + 'objm2' : ( + objm2_lexemes_to_reject, + ), + # Lexemes to reject for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for GNU Modula-2 + 'm2pim+gm2' : ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for p1 Modula-2 + 'm2iso+p1' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for XDS Modula-2 + 'm2iso+xds' : ( + iso_lexemes_to_reject, + ), + } + + # Reserved Words Database + reserved_words_db = { + # Reserved words for unknown dialect + 'unknown' : ( + common_reserved_words, + pim_additional_reserved_words, + iso_additional_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for PIM Modula-2 + 'm2pim' : ( + common_reserved_words, + pim_additional_reserved_words, + ), + + # Reserved words for Modula-2 R10 + 'm2iso' : ( + common_reserved_words, + iso_additional_reserved_words, + ), + + # Reserved words for ISO Modula-2 + 'm2r10' : ( + common_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for Objective Modula-2 + 'objm2' : ( + common_reserved_words, + m2r10_additional_reserved_words, + objm2_additional_reserved_words, + ), + + # Reserved words for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_reserved_words, + iso_additional_reserved_words, + aglet_additional_reserved_words, + ), + + # Reserved words for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_reserved_words, + pim_additional_reserved_words, + gm2_additional_reserved_words, + ), + + # Reserved words for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_reserved_words, + iso_additional_reserved_words, + p1_additional_reserved_words, + ), + + # Reserved words for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_reserved_words, + iso_additional_reserved_words, + xds_additional_reserved_words, + ), + } + + # Builtins Database + builtins_db = { + # Builtins for unknown dialect + 'unknown' : ( + common_builtins, + pim_additional_builtins, + iso_additional_builtins, + m2r10_additional_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim' : ( + common_builtins, + pim_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso' : ( + common_builtins, + iso_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10' : ( + common_builtins, + m2r10_additional_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2' : ( + common_builtins, + m2r10_additional_builtins, + objm2_additional_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_builtins, + iso_additional_builtins, + aglet_additional_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_builtins, + pim_additional_builtins, + gm2_additional_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_builtins, + iso_additional_builtins, + p1_additional_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_builtins, + iso_additional_builtins, + xds_additional_builtins, + ), + } + + # Pseudo-Module Builtins Database + pseudo_builtins_db = { + # Builtins for unknown dialect + 'unknown' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + iso_additional_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10' : ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2' : ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + objm2_additional_pseudo_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + aglet_additional_pseudo_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + gm2_additional_pseudo_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + p1_additional_pseudo_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + xds_additional_pseudo_builtins, + ), + } + + # Standard Library ADTs Database + stdlib_adts_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library ADTs for PIM Modula-2 + 'm2pim' : ( + # No first class library types + ), + + # Standard Library ADTs for ISO Modula-2 + 'm2iso' : ( + # No first class library types + ), + + # Standard Library ADTs for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Aglet Modula-2 + 'm2iso+aglet' : ( + # No first class library types + ), + + # Standard Library ADTs for GNU Modula-2 + 'm2pim+gm2' : ( + # No first class library types + ), + + # Standard Library ADTs for p1 Modula-2 + 'm2iso+p1' : ( + # No first class library types + ), + + # Standard Library ADTs for XDS Modula-2 + 'm2iso+xds' : ( + # No first class library types + ), + } + + # Standard Library Modules Database + stdlib_modules_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Modules for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library Modules for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + ), + + # Standard Library Modules for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_module_identifiers, + ), + } + + # Standard Library Types Database + stdlib_types_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Types for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_type_identifiers, + ), + } + + # Standard Library Procedures Database + stdlib_procedures_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Procedures for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_proc_identifiers, + ), + } + + # Standard Library Variables Database + stdlib_variables_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Variables for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_var_identifiers, + ), + } + + # Standard Library Constants Database + stdlib_constants_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Constants for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_const_identifiers, + ), + } + +# M e t h o d s + + # initialise a lexer instance + def __init__(self, **options): + # + # Alias for unknown dialect + global UNKNOWN + UNKNOWN = self.dialects[0] + # + # check dialect options + # + dialects = get_list_opt(options, 'dialect', []) + # + for dialect_option in dialects: + if dialect_option in self.dialects[1:-1]: + # valid dialect option found + self.set_dialect(dialect_option) + break + # + # Fallback Mode (DEFAULT) + else: + # no valid dialect option + self.set_dialect(UNKNOWN) + # + self.dialect_set_by_tag = False + # + # check style options + # + styles = get_list_opt(options, 'style', []) + # + # use lowercase mode for Algol style + if 'algol' in styles or 'algol_nu' in styles: + self.algol_publication_mode = True + else: + self.algol_publication_mode = False + # + # Check option flags + # + self.treat_stdlib_adts_as_builtins = \ + get_bool_opt(options, 'treat_stdlib_adts_as_builtins', True) + # + # call superclass initialiser + RegexLexer.__init__(self, **options) + + # Set lexer to a specified dialect + def set_dialect(self, dialect_id): + # + #if __debug__: + # print 'entered set_dialect with arg: ', dialect_id + # + # check dialect name against known dialects + if dialect_id not in self.dialects: + dialect = UNKNOWN # default + else: + dialect = dialect_id + # + # compose lexemes to reject set + lexemes_to_reject_set = set() + # add each list of reject lexemes for this dialect + for list in self.lexemes_to_reject_db[dialect]: + lexemes_to_reject_set.update(set(list)) + # + # compose reserved words set + reswords_set = set() + # add each list of reserved words for this dialect + for list in self.reserved_words_db[dialect]: + reswords_set.update(set(list)) + # + # compose builtins set + builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.builtins_db[dialect]: + builtins_set.update(set(list).difference(reswords_set)) + # + # compose pseudo-builtins set + pseudo_builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.pseudo_builtins_db[dialect]: + pseudo_builtins_set.update(set(list).difference(reswords_set)) + # + # compose ADTs set + adts_set = set() + # add each list of ADTs for this dialect excluding reserved words + for list in self.stdlib_adts_db[dialect]: + adts_set.update(set(list).difference(reswords_set)) + # + # compose modules set + modules_set = set() + # add each list of builtins for this dialect excluding builtins + for list in self.stdlib_modules_db[dialect]: + modules_set.update(set(list).difference(builtins_set)) + # + # compose types set + types_set = set() + # add each list of types for this dialect excluding builtins + for list in self.stdlib_types_db[dialect]: + types_set.update(set(list).difference(builtins_set)) + # + # compose procedures set + procedures_set = set() + # add each list of procedures for this dialect excluding builtins + for list in self.stdlib_procedures_db[dialect]: + procedures_set.update(set(list).difference(builtins_set)) + # + # compose variables set + variables_set = set() + # add each list of variables for this dialect excluding builtins + for list in self.stdlib_variables_db[dialect]: + variables_set.update(set(list).difference(builtins_set)) + # + # compose constants set + constants_set = set() + # add each list of constants for this dialect excluding builtins + for list in self.stdlib_constants_db[dialect]: + constants_set.update(set(list).difference(builtins_set)) + # + # update lexer state + self.dialect = dialect + self.lexemes_to_reject = lexemes_to_reject_set + self.reserved_words = reswords_set + self.builtins = builtins_set + self.pseudo_builtins = pseudo_builtins_set + self.adts = adts_set + self.modules = modules_set + self.types = types_set + self.procedures = procedures_set + self.variables = variables_set + self.constants = constants_set + # + #if __debug__: + # print 'exiting set_dialect' + # print ' self.dialect: ', self.dialect + # print ' self.lexemes_to_reject: ', self.lexemes_to_reject + # print ' self.reserved_words: ', self.reserved_words + # print ' self.builtins: ', self.builtins + # print ' self.pseudo_builtins: ', self.pseudo_builtins + # print ' self.adts: ', self.adts + # print ' self.modules: ', self.modules + # print ' self.types: ', self.types + # print ' self.procedures: ', self.procedures + # print ' self.variables: ', self.variables + # print ' self.types: ', self.types + # print ' self.constants: ', self.constants + + # Extracts a dialect name from a dialect tag comment string and checks + # the extracted name against known dialects. If a match is found, the + # matching name is returned, otherwise dialect id 'unknown' is returned + def get_dialect_from_dialect_tag(self, dialect_tag): + # + #if __debug__: + # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag + # + # constants + left_tag_delim = '(*!' + right_tag_delim = '*)' + left_tag_delim_len = len(left_tag_delim) + right_tag_delim_len = len(right_tag_delim) + indicator_start = left_tag_delim_len + indicator_end = -(right_tag_delim_len) + # + # check comment string for dialect indicator + if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \ + and dialect_tag.startswith(left_tag_delim) \ + and dialect_tag.endswith(right_tag_delim): + # + #if __debug__: + # print 'dialect tag found' + # + # extract dialect indicator + indicator = dialect_tag[indicator_start:indicator_end] + # + #if __debug__: + # print 'extracted: ', indicator + # + # check against known dialects + for index in range(1, len(self.dialects)): + # + #if __debug__: + # print 'dialects[', index, ']: ', self.dialects[index] + # + if indicator == self.dialects[index]: + # + #if __debug__: + # print 'matching dialect found' + # + # indicator matches known dialect + return indicator + else: + # indicator does not match any dialect + return UNKNOWN # default + else: + # invalid indicator string + return UNKNOWN # default + + # intercept the token stream, modify token attributes and return them + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + # + # check for dialect tag if dialect has not been set by tag + if not self.dialect_set_by_tag and token == Comment.Special: + indicated_dialect = self.get_dialect_from_dialect_tag(value) + if indicated_dialect != UNKNOWN: + # token is a dialect indicator + # reset reserved words and builtins + self.set_dialect(indicated_dialect) + self.dialect_set_by_tag = True + # + # check for reserved words, predefined and stdlib identifiers + if token is Name: + if value in self.reserved_words: + token = Keyword.Reserved + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.builtins: + token = Name.Builtin + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.pseudo_builtins: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.adts: + if not self.treat_stdlib_adts_as_builtins: + token = Name.Namespace + else: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.modules: + token = Name.Namespace + # + elif value in self.types: + token = Name.Class + # + elif value in self.procedures: + token = Name.Function + # + elif value in self.variables: + token = Name.Variable + # + elif value in self.constants: + token = Name.Constant + # + elif token in Number: + # + # mark prefix number literals as error for PIM and ISO dialects + if self.dialect not in (UNKNOWN, 'm2r10', 'objm2'): + if "'" in value or value[0:2] in ('0b', '0x', '0u'): + token = Error + # + elif self.dialect in ('m2r10', 'objm2'): + # mark base-8 number literals as errors for M2 R10 and ObjM2 + if token is Number.Oct: + token = Error + # mark suffix base-16 literals as errors for M2 R10 and ObjM2 + elif token is Number.Hex and 'H' in value: + token = Error + # mark real numbers with E as errors for M2 R10 and ObjM2 + elif token is Number.Float and 'E' in value: + token = Error + # + elif token in Comment: + # + # mark single line comment as error for PIM and ISO dialects + if token is Comment.Single: + if self.dialect not in [UNKNOWN, 'm2r10', 'objm2']: + token = Error + # + if token is Comment.Preproc: + # mark ISO pragma as error for PIM dialects + if value.startswith('<*') and \ + self.dialect.startswith('m2pim'): + token = Error + # mark PIM pragma as comment for other dialects + elif value.startswith('(*$') and \ + self.dialect != UNKNOWN and \ + not self.dialect.startswith('m2pim'): + token = Comment.Multiline + # + else: # token is neither Name nor Comment + # + # mark lexemes matching the dialect's error token set as errors + if value in self.lexemes_to_reject: + token = Error + # + # substitute lexemes when in Algol mode + if self.algol_publication_mode: + if value == '#': + value = u'≠' + elif value == '<=': + value = u'≤' + elif value == '>=': + value = u'≥' + elif value == '==': + value = u'≡' + elif value == '*.': + value = u'•' + + # return result + yield index, token, value + +# end of modula2.py diff --git a/pygments/lexers/pascal.py b/pygments/lexers/pascal.py index 2895fba7..d3ce6a3a 100644 --- a/pygments/lexers/pascal.py +++ b/pygments/lexers/pascal.py @@ -18,7 +18,9 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Error from pygments.scanner import Scanner -__all__ = ['DelphiLexer', 'Modula2Lexer', 'AdaLexer'] +from pygments.lexers.modula2 import Modula2Lexer + +__all__ = ['DelphiLexer', 'AdaLexer'] class DelphiLexer(Lexer): @@ -505,198 +507,6 @@ class DelphiLexer(Lexer): yield scanner.start_pos, token, scanner.match or '' -class Modula2Lexer(RegexLexer): - """ - For `Modula-2 <http://www.modula2.org/>`_ source code. - - Additional options that determine which keywords are highlighted: - - `pim` - Select PIM Modula-2 dialect (default: True). - `iso` - Select ISO Modula-2 dialect (default: False). - `objm2` - Select Objective Modula-2 dialect (default: False). - `gm2ext` - Also highlight GNU extensions (default: False). - - .. versionadded:: 1.3 - """ - name = 'Modula-2' - aliases = ['modula2', 'm2'] - filenames = ['*.def', '*.mod'] - mimetypes = ['text/x-modula2'] - - flags = re.MULTILINE | re.DOTALL - - tokens = { - 'whitespace': [ - (r'\n+', Text), # blank lines - (r'\s+', Text), # whitespace - ], - 'identifiers': [ - (r'([a-zA-Z_$][\w$]*)', Name), - ], - 'numliterals': [ - (r'[01]+B', Number.Bin), # binary number (ObjM2) - (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) - (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) - (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) - (r'[0-9A-F]+H', Number.Hex), # hexadecimal number - (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number - (r'[0-9]+\.[0-9]+', Number.Float), # real number - (r'[0-9]+', Number.Integer), # decimal whole number - ], - 'strings': [ - (r"'(\\\\|\\'|[^'])*'", String), # single quoted string - (r'"(\\\\|\\"|[^"])*"', String), # double quoted string - ], - 'operators': [ - (r'[*/+=#~&<>\^-]', Operator), - (r':=', Operator), # assignment - (r'@', Operator), # pointer deref (ISO) - (r'\.\.', Operator), # ellipsis or range - (r'`', Operator), # Smalltalk message (ObjM2) - (r'::', Operator), # type conversion (ObjM2) - ], - 'punctuation': [ - (r'[()\[\]{},.:;|]', Punctuation), - ], - 'comments': [ - (r'//.*?\n', Comment.Single), # ObjM2 - (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 - (r'\(\*([^$].*?)\*\)', Comment.Multiline), - # TO DO: nesting of (* ... *) comments - ], - 'pragmas': [ - (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM - (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 - ], - 'root': [ - include('whitespace'), - include('comments'), - include('pragmas'), - include('identifiers'), - include('numliterals'), - include('strings'), - include('operators'), - include('punctuation'), - ] - } - - pim_reserved_words = [ - # 40 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', - 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', - 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', - 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', - 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', - 'UNTIL', 'VAR', 'WHILE', 'WITH', - ] - - pim_pervasives = [ - # 31 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', - 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', - 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', - 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', - ] - - iso_reserved_words = [ - # 46 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', - 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', - 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', - 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', - 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', - 'WITH', - ] - - iso_pervasives = [ - # 42 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', - 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', - 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', - 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', - 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', - 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', - ] - - objm2_reserved_words = [ - # base language, 42 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', - 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', - 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', - 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', - 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', - # OO extensions, 16 reserved words - 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', - 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', - 'SUPER', 'TRY', - ] - - objm2_pervasives = [ - # base language, 38 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', - 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', - 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', - 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', - 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', - # OO extensions, 3 pervasives - 'OBJECT', 'NO', 'YES', - ] - - gnu_reserved_words = [ - # 10 additional reserved words - 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', - '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', - ] - - gnu_pervasives = [ - # 21 identifiers, actually from pseudo-module SYSTEM - # but we will highlight them as if they were pervasives - 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', - 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', - 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', - 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', - ] - - def __init__(self, **options): - self.reserved_words = set() - self.pervasives = set() - # ISO Modula-2 - if get_bool_opt(options, 'iso', False): - self.reserved_words.update(self.iso_reserved_words) - self.pervasives.update(self.iso_pervasives) - # Objective Modula-2 - elif get_bool_opt(options, 'objm2', False): - self.reserved_words.update(self.objm2_reserved_words) - self.pervasives.update(self.objm2_pervasives) - # PIM Modula-2 (DEFAULT) - else: - self.reserved_words.update(self.pim_reserved_words) - self.pervasives.update(self.pim_pervasives) - # GNU extensions - if get_bool_opt(options, 'gm2ext', False): - self.reserved_words.update(self.gnu_reserved_words) - self.pervasives.update(self.gnu_pervasives) - # initialise - RegexLexer.__init__(self, **options) - - def get_tokens_unprocessed(self, text): - for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): - # check for reserved words and pervasives - if token is Name: - if value in self.reserved_words: - token = Keyword.Reserved - elif value in self.pervasives: - token = Keyword.Pervasive - # return result - yield index, token, value - - class AdaLexer(RegexLexer): """ For Ada source code. diff --git a/tests/examplefiles/modula2_test_cases.def b/tests/examplefiles/modula2_test_cases.def new file mode 100644 index 00000000..ce86a55b --- /dev/null +++ b/tests/examplefiles/modula2_test_cases.def @@ -0,0 +1,354 @@ +(* Test Cases for Modula-2 Lexer *) + +(* Notes: + (1) Without dialect option nor embedded dialect tag, the lexer operates in + fallback mode, recognising the *combined* literals, punctuation symbols + and operators of all supported dialects, and the *combined* reserved + words and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10. + (1) If multiple embedded dialect tags are present, the lexer will use the + first valid tag and ignore any subsequent dialect tags in the file. + (2) An embedded dialect tag overrides any command line dialect option. *) + + +(* Testing command line dialect option *) + +(* for PIM Modula-2 : pygmentize -O full,dialect=m2pim ... + for ISO Modula-2 : pygmentize -O full,dialect=m2iso ... + for Modula-2 R10 : pygmentize -O full,dialect=m2r10 ... + for Objective Modula-2 : pygmentize -O full,dialect=objm2 ... *) + +(* for Aglet extensions : pygmentize -O full,dialect=m2iso+aglet ... + for GNU extensions : pygmentize -O full,dialect=m2pim+gm2 ... + for p1 extensions : pygmentize -O full,dialect=m2iso+p1 ... + for XDS extensions : pygmentize -O full,dialect=m2iso+xds ... + + +(* Testing embedded dialect tags *) + +(* !m2pim*) (* <-- remove whitespace before ! for PIM Modula-2 *) +(* !m2iso*) (* <-- remove whitespace before ! for ISO Modula-2 *) +(* !m2r10*) (* <-- remove whitespace before ! for Modula-2 R10 *) +(* !objm2*) (* <-- remove whitespace before ! for Objective Modula-2 *) + +(* !m2iso+aglet*) (* <-- remove whitespace before ! for Aglet extensions *) +(* !m2pim+gm2*) (* <-- remove whitespace before ! for GNU extensions *) +(* !m2iso+p1*) (* <-- remove whitespace before ! for p1 extensions *) +(* !m2iso+xds*) (* <-- remove whitespace before ! for XDS extensions *) + + +(* Dialect Indicating Names *) + +(* recognised names should be highlighted *) + +QUALIFIED (* PIM and ISO *) + +PACKEDSET (* ISO only *) + +ARGLIST (* M2 R10 and ObjM2 *) + +BYCOPY (* ObjM2 only *) + +BITSET8 (* Aglet, GNU and M2 R10 *) + +__FILE__ (* GNU only *) + +BCD (* p1 and M2 R10 *) + +SEQ (* XDS only *) + + +(* Literal Tests *) + +(* recognised literals should be rendered as one unit + unrecognised literals should be rendered as error *) + +ch := 'a'; ch := "a"; (* all dialects *) +ch := 0u20; unich := 0u2038 (* M2 R10 *) + +s := 'The cat said "meow!".'; +s := "It is eight O'clock."; + + +n := 123; n = 1000000; (* all dialects *) +n := 123; n = 1'000'000; (* M2 R10 *) + +n := 0b0110; n:= 0b0110'1100'0111; (* M2 R10 *) +n := 0xFF00; n:= 0xDEAD'BEEF'0F00; (* M2 R10 *) + +r := 1.23; r := 1000000.000001; (* all dialects *) +r := 1.23; r := 1'000'000.000'001; (* M2 R10 *) + +r := 1.234E6; r:= 1.234E-6; r := 1.234567E1000; (* PIM + ISO *) +r := 1.234e6; r:= 1.234e-6; r := 1.234'567e1'000; (* M2 R10 *) + +ch := 0377C; n := 0377B; n := 07FF0H; (* ISO + PIM *) + + +(* Non-Alphabetic Operator Tests *) + +(* supported operators should be rendered as one unit + unsupported operators should be rendered as errors *) + +a := b + c - d * e / f; (* all dialects *) + +SetDiff := A \ B; (* M2 R10 *) + +dotProduct := v1 *. v2; catArray := array1 +> array2; (* M2 R10 *) + +bool := a = b; bool := a > b; bool := a < b; +bool := a # b; bool := a >= b; bool := a <= b; + +bool := a <> b; (* PIM + ISO *) + +bool := a == b; (* M2 R10 *) + +(*&*) IF a & b THEN ... END; (* PIM + ISO *) + +(*~*) IF ~ b THEN ... END; (* PIM + ISO *) + +(*::*) int := real :: INTEGER; (* M2 R10 *) + +(*++*) FOR i++ IN range DO ... END; (* M2 R10 *) +(*--*) FOR i-- IN range DO ... END; (* M2 R10 *) + +(*^*) next := this^.next; (* all dialects *) +(*@*) next := this@.next; (* ISO *) + +(*`*) str := `NSString alloc init; (* ObjM2 *) + + +(* Punctuation Tests *) + +(* supported punctuation should be rendered as one unit + unsupported punctuation should be rendered as an error *) + +(*.*) Foo.Bar.Baz; (*..*) TYPE Sign = [-1..1] OF INTEGER; + +(*|:*) CASE foo OF | 1 : bar | 2 : bam | 3 : boo END; +(*!:*) CASE foo OF 1 : bar ! 2 : bam ! 3 : boo END; (* ISO *) + +(*[]()*) array[n] := foo(); + +(*{}*) CONST Bar = { 1, 2, 3 }; + +(*?*) TPROPERTIES = isCollection, isIndexed | isRigid?; (* M2 R10 *) + +(*~*) CONST ~ isFoobar = Foo AND Bar; (* M2 R10 *) +(*->*) isFoobar -> PROCEDURE [ABS]; (* M2 R10 *) + +(*<<>>*) GENLIB Foo FROM Template FOR Bar = <<ARRAY OF CHAR>> END; (* M2 R10 *) + + +(* Single Line Comment Test *) + +(* should be rendered as comment if supported, as error if unsupported *) + +// This is a single line comment (M2 R10 + ObjM2) + + +(* Pragma Delimiter Tests *) + +(* PIM style pragma should be rendered as pragma in PIM dialects, + as multiline comment in all other dialects. *) + +(*$INLINE*) (* PIM *) + +(* ISO style pragma should be rendered as error in PIM dialects, + as pragma in all other dialects. *) + +<*INLINE*> (* all other dialects *) + + +(* Operator Substitution Test When in Algol mode *) + +IF foo # bar THEN ... END; (* # should be rendered as not equal symbol *) + +IF foo >= bar THEN ... END; (* >= should be rendered as not less symbol *) + +IF foo <= bar THEN ... END; (* <= should be rendered as not greater symbol *) + +IF foo == bar THEN ... END; (* == should be rendered as identity symbol *) + +dotProduct := v1 *. v2; (* *. should be rendered as dot product symbol *) + + +(* Reserved Words and Builtins Test *) + +(* supported reserved words and builtins should be highlighted *) + +(* reserved words common to all dialects *) + +AND ARRAY BEGIN BY CASE CONST DEFINITION DIV DO ELSE ELSIF END EXIT FOR FROM +IF IMPLEMENTATION IMPORT IN LOOP MOD MODULE NOT OF OR POINTER PROCEDURE +RECORD REPEAT RETURN SET THEN TO TYPE UNTIL VAR WHILE + +(* builtins common to all dialects *) + +ABS BOOLEAN CARDINAL CHAR CHR FALSE INTEGER LONGINT LONGREAL +MAX MIN NIL ODD ORD REAL TRUE + +(* pseudo builtins common to all dialects *) + +ADDRESS BYTE WORD ADR + + +(* additional reserved words for PIM *) + +EXPORT QUALIFIED WITH + +(* additional builtins for PIM *) + +BITSET CAP DEC DISPOSE EXCL FLOAT HALT HIGH INC INCL NEW NIL PROC SIZE TRUNC VAL + +(* additional pseudo-builtins for PIM *) + +SYSTEM PROCESS TSIZE NEWPROCESS TRANSFER + + +(* additional reserved words for ISO 10514-1 *) + +EXCEPT EXPORT FINALLY FORWARD PACKEDSET QUALIFIED REM RETRY WITH + +(* additional reserved words for ISO 10514-2 & ISO 10514-3 *) + +ABSTRACT AS CLASS GUARD INHERIT OVERRIDE READONLY REVEAL TRACED UNSAFEGUARDED + +(* additional builtins for ISO 10514-1 *) + +BITSET CAP CMPLX COMPLEX DEC DISPOSE EXCL FLOAT HALT HIGH IM INC INCL INT +INTERRUPTIBLE LENGTH LFLOAT LONGCOMPLEX NEW PROC PROTECTION RE SIZE TRUNC +UNINTERRUBTIBLE VAL + +(* additional builtins for ISO 10514-2 & ISO 10514-3 *) + +CREATE DESTROY EMPTY ISMEMBER SELF + + +(* additional pseudo-builtins for ISO *) + +(* SYSTEM *) +SYSTEM BITSPERLOC LOCSPERBYTE LOCSPERWORD LOC ADDADR SUBADR DIFADR MAKEADR +ADR ROTATE SHIFT CAST TSIZE + +(* COROUTINES *) +COROUTINES ATTACH COROUTINE CURRENT DETACH HANDLER INTERRUPTSOURCE IOTRANSFER +IsATTACHED LISTEN NEWCOROUTINE PROT TRANSFER + +(* EXCEPTIONS *) +EXCEPTIONS AllocateSource CurrentNumber ExceptionNumber ExceptionSource +GetMessage IsCurrentSource IsExceptionalExecution RAISE + +(* TERMINATION *) +TERMINATION IsTerminating HasHalted + +(* M2EXCEPTION *) +M2EXCEPTION M2Exceptions M2Exception IsM2Exception indexException rangeException +caseSelectException invalidLocation functionException wholeValueException +wholeDivException realValueException realDivException complexValueException +complexDivException protException sysException coException exException + + +(* additional reserved words for M2 R10 *) + +ALIAS ARGLIST BLUEPRINT COPY GENLIB INDETERMINATE NEW NONE OPAQUE REFERENTIAL +RELEASE RETAIN + +(* with symbolic assembler language extension *) +ASM REG + +(* additional builtins for M2 R10 *) + +CARDINAL COUNT EMPTY EXISTS INSERT LENGTH LONGCARD OCTET PTR PRED READ READNEW +REMOVE RETRIEVE SORT STORE SUBSET SUCC TLIMIT TMAX TMIN TRUE TSIZE UNICHAR +WRITE WRITEF + +(* additional pseudo-builtins for M2 R10 *) + +(* TPROPERTIES *) +TPROPERTIES PROPERTY LITERAL TPROPERTY TLITERAL TBUILTIN TDYN TREFC TNIL +TBASE TPRECISION TMAXEXP TMINEXP + +(* CONVERSION *) +CONVERSION TSXFSIZE SXF VAL + +(* UNSAFE *) +UNSAFE CAST INTRINSIC AVAIL ADD SUB ADDC SUBC FETCHADD FETCHSUB SHL SHR ASHR +ROTL ROTR ROTLC ROTRC BWNOT BWAND BWOR BWXOR BWNAND BWNOR SETBIT TESTBIT +LSBIT MSBIT CSBITS BAIL HALT TODO FFI ADDR VARGLIST VARGC + +(* ATOMIC *) +ATOMIC INTRINSIC AVAIL SWAP CAS INC DEC BWAND BWNAND BWOR BWXOR + +(* COMPILER *) +COMPILER DEBUG MODNAME PROCNAME LINENUM DEFAULT HASH + +(* ASSEMBLER *) +ASSEMBLER REGISTER SETREG GETREG CODE + + +(* standard library ADT identifiers for M2 R10 *) + +(* rendered as builtins when dialect is set to Modula-2 R10, + this can be turned off by option treat_stdlib_adts_as_builtins=off *) +BCD LONGBCD BITSET SHORTBITSET LONGBITSET LONGLONGBITSET COMPLEX LONGCOMPLEX +SHORTCARD LONGLONGCARD SHORTINT LONGLONGINT POSINT SHORTPOSINT LONGPOSINT +LONGLONGPOSINT BITSET8 BITSET16 BITSET32 BITSET64 BITSET128 BS8 BS16 BS32 +BS64 BS128 CARDINAL8 CARDINAL16 CARDINAL32 CARDINAL64 CARDINAL128 CARD8 +CARD16 CARD32 CARD64 CARD128 INTEGER8 INTEGER16 INTEGER32 INTEGER64 +INTEGER128 INT8 INT16 INT32 INT64 INT128 STRING UNISTRING + + +(* additional reserved words for ObjM2 *) + +(* Note: ObjM2 is a superset of M2 R10 *) + +BYCOPY BYREF CLASS CONTINUE CRITICAL INOUT METHOD ON OPTIONAL OUT PRIVATE +PROTECTED PROTOCOL PUBLIC SUPER TRY + +(* additional builtins for ObjM2 *) + +OBJECT NO YES + + +(* additional builtins for Aglet Extensions to ISO *) + +BITSET8 BITSET16 BITSET32 CARDINAL8 CARDINAL16 CARDINAL32 INTEGER8 INTEGER16 +INTEGER32 + + +(* additional reserved words for GNU Extensions to PIM *) + +ASM __ATTRIBUTE__ __BUILTIN__ __COLUMN__ __DATE__ __FILE__ __FUNCTION__ +__LINE__ __MODULE__ VOLATILE + +(* additional builtins for GNU Extensions to PIM *) + +BITSET8 BITSET16 BITSET32 CARDINAL8 CARDINAL16 CARDINAL32 CARDINAL64 COMPLEX32 +COMPLEX64 COMPLEX96 COMPLEX128 INTEGER8 INTEGER16 INTEGER32 INTEGER64 REAL8 +REAL16 REAL32 REAL96 REAL128 THROW + + +(* additional pseudo-builtins for p1 Extensions to ISO *) + +BCD + + +(* additional reserved words for XDS Extensions to ISO *) + +SEQ + +(* additional builtins for XDS Extensions to ISO *) + +ASH ASSERT DIFFADR_TYPE ENTIER INDEX LEN LONGCARD SHORTCARD SHORTINT + +(* additional pseudo-builtins for XDS Extensions to ISO *) + +(* SYSTEM *) +PROCESS NEWPROCESS BOOL8 BOOL16 BOOL32 CARD8 CARD16 CARD32 INT8 INT16 INT32 +REF MOVE FILL GET PUT CC int unsigned size_t void + +(* COMPILER *) +COMPILER OPTION EQUATION + + +(* end of file *)
\ No newline at end of file |