summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2015-03-08 07:29:37 +0100
committerGeorg Brandl <georg@python.org>2015-03-08 07:29:37 +0100
commit48eb4ba2f472b5d3edaf8446f7bc127a5272c46f (patch)
tree54b50cd2c8a0450490aab94673aec76f6b295c3a
parent0064569c71402121242a09d5e5cb3eddf1ebd6d6 (diff)
downloadpygments-48eb4ba2f472b5d3edaf8446f7bc127a5272c46f.tar.gz
Add overhauled Modula-2 lexer from https://bitbucket.org/trijezdci/m2r10/src/tip/_GRAMMAR/
-rw-r--r--pygments/lexers/_mapping.py2
-rw-r--r--pygments/lexers/modula2.py1568
-rw-r--r--pygments/lexers/pascal.py196
-rw-r--r--tests/examplefiles/modula2_test_cases.def354
4 files changed, 1926 insertions, 194 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 99461c55..2b836ac6 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -221,7 +221,7 @@ LEXERS = {
'MatlabSessionLexer': ('pygments.lexers.matlab', 'Matlab session', ('matlabsession',), (), ()),
'MiniDLexer': ('pygments.lexers.d', 'MiniD', ('minid',), (), ('text/x-minidsrc',)),
'ModelicaLexer': ('pygments.lexers.modeling', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)),
- 'Modula2Lexer': ('pygments.lexers.pascal', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)),
+ 'Modula2Lexer': ('pygments.lexers.modula2', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)),
'MoinWikiLexer': ('pygments.lexers.markup', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)),
'MonkeyLexer': ('pygments.lexers.basic', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)),
'MoonScriptLexer': ('pygments.lexers.scripting', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')),
diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py
new file mode 100644
index 00000000..eec913b6
--- /dev/null
+++ b/pygments/lexers/modula2.py
@@ -0,0 +1,1568 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.modula2
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ Multi-Dialect Lexer for Modula-2.
+
+ :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, include
+from pygments.util import get_bool_opt, get_list_opt
+from pygments.token import Text, Comment, Operator, Keyword, Name, \
+ String, Number, Punctuation, Error
+
+__all__ = ['Modula2Lexer']
+
+
+# Multi-Dialect Modula-2 Lexer
+class Modula2Lexer(RegexLexer):
+ """
+ For `Modula-2 <http://www.modula2.org/>`_ source code.
+
+ The Modula-2 lexer supports several dialects. By default, it operates in
+ fallback mode, recognising the *combined* literals, punctuation symbols
+ and operators of all supported dialects, and the *combined* reserved words
+ and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
+ differentiating between library defined identifiers.
+
+ To select a specific dialect, a dialect option may be passed
+ or a dialect tag may be embedded into a source file.
+
+ Dialect Options:
+
+ `m2pim`
+ Select PIM Modula-2 dialect.
+ `m2iso`
+ Select ISO Modula-2 dialect.
+ `m2r10`
+ Select Modula-2 R10 dialect.
+ `objm2`
+ Select Objective Modula-2 dialect.
+
+ The PIM and ISO dialect options may be qualified with a language extension.
+
+ Language Extensions:
+
+ `+aglet`
+ Select Aglet Modula-2 extensions, available with m2iso.
+ `+gm2`
+ Select GNU Modula-2 extensions, available with m2pim.
+ `+p1`
+ Select p1 Modula-2 extensions, available with m2iso.
+ `+xds`
+ Select XDS Modula-2 extensions, available with m2iso.
+
+
+ Passing a Dialect Option via Unix Commandline Interface
+
+ Dialect options may be passed to the lexer using the `dialect` key.
+ Only one such option should be passed. If multiple dialect options are
+ passed, the first valid option is used, any subsequent options are ignored.
+
+ Examples:
+
+ `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
+ Use ISO dialect to render input to HTML output
+ `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
+ Use ISO dialect with p1 extensions to render input to RTF output
+
+
+ Embedding a Dialect Option within a source file
+
+ A dialect option may be embedded in a source file in form of a dialect
+ tag, a specially formatted comment that specifies a dialect option.
+
+ Dialect Tag EBNF:
+
+ dialectTag :
+ OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
+
+ dialectOption :
+ 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
+ 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
+
+ Prefix : '!' ;
+
+ OpeningCommentDelim : '(*' ;
+
+ ClosingCommentDelim : '*)' ;
+
+ No whitespace is permitted between the tokens of a dialect tag.
+
+ In the event that a source file contains multiple dialect tags, the first
+ tag that contains a valid dialect option will be used and any subsequent
+ dialect tags will be ignored. Ideally, a dialect tag should be placed
+ at the beginning of a source file.
+
+ An embedded dialect tag overrides a dialect option set via command line.
+
+ Examples:
+
+ `(*!m2r10*) DEFINITION MODULE Foobar; ...`
+ Use Modula2 R10 dialect to render this source file.
+ `(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...`
+ Use PIM dialect with GNU extensions to render this source file.
+
+
+ Algol Publication Mode:
+
+ In Algol publication mode, source text is rendered for publication of
+ algorithms in scientific papers and academic texts, following the format
+ of the Revised Algol-60 Language Report. It is activated by passing
+ one of two corresponding styles as an option:
+
+ `algol`
+ render reserved words lowercase underline boldface
+ and builtins lowercase boldface italic
+ `algol_nu`
+ render reserved words lowercase boldface (no underlining)
+ and builtins lowercase boldface italic
+
+ The lexer automatically performs the required lowercase conversion when
+ this mode is activated.
+
+ Example:
+
+ `$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input`
+ Render input file in Algol publication mode to LaTeX output.
+
+
+ Rendering Mode of First Class ADT Identifiers:
+
+ The rendering of standard library first class ADT identifiers is controlled
+ by option flag "treat_stdlib_adts_as_builtins".
+
+ When this option is turned on, standard library ADT identifiers are rendered
+ as builtins. When it is turned off, they are rendered as ordinary library
+ identifiers.
+
+ `treat_stdlib_adts_as_builtins` (default: On)
+
+ The option is useful for dialects that support ADTs as first class objects
+ and provide ADTs in the standard library that would otherwise be built-in.
+
+ At present, only Modula-2 R10 supports library ADTs as first class objects
+ and therefore, no ADT identifiers are defined for any other dialects.
+
+ Example:
+
+ `$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...`
+ Render standard library ADTs as ordinary library types.
+
+ .. versionadded:: 1.3
+
+ .. versionchanged:: 2.1
+ Added multi-dialect support.
+ """
+ name = 'Modula-2'
+ aliases = ['modula2', 'm2']
+ filenames = ['*.def', '*.mod']
+ mimetypes = ['text/x-modula2']
+
+ flags = re.MULTILINE | re.DOTALL
+
+ tokens = {
+ 'whitespace': [
+ (r'\n+', Text), # blank lines
+ (r'\s+', Text), # whitespace
+ ],
+ 'dialecttags': [
+ # PIM Dialect Tag
+ (r'\(\*!m2pim\*\)', Comment.Special),
+ # ISO Dialect Tag
+ (r'\(\*!m2iso\*\)', Comment.Special),
+ # M2R10 Dialect Tag
+ (r'\(\*!m2r10\*\)', Comment.Special),
+ # ObjM2 Dialect Tag
+ (r'\(\*!objm2\*\)', Comment.Special),
+ # Aglet Extensions Dialect Tag
+ (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
+ # GNU Extensions Dialect Tag
+ (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
+ # p1 Extensions Dialect Tag
+ (r'\(\*!m2iso\+p1\*\)', Comment.Special),
+ # XDS Extensions Dialect Tag
+ (r'\(\*!m2iso\+xds\*\)', Comment.Special),
+ ],
+ 'identifiers': [
+ (r'([a-zA-Z_$][\w$]*)', Name),
+ ],
+ 'prefixed_number_literals': [
+ #
+ # Base-2, whole number
+ (r'0b[01]+(\'[01]+)*', Number.Bin),
+ #
+ # Base-16, whole number
+ (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
+ ],
+ 'plain_number_literals': [
+ #
+ # Base-10, real number with exponent
+ (r'[0-9]+(\'[0-9]+)*' # integral part \
+ r'\.[0-9]+(\'[0-9]+)*' # fractional part \
+ r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent \
+ Number.Float),
+ #
+ # Base-10, real number without exponent
+ (r'[0-9]+(\'[0-9]+)*' # integral part \
+ r'\.[0-9]+(\'[0-9]+)*', # fractional part \
+ Number.Float),
+ #
+ # Base-10, whole number
+ (r'[0-9]+(\'[0-9]+)*', Number.Integer),
+ ],
+ 'suffixed_number_literals': [
+ #
+ # Base-8, whole number
+ (r'[0-7]+B', Number.Oct),
+ #
+ # Base-8, character code
+ (r'[0-7]+C', Number.Oct),
+ #
+ # Base-16, number
+ (r'[0-9A-F]+H', Number.Hex),
+ ],
+ 'string_literals': [
+ (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
+ (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
+ ],
+ 'digraph_operators': [
+ # Dot Product Operator
+ (r'\*\.', Operator),
+ # Array Concatenation Operator
+ (r'\+>', Operator), # M2R10 + ObjM2
+ # Inequality Operator
+ (r'<>', Operator), # ISO + PIM
+ # Less-Or-Equal, Subset
+ (r'<=', Operator),
+ # Greater-Or-Equal, Superset
+ (r'>=', Operator),
+ # Identity Operator
+ (r'==', Operator), # M2R10 + ObjM2
+ # Type Conversion Operator
+ (r'::', Operator), # M2R10 + ObjM2
+ # Assignment Symbol
+ (r':=', Operator),
+ # Postfix Increment Mutator
+ (r'\+\+', Operator), # M2R10 + ObjM2
+ # Postfix Decrement Mutator
+ (r'--', Operator), # M2R10 + ObjM2
+ ],
+ 'unigraph_operators': [
+ # Arithmetic Operators
+ (r'[+-]', Operator),
+ (r'[*/]', Operator),
+ # ISO 80000-2 compliant Set Difference Operator
+ (r'\\', Operator), # M2R10 + ObjM2
+ # Relational Operators
+ (r'[=#<>]', Operator),
+ # Dereferencing Operator
+ (r'\^', Operator),
+ # Dereferencing Operator Synonym
+ (r'@', Operator), # ISO
+ # Logical AND Operator Synonym
+ (r'&', Operator), # PIM + ISO
+ # Logical NOT Operator Synonym
+ (r'~', Operator), # PIM + ISO
+ # Smalltalk Message Prefix
+ (r'`', Operator), # ObjM2
+ ],
+ 'digraph_punctuation': [
+ # Range Constructor
+ (r'\.\.', Punctuation),
+ # Opening Chevron Bracket
+ (r'<<', Punctuation), # M2R10 + ISO
+ # Closing Chevron Bracket
+ (r'>>', Punctuation), # M2R10 + ISO
+ # Blueprint Punctuation
+ (r'->', Punctuation), # M2R10 + ISO
+ # Distinguish |# and # in M2 R10
+ (r'\|#', Punctuation),
+ # Distinguish ## and # in M2 R10
+ (r'##', Punctuation),
+ # Distinguish |* and * in M2 R10
+ (r'\|\*', Punctuation),
+ ],
+ 'unigraph_punctuation': [
+ # Common Punctuation
+ (r'[\(\)\[\]{},.:;\|]', Punctuation),
+ # Case Label Separator Synonym
+ (r'!', Punctuation), # ISO
+ # Blueprint Punctuation
+ (r'\?', Punctuation), # M2R10 + ObjM2
+ ],
+ 'comments': [
+ # Single Line Comment
+ (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2
+ # Block Comment
+ (r'\(\*([^$].*?)\*\)', Comment.Multiline),
+ # Template Block Comment
+ (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2
+ ],
+ 'pragmas': [
+ # ISO Style Pragmas
+ (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2
+ # Pascal Style Pragmas
+ (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM
+ ],
+ 'root': [
+ include('whitespace'),
+ include('dialecttags'),
+ include('pragmas'),
+ include('comments'),
+ include('identifiers'),
+ include('suffixed_number_literals'), # PIM + ISO
+ include('prefixed_number_literals'), # M2R10 + ObjM2
+ include('plain_number_literals'),
+ include('string_literals'),
+ include('digraph_punctuation'),
+ include('digraph_operators'),
+ include('unigraph_punctuation'),
+ include('unigraph_operators'),
+ ]
+ }
+
+# C o m m o n D a t a s e t s
+
+ # Common Reserved Words Dataset
+ common_reserved_words = (
+ # 37 common reserved words
+ 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
+ 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
+ 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
+ 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
+ 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
+ )
+
+ # Common Builtins Dataset
+ common_builtins = (
+ # 16 common builtins
+ 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
+ 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
+ 'TRUE',
+ )
+
+ # Common Pseudo-Module Builtins Dataset
+ common_pseudo_builtins = (
+ # 4 common pseudo builtins
+ 'ADDRESS', 'BYTE', 'WORD', 'ADR'
+ )
+
+# P I M M o d u l a - 2 D a t a s e t s
+
+ # Lexemes to Mark as Error Tokens for PIM Modula-2
+ pim_lexemes_to_reject = (
+ '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
+ '+>', '->', '<<', '>>', '|#', '##',
+ )
+
+ # PIM Modula-2 Additional Reserved Words Dataset
+ pim_additional_reserved_words = (
+ # 3 additional reserved words
+ 'EXPORT', 'QUALIFIED', 'WITH',
+ )
+
+ # PIM Modula-2 Additional Builtins Dataset
+ pim_additional_builtins = (
+ # 16 additional builtins
+ 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
+ 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
+ )
+
+ # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
+ pim_additional_pseudo_builtins = (
+ # 5 additional pseudo builtins
+ 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
+ )
+
+# I S O M o d u l a - 2 D a t a s e t s
+
+ # Lexemes to Mark as Error Tokens for ISO Modula-2
+ iso_lexemes_to_reject = (
+ '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
+ '<<', '>>', '|#', '##',
+ )
+
+ # ISO Modula-2 Additional Reserved Words Dataset
+ iso_additional_reserved_words = (
+ # 9 additional reserved words (ISO 10514-1)
+ 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
+ 'REM', 'RETRY', 'WITH',
+ # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
+ 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
+ 'REVEAL', 'TRACED', 'UNSAFEGUARDED',
+ )
+
+ # ISO Modula-2 Additional Builtins Dataset
+ iso_additional_builtins = (
+ # 26 additional builtins (ISO 10514-1)
+ 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
+ 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH',
+ 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
+ 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
+ # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
+ 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
+ )
+
+ # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
+ iso_additional_pseudo_builtins = (
+ # 14 additional builtins (SYSTEM)
+ 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
+ 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
+ 'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
+ # 13 additional builtins (COROUTINES)
+ 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
+ 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
+ 'NEWCOROUTINE', 'PROT', 'TRANSFER',
+ # 9 additional builtins (EXCEPTIONS)
+ 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
+ 'ExceptionSource', 'GetMessage', 'IsCurrentSource',
+ 'IsExceptionalExecution', 'RAISE',
+ # 3 additional builtins (TERMINATION)
+ 'TERMINATION', 'IsTerminating', 'HasHalted',
+ # 4 additional builtins (M2EXCEPTION)
+ 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
+ 'indexException', 'rangeException', 'caseSelectException',
+ 'invalidLocation', 'functionException', 'wholeValueException',
+ 'wholeDivException', 'realValueException', 'realDivException',
+ 'complexValueException', 'complexDivException', 'protException',
+ 'sysException', 'coException', 'exException',
+ )
+
+# M o d u l a - 2 R 1 0 D a t a s e t s
+
+ # Lexemes to Mark as Error Tokens for Modula-2 R10
+ m2r10_lexemes_to_reject = (
+ '!', '`', '@', '$', '%', '&', '<>',
+ )
+
+ # Modula-2 R10 reserved words in addition to the common set
+ m2r10_additional_reserved_words = (
+ # 12 additional reserved words
+ 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
+ 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
+ # 2 additional reserved words with symbolic assembly option
+ 'ASM', 'REG',
+ )
+
+ # Modula-2 R10 builtins in addition to the common set
+ m2r10_additional_builtins = (
+ # 26 additional builtins
+ 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
+ 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
+ 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
+ 'UNICHAR', 'WRITE', 'WRITEF',
+ )
+
+ # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
+ m2r10_additional_pseudo_builtins = (
+ # 13 additional builtins (TPROPERTIES)
+ 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
+ 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
+ 'TMAXEXP', 'TMINEXP',
+ # 4 additional builtins (CONVERSION)
+ 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
+ # 35 additional builtins (UNSAFE)
+ 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
+ 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
+ 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
+ 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
+ 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
+ # 11 additional builtins (ATOMIC)
+ 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
+ 'BWNAND', 'BWOR', 'BWXOR',
+ # 7 additional builtins (COMPILER)
+ 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
+ 'HASH',
+ # 5 additional builtins (ASSEMBLER)
+ 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
+ )
+
+# O b j e c t i v e M o d u l a - 2 D a t a s e t s
+
+ # Lexemes to Mark as Error Tokens for Objective Modula-2
+ objm2_lexemes_to_reject = (
+ '!', '$', '%', '&', '<>',
+ )
+
+ # Objective Modula-2 Extensions
+ # reserved words in addition to Modula-2 R10
+ objm2_additional_reserved_words = (
+ # 16 additional reserved words
+ 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
+ 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
+ 'SUPER', 'TRY',
+ )
+
+ # Objective Modula-2 Extensions
+ # builtins in addition to Modula-2 R10
+ objm2_additional_builtins = (
+ # 3 additional builtins
+ 'OBJECT', 'NO', 'YES',
+ )
+
+ # Objective Modula-2 Extensions
+ # pseudo-module builtins in addition to Modula-2 R10
+ objm2_additional_pseudo_builtins = (
+ # None
+ )
+
+# A g l e t M o d u l a - 2 D a t a s e t s
+
+ # Aglet Extensions
+ # reserved words in addition to ISO Modula-2
+ aglet_additional_reserved_words = (
+ # None
+ )
+
+ # Aglet Extensions
+ # builtins in addition to ISO Modula-2
+ aglet_additional_builtins = (
+ # 9 additional builtins
+ 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
+ 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
+ )
+
+ # Aglet Modula-2 Extensions
+ # pseudo-module builtins in addition to ISO Modula-2
+ aglet_additional_pseudo_builtins = (
+ # None
+ )
+
+# G N U M o d u l a - 2 D a t a s e t s
+
+ # GNU Extensions
+ # reserved words in addition to PIM Modula-2
+ gm2_additional_reserved_words = (
+ # 10 additional reserved words
+ 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
+ '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
+ )
+
+ # GNU Extensions
+ # builtins in addition to PIM Modula-2
+ gm2_additional_builtins = (
+ # 21 additional builtins
+ 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
+ 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
+ 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
+ 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
+ )
+
+ # GNU Extensions
+ # pseudo-module builtins in addition to PIM Modula-2
+ gm2_additional_pseudo_builtins = (
+ # None
+ )
+
+# p 1 M o d u l a - 2 D a t a s e t s
+
+ # p1 Extensions
+ # reserved words in addition to ISO Modula-2
+ p1_additional_reserved_words = (
+ # None
+ )
+
+ # p1 Extensions
+ # builtins in addition to ISO Modula-2
+ p1_additional_builtins = (
+ # None
+ )
+
+ # p1 Modula-2 Extensions
+ # pseudo-module builtins in addition to ISO Modula-2
+ p1_additional_pseudo_builtins = (
+ # 1 additional builtin
+ 'BCD',
+ )
+
+# X D S M o d u l a - 2 D a t a s e t s
+
+ # XDS Extensions
+ # reserved words in addition to ISO Modula-2
+ xds_additional_reserved_words = (
+ # 1 additional reserved word
+ 'SEQ',
+ )
+
+ # XDS Extensions
+ # builtins in addition to ISO Modula-2
+ xds_additional_builtins = (
+ # 9 additional builtins
+ 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
+ 'LONGCARD', 'SHORTCARD', 'SHORTINT',
+ )
+
+ # XDS Modula-2 Extensions
+ # pseudo-module builtins in addition to ISO Modula-2
+ xds_additional_pseudo_builtins = (
+ # 22 additional builtins (SYSTEM)
+ 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
+ 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
+ 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
+ # 3 additional builtins (COMPILER)
+ 'COMPILER', 'OPTION', 'EQUATION'
+ )
+
+# P I M S t a n d a r d L i b r a r y D a t a s e t s
+
+ # PIM Modula-2 Standard Library Modules Dataset
+ pim_stdlib_module_identifiers = (
+ 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
+ )
+
+ # PIM Modula-2 Standard Library Types Dataset
+ pim_stdlib_type_identifiers = (
+ 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
+ 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
+ 'DirectoryCommand',
+ )
+
+ # PIM Modula-2 Standard Library Procedures Dataset
+ pim_stdlib_proc_identifiers = (
+ 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
+ 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
+ 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
+ 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
+ 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
+ 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
+ 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
+ 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
+ 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
+ 'ln', 'sin', 'cos', 'arctan', 'entier','ALLOCATE', 'DEALLOCATE',
+ )
+
+ # PIM Modula-2 Standard Library Variables Dataset
+ pim_stdlib_var_identifiers = (
+ 'Done', 'termCH', 'in', 'out'
+ )
+
+ # PIM Modula-2 Standard Library Constants Dataset
+ pim_stdlib_const_identifiers = (
+ 'EOL',
+ )
+
+# I S O S t a n d a r d L i b r a r y D a t a s e t s
+
+ # ISO Modula-2 Standard Library Modules Dataset
+ iso_stdlib_module_identifiers = (
+ # TO DO
+ )
+
+ # ISO Modula-2 Standard Library Types Dataset
+ iso_stdlib_type_identifiers = (
+ # TO DO
+ )
+
+ # ISO Modula-2 Standard Library Procedures Dataset
+ iso_stdlib_proc_identifiers = (
+ # TO DO
+ )
+
+ # ISO Modula-2 Standard Library Variables Dataset
+ iso_stdlib_var_identifiers = (
+ # TO DO
+ )
+
+ # ISO Modula-2 Standard Library Constants Dataset
+ iso_stdlib_const_identifiers = (
+ # TO DO
+ )
+
+# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s
+
+ # Modula-2 R10 Standard Library ADTs Dataset
+ m2r10_stdlib_adt_identifiers = (
+ 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
+ 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
+ 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
+ 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
+ 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
+ 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
+ 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
+ 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
+ 'INT64', 'INT128', 'STRING', 'UNISTRING',
+ )
+
+ # Modula-2 R10 Standard Library Blueprints Dataset
+ m2r10_stdlib_blueprint_identifiers = (
+ 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
+ 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
+ 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
+ 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
+ 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
+ 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
+ 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
+ )
+
+ # Modula-2 R10 Standard Library Modules Dataset
+ m2r10_stdlib_module_identifiers = (
+ 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
+ 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
+ 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
+ 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
+ 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
+ )
+
+ # Modula-2 R10 Standard Library Types Dataset
+ m2r10_stdlib_type_identifiers = (
+ 'File', 'Status',
+ # TO BE COMPLETED
+ )
+
+ # Modula-2 R10 Standard Library Procedures Dataset
+ m2r10_stdlib_proc_identifiers = (
+ 'ALLOCATE', 'DEALLOCATE', 'SIZE',
+ # TO BE COMPLETED
+ )
+
+ # Modula-2 R10 Standard Library Variables Dataset
+ m2r10_stdlib_var_identifiers = (
+ 'stdIn', 'stdOut', 'stdErr',
+ )
+
+ # Modula-2 R10 Standard Library Constants Dataset
+ m2r10_stdlib_const_identifiers = (
+ 'pi', 'tau',
+ )
+
+# D i a l e c t s
+
+
+ # Dialect modes
+ dialects = (
+ 'unknown',
+ 'm2pim', 'm2iso', 'm2r10', 'objm2',
+ 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
+ )
+
+# D a t a b a s e s
+
+ # Lexemes to Mark as Errors Database
+ lexemes_to_reject_db = {
+ # Lexemes to reject for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Lexemes to reject for PIM Modula-2
+ 'm2pim' : (
+ pim_lexemes_to_reject,
+ ),
+ # Lexemes to reject for ISO Modula-2
+ 'm2iso' : (
+ iso_lexemes_to_reject,
+ ),
+ # Lexemes to reject for Modula-2 R10
+ 'm2r10' : (
+ m2r10_lexemes_to_reject,
+ ),
+ # Lexemes to reject for Objective Modula-2
+ 'objm2' : (
+ objm2_lexemes_to_reject,
+ ),
+ # Lexemes to reject for Aglet Modula-2
+ 'm2iso+aglet' : (
+ iso_lexemes_to_reject,
+ ),
+ # Lexemes to reject for GNU Modula-2
+ 'm2pim+gm2' : (
+ pim_lexemes_to_reject,
+ ),
+ # Lexemes to reject for p1 Modula-2
+ 'm2iso+p1' : (
+ iso_lexemes_to_reject,
+ ),
+ # Lexemes to reject for XDS Modula-2
+ 'm2iso+xds' : (
+ iso_lexemes_to_reject,
+ ),
+ }
+
+ # Reserved Words Database
+ reserved_words_db = {
+ # Reserved words for unknown dialect
+ 'unknown' : (
+ common_reserved_words,
+ pim_additional_reserved_words,
+ iso_additional_reserved_words,
+ m2r10_additional_reserved_words,
+ ),
+
+ # Reserved words for PIM Modula-2
+ 'm2pim' : (
+ common_reserved_words,
+ pim_additional_reserved_words,
+ ),
+
+ # Reserved words for Modula-2 R10
+ 'm2iso' : (
+ common_reserved_words,
+ iso_additional_reserved_words,
+ ),
+
+ # Reserved words for ISO Modula-2
+ 'm2r10' : (
+ common_reserved_words,
+ m2r10_additional_reserved_words,
+ ),
+
+ # Reserved words for Objective Modula-2
+ 'objm2' : (
+ common_reserved_words,
+ m2r10_additional_reserved_words,
+ objm2_additional_reserved_words,
+ ),
+
+ # Reserved words for Aglet Modula-2 Extensions
+ 'm2iso+aglet' : (
+ common_reserved_words,
+ iso_additional_reserved_words,
+ aglet_additional_reserved_words,
+ ),
+
+ # Reserved words for GNU Modula-2 Extensions
+ 'm2pim+gm2' : (
+ common_reserved_words,
+ pim_additional_reserved_words,
+ gm2_additional_reserved_words,
+ ),
+
+ # Reserved words for p1 Modula-2 Extensions
+ 'm2iso+p1' : (
+ common_reserved_words,
+ iso_additional_reserved_words,
+ p1_additional_reserved_words,
+ ),
+
+ # Reserved words for XDS Modula-2 Extensions
+ 'm2iso+xds' : (
+ common_reserved_words,
+ iso_additional_reserved_words,
+ xds_additional_reserved_words,
+ ),
+ }
+
+ # Builtins Database
+ builtins_db = {
+ # Builtins for unknown dialect
+ 'unknown' : (
+ common_builtins,
+ pim_additional_builtins,
+ iso_additional_builtins,
+ m2r10_additional_builtins,
+ ),
+
+ # Builtins for PIM Modula-2
+ 'm2pim' : (
+ common_builtins,
+ pim_additional_builtins,
+ ),
+
+ # Builtins for ISO Modula-2
+ 'm2iso' : (
+ common_builtins,
+ iso_additional_builtins,
+ ),
+
+ # Builtins for ISO Modula-2
+ 'm2r10' : (
+ common_builtins,
+ m2r10_additional_builtins,
+ ),
+
+ # Builtins for Objective Modula-2
+ 'objm2' : (
+ common_builtins,
+ m2r10_additional_builtins,
+ objm2_additional_builtins,
+ ),
+
+ # Builtins for Aglet Modula-2 Extensions
+ 'm2iso+aglet' : (
+ common_builtins,
+ iso_additional_builtins,
+ aglet_additional_builtins,
+ ),
+
+ # Builtins for GNU Modula-2 Extensions
+ 'm2pim+gm2' : (
+ common_builtins,
+ pim_additional_builtins,
+ gm2_additional_builtins,
+ ),
+
+ # Builtins for p1 Modula-2 Extensions
+ 'm2iso+p1' : (
+ common_builtins,
+ iso_additional_builtins,
+ p1_additional_builtins,
+ ),
+
+ # Builtins for XDS Modula-2 Extensions
+ 'm2iso+xds' : (
+ common_builtins,
+ iso_additional_builtins,
+ xds_additional_builtins,
+ ),
+ }
+
+ # Pseudo-Module Builtins Database
+ pseudo_builtins_db = {
+ # Builtins for unknown dialect
+ 'unknown' : (
+ common_pseudo_builtins,
+ pim_additional_pseudo_builtins,
+ iso_additional_pseudo_builtins,
+ m2r10_additional_pseudo_builtins,
+ ),
+
+ # Builtins for PIM Modula-2
+ 'm2pim' : (
+ common_pseudo_builtins,
+ pim_additional_pseudo_builtins,
+ ),
+
+ # Builtins for ISO Modula-2
+ 'm2iso' : (
+ common_pseudo_builtins,
+ iso_additional_pseudo_builtins,
+ ),
+
+ # Builtins for ISO Modula-2
+ 'm2r10' : (
+ common_pseudo_builtins,
+ m2r10_additional_pseudo_builtins,
+ ),
+
+ # Builtins for Objective Modula-2
+ 'objm2' : (
+ common_pseudo_builtins,
+ m2r10_additional_pseudo_builtins,
+ objm2_additional_pseudo_builtins,
+ ),
+
+ # Builtins for Aglet Modula-2 Extensions
+ 'm2iso+aglet' : (
+ common_pseudo_builtins,
+ iso_additional_pseudo_builtins,
+ aglet_additional_pseudo_builtins,
+ ),
+
+ # Builtins for GNU Modula-2 Extensions
+ 'm2pim+gm2' : (
+ common_pseudo_builtins,
+ pim_additional_pseudo_builtins,
+ gm2_additional_pseudo_builtins,
+ ),
+
+ # Builtins for p1 Modula-2 Extensions
+ 'm2iso+p1' : (
+ common_pseudo_builtins,
+ iso_additional_pseudo_builtins,
+ p1_additional_pseudo_builtins,
+ ),
+
+ # Builtins for XDS Modula-2 Extensions
+ 'm2iso+xds' : (
+ common_pseudo_builtins,
+ iso_additional_pseudo_builtins,
+ xds_additional_pseudo_builtins,
+ ),
+ }
+
+ # Standard Library ADTs Database
+ stdlib_adts_db = {
+ # Empty entry for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Standard Library ADTs for PIM Modula-2
+ 'm2pim' : (
+ # No first class library types
+ ),
+
+ # Standard Library ADTs for ISO Modula-2
+ 'm2iso' : (
+ # No first class library types
+ ),
+
+ # Standard Library ADTs for Modula-2 R10
+ 'm2r10' : (
+ m2r10_stdlib_adt_identifiers,
+ ),
+
+ # Standard Library ADTs for Objective Modula-2
+ 'objm2' : (
+ m2r10_stdlib_adt_identifiers,
+ ),
+
+ # Standard Library ADTs for Aglet Modula-2
+ 'm2iso+aglet' : (
+ # No first class library types
+ ),
+
+ # Standard Library ADTs for GNU Modula-2
+ 'm2pim+gm2' : (
+ # No first class library types
+ ),
+
+ # Standard Library ADTs for p1 Modula-2
+ 'm2iso+p1' : (
+ # No first class library types
+ ),
+
+ # Standard Library ADTs for XDS Modula-2
+ 'm2iso+xds' : (
+ # No first class library types
+ ),
+ }
+
+ # Standard Library Modules Database
+ stdlib_modules_db = {
+ # Empty entry for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Standard Library Modules for PIM Modula-2
+ 'm2pim' : (
+ pim_stdlib_module_identifiers,
+ ),
+
+ # Standard Library Modules for ISO Modula-2
+ 'm2iso' : (
+ iso_stdlib_module_identifiers,
+ ),
+
+ # Standard Library Modules for Modula-2 R10
+ 'm2r10' : (
+ m2r10_stdlib_blueprint_identifiers,
+ m2r10_stdlib_module_identifiers,
+ m2r10_stdlib_adt_identifiers,
+ ),
+
+ # Standard Library Modules for Objective Modula-2
+ 'objm2' : (
+ m2r10_stdlib_blueprint_identifiers,
+ m2r10_stdlib_module_identifiers,
+ ),
+
+ # Standard Library Modules for Aglet Modula-2
+ 'm2iso+aglet' : (
+ iso_stdlib_module_identifiers,
+ ),
+
+ # Standard Library Modules for GNU Modula-2
+ 'm2pim+gm2' : (
+ pim_stdlib_module_identifiers,
+ ),
+
+ # Standard Library Modules for p1 Modula-2
+ 'm2iso+p1' : (
+ iso_stdlib_module_identifiers,
+ ),
+
+ # Standard Library Modules for XDS Modula-2
+ 'm2iso+xds' : (
+ iso_stdlib_module_identifiers,
+ ),
+ }
+
+ # Standard Library Types Database
+ stdlib_types_db = {
+ # Empty entry for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Standard Library Types for PIM Modula-2
+ 'm2pim' : (
+ pim_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for ISO Modula-2
+ 'm2iso' : (
+ iso_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for Modula-2 R10
+ 'm2r10' : (
+ m2r10_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for Objective Modula-2
+ 'objm2' : (
+ m2r10_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for Aglet Modula-2
+ 'm2iso+aglet' : (
+ iso_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for GNU Modula-2
+ 'm2pim+gm2' : (
+ pim_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for p1 Modula-2
+ 'm2iso+p1' : (
+ iso_stdlib_type_identifiers,
+ ),
+
+ # Standard Library Types for XDS Modula-2
+ 'm2iso+xds' : (
+ iso_stdlib_type_identifiers,
+ ),
+ }
+
+ # Standard Library Procedures Database
+ stdlib_procedures_db = {
+ # Empty entry for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Standard Library Procedures for PIM Modula-2
+ 'm2pim' : (
+ pim_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for ISO Modula-2
+ 'm2iso' : (
+ iso_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for Modula-2 R10
+ 'm2r10' : (
+ m2r10_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for Objective Modula-2
+ 'objm2' : (
+ m2r10_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for Aglet Modula-2
+ 'm2iso+aglet' : (
+ iso_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for GNU Modula-2
+ 'm2pim+gm2' : (
+ pim_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for p1 Modula-2
+ 'm2iso+p1' : (
+ iso_stdlib_proc_identifiers,
+ ),
+
+ # Standard Library Procedures for XDS Modula-2
+ 'm2iso+xds' : (
+ iso_stdlib_proc_identifiers,
+ ),
+ }
+
+ # Standard Library Variables Database
+ stdlib_variables_db = {
+ # Empty entry for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Standard Library Variables for PIM Modula-2
+ 'm2pim' : (
+ pim_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for ISO Modula-2
+ 'm2iso' : (
+ iso_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for Modula-2 R10
+ 'm2r10' : (
+ m2r10_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for Objective Modula-2
+ 'objm2' : (
+ m2r10_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for Aglet Modula-2
+ 'm2iso+aglet' : (
+ iso_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for GNU Modula-2
+ 'm2pim+gm2' : (
+ pim_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for p1 Modula-2
+ 'm2iso+p1' : (
+ iso_stdlib_var_identifiers,
+ ),
+
+ # Standard Library Variables for XDS Modula-2
+ 'm2iso+xds' : (
+ iso_stdlib_var_identifiers,
+ ),
+ }
+
+ # Standard Library Constants Database
+ stdlib_constants_db = {
+ # Empty entry for unknown dialect
+ 'unknown' : (
+ # LEAVE THIS EMPTY
+ ),
+ # Standard Library Constants for PIM Modula-2
+ 'm2pim' : (
+ pim_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for ISO Modula-2
+ 'm2iso' : (
+ iso_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for Modula-2 R10
+ 'm2r10' : (
+ m2r10_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for Objective Modula-2
+ 'objm2' : (
+ m2r10_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for Aglet Modula-2
+ 'm2iso+aglet' : (
+ iso_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for GNU Modula-2
+ 'm2pim+gm2' : (
+ pim_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for p1 Modula-2
+ 'm2iso+p1' : (
+ iso_stdlib_const_identifiers,
+ ),
+
+ # Standard Library Constants for XDS Modula-2
+ 'm2iso+xds' : (
+ iso_stdlib_const_identifiers,
+ ),
+ }
+
+# M e t h o d s
+
+ # initialise a lexer instance
+ def __init__(self, **options):
+ #
+ # Alias for unknown dialect
+ global UNKNOWN
+ UNKNOWN = self.dialects[0]
+ #
+ # check dialect options
+ #
+ dialects = get_list_opt(options, 'dialect', [])
+ #
+ for dialect_option in dialects:
+ if dialect_option in self.dialects[1:-1]:
+ # valid dialect option found
+ self.set_dialect(dialect_option)
+ break
+ #
+ # Fallback Mode (DEFAULT)
+ else:
+ # no valid dialect option
+ self.set_dialect(UNKNOWN)
+ #
+ self.dialect_set_by_tag = False
+ #
+ # check style options
+ #
+ styles = get_list_opt(options, 'style', [])
+ #
+ # use lowercase mode for Algol style
+ if 'algol' in styles or 'algol_nu' in styles:
+ self.algol_publication_mode = True
+ else:
+ self.algol_publication_mode = False
+ #
+ # Check option flags
+ #
+ self.treat_stdlib_adts_as_builtins = \
+ get_bool_opt(options, 'treat_stdlib_adts_as_builtins', True)
+ #
+ # call superclass initialiser
+ RegexLexer.__init__(self, **options)
+
+ # Set lexer to a specified dialect
+ def set_dialect(self, dialect_id):
+ #
+ #if __debug__:
+ # print 'entered set_dialect with arg: ', dialect_id
+ #
+ # check dialect name against known dialects
+ if dialect_id not in self.dialects:
+ dialect = UNKNOWN # default
+ else:
+ dialect = dialect_id
+ #
+ # compose lexemes to reject set
+ lexemes_to_reject_set = set()
+ # add each list of reject lexemes for this dialect
+ for list in self.lexemes_to_reject_db[dialect]:
+ lexemes_to_reject_set.update(set(list))
+ #
+ # compose reserved words set
+ reswords_set = set()
+ # add each list of reserved words for this dialect
+ for list in self.reserved_words_db[dialect]:
+ reswords_set.update(set(list))
+ #
+ # compose builtins set
+ builtins_set = set()
+ # add each list of builtins for this dialect excluding reserved words
+ for list in self.builtins_db[dialect]:
+ builtins_set.update(set(list).difference(reswords_set))
+ #
+ # compose pseudo-builtins set
+ pseudo_builtins_set = set()
+ # add each list of builtins for this dialect excluding reserved words
+ for list in self.pseudo_builtins_db[dialect]:
+ pseudo_builtins_set.update(set(list).difference(reswords_set))
+ #
+ # compose ADTs set
+ adts_set = set()
+ # add each list of ADTs for this dialect excluding reserved words
+ for list in self.stdlib_adts_db[dialect]:
+ adts_set.update(set(list).difference(reswords_set))
+ #
+ # compose modules set
+ modules_set = set()
+ # add each list of builtins for this dialect excluding builtins
+ for list in self.stdlib_modules_db[dialect]:
+ modules_set.update(set(list).difference(builtins_set))
+ #
+ # compose types set
+ types_set = set()
+ # add each list of types for this dialect excluding builtins
+ for list in self.stdlib_types_db[dialect]:
+ types_set.update(set(list).difference(builtins_set))
+ #
+ # compose procedures set
+ procedures_set = set()
+ # add each list of procedures for this dialect excluding builtins
+ for list in self.stdlib_procedures_db[dialect]:
+ procedures_set.update(set(list).difference(builtins_set))
+ #
+ # compose variables set
+ variables_set = set()
+ # add each list of variables for this dialect excluding builtins
+ for list in self.stdlib_variables_db[dialect]:
+ variables_set.update(set(list).difference(builtins_set))
+ #
+ # compose constants set
+ constants_set = set()
+ # add each list of constants for this dialect excluding builtins
+ for list in self.stdlib_constants_db[dialect]:
+ constants_set.update(set(list).difference(builtins_set))
+ #
+ # update lexer state
+ self.dialect = dialect
+ self.lexemes_to_reject = lexemes_to_reject_set
+ self.reserved_words = reswords_set
+ self.builtins = builtins_set
+ self.pseudo_builtins = pseudo_builtins_set
+ self.adts = adts_set
+ self.modules = modules_set
+ self.types = types_set
+ self.procedures = procedures_set
+ self.variables = variables_set
+ self.constants = constants_set
+ #
+ #if __debug__:
+ # print 'exiting set_dialect'
+ # print ' self.dialect: ', self.dialect
+ # print ' self.lexemes_to_reject: ', self.lexemes_to_reject
+ # print ' self.reserved_words: ', self.reserved_words
+ # print ' self.builtins: ', self.builtins
+ # print ' self.pseudo_builtins: ', self.pseudo_builtins
+ # print ' self.adts: ', self.adts
+ # print ' self.modules: ', self.modules
+ # print ' self.types: ', self.types
+ # print ' self.procedures: ', self.procedures
+ # print ' self.variables: ', self.variables
+ # print ' self.types: ', self.types
+ # print ' self.constants: ', self.constants
+
+ # Extracts a dialect name from a dialect tag comment string and checks
+ # the extracted name against known dialects. If a match is found, the
+ # matching name is returned, otherwise dialect id 'unknown' is returned
+ def get_dialect_from_dialect_tag(self, dialect_tag):
+ #
+ #if __debug__:
+ # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
+ #
+ # constants
+ left_tag_delim = '(*!'
+ right_tag_delim = '*)'
+ left_tag_delim_len = len(left_tag_delim)
+ right_tag_delim_len = len(right_tag_delim)
+ indicator_start = left_tag_delim_len
+ indicator_end = -(right_tag_delim_len)
+ #
+ # check comment string for dialect indicator
+ if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
+ and dialect_tag.startswith(left_tag_delim) \
+ and dialect_tag.endswith(right_tag_delim):
+ #
+ #if __debug__:
+ # print 'dialect tag found'
+ #
+ # extract dialect indicator
+ indicator = dialect_tag[indicator_start:indicator_end]
+ #
+ #if __debug__:
+ # print 'extracted: ', indicator
+ #
+ # check against known dialects
+ for index in range(1, len(self.dialects)):
+ #
+ #if __debug__:
+ # print 'dialects[', index, ']: ', self.dialects[index]
+ #
+ if indicator == self.dialects[index]:
+ #
+ #if __debug__:
+ # print 'matching dialect found'
+ #
+ # indicator matches known dialect
+ return indicator
+ else:
+ # indicator does not match any dialect
+ return UNKNOWN # default
+ else:
+ # invalid indicator string
+ return UNKNOWN # default
+
+ # intercept the token stream, modify token attributes and return them
+ def get_tokens_unprocessed(self, text):
+ for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
+ #
+ # check for dialect tag if dialect has not been set by tag
+ if not self.dialect_set_by_tag and token == Comment.Special:
+ indicated_dialect = self.get_dialect_from_dialect_tag(value)
+ if indicated_dialect != UNKNOWN:
+ # token is a dialect indicator
+ # reset reserved words and builtins
+ self.set_dialect(indicated_dialect)
+ self.dialect_set_by_tag = True
+ #
+ # check for reserved words, predefined and stdlib identifiers
+ if token is Name:
+ if value in self.reserved_words:
+ token = Keyword.Reserved
+ if self.algol_publication_mode:
+ value = value.lower()
+ #
+ elif value in self.builtins:
+ token = Name.Builtin
+ if self.algol_publication_mode:
+ value = value.lower()
+ #
+ elif value in self.pseudo_builtins:
+ token = Name.Builtin.Pseudo
+ if self.algol_publication_mode:
+ value = value.lower()
+ #
+ elif value in self.adts:
+ if not self.treat_stdlib_adts_as_builtins:
+ token = Name.Namespace
+ else:
+ token = Name.Builtin.Pseudo
+ if self.algol_publication_mode:
+ value = value.lower()
+ #
+ elif value in self.modules:
+ token = Name.Namespace
+ #
+ elif value in self.types:
+ token = Name.Class
+ #
+ elif value in self.procedures:
+ token = Name.Function
+ #
+ elif value in self.variables:
+ token = Name.Variable
+ #
+ elif value in self.constants:
+ token = Name.Constant
+ #
+ elif token in Number:
+ #
+ # mark prefix number literals as error for PIM and ISO dialects
+ if self.dialect not in (UNKNOWN, 'm2r10', 'objm2'):
+ if "'" in value or value[0:2] in ('0b', '0x', '0u'):
+ token = Error
+ #
+ elif self.dialect in ('m2r10', 'objm2'):
+ # mark base-8 number literals as errors for M2 R10 and ObjM2
+ if token is Number.Oct:
+ token = Error
+ # mark suffix base-16 literals as errors for M2 R10 and ObjM2
+ elif token is Number.Hex and 'H' in value:
+ token = Error
+ # mark real numbers with E as errors for M2 R10 and ObjM2
+ elif token is Number.Float and 'E' in value:
+ token = Error
+ #
+ elif token in Comment:
+ #
+ # mark single line comment as error for PIM and ISO dialects
+ if token is Comment.Single:
+ if self.dialect not in [UNKNOWN, 'm2r10', 'objm2']:
+ token = Error
+ #
+ if token is Comment.Preproc:
+ # mark ISO pragma as error for PIM dialects
+ if value.startswith('<*') and \
+ self.dialect.startswith('m2pim'):
+ token = Error
+ # mark PIM pragma as comment for other dialects
+ elif value.startswith('(*$') and \
+ self.dialect != UNKNOWN and \
+ not self.dialect.startswith('m2pim'):
+ token = Comment.Multiline
+ #
+ else: # token is neither Name nor Comment
+ #
+ # mark lexemes matching the dialect's error token set as errors
+ if value in self.lexemes_to_reject:
+ token = Error
+ #
+ # substitute lexemes when in Algol mode
+ if self.algol_publication_mode:
+ if value == '#':
+ value = u'≠'
+ elif value == '<=':
+ value = u'≤'
+ elif value == '>=':
+ value = u'≥'
+ elif value == '==':
+ value = u'≡'
+ elif value == '*.':
+ value = u'•'
+
+ # return result
+ yield index, token, value
+
+# end of modula2.py
diff --git a/pygments/lexers/pascal.py b/pygments/lexers/pascal.py
index 2895fba7..d3ce6a3a 100644
--- a/pygments/lexers/pascal.py
+++ b/pygments/lexers/pascal.py
@@ -18,7 +18,9 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Error
from pygments.scanner import Scanner
-__all__ = ['DelphiLexer', 'Modula2Lexer', 'AdaLexer']
+from pygments.lexers.modula2 import Modula2Lexer
+
+__all__ = ['DelphiLexer', 'AdaLexer']
class DelphiLexer(Lexer):
@@ -505,198 +507,6 @@ class DelphiLexer(Lexer):
yield scanner.start_pos, token, scanner.match or ''
-class Modula2Lexer(RegexLexer):
- """
- For `Modula-2 <http://www.modula2.org/>`_ source code.
-
- Additional options that determine which keywords are highlighted:
-
- `pim`
- Select PIM Modula-2 dialect (default: True).
- `iso`
- Select ISO Modula-2 dialect (default: False).
- `objm2`
- Select Objective Modula-2 dialect (default: False).
- `gm2ext`
- Also highlight GNU extensions (default: False).
-
- .. versionadded:: 1.3
- """
- name = 'Modula-2'
- aliases = ['modula2', 'm2']
- filenames = ['*.def', '*.mod']
- mimetypes = ['text/x-modula2']
-
- flags = re.MULTILINE | re.DOTALL
-
- tokens = {
- 'whitespace': [
- (r'\n+', Text), # blank lines
- (r'\s+', Text), # whitespace
- ],
- 'identifiers': [
- (r'([a-zA-Z_$][\w$]*)', Name),
- ],
- 'numliterals': [
- (r'[01]+B', Number.Bin), # binary number (ObjM2)
- (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO)
- (r'[0-7]+C', Number.Oct), # char code (PIM + ISO)
- (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2)
- (r'[0-9A-F]+H', Number.Hex), # hexadecimal number
- (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number
- (r'[0-9]+\.[0-9]+', Number.Float), # real number
- (r'[0-9]+', Number.Integer), # decimal whole number
- ],
- 'strings': [
- (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
- (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
- ],
- 'operators': [
- (r'[*/+=#~&<>\^-]', Operator),
- (r':=', Operator), # assignment
- (r'@', Operator), # pointer deref (ISO)
- (r'\.\.', Operator), # ellipsis or range
- (r'`', Operator), # Smalltalk message (ObjM2)
- (r'::', Operator), # type conversion (ObjM2)
- ],
- 'punctuation': [
- (r'[()\[\]{},.:;|]', Punctuation),
- ],
- 'comments': [
- (r'//.*?\n', Comment.Single), # ObjM2
- (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2
- (r'\(\*([^$].*?)\*\)', Comment.Multiline),
- # TO DO: nesting of (* ... *) comments
- ],
- 'pragmas': [
- (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM
- (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2
- ],
- 'root': [
- include('whitespace'),
- include('comments'),
- include('pragmas'),
- include('identifiers'),
- include('numliterals'),
- include('strings'),
- include('operators'),
- include('punctuation'),
- ]
- }
-
- pim_reserved_words = [
- # 40 reserved words
- 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION',
- 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR',
- 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD',
- 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED',
- 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
- 'UNTIL', 'VAR', 'WHILE', 'WITH',
- ]
-
- pim_pervasives = [
- # 31 pervasives
- 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC',
- 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL',
- 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD',
- 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL',
- ]
-
- iso_reserved_words = [
- # 46 reserved words
- 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
- 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY',
- 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN',
- 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER',
- 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY',
- 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
- 'WITH',
- ]
-
- iso_pervasives = [
- # 42 pervasives
- 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX',
- 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH',
- 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH',
- 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW',
- 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE',
- 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
- ]
-
- objm2_reserved_words = [
- # base language, 42 reserved words
- 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
- 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF',
- 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD',
- 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE',
- 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE',
- 'UNTIL', 'VAR', 'VARIADIC', 'WHILE',
- # OO extensions, 16 reserved words
- 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
- 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
- 'SUPER', 'TRY',
- ]
-
- objm2_pervasives = [
- # base language, 38 pervasives
- 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE',
- 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD',
- 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL',
- 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX',
- 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF',
- # OO extensions, 3 pervasives
- 'OBJECT', 'NO', 'YES',
- ]
-
- gnu_reserved_words = [
- # 10 additional reserved words
- 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
- '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
- ]
-
- gnu_pervasives = [
- # 21 identifiers, actually from pseudo-module SYSTEM
- # but we will highlight them as if they were pervasives
- 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
- 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
- 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
- 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
- ]
-
- def __init__(self, **options):
- self.reserved_words = set()
- self.pervasives = set()
- # ISO Modula-2
- if get_bool_opt(options, 'iso', False):
- self.reserved_words.update(self.iso_reserved_words)
- self.pervasives.update(self.iso_pervasives)
- # Objective Modula-2
- elif get_bool_opt(options, 'objm2', False):
- self.reserved_words.update(self.objm2_reserved_words)
- self.pervasives.update(self.objm2_pervasives)
- # PIM Modula-2 (DEFAULT)
- else:
- self.reserved_words.update(self.pim_reserved_words)
- self.pervasives.update(self.pim_pervasives)
- # GNU extensions
- if get_bool_opt(options, 'gm2ext', False):
- self.reserved_words.update(self.gnu_reserved_words)
- self.pervasives.update(self.gnu_pervasives)
- # initialise
- RegexLexer.__init__(self, **options)
-
- def get_tokens_unprocessed(self, text):
- for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
- # check for reserved words and pervasives
- if token is Name:
- if value in self.reserved_words:
- token = Keyword.Reserved
- elif value in self.pervasives:
- token = Keyword.Pervasive
- # return result
- yield index, token, value
-
-
class AdaLexer(RegexLexer):
"""
For Ada source code.
diff --git a/tests/examplefiles/modula2_test_cases.def b/tests/examplefiles/modula2_test_cases.def
new file mode 100644
index 00000000..ce86a55b
--- /dev/null
+++ b/tests/examplefiles/modula2_test_cases.def
@@ -0,0 +1,354 @@
+(* Test Cases for Modula-2 Lexer *)
+
+(* Notes:
+ (1) Without dialect option nor embedded dialect tag, the lexer operates in
+ fallback mode, recognising the *combined* literals, punctuation symbols
+ and operators of all supported dialects, and the *combined* reserved
+ words and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10.
+ (1) If multiple embedded dialect tags are present, the lexer will use the
+ first valid tag and ignore any subsequent dialect tags in the file.
+ (2) An embedded dialect tag overrides any command line dialect option. *)
+
+
+(* Testing command line dialect option *)
+
+(* for PIM Modula-2 : pygmentize -O full,dialect=m2pim ...
+ for ISO Modula-2 : pygmentize -O full,dialect=m2iso ...
+ for Modula-2 R10 : pygmentize -O full,dialect=m2r10 ...
+ for Objective Modula-2 : pygmentize -O full,dialect=objm2 ... *)
+
+(* for Aglet extensions : pygmentize -O full,dialect=m2iso+aglet ...
+ for GNU extensions : pygmentize -O full,dialect=m2pim+gm2 ...
+ for p1 extensions : pygmentize -O full,dialect=m2iso+p1 ...
+ for XDS extensions : pygmentize -O full,dialect=m2iso+xds ...
+
+
+(* Testing embedded dialect tags *)
+
+(* !m2pim*) (* <-- remove whitespace before ! for PIM Modula-2 *)
+(* !m2iso*) (* <-- remove whitespace before ! for ISO Modula-2 *)
+(* !m2r10*) (* <-- remove whitespace before ! for Modula-2 R10 *)
+(* !objm2*) (* <-- remove whitespace before ! for Objective Modula-2 *)
+
+(* !m2iso+aglet*) (* <-- remove whitespace before ! for Aglet extensions *)
+(* !m2pim+gm2*) (* <-- remove whitespace before ! for GNU extensions *)
+(* !m2iso+p1*) (* <-- remove whitespace before ! for p1 extensions *)
+(* !m2iso+xds*) (* <-- remove whitespace before ! for XDS extensions *)
+
+
+(* Dialect Indicating Names *)
+
+(* recognised names should be highlighted *)
+
+QUALIFIED (* PIM and ISO *)
+
+PACKEDSET (* ISO only *)
+
+ARGLIST (* M2 R10 and ObjM2 *)
+
+BYCOPY (* ObjM2 only *)
+
+BITSET8 (* Aglet, GNU and M2 R10 *)
+
+__FILE__ (* GNU only *)
+
+BCD (* p1 and M2 R10 *)
+
+SEQ (* XDS only *)
+
+
+(* Literal Tests *)
+
+(* recognised literals should be rendered as one unit
+ unrecognised literals should be rendered as error *)
+
+ch := 'a'; ch := "a"; (* all dialects *)
+ch := 0u20; unich := 0u2038 (* M2 R10 *)
+
+s := 'The cat said "meow!".';
+s := "It is eight O'clock.";
+
+
+n := 123; n = 1000000; (* all dialects *)
+n := 123; n = 1'000'000; (* M2 R10 *)
+
+n := 0b0110; n:= 0b0110'1100'0111; (* M2 R10 *)
+n := 0xFF00; n:= 0xDEAD'BEEF'0F00; (* M2 R10 *)
+
+r := 1.23; r := 1000000.000001; (* all dialects *)
+r := 1.23; r := 1'000'000.000'001; (* M2 R10 *)
+
+r := 1.234E6; r:= 1.234E-6; r := 1.234567E1000; (* PIM + ISO *)
+r := 1.234e6; r:= 1.234e-6; r := 1.234'567e1'000; (* M2 R10 *)
+
+ch := 0377C; n := 0377B; n := 07FF0H; (* ISO + PIM *)
+
+
+(* Non-Alphabetic Operator Tests *)
+
+(* supported operators should be rendered as one unit
+ unsupported operators should be rendered as errors *)
+
+a := b + c - d * e / f; (* all dialects *)
+
+SetDiff := A \ B; (* M2 R10 *)
+
+dotProduct := v1 *. v2; catArray := array1 +> array2; (* M2 R10 *)
+
+bool := a = b; bool := a > b; bool := a < b;
+bool := a # b; bool := a >= b; bool := a <= b;
+
+bool := a <> b; (* PIM + ISO *)
+
+bool := a == b; (* M2 R10 *)
+
+(*&*) IF a & b THEN ... END; (* PIM + ISO *)
+
+(*~*) IF ~ b THEN ... END; (* PIM + ISO *)
+
+(*::*) int := real :: INTEGER; (* M2 R10 *)
+
+(*++*) FOR i++ IN range DO ... END; (* M2 R10 *)
+(*--*) FOR i-- IN range DO ... END; (* M2 R10 *)
+
+(*^*) next := this^.next; (* all dialects *)
+(*@*) next := this@.next; (* ISO *)
+
+(*`*) str := `NSString alloc init; (* ObjM2 *)
+
+
+(* Punctuation Tests *)
+
+(* supported punctuation should be rendered as one unit
+ unsupported punctuation should be rendered as an error *)
+
+(*.*) Foo.Bar.Baz; (*..*) TYPE Sign = [-1..1] OF INTEGER;
+
+(*|:*) CASE foo OF | 1 : bar | 2 : bam | 3 : boo END;
+(*!:*) CASE foo OF 1 : bar ! 2 : bam ! 3 : boo END; (* ISO *)
+
+(*[]()*) array[n] := foo();
+
+(*{}*) CONST Bar = { 1, 2, 3 };
+
+(*?*) TPROPERTIES = isCollection, isIndexed | isRigid?; (* M2 R10 *)
+
+(*~*) CONST ~ isFoobar = Foo AND Bar; (* M2 R10 *)
+(*->*) isFoobar -> PROCEDURE [ABS]; (* M2 R10 *)
+
+(*<<>>*) GENLIB Foo FROM Template FOR Bar = <<ARRAY OF CHAR>> END; (* M2 R10 *)
+
+
+(* Single Line Comment Test *)
+
+(* should be rendered as comment if supported, as error if unsupported *)
+
+// This is a single line comment (M2 R10 + ObjM2)
+
+
+(* Pragma Delimiter Tests *)
+
+(* PIM style pragma should be rendered as pragma in PIM dialects,
+ as multiline comment in all other dialects. *)
+
+(*$INLINE*) (* PIM *)
+
+(* ISO style pragma should be rendered as error in PIM dialects,
+ as pragma in all other dialects. *)
+
+<*INLINE*> (* all other dialects *)
+
+
+(* Operator Substitution Test When in Algol mode *)
+
+IF foo # bar THEN ... END; (* # should be rendered as not equal symbol *)
+
+IF foo >= bar THEN ... END; (* >= should be rendered as not less symbol *)
+
+IF foo <= bar THEN ... END; (* <= should be rendered as not greater symbol *)
+
+IF foo == bar THEN ... END; (* == should be rendered as identity symbol *)
+
+dotProduct := v1 *. v2; (* *. should be rendered as dot product symbol *)
+
+
+(* Reserved Words and Builtins Test *)
+
+(* supported reserved words and builtins should be highlighted *)
+
+(* reserved words common to all dialects *)
+
+AND ARRAY BEGIN BY CASE CONST DEFINITION DIV DO ELSE ELSIF END EXIT FOR FROM
+IF IMPLEMENTATION IMPORT IN LOOP MOD MODULE NOT OF OR POINTER PROCEDURE
+RECORD REPEAT RETURN SET THEN TO TYPE UNTIL VAR WHILE
+
+(* builtins common to all dialects *)
+
+ABS BOOLEAN CARDINAL CHAR CHR FALSE INTEGER LONGINT LONGREAL
+MAX MIN NIL ODD ORD REAL TRUE
+
+(* pseudo builtins common to all dialects *)
+
+ADDRESS BYTE WORD ADR
+
+
+(* additional reserved words for PIM *)
+
+EXPORT QUALIFIED WITH
+
+(* additional builtins for PIM *)
+
+BITSET CAP DEC DISPOSE EXCL FLOAT HALT HIGH INC INCL NEW NIL PROC SIZE TRUNC VAL
+
+(* additional pseudo-builtins for PIM *)
+
+SYSTEM PROCESS TSIZE NEWPROCESS TRANSFER
+
+
+(* additional reserved words for ISO 10514-1 *)
+
+EXCEPT EXPORT FINALLY FORWARD PACKEDSET QUALIFIED REM RETRY WITH
+
+(* additional reserved words for ISO 10514-2 & ISO 10514-3 *)
+
+ABSTRACT AS CLASS GUARD INHERIT OVERRIDE READONLY REVEAL TRACED UNSAFEGUARDED
+
+(* additional builtins for ISO 10514-1 *)
+
+BITSET CAP CMPLX COMPLEX DEC DISPOSE EXCL FLOAT HALT HIGH IM INC INCL INT
+INTERRUPTIBLE LENGTH LFLOAT LONGCOMPLEX NEW PROC PROTECTION RE SIZE TRUNC
+UNINTERRUBTIBLE VAL
+
+(* additional builtins for ISO 10514-2 & ISO 10514-3 *)
+
+CREATE DESTROY EMPTY ISMEMBER SELF
+
+
+(* additional pseudo-builtins for ISO *)
+
+(* SYSTEM *)
+SYSTEM BITSPERLOC LOCSPERBYTE LOCSPERWORD LOC ADDADR SUBADR DIFADR MAKEADR
+ADR ROTATE SHIFT CAST TSIZE
+
+(* COROUTINES *)
+COROUTINES ATTACH COROUTINE CURRENT DETACH HANDLER INTERRUPTSOURCE IOTRANSFER
+IsATTACHED LISTEN NEWCOROUTINE PROT TRANSFER
+
+(* EXCEPTIONS *)
+EXCEPTIONS AllocateSource CurrentNumber ExceptionNumber ExceptionSource
+GetMessage IsCurrentSource IsExceptionalExecution RAISE
+
+(* TERMINATION *)
+TERMINATION IsTerminating HasHalted
+
+(* M2EXCEPTION *)
+M2EXCEPTION M2Exceptions M2Exception IsM2Exception indexException rangeException
+caseSelectException invalidLocation functionException wholeValueException
+wholeDivException realValueException realDivException complexValueException
+complexDivException protException sysException coException exException
+
+
+(* additional reserved words for M2 R10 *)
+
+ALIAS ARGLIST BLUEPRINT COPY GENLIB INDETERMINATE NEW NONE OPAQUE REFERENTIAL
+RELEASE RETAIN
+
+(* with symbolic assembler language extension *)
+ASM REG
+
+(* additional builtins for M2 R10 *)
+
+CARDINAL COUNT EMPTY EXISTS INSERT LENGTH LONGCARD OCTET PTR PRED READ READNEW
+REMOVE RETRIEVE SORT STORE SUBSET SUCC TLIMIT TMAX TMIN TRUE TSIZE UNICHAR
+WRITE WRITEF
+
+(* additional pseudo-builtins for M2 R10 *)
+
+(* TPROPERTIES *)
+TPROPERTIES PROPERTY LITERAL TPROPERTY TLITERAL TBUILTIN TDYN TREFC TNIL
+TBASE TPRECISION TMAXEXP TMINEXP
+
+(* CONVERSION *)
+CONVERSION TSXFSIZE SXF VAL
+
+(* UNSAFE *)
+UNSAFE CAST INTRINSIC AVAIL ADD SUB ADDC SUBC FETCHADD FETCHSUB SHL SHR ASHR
+ROTL ROTR ROTLC ROTRC BWNOT BWAND BWOR BWXOR BWNAND BWNOR SETBIT TESTBIT
+LSBIT MSBIT CSBITS BAIL HALT TODO FFI ADDR VARGLIST VARGC
+
+(* ATOMIC *)
+ATOMIC INTRINSIC AVAIL SWAP CAS INC DEC BWAND BWNAND BWOR BWXOR
+
+(* COMPILER *)
+COMPILER DEBUG MODNAME PROCNAME LINENUM DEFAULT HASH
+
+(* ASSEMBLER *)
+ASSEMBLER REGISTER SETREG GETREG CODE
+
+
+(* standard library ADT identifiers for M2 R10 *)
+
+(* rendered as builtins when dialect is set to Modula-2 R10,
+ this can be turned off by option treat_stdlib_adts_as_builtins=off *)
+BCD LONGBCD BITSET SHORTBITSET LONGBITSET LONGLONGBITSET COMPLEX LONGCOMPLEX
+SHORTCARD LONGLONGCARD SHORTINT LONGLONGINT POSINT SHORTPOSINT LONGPOSINT
+LONGLONGPOSINT BITSET8 BITSET16 BITSET32 BITSET64 BITSET128 BS8 BS16 BS32
+BS64 BS128 CARDINAL8 CARDINAL16 CARDINAL32 CARDINAL64 CARDINAL128 CARD8
+CARD16 CARD32 CARD64 CARD128 INTEGER8 INTEGER16 INTEGER32 INTEGER64
+INTEGER128 INT8 INT16 INT32 INT64 INT128 STRING UNISTRING
+
+
+(* additional reserved words for ObjM2 *)
+
+(* Note: ObjM2 is a superset of M2 R10 *)
+
+BYCOPY BYREF CLASS CONTINUE CRITICAL INOUT METHOD ON OPTIONAL OUT PRIVATE
+PROTECTED PROTOCOL PUBLIC SUPER TRY
+
+(* additional builtins for ObjM2 *)
+
+OBJECT NO YES
+
+
+(* additional builtins for Aglet Extensions to ISO *)
+
+BITSET8 BITSET16 BITSET32 CARDINAL8 CARDINAL16 CARDINAL32 INTEGER8 INTEGER16
+INTEGER32
+
+
+(* additional reserved words for GNU Extensions to PIM *)
+
+ASM __ATTRIBUTE__ __BUILTIN__ __COLUMN__ __DATE__ __FILE__ __FUNCTION__
+__LINE__ __MODULE__ VOLATILE
+
+(* additional builtins for GNU Extensions to PIM *)
+
+BITSET8 BITSET16 BITSET32 CARDINAL8 CARDINAL16 CARDINAL32 CARDINAL64 COMPLEX32
+COMPLEX64 COMPLEX96 COMPLEX128 INTEGER8 INTEGER16 INTEGER32 INTEGER64 REAL8
+REAL16 REAL32 REAL96 REAL128 THROW
+
+
+(* additional pseudo-builtins for p1 Extensions to ISO *)
+
+BCD
+
+
+(* additional reserved words for XDS Extensions to ISO *)
+
+SEQ
+
+(* additional builtins for XDS Extensions to ISO *)
+
+ASH ASSERT DIFFADR_TYPE ENTIER INDEX LEN LONGCARD SHORTCARD SHORTINT
+
+(* additional pseudo-builtins for XDS Extensions to ISO *)
+
+(* SYSTEM *)
+PROCESS NEWPROCESS BOOL8 BOOL16 BOOL32 CARD8 CARD16 CARD32 INT8 INT16 INT32
+REF MOVE FILL GET PUT CC int unsigned size_t void
+
+(* COMPILER *)
+COMPILER OPTION EQUATION
+
+
+(* end of file *) \ No newline at end of file