diff options
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | CHANGES | 4 | ||||
-rw-r--r-- | docs/src/lexerdevelopment.txt | 2 | ||||
-rw-r--r-- | docs/src/styles.txt | 2 | ||||
-rw-r--r-- | pygments/cmdline.py | 2 | ||||
-rw-r--r-- | pygments/formatters/latex.py | 2 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 4 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 42 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 329 | ||||
-rw-r--r-- | pygments/lexers/math.py | 407 | ||||
-rw-r--r-- | pygments/lexers/other.py | 317 | ||||
-rw-r--r-- | pygments/lexers/postgres.py | 2 | ||||
-rw-r--r-- | pygments/lexers/pypylog.py | 10 | ||||
-rw-r--r-- | pygments/lexers/web.py | 53 | ||||
-rw-r--r-- | tests/examplefiles/example.sml | 156 | ||||
-rw-r--r-- | tests/examplefiles/intsyn.fun | 675 | ||||
-rw-r--r-- | tests/examplefiles/intsyn.sig | 286 | ||||
-rw-r--r-- | tests/examplefiles/psql_session.txt | 1 |
18 files changed, 2079 insertions, 217 deletions
@@ -39,6 +39,7 @@ Other contributors, listed alphabetically, are: * Matthew Harrison -- SVG formatter * Steven Hazel -- Tcl lexer * Aslak Hellesøy -- Gherkin lexer +* Jordi Gutiérrez Hermoso -- Octave lexer * David Hess, Fish Software, Inc. -- Objective-J lexer * Varun Hiremath -- Debian control lexer * Ben Hollis -- Mason lexer @@ -78,6 +79,7 @@ Other contributors, listed alphabetically, are: * Ken Schutte -- Matlab lexers * Tassilo Schweyer -- Io, MOOCode lexers * Joerg Sieker -- ABAP lexer +* Robert Simmons -- Standard ML lexer * Kirill Simonov -- YAML lexer * Steve Spigarelli -- XQuery lexer * Jerome St-Louis -- eC lexer @@ -21,6 +21,8 @@ Version 1.5 * PostgreSQL (#660) * DTD * Gosu + * Octave (PR#22) + * Standard ML (PR#14) - In the LaTeX formatter, escape special &, < and > chars (#648). @@ -41,6 +43,8 @@ Version 1.5 - Fix generic type highlighting in ActionScript 3 (#666). +- Fixes to the Clojure lexer (PR#9). + Version 1.4 ----------- diff --git a/docs/src/lexerdevelopment.txt b/docs/src/lexerdevelopment.txt index b67d6c87..6ffc4b72 100644 --- a/docs/src/lexerdevelopment.txt +++ b/docs/src/lexerdevelopment.txt @@ -121,7 +121,7 @@ sections, comments and key = value pairs: } The lexer first looks for whitespace, comments and section names. And later it -looks for a line that looks like a key, value pair, seperated by an ``'='`` +looks for a line that looks like a key, value pair, separated by an ``'='`` sign, and optional whitespace. The `bygroups` helper makes sure that each group is yielded with a different diff --git a/docs/src/styles.txt b/docs/src/styles.txt index acb70c37..e3e9cfb3 100644 --- a/docs/src/styles.txt +++ b/docs/src/styles.txt @@ -85,7 +85,7 @@ Here a small overview of all allowed styles: ``bold`` render text as bold ``nobold`` - don't render text as bold (to prevent subtokens behing highlighted bold) + don't render text as bold (to prevent subtokens being highlighted bold) ``italic`` render text italic ``noitalic`` diff --git a/pygments/cmdline.py b/pygments/cmdline.py index 16a0d1a8..7db761ca 100644 --- a/pygments/cmdline.py +++ b/pygments/cmdline.py @@ -219,7 +219,7 @@ def main(args=sys.argv): return 0 if opts.pop('-V', None) is not None: - print 'Pygments version %s, (c) 2006-2008 by Georg Brandl.' % __version__ + print 'Pygments version %s, (c) 2006-2011 by Georg Brandl.' % __version__ return 0 # handle ``pygmentize -L`` diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index d658e7ca..e109ebdd 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -286,7 +286,7 @@ class LatexFormatter(Formatter): cp = self.commandprefix styles = [] for name, definition in self.cmd2def.iteritems(): - styles.append(r'\def\%s@tok@%s{%s}' % (cp, name, definition)) + styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition)) return STYLE_TEMPLATE % {'cp': self.commandprefix, 'styles': '\n'.join(styles)} diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 5878e135..4772a9a0 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -136,7 +136,7 @@ LEXERS = { 'MakoXmlLexer': ('pygments.lexers.templates', 'XML+Mako', ('xml+mako',), (), ('application/xml+mako',)), 'MaqlLexer': ('pygments.lexers.other', 'MAQL', ('maql',), ('*.maql',), ('text/x-gooddata-maql', 'application/x-gooddata-maql')), 'MasonLexer': ('pygments.lexers.templates', 'Mason', ('mason',), ('*.m', '*.mhtml', '*.mc', '*.mi', 'autohandler', 'dhandler'), ('application/x-mason',)), - 'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab', 'octave'), ('*.m',), ('text/matlab',)), + 'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',), ('*.m',), ('text/matlab',)), 'MatlabSessionLexer': ('pygments.lexers.math', 'Matlab session', ('matlabsession',), (), ()), 'MiniDLexer': ('pygments.lexers.agile', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)), 'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), @@ -161,6 +161,7 @@ LEXERS = { 'ObjectiveJLexer': ('pygments.lexers.web', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)), 'OcamlLexer': ('pygments.lexers.compiled', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), 'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), + 'OctaveLexer': ('pygments.lexers.math', 'Octave', ('octave',), ('*.m',), ('text/octave',)), 'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), 'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')), 'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)), @@ -195,6 +196,7 @@ LEXERS = { 'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)), 'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')), 'SLexer': ('pygments.lexers.math', 'S', ('splus', 's', 'r'), ('*.S', '*.R'), ('text/S-plus', 'text/S', 'text/R')), + 'SMLLexer': ('pygments.lexers.functional', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')), 'SassLexer': ('pygments.lexers.web', 'Sass', ('sass', 'SASS'), ('*.sass',), ('text/x-sass',)), 'ScalaLexer': ('pygments.lexers.compiled', 'Scala', ('scala',), ('*.scala',), ('text/x-scala',)), 'ScamlLexer': ('pygments.lexers.web', 'Scaml', ('scaml', 'SCAML'), ('*.scaml',), ('text/x-scaml',)), diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 12b914c2..64929f51 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -13,7 +13,7 @@ import re from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ LexerContext, include, combined, do_insertions, bygroups, using, this -from pygments.token import Error, Text, Other, \ +from pygments.token import Error, Text, Whitespace, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches from pygments import unistring as uni @@ -1373,7 +1373,7 @@ class ClojureLexer(RegexLexer): ] builtins = [ '.', '..', - '*', '+', '-', '->', '..', '/', '<', '<=', '=', '==', '>', '>=', + '*', '+', '-', '->', '/', '<', '<=', '=', '==', '>', '>=', 'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns', 'alter', 'and', 'append-child', 'apply', 'array-map', 'aset', 'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float', @@ -1426,7 +1426,16 @@ class ClojureLexer(RegexLexer): # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now - valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+' + + # TODO / should divide keywords/symbols into namespace/rest + # but that's hard, so just pretend / is part of the name + valid_name = r'[\w!$%*+,<=>?/-]+' + + prefix_operators = ['`', "'", '#', '^', '~', '~@'] + + def _multi_escape(entries): + return '(%s)' % ('|'.join([ + re.escape(entry) + ' ' for entry in entries])) tokens = { 'root' : [ @@ -1435,40 +1444,33 @@ class ClojureLexer(RegexLexer): (r';.*$', Comment.Single), # whitespaces - usually not relevant - (r'\s+', Text), + (r'[,\s]+', Whitespace), # numbers (r'-?\d+\.\d+', Number.Float), (r'-?\d+', Number.Integer), - # support for uncommon kinds of numbers - - # have to figure out what the characters mean - #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), + (r'0x-?[abcdef\d]+', Number.Hex), # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), - (r"\\([()/'\".'_!§$%& ?;=#+-]{1}|[a-zA-Z0-9]+)", String.Char), + (r"\\(.|[a-z]+)", String.Char), - # constants - (r'(#t|#f)', Name.Constant), + # keywords + (r':' + valid_name, Name.Constant), # special operators - (r"('|#|`|,@|,|\.)", Operator), + (_multi_escape(prefix_operators), + Operator + ), # highlight the keywords - ('(%s)' % '|'.join([ - re.escape(entry) + ' ' for entry in keywords]), + (_multi_escape(keywords), Keyword ), - # first variable in a quoted string like - # '(this is syntactic sugar) - (r"(?<='\()" + valid_name, Name.Variable), - (r"(?<=#\()" + valid_name, Name.Variable), - # highlight the builtins - ("(?<=\()(%s)" % '|'.join([ - re.escape(entry) + ' ' for entry in builtins]), + (_multi_escape(builtins), Name.Builtin ), diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 5d4c62e3..204f44c5 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -13,11 +13,12 @@ import re from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions from pygments.token import Text, Comment, Operator, Keyword, Name, \ - String, Number, Punctuation, Literal, Generic + String, Number, Punctuation, Literal, Generic, Error -__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer', - 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer'] +__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', + 'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer', + 'ErlangShellLexer'] class SchemeLexer(RegexLexer): @@ -515,6 +516,328 @@ class LiterateHaskellLexer(Lexer): yield item +class SMLLexer(RegexLexer): + """ + For the Standard ML language. + + *New in Pygments 1.5.* + """ + + name = 'Standard ML' + aliases = ['sml'] + filenames = ['*.sml', '*.sig', '*.fun',] + mimetypes = ['text/x-standardml', 'application/x-standardml'] + + alphanumid_reserved = [ + # Core + 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else', + 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', + 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', + 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', + # Modules + 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', + 'struct', 'structure', 'where', + ] + + symbolicid_reserved = [ + # Core + ':', '\|', '=', '=>', '->', '#', + # Modules + ':>', + ] + + nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ] + + alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*" + symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" + + # A character constant is a sequence of the form #s, where s is a string + # constant denoting a string of size one character. This setup just parses + # the entire string as either a String.Double or a String.Char (depending + # on the argument), even if the String.Char is an erronous + # multiple-character string. + def stringy (whatkind): + return [ + (r'[^"\\]', whatkind), + (r'\\[\\\"abtnvfr]', String.Escape), + (r'\\\^[@-^]', String.Escape), + (r'\\[0-9]{3}', String.Escape), + (r'\\u[0-9a-fA-F]{4}', String.Escape), + (r'\\\s+\\', String.Interpol), + (r'"', whatkind, '#pop'), + ] + + # Callbacks for distinguishing tokens and reserved words + def long_id_callback(self, match): + if match.group(1) in self.alphanumid_reserved: token = Error + else: token = Name.Namespace + yield match.start(1), token, match.group(1) + yield match.start(2), Punctuation, match.group(2) + + def end_id_callback(self, match): + if match.group(1) in self.alphanumid_reserved: token = Error + elif match.group(1) in self.symbolicid_reserved: token = Error + else: token = Name + yield match.start(1), token, match.group(1) + + def id_callback(self, match): + str = match.group(1) + if str in self.alphanumid_reserved: token = Keyword.Reserved + elif str in self.symbolicid_reserved: token = Punctuation + else: token = Name + yield match.start(1), token, str + + tokens = { + # Whitespace and comments are (almost) everywhere + 'whitespace': [ + (r'\s+', Text), + (r'\(\*', Comment.Multiline, 'comment'), + ], + + 'delimiters': [ + # This lexer treats these delimiters specially: + # Delimiters define scopes, and the scope is how the meaning of + # the `|' is resolved - is it a case/handle expression, or function + # definition by cases? (This is not how the Definition works, but + # it's how MLton behaves, see http://mlton.org/SMLNJDeviations) + (r'\(|\[|{', Punctuation, 'main'), + (r'\)|\]|}', Punctuation, '#pop'), + (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')), + (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'), + (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'), + ], + + 'core': [ + # Punctuation that doesn't overlap symbolic identifiers + (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]), + Punctuation), + + # Special constants: strings, floats, numbers in decimal and hex + (r'#"', String.Char, 'char'), + (r'"', String.Double, 'string'), + (r'~?0x[0-9a-fA-F]+', Number.Hex), + (r'0wx[0-9a-fA-F]+', Number.Hex), + (r'0w\d+', Number.Integer), + (r'~?\d+\.\d+[eE]~?\d+', Number.Float), + (r'~?\d+\.\d+', Number.Float), + (r'~?\d+[eE]~?\d+', Number.Float), + (r'~?\d+', Number.Integer), + + # Labels + (r'#\s*[1-9][0-9]*', Name.Label), + (r'#\s*(%s)' % alphanumid_re, Name.Label), + (r'#\s+(%s)' % symbolicid_re, Name.Label), + # Some reserved words trigger a special, local lexer state change + (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'), + (r'(?=\b(exception)\b(?!\'))', Text, ('ename')), + (r'\b(functor|include|open|signature|structure)\b(?!\')', + Keyword.Reserved, 'sname'), + (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), + + # Regular identifiers, long and otherwise + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"), + (r'(%s)' % alphanumid_re, id_callback), + (r'(%s)' % symbolicid_re, id_callback), + ], + 'dotted': [ + (r'(%s)(\.)' % alphanumid_re, long_id_callback), + (r'(%s)' % alphanumid_re, end_id_callback, "#pop"), + (r'(%s)' % symbolicid_re, end_id_callback, "#pop"), + (r'\s+', Error), + (r'\S+', Error), + ], + + + # Main parser (prevents errors in files that have scoping errors) + 'root': [ (r'', Text, 'main') ], + + # In this scope, I expect '|' to not be followed by a function name, + # and I expect 'and' to be followed by a binding site + 'main': [ + include('whitespace'), + + # Special behavior of val/and/fun + (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'), + (r'\b(fun)\b(?!\')', Keyword.Reserved, + ('#pop', 'main-fun', 'fname')), + + include('delimiters'), + include('core'), + (r'\S+', Error), + ], + + # In this scope, I expect '|' and 'and' to be followed by a function + 'main-fun': [ + include('whitespace'), + + (r'\s', Text), + (r'\(\*', Comment.Multiline, 'comment'), + + # Special behavior of val/and/fun + (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'), + (r'\b(val)\b(?!\')', Keyword.Reserved, + ('#pop', 'main', 'vname')), + + # Special behavior of '|' and '|'-manipulating keywords + (r'\|', Punctuation, 'fname'), + (r'\b(case|handle)\b(?!\')', Keyword.Reserved, + ('#pop', 'main')), + + include('delimiters'), + include('core'), + (r'\S+', Error), + ], + + # Character and string parsers + 'char': stringy(String.Char), + 'string': stringy(String.Double), + + 'breakout': [ + (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'), + ], + + # Dealing with what comes after module system keywords + 'sname': [ + include('whitespace'), + include('breakout'), + + (r'(%s)' % alphanumid_re, Name.Namespace), + (r'', Text, '#pop'), + ], + + # Dealing with what comes after the 'fun' (or 'and' or '|') keyword + 'fname': [ + include('whitespace'), + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + + (r'(%s)' % alphanumid_re, Name.Function, '#pop'), + (r'(%s)' % symbolicid_re, Name.Function, '#pop'), + + # Ignore interesting function declarations like "fun (x + y) = ..." + (r'', Text, '#pop'), + ], + + # Dealing with what comes after the 'val' (or 'and') keyword + 'vname': [ + include('whitespace'), + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + + (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re), + bygroups(Name.Variable, Text, Punctuation), '#pop'), + (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re), + bygroups(Name.Variable, Text, Punctuation), '#pop'), + (r'(%s)' % alphanumid_re, Name.Variable, '#pop'), + (r'(%s)' % symbolicid_re, Name.Variable, '#pop'), + + # Ignore interesting patterns like 'val (x, y)' + (r'', Text, '#pop'), + ], + + # Dealing with what comes after the 'type' (or 'and') keyword + 'tname': [ + include('whitespace'), + include('breakout'), + + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')), + + (r'(%s)' % alphanumid_re, Keyword.Type), + (r'(%s)' % symbolicid_re, Keyword.Type), + (r'\S+', Error, '#pop'), + ], + + # A type binding includes most identifiers + 'typbind': [ + include('whitespace'), + + (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), + + include('breakout'), + include('core'), + (r'\S+', Error, '#pop'), + ], + + # Dealing with what comes after the 'datatype' (or 'and') keyword + 'dname': [ + include('whitespace'), + include('breakout'), + + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r'\(', Punctuation, 'tyvarseq'), + (r'(=)(\s*)(datatype)', + bygroups(Punctuation, Text, Keyword.Reserved), '#pop'), + (r'=(?!%s)' % symbolicid_re, Punctuation, + ('#pop', 'datbind', 'datcon')), + + (r'(%s)' % alphanumid_re, Keyword.Type), + (r'(%s)' % symbolicid_re, Keyword.Type), + (r'\S+', Error, '#pop'), + ], + + # common case - A | B | C of int + 'datbind': [ + include('whitespace'), + + (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')), + (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), + (r'\b(of)\b(?!\')', Keyword.Reserved), + + (r'(\|)(\s*)(%s)' % alphanumid_re, + bygroups(Punctuation, Text, Name.Class)), + (r'(\|)(\s+)(%s)' % symbolicid_re, + bygroups(Punctuation, Text, Name.Class)), + + include('breakout'), + include('core'), + (r'\S+', Error), + ], + + # Dealing with what comes after an exception + 'ename': [ + include('whitespace'), + + (r'(exception|and)\b(\s+)(%s)' % alphanumid_re, + bygroups(Keyword.Reserved, Text, Name.Class)), + (r'(exception|and)\b(\s*)(%s)' % symbolicid_re, + bygroups(Keyword.Reserved, Text, Name.Class)), + (r'\b(of)\b(?!\')', Keyword.Reserved), + + include('breakout'), + include('core'), + (r'\S+', Error), + ], + + 'datcon': [ + include('whitespace'), + (r'(%s)' % alphanumid_re, Name.Class, '#pop'), + (r'(%s)' % symbolicid_re, Name.Class, '#pop'), + (r'\S+', Error, '#pop'), + ], + + # Series of type variables + 'tyvarseq': [ + (r'\s', Text), + (r'\(\*', Comment.Multiline, 'comment'), + + (r'\'[0-9a-zA-Z_\']*', Name.Decorator), + (r',', Punctuation), + (r'\)', Punctuation, '#pop'), + (r'', Error, '#pop'), + ], + + 'comment': [ + (r'[^(*)]', Comment.Multiline), + (r'\(\*', Comment.Multiline, '#push'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[(*)]', Comment.Multiline), + ], + } + + class OcamlLexer(RegexLexer): """ For the OCaml language. diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py index 57af657c..de804aa9 100644 --- a/pygments/lexers/math.py +++ b/pygments/lexers/math.py @@ -17,8 +17,8 @@ from pygments.token import Comment, String, Punctuation, Keyword, Name, \ from pygments.lexers.agile import PythonLexer -__all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'NumPyLexer', - 'RConsoleLexer', 'SLexer'] +__all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', + 'NumPyLexer', 'RConsoleLexer', 'SLexer'] class MuPADLexer(RegexLexer): @@ -94,13 +94,13 @@ class MuPADLexer(RegexLexer): class MatlabLexer(RegexLexer): """ - For Matlab (or GNU Octave) source code. + For Matlab source code. Contributed by Ken Schutte <kschutte@csail.mit.edu>. *New in Pygments 0.10.* """ name = 'Matlab' - aliases = ['matlab', 'octave'] + aliases = ['matlab'] filenames = ['*.m'] mimetypes = ['text/matlab'] @@ -195,11 +195,12 @@ class MatlabLexer(RegexLexer): return 0.9 return 0.1 + line_re = re.compile('.*?\n') class MatlabSessionLexer(Lexer): """ - For Matlab (or GNU Octave) sessions. Modeled after PythonConsoleLexer. + For Matlab sessions. Modeled after PythonConsoleLexer. Contributed by Ken Schutte <kschutte@csail.mit.edu>. *New in Pygments 0.10.* @@ -246,12 +247,404 @@ class MatlabSessionLexer(Lexer): yield item +class OctaveLexer(RegexLexer): + """ + For GNU Octave source code. + + *New in Pygments 1.5.* + """ + name = 'Octave' + aliases = ['octave'] + filenames = ['*.m'] + mimetypes = ['text/octave'] + + # These lists are generated automatically. + # Run the following in bash shell: + # + # First dump all of the Octave manual into a plain text file: + # + # $ info octave --subnodes -o octave-manual + # + # Now grep through it: + + # for i in \ + # "Built-in Function" "Command" "Function File" \ + # "Loadable Function" "Mapping Function"; + # do + # perl -e '@name = qw('"$i"'); + # print lc($name[0]),"_kw = [\n"'; + # + # perl -n -e 'print "\"$1\",\n" if /-- '"$i"': .* (\w*) \(/;' \ + # octave-manual | sort | uniq ; + # echo "]" ; + # echo; + # done + + # taken from Octave Mercurial changeset 8cc154f45e37 (30-jan-2011) + + builtin_kw = [ "addlistener", "addpath", "addproperty", "all", + "and", "any", "argnames", "argv", "assignin", + "atexit", "autoload", + "available_graphics_toolkits", "beep_on_error", + "bitand", "bitmax", "bitor", "bitshift", "bitxor", + "cat", "cell", "cellstr", "char", "class", "clc", + "columns", "command_line_path", + "completion_append_char", "completion_matches", + "complex", "confirm_recursive_rmdir", "cputime", + "crash_dumps_octave_core", "ctranspose", "cumprod", + "cumsum", "debug_on_error", "debug_on_interrupt", + "debug_on_warning", "default_save_options", + "dellistener", "diag", "diff", "disp", + "doc_cache_file", "do_string_escapes", "double", + "drawnow", "e", "echo_executing_commands", "eps", + "eq", "errno", "errno_list", "error", "eval", + "evalin", "exec", "exist", "exit", "eye", "false", + "fclear", "fclose", "fcntl", "fdisp", "feof", + "ferror", "feval", "fflush", "fgetl", "fgets", + "fieldnames", "file_in_loadpath", "file_in_path", + "filemarker", "filesep", "find_dir_in_path", + "fixed_point_format", "fnmatch", "fopen", "fork", + "formula", "fprintf", "fputs", "fread", "freport", + "frewind", "fscanf", "fseek", "fskipl", "ftell", + "functions", "fwrite", "ge", "genpath", "get", + "getegid", "getenv", "geteuid", "getgid", + "getpgrp", "getpid", "getppid", "getuid", "glob", + "gt", "gui_mode", "history_control", + "history_file", "history_size", + "history_timestamp_format_string", "home", + "horzcat", "hypot", "ifelse", + "ignore_function_time_stamp", "inferiorto", + "info_file", "info_program", "inline", "input", + "intmax", "intmin", "ipermute", + "is_absolute_filename", "isargout", "isbool", + "iscell", "iscellstr", "ischar", "iscomplex", + "isempty", "isfield", "isfloat", "isglobal", + "ishandle", "isieee", "isindex", "isinteger", + "islogical", "ismatrix", "ismethod", "isnull", + "isnumeric", "isobject", "isreal", + "is_rooted_relative_filename", "issorted", + "isstruct", "isvarname", "kbhit", "keyboard", + "kill", "lasterr", "lasterror", "lastwarn", + "ldivide", "le", "length", "link", "linspace", + "logical", "lstat", "lt", "make_absolute_filename", + "makeinfo_program", "max_recursion_depth", "merge", + "methods", "mfilename", "minus", "mislocked", + "mkdir", "mkfifo", "mkstemp", "mldivide", "mlock", + "mouse_wheel_zoom", "mpower", "mrdivide", "mtimes", + "munlock", "nargin", "nargout", + "native_float_format", "ndims", "ne", "nfields", + "nnz", "norm", "not", "numel", "nzmax", + "octave_config_info", "octave_core_file_limit", + "octave_core_file_name", + "octave_core_file_options", "ones", "or", + "output_max_field_width", "output_precision", + "page_output_immediately", "page_screen_output", + "path", "pathsep", "pause", "pclose", "permute", + "pi", "pipe", "plus", "popen", "power", + "print_empty_dimensions", "printf", + "print_struct_array_contents", "prod", + "program_invocation_name", "program_name", + "putenv", "puts", "pwd", "quit", "rats", "rdivide", + "readdir", "readlink", "read_readline_init_file", + "realmax", "realmin", "rehash", "rename", + "repelems", "re_read_readline_init_file", "reset", + "reshape", "resize", "restoredefaultpath", + "rethrow", "rmdir", "rmfield", "rmpath", "rows", + "save_header_format_string", "save_precision", + "saving_history", "scanf", "set", "setenv", + "shell_cmd", "sighup_dumps_octave_core", + "sigterm_dumps_octave_core", "silent_functions", + "single", "size", "size_equal", "sizemax", + "sizeof", "sleep", "source", "sparse_auto_mutate", + "split_long_rows", "sprintf", "squeeze", "sscanf", + "stat", "stderr", "stdin", "stdout", "strcmp", + "strcmpi", "string_fill_char", "strncmp", + "strncmpi", "struct", "struct_levels_to_print", + "strvcat", "subsasgn", "subsref", "sum", "sumsq", + "superiorto", "suppress_verbose_help_message", + "symlink", "system", "tic", "tilde_expand", + "times", "tmpfile", "tmpnam", "toc", "toupper", + "transpose", "true", "typeinfo", "umask", "uminus", + "uname", "undo_string_escapes", "unlink", "uplus", + "upper", "usage", "usleep", "vec", "vectorize", + "vertcat", "waitpid", "warning", "warranty", + "whos_line_format", "yes_or_no", "zeros", + "inf", "Inf", "nan", "NaN"] + + command_kw = [ "close", "load", "who", "whos", ] + + function_kw = [ "accumarray", "accumdim", "acosd", "acotd", + "acscd", "addtodate", "allchild", "ancestor", + "anova", "arch_fit", "arch_rnd", "arch_test", + "area", "arma_rnd", "arrayfun", "ascii", "asctime", + "asecd", "asind", "assert", "atand", + "autoreg_matrix", "autumn", "axes", "axis", "bar", + "barh", "bartlett", "bartlett_test", "beep", + "betacdf", "betainv", "betapdf", "betarnd", + "bicgstab", "bicubic", "binary", "binocdf", + "binoinv", "binopdf", "binornd", "bitcmp", + "bitget", "bitset", "blackman", "blanks", + "blkdiag", "bone", "box", "brighten", "calendar", + "cast", "cauchy_cdf", "cauchy_inv", "cauchy_pdf", + "cauchy_rnd", "caxis", "celldisp", "center", "cgs", + "chisquare_test_homogeneity", + "chisquare_test_independence", "circshift", "cla", + "clabel", "clf", "clock", "cloglog", "closereq", + "colon", "colorbar", "colormap", "colperm", + "comet", "common_size", "commutation_matrix", + "compan", "compare_versions", "compass", + "computer", "cond", "condest", "contour", + "contourc", "contourf", "contrast", "conv", + "convhull", "cool", "copper", "copyfile", "cor", + "corrcoef", "cor_test", "cosd", "cotd", "cov", + "cplxpair", "cross", "cscd", "cstrcat", "csvread", + "csvwrite", "ctime", "cumtrapz", "curl", "cut", + "cylinder", "date", "datenum", "datestr", + "datetick", "datevec", "dblquad", "deal", + "deblank", "deconv", "delaunay", "delaunayn", + "delete", "demo", "detrend", "diffpara", "diffuse", + "dir", "discrete_cdf", "discrete_inv", + "discrete_pdf", "discrete_rnd", "display", + "divergence", "dlmwrite", "dos", "dsearch", + "dsearchn", "duplication_matrix", "durbinlevinson", + "ellipsoid", "empirical_cdf", "empirical_inv", + "empirical_pdf", "empirical_rnd", "eomday", + "errorbar", "etime", "etreeplot", "example", + "expcdf", "expinv", "expm", "exppdf", "exprnd", + "ezcontour", "ezcontourf", "ezmesh", "ezmeshc", + "ezplot", "ezpolar", "ezsurf", "ezsurfc", "factor", + "factorial", "fail", "fcdf", "feather", "fftconv", + "fftfilt", "fftshift", "figure", "fileattrib", + "fileparts", "fill", "findall", "findobj", + "findstr", "finv", "flag", "flipdim", "fliplr", + "flipud", "fpdf", "fplot", "fractdiff", "freqz", + "freqz_plot", "frnd", "fsolve", + "f_test_regression", "ftp", "fullfile", "fzero", + "gamcdf", "gaminv", "gampdf", "gamrnd", "gca", + "gcbf", "gcbo", "gcf", "genvarname", "geocdf", + "geoinv", "geopdf", "geornd", "getfield", "ginput", + "glpk", "gls", "gplot", "gradient", + "graphics_toolkit", "gray", "grid", "griddata", + "griddatan", "gtext", "gunzip", "gzip", "hadamard", + "hamming", "hankel", "hanning", "hggroup", + "hidden", "hilb", "hist", "histc", "hold", "hot", + "hotelling_test", "housh", "hsv", "hurst", + "hygecdf", "hygeinv", "hygepdf", "hygernd", + "idivide", "ifftshift", "image", "imagesc", + "imfinfo", "imread", "imshow", "imwrite", "index", + "info", "inpolygon", "inputname", "interpft", + "interpn", "intersect", "invhilb", "iqr", "isa", + "isdefinite", "isdir", "is_duplicate_entry", + "isequal", "isequalwithequalnans", "isfigure", + "ishermitian", "ishghandle", "is_leap_year", + "isletter", "ismac", "ismember", "ispc", "isprime", + "isprop", "isscalar", "issquare", "isstrprop", + "issymmetric", "isunix", "is_valid_file_id", + "isvector", "jet", "kendall", + "kolmogorov_smirnov_cdf", + "kolmogorov_smirnov_test", "kruskal_wallis_test", + "krylov", "kurtosis", "laplace_cdf", "laplace_inv", + "laplace_pdf", "laplace_rnd", "legend", "legendre", + "license", "line", "linkprop", "list_primes", + "loadaudio", "loadobj", "logistic_cdf", + "logistic_inv", "logistic_pdf", "logistic_rnd", + "logit", "loglog", "loglogerr", "logm", "logncdf", + "logninv", "lognpdf", "lognrnd", "logspace", + "lookfor", "ls_command", "lsqnonneg", "magic", + "mahalanobis", "manova", "matlabroot", + "mcnemar_test", "mean", "meansq", "median", "menu", + "mesh", "meshc", "meshgrid", "meshz", "mexext", + "mget", "mkpp", "mode", "moment", "movefile", + "mpoles", "mput", "namelengthmax", "nargchk", + "nargoutchk", "nbincdf", "nbininv", "nbinpdf", + "nbinrnd", "nchoosek", "ndgrid", "newplot", "news", + "nonzeros", "normcdf", "normest", "norminv", + "normpdf", "normrnd", "now", "nthroot", "null", + "ocean", "ols", "onenormest", "optimget", + "optimset", "orderfields", "orient", "orth", + "pack", "pareto", "parseparams", "pascal", "patch", + "pathdef", "pcg", "pchip", "pcolor", "pcr", + "peaks", "periodogram", "perl", "perms", "pie", + "pink", "planerot", "playaudio", "plot", + "plotmatrix", "plotyy", "poisscdf", "poissinv", + "poisspdf", "poissrnd", "polar", "poly", + "polyaffine", "polyarea", "polyderiv", "polyfit", + "polygcd", "polyint", "polyout", "polyreduce", + "polyval", "polyvalm", "postpad", "powerset", + "ppder", "ppint", "ppjumps", "ppplot", "ppval", + "pqpnonneg", "prepad", "primes", "print", + "print_usage", "prism", "probit", "qp", "qqplot", + "quadcc", "quadgk", "quadl", "quadv", "quiver", + "qzhess", "rainbow", "randi", "range", "rank", + "ranks", "rat", "reallog", "realpow", "realsqrt", + "record", "rectangle_lw", "rectangle_sw", + "rectint", "refresh", "refreshdata", + "regexptranslate", "repmat", "residue", "ribbon", + "rindex", "roots", "rose", "rosser", "rotdim", + "rref", "run", "run_count", "rundemos", "run_test", + "runtests", "saveas", "saveaudio", "saveobj", + "savepath", "scatter", "secd", "semilogx", + "semilogxerr", "semilogy", "semilogyerr", + "setaudio", "setdiff", "setfield", "setxor", + "shading", "shift", "shiftdim", "sign_test", + "sinc", "sind", "sinetone", "sinewave", "skewness", + "slice", "sombrero", "sortrows", "spaugment", + "spconvert", "spdiags", "spearman", "spectral_adf", + "spectral_xdf", "specular", "speed", "spencer", + "speye", "spfun", "sphere", "spinmap", "spline", + "spones", "sprand", "sprandn", "sprandsym", + "spring", "spstats", "spy", "sqp", "stairs", + "statistics", "std", "stdnormal_cdf", + "stdnormal_inv", "stdnormal_pdf", "stdnormal_rnd", + "stem", "stft", "strcat", "strchr", "strjust", + "strmatch", "strread", "strsplit", "strtok", + "strtrim", "strtrunc", "structfun", "studentize", + "subplot", "subsindex", "subspace", "substr", + "substruct", "summer", "surf", "surface", "surfc", + "surfl", "surfnorm", "svds", "swapbytes", + "sylvester_matrix", "symvar", "synthesis", "table", + "tand", "tar", "tcdf", "tempdir", "tempname", + "test", "text", "textread", "textscan", "tinv", + "title", "toeplitz", "tpdf", "trace", "trapz", + "treelayout", "treeplot", "triangle_lw", + "triangle_sw", "tril", "trimesh", "triplequad", + "triplot", "trisurf", "triu", "trnd", "tsearchn", + "t_test", "t_test_regression", "type", "unidcdf", + "unidinv", "unidpdf", "unidrnd", "unifcdf", + "unifinv", "unifpdf", "unifrnd", "union", "unique", + "unix", "unmkpp", "unpack", "untabify", "untar", + "unwrap", "unzip", "u_test", "validatestring", + "vander", "var", "var_test", "vech", "ver", + "version", "view", "voronoi", "voronoin", + "waitforbuttonpress", "wavread", "wavwrite", + "wblcdf", "wblinv", "wblpdf", "wblrnd", "weekday", + "welch_test", "what", "white", "whitebg", + "wienrnd", "wilcoxon_test", "wilkinson", "winter", + "xlabel", "xlim", "ylabel", "yulewalker", "zip", + "zlabel", "z_test", ] + + loadable_kw = [ "airy", "amd", "balance", "besselh", "besseli", + "besselj", "besselk", "bessely", "bitpack", + "bsxfun", "builtin", "ccolamd", "cellfun", + "cellslices", "chol", "choldelete", "cholinsert", + "cholinv", "cholshift", "cholupdate", "colamd", + "colloc", "convhulln", "convn", "csymamd", + "cummax", "cummin", "daspk", "daspk_options", + "dasrt", "dasrt_options", "dassl", "dassl_options", + "dbclear", "dbdown", "dbstack", "dbstatus", + "dbstop", "dbtype", "dbup", "dbwhere", "det", + "dlmread", "dmperm", "dot", "eig", "eigs", + "endgrent", "endpwent", "etree", "fft", "fftn", + "fftw", "filter", "find", "full", "gcd", + "getgrent", "getgrgid", "getgrnam", "getpwent", + "getpwnam", "getpwuid", "getrusage", "givens", + "gmtime", "gnuplot_binary", "hess", "ifft", + "ifftn", "inv", "isdebugmode", "issparse", "kron", + "localtime", "lookup", "lsode", "lsode_options", + "lu", "luinc", "luupdate", "matrix_type", "max", + "min", "mktime", "pinv", "qr", "qrdelete", + "qrinsert", "qrshift", "qrupdate", "quad", + "quad_options", "qz", "rand", "rande", "randg", + "randn", "randp", "randperm", "rcond", "regexp", + "regexpi", "regexprep", "schur", "setgrent", + "setpwent", "sort", "spalloc", "sparse", "spparms", + "sprank", "sqrtm", "strfind", "strftime", + "strptime", "strrep", "svd", "svd_driver", "syl", + "symamd", "symbfact", "symrcm", "time", "tsearch", + "typecast", "urlread", "urlwrite", ] + + mapping_kw = [ "abs", "acos", "acosh", "acot", "acoth", "acsc", + "acsch", "angle", "arg", "asec", "asech", "asin", + "asinh", "atan", "atanh", "beta", "betainc", + "betaln", "bincoeff", "cbrt", "ceil", "conj", "cos", + "cosh", "cot", "coth", "csc", "csch", "erf", "erfc", + "erfcx", "erfinv", "exp", "finite", "fix", "floor", + "fmod", "gamma", "gammainc", "gammaln", "imag", + "isalnum", "isalpha", "isascii", "iscntrl", + "isdigit", "isfinite", "isgraph", "isinf", + "islower", "isna", "isnan", "isprint", "ispunct", + "isspace", "isupper", "isxdigit", "lcm", "lgamma", + "log", "lower", "mod", "real", "rem", "round", + "roundb", "sec", "sech", "sign", "sin", "sinh", + "sqrt", "tan", "tanh", "toascii", "tolower", "xor", + ] + + builtin_consts = [ "EDITOR", "EXEC_PATH", "I", "IMAGE_PATH", "NA", + "OCTAVE_HOME", "OCTAVE_VERSION", "PAGER", + "PAGER_FLAGS", "SEEK_CUR", "SEEK_END", "SEEK_SET", + "SIG", "S_ISBLK", "S_ISCHR", "S_ISDIR", "S_ISFIFO", + "S_ISLNK", "S_ISREG", "S_ISSOCK", "WCONTINUE", + "WCOREDUMP", "WEXITSTATUS", "WIFCONTINUED", + "WIFEXITED", "WIFSIGNALED", "WIFSTOPPED", "WNOHANG", + "WSTOPSIG", "WTERMSIG", "WUNTRACED", ] + + tokens = { + 'root': [ + #We should look into multiline comments + (r'[%#].*$', Comment), + (r'^\s*function', Keyword, 'deffunc'), + + # from 'iskeyword' on hg changeset 8cc154f45e37 + (r'(__FILE__|__LINE__|break|case|catch|classdef|continue|do|else|' + r'elseif|end|end_try_catch|end_unwind_protect|endclassdef|' + r'endevents|endfor|endfunction|endif|endmethods|endproperties|' + r'endswitch|endwhile|events|for|function|get|global|if|methods|' + r'otherwise|persistent|properties|return|set|static|switch|try|' + r'until|unwind_protect|unwind_protect_cleanup|while)\b', Keyword), + + ("(" + "|".join( builtin_kw + command_kw + + function_kw + loadable_kw + + mapping_kw) + r')\b', Name.Builtin), + + ("(" + "|".join(builtin_consts) + r')\b', Name.Constant), + + # operators in Octave but not Matlab: + (r'-=|!=|!|/=|--', Operator), + # operators: + (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator), + # operators in Octave but not Matlab requiring escape for re: + (r'\*=|\+=|\^=|\/=|\\=|\*\*|\+\+|\.\*\*',Operator), + # operators requiring escape for re: + (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator), + + + # punctuation: + (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation), + (r'=|:|;', Punctuation), + + (r'"[^"]*"', String), + + # quote can be transpose, instead of string: + # (not great, but handles common cases...) + (r'(?<=[\w\)\]])\'', Operator), + (r'(?<![\w\)\]])\'', String, 'string'), + + ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'.', Text), + ], + 'string': [ + (r"[^']*'", String, '#pop'), + ], + 'deffunc': [ + (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)', + bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation, + Text.Whitespace, Name.Function, Punctuation, Text, + Punctuation, Text.Whitespace), '#pop'), + ], + } + + def analyse_text(text): + if re.match('^\s*[%#]', text, re.M): #Comment + return 0.9 + return 0.1 + + class NumPyLexer(PythonLexer): - ''' + """ A Python lexer recognizing Numerical Python builtins. *New in Pygments 0.10.* - ''' + """ name = 'NumPy' aliases = ['numpy'] diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 52ae3841..390576c7 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - this, do_insertions + this, do_insertions, combined from pygments.token import Error, Punctuation, Literal, Token, \ Text, Comment, Operator, Keyword, Name, String, Number, Generic from pygments.util import shebang_matches @@ -2429,198 +2429,177 @@ class AutohotkeyLexer(RegexLexer): filenames = ['*.ahk', '*.ahkl'] mimetypes = ['text/x-autohotkey'] - flags = re.IGNORECASE | re.DOTALL | re.MULTILINE - tokens = { 'root': [ - include('whitespace'), - (r'^\(', String, 'continuation'), - include('comments'), - (r'(^\s*)(\w+)(\s*)(=)', - bygroups(Text.Whitespace, Name, Text.Whitespace, Operator), - 'command'), - (r'([\w#@$?\[\]]+)(\s*)(\()', - bygroups(Name.Function, Text.Whitespace, Punctuation), - 'parameters'), - include('directives'), - include('labels'), + (r'^(\s*)(/\*)', bygroups(Text, Comment.Multiline), + 'incomment'), + (r'^(\s*)(\()', bygroups(Text, Generic), 'incontinuation'), + (r'\s+;.*?$', Comment.Singleline), + (r'^;.*?$', Comment.Singleline), + (r'[]{}(),;[]', Punctuation), + (r'(in|is|and|or|not)\b', Operator.Word), + (r'\%[a-zA-Z_#@$][a-zA-Z0-9_#@$]*\%', Name.Variable), + (r'!=|==|:=|\.=|<<|>>|[-~+/*%=<>&^|?:!.]', Operator), include('commands'), - include('expressions'), + include('labels'), + include('builtInFunctions'), + include('builtInVariables'), + (r'"', String, combined('stringescape', 'dqs')), include('numbers'), - include('literals'), - include('keynames'), - include('keywords'), + (r'[a-zA-Z_#@$][a-zA-Z0-9_#@$]*', Name), + (r'\\|\'', Text), + (r'\`([\,\%\`abfnrtv\-\+;])', String.Escape), + include('garbage'), ], - 'command': [ - include('comments'), - include('whitespace'), - (r'^\(', String, 'continuation'), - (r'[^\n]*?(?=;*|$)', String, '#pop'), - include('numbers'), - include('literals'), + 'incomment': [ + (r'^\s*\*/', Comment.Multiline, '#pop'), + (r'[^*/]', Comment.Multiline), + (r'[*/]', Comment.Multiline) ], - - 'expressions': [ - include('comments'), - include('whitespace'), - include('numbers'), - include('literals'), - (r'([]\w#@$?[]+)(\s*)(\()', - bygroups(Name.Function, Text.Whitespace, Punctuation), - 'parameters'), - (r'A_\w+', Name.Builtin), - (r'%[]\w#@$?[]+?%', Name.Variable), - # blocks: if, else, function definitions - (r'{', Punctuation, 'block'), - # parameters in function calls - ], - 'literals': [ - (r'"', String, 'string'), - (r'A_\w+', Name.Builtin), - (r'%[]\w#@$?[]+?%', Name.Variable), - (r'[-~!%^&*+|?:<>/=]=?', Operator, 'expressions'), - (r'==', Operator, 'expressions'), - ('[{()},.%#`;]', Punctuation), - (r'\\', Punctuation), - include('keywords'), - (r'\w+', Text), - ], - 'string': [ - (r'"', String, '#pop'), - (r'""|`.', String.Escape), - (r'[^\`"\n]+', String), # all other characters + 'incontinuation': [ + (r'^\s*\)', Generic, '#pop'), + (r'[^)]', Generic), + (r'[)]', Generic), ], - 'block': [ - include('root'), - ('{', Punctuation, '#push'), - ('}', Punctuation, '#pop'), - ], - 'parameters': [ - (r'\)', Punctuation, '#pop'), - (r'\(', Punctuation, '#push'), - include('numbers'), - include('literals'), - include('whitespace'), + 'commands': [ + (r'(?i)^(\s*)(global|local|static|' + r'#AllowSameLineComments|#ClipboardTimeout|#CommentFlag|' + r'#ErrorStdOut|#EscapeChar|#HotkeyInterval|#HotkeyModifierTimeout|' + r'#Hotstring|#IfWinActive|#IfWinExist|#IfWinNotActive|' + r'#IfWinNotExist|#IncludeAgain|#Include|#InstallKeybdHook|' + r'#InstallMouseHook|#KeyHistory|#LTrim|#MaxHotkeysPerInterval|' + r'#MaxMem|#MaxThreads|#MaxThreadsBuffer|#MaxThreadsPerHotkey|' + r'#NoEnv|#NoTrayIcon|#Persistent|#SingleInstance|#UseHook|' + r'#WinActivateForce|AutoTrim|BlockInput|Break|Click|ClipWait|' + r'Continue|Control|ControlClick|ControlFocus|ControlGetFocus|' + r'ControlGetPos|ControlGetText|ControlGet|ControlMove|ControlSend|' + r'ControlSendRaw|ControlSetText|CoordMode|Critical|' + r'DetectHiddenText|DetectHiddenWindows|Drive|DriveGet|' + r'DriveSpaceFree|Edit|Else|EnvAdd|EnvDiv|EnvGet|EnvMult|EnvSet|' + r'EnvSub|EnvUpdate|Exit|ExitApp|FileAppend|' + r'FileCopy|FileCopyDir|FileCreateDir|FileCreateShortcut|' + r'FileDelete|FileGetAttrib|FileGetShortcut|FileGetSize|' + r'FileGetTime|FileGetVersion|FileInstall|FileMove|FileMoveDir|' + r'FileRead|FileReadLine|FileRecycle|FileRecycleEmpty|' + r'FileRemoveDir|FileSelectFile|FileSelectFolder|FileSetAttrib|' + r'FileSetTime|FormatTime|GetKeyState|Gosub|Goto|GroupActivate|' + r'GroupAdd|GroupClose|GroupDeactivate|Gui|GuiControl|' + r'GuiControlGet|Hotkey|IfEqual|IfExist|IfGreaterOrEqual|IfGreater|' + r'IfInString|IfLess|IfLessOrEqual|IfMsgBox|IfNotEqual|IfNotExist|' + r'IfNotInString|IfWinActive|IfWinExist|IfWinNotActive|' + r'IfWinNotExist|If |ImageSearch|IniDelete|IniRead|IniWrite|' + r'InputBox|Input|KeyHistory|KeyWait|ListHotkeys|ListLines|' + r'ListVars|Loop|Menu|MouseClickDrag|MouseClick|MouseGetPos|' + r'MouseMove|MsgBox|OnExit|OutputDebug|Pause|PixelGetColor|' + r'PixelSearch|PostMessage|Process|Progress|Random|RegDelete|' + r'RegRead|RegWrite|Reload|Repeat|Return|RunAs|RunWait|Run|' + r'SendEvent|SendInput|SendMessage|SendMode|SendPlay|SendRaw|Send|' + r'SetBatchLines|SetCapslockState|SetControlDelay|' + r'SetDefaultMouseSpeed|SetEnv|SetFormat|SetKeyDelay|' + r'SetMouseDelay|SetNumlockState|SetScrollLockState|' + r'SetStoreCapslockMode|SetTimer|SetTitleMatchMode|' + r'SetWinDelay|SetWorkingDir|Shutdown|Sleep|Sort|SoundBeep|' + r'SoundGet|SoundGetWaveVolume|SoundPlay|SoundSet|' + r'SoundSetWaveVolume|SplashImage|SplashTextOff|SplashTextOn|' + r'SplitPath|StatusBarGetText|StatusBarWait|StringCaseSense|' + r'StringGetPos|StringLeft|StringLen|StringLower|StringMid|' + r'StringReplace|StringRight|StringSplit|StringTrimLeft|' + r'StringTrimRight|StringUpper|Suspend|SysGet|Thread|ToolTip|' + r'Transform|TrayTip|URLDownloadToFile|While|WinActivate|' + r'WinActivateBottom|WinClose|WinGetActiveStats|WinGetActiveTitle|' + r'WinGetClass|WinGetPos|WinGetText|WinGetTitle|WinGet|WinHide|' + r'WinKill|WinMaximize|WinMenuSelectItem|WinMinimizeAllUndo|' + r'WinMinimizeAll|WinMinimize|WinMove|WinRestore|WinSetTitle|' + r'WinSet|WinShow|WinWaitActive|WinWaitClose|WinWaitNotActive|' + r'WinWait)\b', bygroups(Text, Name.Builtin)), + ], + 'builtInFunctions': [ + (r'(?i)(Abs|ACos|Asc|ASin|ATan|Ceil|Chr|Cos|DllCall|Exp|FileExist|' + r'Floor|GetKeyState|IL_Add|IL_Create|IL_Destroy|InStr|IsFunc|' + r'IsLabel|Ln|Log|LV_Add|LV_Delete|LV_DeleteCol|LV_GetCount|' + r'LV_GetNext|LV_GetText|LV_Insert|LV_InsertCol|LV_Modify|' + r'LV_ModifyCol|LV_SetImageList|Mod|NumGet|NumPut|OnMessage|' + r'RegExMatch|RegExReplace|RegisterCallback|Round|SB_SetIcon|' + r'SB_SetParts|SB_SetText|Sin|Sqrt|StrLen|SubStr|Tan|TV_Add|' + r'TV_Delete|TV_GetChild|TV_GetCount|TV_GetNext|TV_Get|' + r'TV_GetParent|TV_GetPrev|TV_GetSelection|TV_GetText|TV_Modify|' + r'VarSetCapacity|WinActive|WinExist|Object|ComObjActive|' + r'ComObjArray|ComObjEnwrap|ComObjUnwrap|ComObjParameter|' + r'ComObjType|ComObjConnect|ComObjCreate|ComObjGet|ComObjError|' + r'ComObjValue|Insert|MinIndex|MaxIndex|Remove|SetCapacity|' + r'GetCapacity|GetAddress|_NewEnum|FileOpen|Read|Write|ReadLine|' + r'WriteLine|ReadNumType|WriteNumType|RawRead|RawWrite|Seek|Tell|' + r'Close|Next|IsObject|StrPut|StrGet|Trim|LTrim|RTrim)\b', + Name.Function), + ], + 'builtInVariables': [ + (r'(?i)(A_AhkPath|A_AhkVersion|A_AppData|A_AppDataCommon|' + r'A_AutoTrim|A_BatchLines|A_CaretX|A_CaretY|A_ComputerName|' + r'A_ControlDelay|A_Cursor|A_DDDD|A_DDD|A_DD|A_DefaultMouseSpeed|' + r'A_Desktop|A_DesktopCommon|A_DetectHiddenText|' + r'A_DetectHiddenWindows|A_EndChar|A_EventInfo|A_ExitReason|' + r'A_FormatFloat|A_FormatInteger|A_Gui|A_GuiEvent|A_GuiControl|' + r'A_GuiControlEvent|A_GuiHeight|A_GuiWidth|A_GuiX|A_GuiY|A_Hour|' + r'A_IconFile|A_IconHidden|A_IconNumber|A_IconTip|A_Index|' + r'A_IPAddress1|A_IPAddress2|A_IPAddress3|A_IPAddress4|A_ISAdmin|' + r'A_IsCompiled|A_IsCritical|A_IsPaused|A_IsSuspended|A_KeyDelay|' + r'A_Language|A_LastError|A_LineFile|A_LineNumber|A_LoopField|' + r'A_LoopFileAttrib|A_LoopFileDir|A_LoopFileExt|A_LoopFileFullPath|' + r'A_LoopFileLongPath|A_LoopFileName|A_LoopFileShortName|' + r'A_LoopFileShortPath|A_LoopFileSize|A_LoopFileSizeKB|' + r'A_LoopFileSizeMB|A_LoopFileTimeAccessed|A_LoopFileTimeCreated|' + r'A_LoopFileTimeModified|A_LoopReadLine|A_LoopRegKey|' + r'A_LoopRegName|A_LoopRegSubkey|A_LoopRegTimeModified|' + r'A_LoopRegType|A_MDAY|A_Min|A_MM|A_MMM|A_MMMM|A_Mon|A_MouseDelay|' + r'A_MSec|A_MyDocuments|A_Now|A_NowUTC|A_NumBatchLines|A_OSType|' + r'A_OSVersion|A_PriorHotkey|A_ProgramFiles|A_Programs|' + r'A_ProgramsCommon|A_ScreenHeight|A_ScreenWidth|A_ScriptDir|' + r'A_ScriptFullPath|A_ScriptName|A_Sec|A_Space|A_StartMenu|' + r'A_StartMenuCommon|A_Startup|A_StartupCommon|A_StringCaseSense|' + r'A_Tab|A_Temp|A_ThisFunc|A_ThisHotkey|A_ThisLabel|A_ThisMenu|' + r'A_ThisMenuItem|A_ThisMenuItemPos|A_TickCount|A_TimeIdle|' + r'A_TimeIdlePhysical|A_TimeSincePriorHotkey|A_TimeSinceThisHotkey|' + r'A_TitleMatchMode|A_TitleMatchModeSpeed|A_UserName|A_WDay|' + r'A_WinDelay|A_WinDir|A_WorkingDir|A_YDay|A_YEAR|A_YWeek|A_YYYY|' + r'Clipboard|ClipboardAll|ComSpec|ErrorLevel|ProgramFiles|True|' + r'False|A_IsUnicode|A_FileEncoding|A_OSVersion|A_PtrSize)\b', + Name.Variable), ], - 'keywords': [ - (r'(static|global|local)\b', Keyword.Type), - (r'(if|else|and|or)\b', Keyword.Reserved), - ], - 'directives': [ - (r'#\w+?\s', Keyword), - ], 'labels': [ # hotkeys and labels # technically, hotkey names are limited to named keys and buttons - (r'(^\s*)([^:\s]+?:{1,2})', bygroups(Text.Whitespace, Name.Label)), - # hotstrings - (r'(^\s*)(::[]\w#@$?[]+?::)', bygroups(Text.Whitespace, Name.Label)), - ], - 'comments': [ - (r'^;+.*?$', Comment.Single), # beginning of line comments - (r'(?<=\s);+.*?$', Comment.Single), # end of line comments - (r'^/\*.*?\n\*/', Comment.Multiline), - (r'(?<!\n)/\*.*?\n\*/', Error), # must be at start of line - ], - 'whitespace': [ - (r'[ \t]+', Text.Whitespace), - ], + (r'(^\s*)([^:\s\(\"]+?:{1,2})', bygroups(Text, Name.Label)), + (r'(^\s*)(::[^:\s]+?::)', bygroups(Text, Name.Label)), + ], 'numbers': [ (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'\d+[eE][+-]?[0-9]+', Number.Float), - (r'0[0-7]+', Number.Oct), + (r'0\d+', Number.Oct), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer) ], - 'continuation': [ - (r'\n\)', Punctuation, '#pop'), - (r'\s[^\n\)]+', String), + 'stringescape': [ + (r'\"\"|\`([\,\%\`abfnrtv])', String.Escape), ], - 'keynames': [ - (r'\[[^\]]+\]', Keyword, 'keynames') + 'strings': [ + (r'[^"\n]+', String), ], - 'commands': [ - (r'(autotrim|blockinput|break|click|' - r'clipwait|continue|control|' - r'controlclick|controlfocus|controlget|' - r'controlgetfocus|controlgetpos|controlgettext|' - r'controlmove|controlsend|controlsendraw|' - r'controlsettext|coordmode|critical|' - r'detecthiddentext|detecthiddenwindows|' - r'dllcall|drive|' - r'driveget|drivespacefree|' - r'else|envadd|envdiv|' - r'envget|envmult|envset|' - r'envsub|envupdate|exit|' - r'exitapp|fileappend|filecopy|' - r'filecopydir|filecreatedir|filecreateshortcut|' - r'filedelete|filegetattrib|filegetshortcut|' - r'filegetsize|filegettime|filegetversion|' - r'fileinstall|filemove|filemovedir|' - r'fileread|filereadline|filerecycle|' - r'filerecycleempty|fileremovedir|fileselectfile|' - r'fileselectfolder|filesetattrib|filesettime|' - r'formattime|gosub|' - r'goto|groupactivate|groupadd|' - r'groupclose|groupdeactivate|gui|' - r'guicontrol|guicontrolget|hotkey|' - r'ifexist|ifgreater|ifgreaterorequal|' - r'ifinstring|ifless|iflessorequal|' - r'ifmsgbox|ifnotequal|ifnotexist|' - r'ifnotinstring|ifwinactive|ifwinexist|' - r'ifwinnotactive|ifwinnotexist|imagesearch|' - r'inidelete|iniread|iniwrite|' - r'input|inputbox|keyhistory|' - r'keywait|listhotkeys|listlines|' - r'listvars|loop|' - r'menu|mouseclick|mouseclickdrag|' - r'mousegetpos|mousemove|msgbox|' - r'onmessage|onexit|outputdebug|' - r'pixelgetcolor|pixelsearch|postmessage|' - r'process|progress|random|' - r'regexmatch|regexreplace|registercallback|' - r'regdelete|regread|regwrite|' - r'reload|repeat|return|' - r'run|runas|runwait|' - r'send|sendevent|sendinput|' - r'sendmessage|sendmode|sendplay|' - r'sendraw|setbatchlines|setcapslockstate|' - r'setcontroldelay|setdefaultmousespeed|setenv|' - r'setformat|setkeydelay|setmousedelay|' - r'setnumlockstate|setscrolllockstate|' - r'setstorecapslockmode|' - r'settimer|settitlematchmode|setwindelay|' - r'setworkingdir|shutdown|sleep|' - r'sort|soundbeep|soundget|' - r'soundgetwavevolume|soundplay|soundset|' - r'soundsetwavevolume|splashimage|splashtextoff|' - r'splashtexton|splitpath|statusbargettext|' - r'statusbarwait|stringcasesense|stringgetpos|' - r'stringleft|stringlen|stringlower|' - r'stringmid|stringreplace|stringright|' - r'stringsplit|stringtrimleft|stringtrimright|' - r'stringupper|suspend|sysget|' - r'thread|tooltip|transform|' - r'traytip|urldownloadtofile|while|' - r'varsetcapacity|' - r'winactivate|winactivatebottom|winclose|' - r'winget|wingetactivestats|wingetactivetitle|' - r'wingetclass|wingetpos|wingettext|' - r'wingettitle|winhide|winkill|' - r'winmaximize|winmenuselectitem|winminimize|' - r'winminimizeall|winminimizeallundo|winmove|' - r'winrestore|winset|winsettitle|' - r'winshow|winwait|winwaitactive|' - r'winwaitclose|winwaitnotactive' - r'true|false|NULL)\b', Keyword, 'command'), - ], + 'dqs': [ + (r'"', String, '#pop'), + include('strings') + ], + 'garbage': [ + (r'[^\S\n]', Text), + # (r'.', Text), # no cheating + ], + } - } class MaqlLexer(RegexLexer): """ - Lexer for `GoodData MAQL <https://secure.gooddata.com/docs/html/advanced.metric.tutorial.html>`_ + Lexer for `GoodData MAQL + <https://secure.gooddata.com/docs/html/advanced.metric.tutorial.html>`_ scripts. *New in Pygments 1.4.* diff --git a/pygments/lexers/postgres.py b/pygments/lexers/postgres.py index f3f69c86..08bc92f9 100644 --- a/pygments/lexers/postgres.py +++ b/pygments/lexers/postgres.py @@ -234,7 +234,7 @@ re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$') re_error = re.compile(r'(ERROR|FATAL):') re_message = re.compile( r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|' - r'FATAL|HINT|DETAIL|LINE [0-9]+):)(.*?\n)') + r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)') def lookahead(x): """Wrap an iterator and allow pushing back an item.""" diff --git a/pygments/lexers/pypylog.py b/pygments/lexers/pypylog.py index 07811c2f..c3af3387 100644 --- a/pygments/lexers/pypylog.py +++ b/pygments/lexers/pypylog.py @@ -49,14 +49,16 @@ class PyPyLogLexer(RegexLexer): (r"<.*?>", Name.Builtin), (r"(debug_merge_point|jump|finish)", Name.Class), (r"(int_add_ovf|int_add|int_sub_ovf|int_sub|int_mul_ovf|int_mul|" - r"int_mod|int_rshift|int_and|int_or|int_xor|int_eq|int_ne|int_ge|" - r"int_gt|int_le|int_lt|int_is_zero|int_is_true|" + r"int_floordiv|int_mod|int_lshift|int_rshift|int_and|int_or|" + r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|" + r"int_is_true|" r"uint_floordiv|uint_ge|uint_lt|" r"float_add|float_sub|float_mul|float_truediv|" - r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|" + r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|" r"ptr_eq|" + r"cast_int_to_float|cast_float_to_int|cast_opaque_ptr|" r"force_token|quasiimmut_field|same_as|virtual_ref_finish|virtual_ref|" - r"call_may_force|call_assembler|call_loopinvariant|call_pure|call|" + r"call_may_force|call_assembler|call_loopinvariant|call_release_gil|call_pure|call|" r"new_with_vtable|new_array|newstr|newunicode|new|" r"arraylen_gc|" r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index 17a42d04..148762fd 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -68,7 +68,7 @@ class JavascriptLexer(RegexLexer): (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|' r'throw|try|catch|finally|new|delete|typeof|instanceof|void|' r'this)\b', Keyword, 'slashstartsregex'), - (r'(var|with|function)\b', Keyword.Declaration, 'slashstartsregex'), + (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|' r'extends|final|float|goto|implements|import|int|interface|long|native|' r'package|private|protected|public|short|static|super|synchronized|throws|' @@ -1716,18 +1716,22 @@ class CoffeeScriptLexer(RegexLexer): tokens = { 'commentsandwhitespace': [ (r'\s+', Text), + (r'###.*?###', Comment.Multiline), (r'#.*?\n', Comment.Single), ], + 'multilineregex': [ + include('commentsandwhitespace'), + (r'///([gim]+\b|\B)', String.Regex, '#pop'), + (r'/', String.Regex), + (r'[^/#]+', String.Regex) + ], 'slashstartsregex': [ include('commentsandwhitespace'), + (r'///', String.Regex, ('#pop', 'multilineregex')), (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), - (r'(?=/)', Text, ('#pop', 'badregex')), (r'', Text, '#pop'), ], - 'badregex': [ - ('\n', Text, '#pop'), - ], 'root': [ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), @@ -1751,13 +1755,46 @@ class CoffeeScriptLexer(RegexLexer): 'slashstartsregex'), (r'@[$a-zA-Z_][a-zA-Z0-9_\.:]*\s*[:=]\s', Name.Variable.Instance, 'slashstartsregex'), + (r'@', Name.Other, 'slashstartsregex'), (r'@?[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other, 'slashstartsregex'), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), - (r'"(\\\\|\\"|[^"])*"', String.Double), - (r"'(\\\\|\\'|[^'])*'", String.Single), - ] + ('"""', String, 'tdqs'), + ("'''", String, 'tsqs'), + ('"', String, 'dqs'), + ("'", String, 'sqs'), + ], + 'strings': [ + (r'[^#\\\'"]+', String) # note that all coffee script strings are multi-line. + # hashmarks, quotes and backslashes must be parsed one at a time + ], + 'interpoling_string' : [ + (r'}', String.Interpol, "#pop"), + include('root') + ], + 'dqs': [ + (r'"', String, '#pop'), + (r'\\.|\'', String), # double-quoted string don't need ' escapes + (r'#{', String.Interpol, "interpoling_string"), + include('strings') + ], + 'sqs': [ + (r"'", String, '#pop'), + (r'#|\\.|"', String), # single quoted strings don't need " escapses + include('strings') + ], + 'tdqs': [ + (r'"""', String, '#pop'), + (r'\\.|\'|"', String), # no need to escape quotes in triple-string + (r'#{', String.Interpol, "interpoling_string"), + include('strings'), + ], + 'tsqs': [ + (r"'''", String, '#pop'), + (r'#|\\.|\'|"', String), # no need to escape quotes in triple-strings + include('strings') + ], } class DuelLexer(RegexLexer): diff --git a/tests/examplefiles/example.sml b/tests/examplefiles/example.sml new file mode 100644 index 00000000..31db47d2 --- /dev/null +++ b/tests/examplefiles/example.sml @@ -0,0 +1,156 @@ +structure C = struct + val a = 12 + fun f x = x + 5 +end + +(*(*(*(*(* This file is all pretty strange Standard ML *)*)*)*) (**)*) +(* Robert J. Simmons *) + +(* Comments (* can be nested *) *) +structure S = struct + val x = (1, 2, "three") +end + +structure Sv = struct + (* These look good *) + val x = (1, 2, "three") + val z = #2 x + + (* Although these look bad (not all the numbers are constants), * + * they never occur in practice, as they are equivalent to the above. *) + val x = {1 = 1, 3 = "three", 2 = 2} + val z = # + 2 x + + val || = 12 +end + +signature S = sig end + +structure S = struct + val x = (1, 2, "three") + datatype 'a t = T of 'a + and u = U of v * v + withtype v = {left: int t, right: int t} + exception E1 of int and E2 + fun 'a id (x: 'a) : 'a = x + + val + 'a id = fn (x : 'a) => x +end + +signature R = sig + type t + val x : t + val f : t * int -> int +end +structure R : R = struct + datatype t = T of int + val x : t = T 0 + fun f (T x, i : int) : int = x + i + fun 'a id (x: 'a) : 'a = x +end + +signature BA_Z = sig + val s: int + include S R +end + +structure b______ = struct (* What (* A * strange * name *) for ) a ( struct *) + +val !%&$#+-/:<=>?@\~`^|* = 3 + +type struct' = int list +and 'a sig' = 'a list +and ('a, 'b) end' = 'b option * 'a list + +structure baz = struct + structure Bar = struct + val foo = !%&$#+-/:<=>?@\~`^|* + end +end + +infixr +!+ +fun (a +!+ b) = (op +) (a, b) + +open baz S R + +val$$$ = fn x => fn y => fn z => fn w => w +val (foo, ++, bar, ||) = (4, baz.Bar.foo, !%&$#+-/:<=>?@\~`^|*, Bar.foo) +val _ = $$$foo++bar|| + +val val'ue : ' list = [] +val struct3 : (' -> ') = fn x => x +val end_struct_' : ('a -> 'a) = fn x => x +val x : (''a -> ''a) = fn x => x +val x : ('''' -> '''') = fn x => x +val x : unit = print "Weird, huh?\n" +val w = {x=1,y=2,##= =3,4=3} +val {##=, x, 4=a,...} = w +val z = #4 w +val z = # ##= w + +fun f x y 0 = 4 + | f x y z = 4 + Sv.|| + +exception Foo of int +datatype ('0, 'b, '_, ') f'o'o = Bar | baZ12' | dsfa_fad | #@$ | Bug +and (', ''', '''', ''''') bar = + Bee of unit + | Ben of (', ''', '''', ''''') f'o'o * int + | X of ''' list + +fun q x = raise Foo x +and h x = raise Foo (~x) + +val x = 4 +and y = 5 + +fun q 0 = 4 + | q 1 = (case 1 of 1 => 2 | 3 => 4 | x => y) + | q y = case y of 1 => 2 | 3 => 4 | x => y + +val x = ref true +fun q 0 = 4 + | q 1 = if false then case 1 of 1 => 2 | 3 => 4 | x => y else 19 + | q 2 = (while !x handle Match => !x | Fail _ => !x do () ; 2) + | q x = (raise Match) handle Domain => 9 | Match => 3 + +fun p 0 = 12 + | p 1 = 8 + | p 2 = r false + | p x = r true +and r true = 19 + | r false = 12 + +val _ = 123 +val _ = 0001 +val _ = ~123 +val _ = ~0001 +val _ = 0w12412 +val _ = 0w12412 +val _ = 0xfA0 +val _ = ~0xfA0 +val _ = 0wxfA0 +val _ = 1.4 +val _ = ~1.4 +val _ = 1e~2 +val _ = 1E~2 +val _ = 1e2 +val _ = 1E2 +val _ = 1.4e~2 +val _ = 1.4E~2 +val _ = 1.4e2 +val _ = 1.4E2 + +val c = #"\000" +val st = "foo \ + \ bar" ^ "baz \ + \ and \ + \ such\n" + +val () = print st + +val _ = foo::bar::4::[++] + +end diff --git a/tests/examplefiles/intsyn.fun b/tests/examplefiles/intsyn.fun new file mode 100644 index 00000000..777b0fdb --- /dev/null +++ b/tests/examplefiles/intsyn.fun @@ -0,0 +1,675 @@ +(* Internal Syntax *) +(* Author: Frank Pfenning, Carsten Schuermann *) +(* Modified: Roberto Virga *) + +functor IntSyn (structure Global : GLOBAL) :> INTSYN = +struct + + type cid = int (* Constant identifier *) + type name = string (* Variable name *) + type mid = int (* Structure identifier *) + type csid = int (* CS module identifier *) + + + (* Contexts *) + datatype 'a Ctx = (* Contexts *) + Null (* G ::= . *) + | Decl of 'a Ctx * 'a (* | G, D *) + + (* ctxPop (G) => G' + Invariant: G = G',D + *) + fun ctxPop (Decl (G, D)) = G + + exception Error of string (* raised if out of space *) + (* ctxLookup (G, k) = D, kth declaration in G from right to left + Invariant: 1 <= k <= |G|, where |G| is length of G + *) + + fun ctxLookup (Decl (G', D), 1) = D + | ctxLookup (Decl (G', _), k') = ctxLookup (G', k'-1) +(* | ctxLookup (Null, k') = (print ("Looking up k' = " ^ Int.toString k' ^ "\n"); raise Error "Out of Bounce\n")*) + (* ctxLookup (Null, k') should not occur by invariant *) + + (* ctxLength G = |G|, the number of declarations in G *) + fun ctxLength G = + let + fun ctxLength' (Null, n) = n + | ctxLength' (Decl(G, _), n)= ctxLength' (G, n+1) + in + ctxLength' (G, 0) + end + + type FgnExp = exn (* foreign expression representation *) + exception UnexpectedFgnExp of FgnExp + (* raised by a constraint solver + if passed an incorrect arg *) + + type FgnCnstr = exn (* foreign unification constraint + representation *) + exception UnexpectedFgnCnstr of FgnCnstr + (* raised by a constraint solver + if passed an incorrect arg *) + + datatype Depend = (* Dependency information *) + No (* P ::= No *) + | Maybe (* | Maybe *) + | Meta (* | Meta *) + + (* Expressions *) + + datatype Uni = (* Universes: *) + Kind (* L ::= Kind *) + | Type (* | Type *) + + datatype Exp = (* Expressions: *) + Uni of Uni (* U ::= L *) + | Pi of (Dec * Depend) * Exp (* | bPi (D, P). V *) + | Root of Head * Spine (* | C @ S *) + | Redex of Exp * Spine (* | U @ S *) + | Lam of Dec * Exp (* | lam D. U *) + | EVar of Exp option ref * Dec Ctx * Exp * (Cnstr ref) list ref + (* | X<I> : G|-V, Cnstr *) + + | EClo of Exp * Sub (* | U[s] *) + | AVar of Exp option ref (* | A<I> *) + | NVar of int (* | n (linear, fully applied) *) + (* grafting variable *) + + | FgnExp of csid * FgnExp + (* | (foreign expression) *) + + and Head = (* Heads: *) + BVar of int (* H ::= k *) + | Const of cid (* | c *) + | Proj of Block * int (* | #k(b) *) + | Skonst of cid (* | c# *) + | Def of cid (* | d *) + | NSDef of cid (* | d (non strict) *) + | FVar of name * Exp * Sub (* | F[s] *) + | FgnConst of csid * ConDec (* | (foreign constant) *) + + and Spine = (* Spines: *) + Nil (* S ::= Nil *) + | App of Exp * Spine (* | U ; S *) + | SClo of Spine * Sub (* | S[s] *) + + and Sub = (* Explicit substitutions: *) + Shift of int (* s ::= ^n *) + | Dot of Front * Sub (* | Ft.s *) + + and Front = (* Fronts: *) + Idx of int (* Ft ::= k *) + | Exp of Exp (* | U *) + | Axp of Exp (* | U (assignable) *) + | Block of Block (* | _x *) + | Undef (* | _ *) + + and Dec = (* Declarations: *) + Dec of name option * Exp (* D ::= x:V *) + | BDec of name option * (cid * Sub) (* | v:l[s] *) + | ADec of name option * int (* | v[^-d] *) + | NDec of name option + + and Block = (* Blocks: *) + Bidx of int (* b ::= v *) + | LVar of Block option ref * Sub * (cid * Sub) + (* | L(l[^k],t) *) + | Inst of Exp list (* | u1, ..., Un *) + + + (* Constraints *) + + and Cnstr = (* Constraint: *) + Solved (* Cnstr ::= solved *) + | Eqn of Dec Ctx * Exp * Exp (* | G|-(U1 == U2) *) + | FgnCnstr of csid * FgnCnstr (* | (foreign) *) + + and Status = (* Status of a constant: *) + Normal (* inert *) + | Constraint of csid * (Dec Ctx * Spine * int -> Exp option) + (* acts as constraint *) + | Foreign of csid * (Spine -> Exp) (* is converted to foreign *) + + and FgnUnify = (* Result of foreign unify *) + Succeed of FgnUnifyResidual list + (* succeed with a list of residual operations *) + | Fail + + and FgnUnifyResidual = (* Residual of foreign unify *) + Assign of Dec Ctx * Exp * Exp * Sub + (* perform the assignment G |- X = U [ss] *) + | Delay of Exp * Cnstr ref + (* delay cnstr, associating it with all the rigid EVars in U *) + + (* Global signature *) + + and ConDec = (* Constant declaration *) + ConDec of string * mid option * int * Status + (* a : K : kind or *) + * Exp * Uni (* c : A : type *) + | ConDef of string * mid option * int (* a = A : K : kind or *) + * Exp * Exp * Uni (* d = M : A : type *) + * Ancestor (* Ancestor info for d or a *) + | AbbrevDef of string * mid option * int + (* a = A : K : kind or *) + * Exp * Exp * Uni (* d = M : A : type *) + | BlockDec of string * mid option (* %block l : SOME G1 PI G2 *) + * Dec Ctx * Dec list + + | BlockDef of string * mid option * cid list + (* %block l = (l1 | ... | ln) *) + + | SkoDec of string * mid option * int (* sa: K : kind or *) + * Exp * Uni (* sc: A : type *) + + and Ancestor = (* Ancestor of d or a *) + Anc of cid option * int * cid option (* head(expand(d)), height, head(expand[height](d)) *) + (* NONE means expands to {x:A}B *) + + datatype StrDec = (* Structure declaration *) + StrDec of string * mid option + + (* Form of constant declaration *) + datatype ConDecForm = + FromCS (* from constraint domain *) + | Ordinary (* ordinary declaration *) + | Clause (* %clause declaration *) + + (* Type abbreviations *) + type dctx = Dec Ctx (* G = . | G,D *) + type eclo = Exp * Sub (* Us = U[s] *) + type bclo = Block * Sub (* Bs = B[s] *) + type cnstr = Cnstr ref + +(* exception Error of string (* raised if out of space *) *) + + + structure FgnExpStd = struct + + structure ToInternal = FgnOpnTable (type arg = unit + type result = Exp) + + structure Map = FgnOpnTable (type arg = Exp -> Exp + type result = Exp) + + structure App = FgnOpnTable (type arg = Exp -> unit + type result = unit) + + structure EqualTo = FgnOpnTable (type arg = Exp + type result = bool) + + structure UnifyWith = FgnOpnTable (type arg = Dec Ctx * Exp + type result = FgnUnify) + + + + fun fold csfe f b = let + val r = ref b + fun g U = r := f (U,!r) + in + App.apply csfe g ; !r + end + + end + + structure FgnCnstrStd = struct + + structure ToInternal = FgnOpnTable (type arg = unit + type result = (Dec Ctx * Exp) list) + + structure Awake = FgnOpnTable (type arg = unit + type result = bool) + + structure Simplify = FgnOpnTable (type arg = unit + type result = bool) + + end + + fun conDecName (ConDec (name, _, _, _, _, _)) = name + | conDecName (ConDef (name, _, _, _, _, _, _)) = name + | conDecName (AbbrevDef (name, _, _, _, _, _)) = name + | conDecName (SkoDec (name, _, _, _, _)) = name + | conDecName (BlockDec (name, _, _, _)) = name + | conDecName (BlockDef (name, _, _)) = name + + fun conDecParent (ConDec (_, parent, _, _, _, _)) = parent + | conDecParent (ConDef (_, parent, _, _, _, _, _)) = parent + | conDecParent (AbbrevDef (_, parent, _, _, _, _)) = parent + | conDecParent (SkoDec (_, parent, _, _, _)) = parent + | conDecParent (BlockDec (_, parent, _, _)) = parent + | conDecParent (BlockDef (_, parent, _)) = parent + + + (* conDecImp (CD) = k + + Invariant: + If CD is either a declaration, definition, abbreviation, or + a Skolem constant + then k stands for the number of implicit elements. + *) + fun conDecImp (ConDec (_, _, i, _, _, _)) = i + | conDecImp (ConDef (_, _, i, _, _, _, _)) = i + | conDecImp (AbbrevDef (_, _, i, _, _, _)) = i + | conDecImp (SkoDec (_, _, i, _, _)) = i + | conDecImp (BlockDec (_, _, _, _)) = 0 (* watch out -- carsten *) + + fun conDecStatus (ConDec (_, _, _, status, _, _)) = status + | conDecStatus _ = Normal + + (* conDecType (CD) = V + + Invariant: + If CD is either a declaration, definition, abbreviation, or + a Skolem constant + then V is the respective type + *) + fun conDecType (ConDec (_, _, _, _, V, _)) = V + | conDecType (ConDef (_, _, _, _, V, _, _)) = V + | conDecType (AbbrevDef (_, _, _, _, V, _)) = V + | conDecType (SkoDec (_, _, _, V, _)) = V + + + (* conDecBlock (CD) = (Gsome, Lpi) + + Invariant: + If CD is block definition + then Gsome is the context of some variables + and Lpi is the list of pi variables + *) + fun conDecBlock (BlockDec (_, _, Gsome, Lpi)) = (Gsome, Lpi) + + (* conDecUni (CD) = L + + Invariant: + If CD is either a declaration, definition, abbreviation, or + a Skolem constant + then L is the respective universe + *) + fun conDecUni (ConDec (_, _, _, _, _, L)) = L + | conDecUni (ConDef (_, _, _, _, _, L, _)) = L + | conDecUni (AbbrevDef (_, _, _, _, _, L)) = L + | conDecUni (SkoDec (_, _, _, _, L)) = L + + + fun strDecName (StrDec (name, _)) = name + + fun strDecParent (StrDec (_, parent)) = parent + + local + val maxCid = Global.maxCid + val dummyEntry = ConDec("", NONE, 0, Normal, Uni (Kind), Kind) + val sgnArray = Array.array (maxCid+1, dummyEntry) + : ConDec Array.array + val nextCid = ref(0) + + val maxMid = Global.maxMid + val sgnStructArray = Array.array (maxMid+1, StrDec("", NONE)) + : StrDec Array.array + val nextMid = ref (0) + + in + (* Invariants *) + (* Constant declarations are all well-typed *) + (* Constant declarations are stored in beta-normal form *) + (* All definitions are strict in all their arguments *) + (* If Const(cid) is valid, then sgnArray(cid) = ConDec _ *) + (* If Def(cid) is valid, then sgnArray(cid) = ConDef _ *) + + fun sgnClean (i) = if i >= !nextCid then () + else (Array.update (sgnArray, i, dummyEntry); + sgnClean (i+1)) + + fun sgnReset () = ((* Fri Dec 20 12:04:24 2002 -fp *) + (* this circumvents a space leak *) + sgnClean (0); + nextCid := 0; nextMid := 0) + fun sgnSize () = (!nextCid, !nextMid) + + fun sgnAdd (conDec) = + let + val cid = !nextCid + in + if cid > maxCid + then raise Error ("Global signature size " ^ Int.toString (maxCid+1) ^ " exceeded") + else (Array.update (sgnArray, cid, conDec) ; + nextCid := cid + 1; + cid) + end + + (* 0 <= cid < !nextCid *) + fun sgnLookup (cid) = Array.sub (sgnArray, cid) + + fun sgnApp (f) = + let + fun sgnApp' (cid) = + if cid = !nextCid then () else (f cid; sgnApp' (cid+1)) + in + sgnApp' (0) + end + + fun sgnStructAdd (strDec) = + let + val mid = !nextMid + in + if mid > maxMid + then raise Error ("Global signature size " ^ Int.toString (maxMid+1) ^ " exceeded") + else (Array.update (sgnStructArray, mid, strDec) ; + nextMid := mid + 1; + mid) + end + + (* 0 <= mid < !nextMid *) + fun sgnStructLookup (mid) = Array.sub (sgnStructArray, mid) + + (* A hack used in Flit - jcreed 6/05 *) + fun rename (cid, new) = + let + val newConDec = case sgnLookup cid of + ConDec (n,m,i,s,e,u) => ConDec(new,m,i,s,e,u) + | ConDef (n,m,i,e,e',u,a) => ConDef(new,m,i,e,e',u,a) + | AbbrevDef (n,m,i,e,e',u) => AbbrevDef (new,m,i,e,e',u) + | BlockDec (n,m,d,d') => BlockDec (new,m,d,d') + | SkoDec (n,m,i,e,u) => SkoDec (new,m,i,e,u) + in + Array.update (sgnArray, cid, newConDec) + end + + end + + fun constDef (d) = + (case sgnLookup (d) + of ConDef(_, _, _, U,_, _, _) => U + | AbbrevDef (_, _, _, U,_, _) => U) + + fun constType (c) = conDecType (sgnLookup c) + fun constImp (c) = conDecImp (sgnLookup c) + fun constUni (c) = conDecUni (sgnLookup c) + fun constBlock (c) = conDecBlock (sgnLookup c) + + fun constStatus (c) = + (case sgnLookup (c) + of ConDec (_, _, _, status, _, _) => status + | _ => Normal) + + + (* Explicit Substitutions *) + + (* id = ^0 + + Invariant: + G |- id : G id is patsub + *) + val id = Shift(0) + + (* shift = ^1 + + Invariant: + G, V |- ^ : G ^ is patsub + *) + val shift = Shift(1) + + (* invShift = ^-1 = _.^0 + Invariant: + G |- ^-1 : G, V ^-1 is patsub + *) + val invShift = Dot(Undef, id) + + + (* comp (s1, s2) = s' + + Invariant: + If G' |- s1 : G + and G'' |- s2 : G' + then s' = s1 o s2 + and G'' |- s1 o s2 : G + + If s1, s2 patsub + then s' patsub + *) + fun comp (Shift (0), s) = s + (* next line is an optimization *) + (* roughly 15% on standard suite for Twelf 1.1 *) + (* Sat Feb 14 10:15:16 1998 -fp *) + | comp (s, Shift (0)) = s + | comp (Shift (n), Dot (Ft, s)) = comp (Shift (n-1), s) + | comp (Shift (n), Shift (m)) = Shift (n+m) + | comp (Dot (Ft, s), s') = Dot (frontSub (Ft, s'), comp (s, s')) + + (* bvarSub (n, s) = Ft' + + Invariant: + If G |- s : G' G' |- n : V + then Ft' = Ftn if s = Ft1 .. Ftn .. ^k + or Ft' = ^(n+k) if s = Ft1 .. Ftm ^k and m<n + and G |- Ft' : V [s] + *) + and bvarSub (1, Dot(Ft, s)) = Ft + | bvarSub (n, Dot(Ft, s)) = bvarSub (n-1, s) + | bvarSub (n, Shift(k)) = Idx (n+k) + + (* blockSub (B, s) = B' + + Invariant: + If G |- s : G' + and G' |- B block + then G |- B' block + and B [s] == B' + *) + (* in front of substitutions, first case is irrelevant *) + (* Sun Dec 2 11:56:41 2001 -fp *) + and blockSub (Bidx k, s) = + (case bvarSub (k, s) + of Idx k' => Bidx k' + | Block B => B) + | blockSub (LVar (ref (SOME B), sk, _), s) = + blockSub (B, comp (sk, s)) + (* -fp Sun Dec 1 21:18:30 2002 *) + (* --cs Sun Dec 1 11:25:41 2002 *) + (* Since always . |- t : Gsome, discard s *) + (* where is this needed? *) + (* Thu Dec 6 20:30:26 2001 -fp !!! *) + | blockSub (LVar (r as ref NONE, sk, (l, t)), s) = + LVar(r, comp(sk, s), (l, t)) + (* was: + LVar (r, comp(sk, s), (l, comp (t, s))) + July 22, 2010 -fp -cs + *) + (* comp(^k, s) = ^k' for some k' by invariant *) + | blockSub (L as Inst ULs, s') = Inst (map (fn U => EClo (U, s')) ULs) + (* this should be right but somebody should verify *) + + (* frontSub (Ft, s) = Ft' + + Invariant: + If G |- s : G' G' |- Ft : V + then Ft' = Ft [s] + and G |- Ft' : V [s] + + NOTE: EClo (U, s) might be undefined, so if this is ever + computed eagerly, we must introduce an "Undefined" exception, + raise it in whnf and handle it here so Exp (EClo (U, s)) => Undef + *) + and frontSub (Idx (n), s) = bvarSub (n, s) + | frontSub (Exp (U), s) = Exp (EClo (U, s)) + | frontSub (Undef, s) = Undef + | frontSub (Block (B), s) = Block (blockSub (B, s)) + + (* decSub (x:V, s) = D' + + Invariant: + If G |- s : G' G' |- V : L + then D' = x:V[s] + and G |- V[s] : L + *) + (* First line is an optimization suggested by cs *) + (* D[id] = D *) + (* Sat Feb 14 18:37:44 1998 -fp *) + (* seems to have no statistically significant effect *) + (* undo for now Sat Feb 14 20:22:29 1998 -fp *) + (* + fun decSub (D, Shift(0)) = D + | decSub (Dec (x, V), s) = Dec (x, EClo (V, s)) + *) + fun decSub (Dec (x, V), s) = Dec (x, EClo (V, s)) + | decSub (NDec x, s) = NDec x + | decSub (BDec (n, (l, t)), s) = BDec (n, (l, comp (t, s))) + + (* dot1 (s) = s' + + Invariant: + If G |- s : G' + then s' = 1. (s o ^) + and for all V s.t. G' |- V : L + G, V[s] |- s' : G', V + + If s patsub then s' patsub + *) + (* first line is an optimization *) + (* roughly 15% on standard suite for Twelf 1.1 *) + (* Sat Feb 14 10:16:16 1998 -fp *) + fun dot1 (s as Shift (0)) = s + | dot1 s = Dot (Idx(1), comp(s, shift)) + + (* invDot1 (s) = s' + invDot1 (1. s' o ^) = s' + + Invariant: + s = 1 . s' o ^ + If G' |- s' : G + (so G',V[s] |- s : G,V) + *) + fun invDot1 (s) = comp (comp(shift, s), invShift) + + + (* Declaration Contexts *) + + (* ctxDec (G, k) = x:V + Invariant: + If |G| >= k, where |G| is size of G, + then G |- k : V and G |- V : L + *) + fun ctxDec (G, k) = + let (* ctxDec' (G'', k') = x:V + where G |- ^(k-k') : G'', 1 <= k' <= k + *) + fun ctxDec' (Decl (G', Dec (x, V')), 1) = Dec (x, EClo (V', Shift (k))) + | ctxDec' (Decl (G', BDec (n, (l, s))), 1) = BDec (n, (l, comp (s, Shift (k)))) + | ctxDec' (Decl (G', _), k') = ctxDec' (G', k'-1) + (* ctxDec' (Null, k') should not occur by invariant *) + in + ctxDec' (G, k) + end + + (* blockDec (G, v, i) = V + + Invariant: + If G (v) = l[s] + and Sigma (l) = SOME Gsome BLOCK Lblock + and G |- s : Gsome + then G |- pi (v, i) : V + *) + + fun blockDec (G, v as (Bidx k), i) = + let + val BDec (_, (l, s)) = ctxDec (G, k) + (* G |- s : Gsome *) + val (Gsome, Lblock) = conDecBlock (sgnLookup l) + fun blockDec' (t, D :: L, 1, j) = decSub (D, t) + | blockDec' (t, _ :: L, n, j) = + blockDec' (Dot (Exp (Root (Proj (v, j), Nil)), t), + L, n-1, j+1) + in + blockDec' (s, Lblock, i, 1) + end + + + (* EVar related functions *) + + (* newEVar (G, V) = newEVarCnstr (G, V, nil) *) + fun newEVar (G, V) = EVar(ref NONE, G, V, ref nil) + + (* newAVar G = new AVar (assignable variable) *) + (* AVars carry no type, ctx, or cnstr *) + fun newAVar () = AVar(ref NONE) + + (* newTypeVar (G) = X, X new + where G |- X : type + *) + fun newTypeVar (G) = EVar(ref NONE, G, Uni(Type), ref nil) + + (* newLVar (l, s) = (l[s]) *) + fun newLVar (sk, (cid, t)) = LVar (ref NONE, sk, (cid, t)) + + (* Definition related functions *) + (* headOpt (U) = SOME(H) or NONE, U should be strict, normal *) + fun headOpt (Root (H, _)) = SOME(H) + | headOpt (Lam (_, U)) = headOpt U + | headOpt _ = NONE + + fun ancestor' (NONE) = Anc(NONE, 0, NONE) + | ancestor' (SOME(Const(c))) = Anc(SOME(c), 1, SOME(c)) + | ancestor' (SOME(Def(d))) = + (case sgnLookup(d) + of ConDef(_, _, _, _, _, _, Anc(_, height, cOpt)) + => Anc(SOME(d), height+1, cOpt)) + | ancestor' (SOME _) = (* FgnConst possible, BVar impossible by strictness *) + Anc(NONE, 0, NONE) + (* ancestor(U) = ancestor info for d = U *) + fun ancestor (U) = ancestor' (headOpt U) + + (* defAncestor(d) = ancestor of d, d must be defined *) + fun defAncestor (d) = + (case sgnLookup(d) + of ConDef(_, _, _, _, _, _, anc) => anc) + + (* Type related functions *) + + (* targetHeadOpt (V) = SOME(H) or NONE + where H is the head of the atomic target type of V, + NONE if V is a kind or object or have variable type. + Does not expand type definitions. + *) + (* should there possibly be a FgnConst case? also targetFamOpt -kw *) + fun targetHeadOpt (Root (H, _)) = SOME(H) + | targetHeadOpt (Pi(_, V)) = targetHeadOpt V + | targetHeadOpt (Redex (V, S)) = targetHeadOpt V + | targetHeadOpt (Lam (_, V)) = targetHeadOpt V + | targetHeadOpt (EVar (ref (SOME(V)),_,_,_)) = targetHeadOpt V + | targetHeadOpt (EClo (V, s)) = targetHeadOpt V + | targetHeadOpt _ = NONE + (* Root(Bvar _, _), Root(FVar _, _), Root(FgnConst _, _), + EVar(ref NONE,..), Uni, FgnExp _ + *) + (* Root(Skonst _, _) can't occur *) + (* targetHead (A) = a + as in targetHeadOpt, except V must be a valid type + *) + fun targetHead (A) = valOf (targetHeadOpt A) + + (* targetFamOpt (V) = SOME(cid) or NONE + where cid is the type family of the atomic target type of V, + NONE if V is a kind or object or have variable type. + Does expand type definitions. + *) + fun targetFamOpt (Root (Const(cid), _)) = SOME(cid) + | targetFamOpt (Pi(_, V)) = targetFamOpt V + | targetFamOpt (Root (Def(cid), _)) = targetFamOpt (constDef cid) + | targetFamOpt (Redex (V, S)) = targetFamOpt V + | targetFamOpt (Lam (_, V)) = targetFamOpt V + | targetFamOpt (EVar (ref (SOME(V)),_,_,_)) = targetFamOpt V + | targetFamOpt (EClo (V, s)) = targetFamOpt V + | targetFamOpt _ = NONE + (* Root(Bvar _, _), Root(FVar _, _), Root(FgnConst _, _), + EVar(ref NONE,..), Uni, FgnExp _ + *) + (* Root(Skonst _, _) can't occur *) + (* targetFam (A) = a + as in targetFamOpt, except V must be a valid type + *) + fun targetFam (A) = valOf (targetFamOpt A) + +end; (* functor IntSyn *) + +structure IntSyn :> INTSYN = + IntSyn (structure Global = Global); diff --git a/tests/examplefiles/intsyn.sig b/tests/examplefiles/intsyn.sig new file mode 100644 index 00000000..ea505362 --- /dev/null +++ b/tests/examplefiles/intsyn.sig @@ -0,0 +1,286 @@ +(* Internal Syntax *) +(* Author: Frank Pfenning, Carsten Schuermann *) +(* Modified: Roberto Virga *) + +signature INTSYN = +sig + + type cid = int (* Constant identifier *) + type mid = int (* Structure identifier *) + type csid = int (* CS module identifier *) + + + type FgnExp = exn (* foreign expression representation *) + exception UnexpectedFgnExp of FgnExp + (* raised by a constraint solver + if passed an incorrect arg *) + type FgnCnstr = exn (* foreign constraint representation *) + exception UnexpectedFgnCnstr of FgnCnstr + (* raised by a constraint solver + if passed an incorrect arg *) + + (* Contexts *) + + datatype 'a Ctx = (* Contexts *) + Null (* G ::= . *) + | Decl of 'a Ctx * 'a (* | G, D *) + + val ctxPop : 'a Ctx -> 'a Ctx + val ctxLookup: 'a Ctx * int -> 'a + val ctxLength: 'a Ctx -> int + + datatype Depend = (* Dependency information *) + No (* P ::= No *) + | Maybe (* | Maybe *) + | Meta (* | Meta *) + + (* expressions *) + + datatype Uni = (* Universes: *) + Kind (* L ::= Kind *) + | Type (* | Type *) + + datatype Exp = (* Expressions: *) + Uni of Uni (* U ::= L *) + | Pi of (Dec * Depend) * Exp (* | Pi (D, P). V *) + | Root of Head * Spine (* | H @ S *) + | Redex of Exp * Spine (* | U @ S *) + | Lam of Dec * Exp (* | lam D. U *) + | EVar of Exp option ref * Dec Ctx * Exp * (Cnstr ref) list ref + (* | X<I> : G|-V, Cnstr *) + | EClo of Exp * Sub (* | U[s] *) + | AVar of Exp option ref (* | A<I> *) + + | FgnExp of csid * FgnExp (* | (foreign expression) *) + + | NVar of int (* | n (linear, + fully applied variable + used in indexing *) + + and Head = (* Head: *) + BVar of int (* H ::= k *) + | Const of cid (* | c *) + | Proj of Block * int (* | #k(b) *) + | Skonst of cid (* | c# *) + | Def of cid (* | d (strict) *) + | NSDef of cid (* | d (non strict) *) + | FVar of string * Exp * Sub (* | F[s] *) + | FgnConst of csid * ConDec (* | (foreign constant) *) + + and Spine = (* Spines: *) + Nil (* S ::= Nil *) + | App of Exp * Spine (* | U ; S *) + | SClo of Spine * Sub (* | S[s] *) + + and Sub = (* Explicit substitutions: *) + Shift of int (* s ::= ^n *) + | Dot of Front * Sub (* | Ft.s *) + + and Front = (* Fronts: *) + Idx of int (* Ft ::= k *) + | Exp of Exp (* | U *) + | Axp of Exp (* | U *) + | Block of Block (* | _x *) + | Undef (* | _ *) + + and Dec = (* Declarations: *) + Dec of string option * Exp (* D ::= x:V *) + | BDec of string option * (cid * Sub) (* | v:l[s] *) + | ADec of string option * int (* | v[^-d] *) + | NDec of string option + + and Block = (* Blocks: *) + Bidx of int (* b ::= v *) + | LVar of Block option ref * Sub * (cid * Sub) + (* | L(l[^k],t) *) + | Inst of Exp list (* | U1, ..., Un *) + (* It would be better to consider having projections count + like substitutions, then we could have Inst of Sub here, + which would simplify a lot of things. + + I suggest however to wait until the next big overhaul + of the system -- cs *) + + +(* | BClo of Block * Sub (* | b[s] *) *) + + (* constraints *) + + and Cnstr = (* Constraint: *) + Solved (* Cnstr ::= solved *) + | Eqn of Dec Ctx * Exp * Exp (* | G|-(U1 == U2) *) + | FgnCnstr of csid * FgnCnstr (* | (foreign) *) + + and Status = (* Status of a constant: *) + Normal (* inert *) + | Constraint of csid * (Dec Ctx * Spine * int -> Exp option) + (* acts as constraint *) + | Foreign of csid * (Spine -> Exp) (* is converted to foreign *) + + and FgnUnify = (* Result of foreign unify *) + Succeed of FgnUnifyResidual list + (* succeed with a list of residual operations *) + | Fail + + and FgnUnifyResidual = + Assign of Dec Ctx * Exp * Exp * Sub + (* perform the assignment G |- X = U [ss] *) + | Delay of Exp * Cnstr ref + (* delay cnstr, associating it with all the rigid EVars in U *) + + (* Global signature *) + + and ConDec = (* Constant declaration *) + ConDec of string * mid option * int * Status + (* a : K : kind or *) + * Exp * Uni (* c : A : type *) + | ConDef of string * mid option * int (* a = A : K : kind or *) + * Exp * Exp * Uni (* d = M : A : type *) + * Ancestor (* Ancestor info for d or a *) + | AbbrevDef of string * mid option * int + (* a = A : K : kind or *) + * Exp * Exp * Uni (* d = M : A : type *) + | BlockDec of string * mid option (* %block l : SOME G1 PI G2 *) + * Dec Ctx * Dec list + | BlockDef of string * mid option * cid list + (* %block l = (l1 | ... | ln) *) + | SkoDec of string * mid option * int (* sa: K : kind or *) + * Exp * Uni (* sc: A : type *) + + and Ancestor = (* Ancestor of d or a *) + Anc of cid option * int * cid option (* head(expand(d)), height, head(expand[height](d)) *) + (* NONE means expands to {x:A}B *) + + datatype StrDec = (* Structure declaration *) + StrDec of string * mid option + + (* Form of constant declaration *) + datatype ConDecForm = + FromCS (* from constraint domain *) + | Ordinary (* ordinary declaration *) + | Clause (* %clause declaration *) + + (* Type abbreviations *) + type dctx = Dec Ctx (* G = . | G,D *) + type eclo = Exp * Sub (* Us = U[s] *) + type bclo = Block * Sub (* Bs = B[s] *) + type cnstr = Cnstr ref + + exception Error of string (* raised if out of space *) + + (* standard operations on foreign expressions *) + structure FgnExpStd : sig + (* convert to internal syntax *) + structure ToInternal : FGN_OPN where type arg = unit + where type result = Exp + + (* apply function to subterms *) + structure Map : FGN_OPN where type arg = Exp -> Exp + where type result = Exp + + (* apply function to subterms, for effect *) + structure App : FGN_OPN where type arg = Exp -> unit + where type result = unit + + (* test for equality *) + structure EqualTo : FGN_OPN where type arg = Exp + where type result = bool + + (* unify with another term *) + structure UnifyWith : FGN_OPN where type arg = Dec Ctx * Exp + where type result = FgnUnify + + (* fold a function over the subterms *) + val fold : (csid * FgnExp) -> (Exp * 'a -> 'a) -> 'a -> 'a + end + + (* standard operations on foreign constraints *) + structure FgnCnstrStd : sig + (* convert to internal syntax *) + structure ToInternal : FGN_OPN where type arg = unit + where type result = (Dec Ctx * Exp) list + + (* awake *) + structure Awake : FGN_OPN where type arg = unit + where type result = bool + + (* simplify *) + structure Simplify : FGN_OPN where type arg = unit + where type result = bool + end + + val conDecName : ConDec -> string + val conDecParent : ConDec -> mid option + val conDecImp : ConDec -> int + val conDecStatus : ConDec -> Status + val conDecType : ConDec -> Exp + val conDecBlock : ConDec -> dctx * Dec list + val conDecUni : ConDec -> Uni + + val strDecName : StrDec -> string + val strDecParent : StrDec -> mid option + + val sgnReset : unit -> unit + val sgnSize : unit -> cid * mid + + val sgnAdd : ConDec -> cid + val sgnLookup: cid -> ConDec + val sgnApp : (cid -> unit) -> unit + + val sgnStructAdd : StrDec -> mid + val sgnStructLookup : mid -> StrDec + + val constType : cid -> Exp (* type of c or d *) + val constDef : cid -> Exp (* definition of d *) + val constImp : cid -> int + val constStatus : cid -> Status + val constUni : cid -> Uni + val constBlock : cid -> dctx * Dec list + + (* Declaration Contexts *) + + val ctxDec : dctx * int -> Dec (* get variable declaration *) + val blockDec : dctx * Block * int -> Dec + + (* Explicit substitutions *) + + val id : Sub (* id *) + val shift : Sub (* ^ *) + val invShift : Sub (* ^-1 *) + + val bvarSub : int * Sub -> Front (* k[s] *) + val frontSub : Front * Sub -> Front (* H[s] *) + val decSub : Dec * Sub -> Dec (* x:V[s] *) + val blockSub : Block * Sub -> Block (* B[s] *) + + val comp : Sub * Sub -> Sub (* s o s' *) + val dot1 : Sub -> Sub (* 1 . (s o ^) *) + val invDot1 : Sub -> Sub (* (^ o s) o ^-1) *) + + (* EVar related functions *) + + val newEVar : dctx * Exp -> Exp (* creates X:G|-V, [] *) + val newAVar : unit -> Exp (* creates A (bare) *) + val newTypeVar : dctx -> Exp (* creates X:G|-type, [] *) + val newLVar : Sub * (cid * Sub) -> Block + (* creates B:(l[^k],t) *) + + (* Definition related functions *) + val headOpt : Exp -> Head option + val ancestor : Exp -> Ancestor + val defAncestor : cid -> Ancestor + + (* Type related functions *) + + (* Not expanding type definitions *) + val targetHeadOpt : Exp -> Head option (* target type family or NONE *) + val targetHead : Exp -> Head (* target type family *) + + (* Expanding type definitions *) + val targetFamOpt : Exp -> cid option (* target type family or NONE *) + val targetFam : Exp -> cid (* target type family *) + + (* Used in Flit *) + val rename : cid * string -> unit + +end; (* signature INTSYN *) diff --git a/tests/examplefiles/psql_session.txt b/tests/examplefiles/psql_session.txt index fd87a133..7096072b 100644 --- a/tests/examplefiles/psql_session.txt +++ b/tests/examplefiles/psql_session.txt @@ -1,5 +1,6 @@ regression=# select foo; ERROR: column "foo" does not exist +CONTEXT: PL/pgSQL function "test1" while casting return value to function's return type LINE 1: select foo; ^ regression=# \q |