summaryrefslogtreecommitdiff
path: root/pygments/lexers/functional.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/functional.py')
-rw-r--r--pygments/lexers/functional.py341
1 files changed, 332 insertions, 9 deletions
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index e6d16b19..204f44c5 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -5,7 +5,7 @@
Lexers for functional languages.
- :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2011 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@@ -13,11 +13,12 @@ import re
from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
from pygments.token import Text, Comment, Operator, Keyword, Name, \
- String, Number, Punctuation, Literal, Generic
+ String, Number, Punctuation, Literal, Generic, Error
-__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer',
- 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer']
+__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
+ 'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
+ 'ErlangShellLexer']
class SchemeLexer(RegexLexer):
@@ -36,7 +37,7 @@ class SchemeLexer(RegexLexer):
"""
name = 'Scheme'
aliases = ['scheme', 'scm']
- filenames = ['*.scm']
+ filenames = ['*.scm', '*.ss', '*.rkt']
mimetypes = ['text/x-scheme', 'application/x-scheme']
# list of known keywords and builtins taken form vim 6.4 scheme.vim
@@ -406,7 +407,7 @@ class HaskellLexer(RegexLexer):
'funclist': [
(r'\s+', Text),
(r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
- (r'[_a-z][\w\']+', Name.Function),
+ (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
(r'--.*$', Comment.Single),
(r'{-', Comment.Multiline, 'comment'),
(r',', Punctuation),
@@ -515,6 +516,328 @@ class LiterateHaskellLexer(Lexer):
yield item
+class SMLLexer(RegexLexer):
+ """
+ For the Standard ML language.
+
+ *New in Pygments 1.5.*
+ """
+
+ name = 'Standard ML'
+ aliases = ['sml']
+ filenames = ['*.sml', '*.sig', '*.fun',]
+ mimetypes = ['text/x-standardml', 'application/x-standardml']
+
+ alphanumid_reserved = [
+ # Core
+ 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
+ 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
+ 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
+ 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
+ # Modules
+ 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
+ 'struct', 'structure', 'where',
+ ]
+
+ symbolicid_reserved = [
+ # Core
+ ':', '\|', '=', '=>', '->', '#',
+ # Modules
+ ':>',
+ ]
+
+ nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ]
+
+ alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*"
+ symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
+
+ # A character constant is a sequence of the form #s, where s is a string
+ # constant denoting a string of size one character. This setup just parses
+ # the entire string as either a String.Double or a String.Char (depending
+ # on the argument), even if the String.Char is an erronous
+ # multiple-character string.
+ def stringy (whatkind):
+ return [
+ (r'[^"\\]', whatkind),
+ (r'\\[\\\"abtnvfr]', String.Escape),
+ (r'\\\^[@-^]', String.Escape),
+ (r'\\[0-9]{3}', String.Escape),
+ (r'\\u[0-9a-fA-F]{4}', String.Escape),
+ (r'\\\s+\\', String.Interpol),
+ (r'"', whatkind, '#pop'),
+ ]
+
+ # Callbacks for distinguishing tokens and reserved words
+ def long_id_callback(self, match):
+ if match.group(1) in self.alphanumid_reserved: token = Error
+ else: token = Name.Namespace
+ yield match.start(1), token, match.group(1)
+ yield match.start(2), Punctuation, match.group(2)
+
+ def end_id_callback(self, match):
+ if match.group(1) in self.alphanumid_reserved: token = Error
+ elif match.group(1) in self.symbolicid_reserved: token = Error
+ else: token = Name
+ yield match.start(1), token, match.group(1)
+
+ def id_callback(self, match):
+ str = match.group(1)
+ if str in self.alphanumid_reserved: token = Keyword.Reserved
+ elif str in self.symbolicid_reserved: token = Punctuation
+ else: token = Name
+ yield match.start(1), token, str
+
+ tokens = {
+ # Whitespace and comments are (almost) everywhere
+ 'whitespace': [
+ (r'\s+', Text),
+ (r'\(\*', Comment.Multiline, 'comment'),
+ ],
+
+ 'delimiters': [
+ # This lexer treats these delimiters specially:
+ # Delimiters define scopes, and the scope is how the meaning of
+ # the `|' is resolved - is it a case/handle expression, or function
+ # definition by cases? (This is not how the Definition works, but
+ # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
+ (r'\(|\[|{', Punctuation, 'main'),
+ (r'\)|\]|}', Punctuation, '#pop'),
+ (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
+ (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
+ (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
+ ],
+
+ 'core': [
+ # Punctuation that doesn't overlap symbolic identifiers
+ (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]),
+ Punctuation),
+
+ # Special constants: strings, floats, numbers in decimal and hex
+ (r'#"', String.Char, 'char'),
+ (r'"', String.Double, 'string'),
+ (r'~?0x[0-9a-fA-F]+', Number.Hex),
+ (r'0wx[0-9a-fA-F]+', Number.Hex),
+ (r'0w\d+', Number.Integer),
+ (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
+ (r'~?\d+\.\d+', Number.Float),
+ (r'~?\d+[eE]~?\d+', Number.Float),
+ (r'~?\d+', Number.Integer),
+
+ # Labels
+ (r'#\s*[1-9][0-9]*', Name.Label),
+ (r'#\s*(%s)' % alphanumid_re, Name.Label),
+ (r'#\s+(%s)' % symbolicid_re, Name.Label),
+ # Some reserved words trigger a special, local lexer state change
+ (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
+ (r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
+ (r'\b(functor|include|open|signature|structure)\b(?!\')',
+ Keyword.Reserved, 'sname'),
+ (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
+
+ # Regular identifiers, long and otherwise
+ (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+ (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
+ (r'(%s)' % alphanumid_re, id_callback),
+ (r'(%s)' % symbolicid_re, id_callback),
+ ],
+ 'dotted': [
+ (r'(%s)(\.)' % alphanumid_re, long_id_callback),
+ (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
+ (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
+ (r'\s+', Error),
+ (r'\S+', Error),
+ ],
+
+
+ # Main parser (prevents errors in files that have scoping errors)
+ 'root': [ (r'', Text, 'main') ],
+
+ # In this scope, I expect '|' to not be followed by a function name,
+ # and I expect 'and' to be followed by a binding site
+ 'main': [
+ include('whitespace'),
+
+ # Special behavior of val/and/fun
+ (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
+ (r'\b(fun)\b(?!\')', Keyword.Reserved,
+ ('#pop', 'main-fun', 'fname')),
+
+ include('delimiters'),
+ include('core'),
+ (r'\S+', Error),
+ ],
+
+ # In this scope, I expect '|' and 'and' to be followed by a function
+ 'main-fun': [
+ include('whitespace'),
+
+ (r'\s', Text),
+ (r'\(\*', Comment.Multiline, 'comment'),
+
+ # Special behavior of val/and/fun
+ (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
+ (r'\b(val)\b(?!\')', Keyword.Reserved,
+ ('#pop', 'main', 'vname')),
+
+ # Special behavior of '|' and '|'-manipulating keywords
+ (r'\|', Punctuation, 'fname'),
+ (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
+ ('#pop', 'main')),
+
+ include('delimiters'),
+ include('core'),
+ (r'\S+', Error),
+ ],
+
+ # Character and string parsers
+ 'char': stringy(String.Char),
+ 'string': stringy(String.Double),
+
+ 'breakout': [
+ (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
+ ],
+
+ # Dealing with what comes after module system keywords
+ 'sname': [
+ include('whitespace'),
+ include('breakout'),
+
+ (r'(%s)' % alphanumid_re, Name.Namespace),
+ (r'', Text, '#pop'),
+ ],
+
+ # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
+ 'fname': [
+ include('whitespace'),
+ (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+ (r'\(', Punctuation, 'tyvarseq'),
+
+ (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
+ (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
+
+ # Ignore interesting function declarations like "fun (x + y) = ..."
+ (r'', Text, '#pop'),
+ ],
+
+ # Dealing with what comes after the 'val' (or 'and') keyword
+ 'vname': [
+ include('whitespace'),
+ (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+ (r'\(', Punctuation, 'tyvarseq'),
+
+ (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
+ bygroups(Name.Variable, Text, Punctuation), '#pop'),
+ (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
+ bygroups(Name.Variable, Text, Punctuation), '#pop'),
+ (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
+ (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
+
+ # Ignore interesting patterns like 'val (x, y)'
+ (r'', Text, '#pop'),
+ ],
+
+ # Dealing with what comes after the 'type' (or 'and') keyword
+ 'tname': [
+ include('whitespace'),
+ include('breakout'),
+
+ (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+ (r'\(', Punctuation, 'tyvarseq'),
+ (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
+
+ (r'(%s)' % alphanumid_re, Keyword.Type),
+ (r'(%s)' % symbolicid_re, Keyword.Type),
+ (r'\S+', Error, '#pop'),
+ ],
+
+ # A type binding includes most identifiers
+ 'typbind': [
+ include('whitespace'),
+
+ (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
+
+ include('breakout'),
+ include('core'),
+ (r'\S+', Error, '#pop'),
+ ],
+
+ # Dealing with what comes after the 'datatype' (or 'and') keyword
+ 'dname': [
+ include('whitespace'),
+ include('breakout'),
+
+ (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+ (r'\(', Punctuation, 'tyvarseq'),
+ (r'(=)(\s*)(datatype)',
+ bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
+ (r'=(?!%s)' % symbolicid_re, Punctuation,
+ ('#pop', 'datbind', 'datcon')),
+
+ (r'(%s)' % alphanumid_re, Keyword.Type),
+ (r'(%s)' % symbolicid_re, Keyword.Type),
+ (r'\S+', Error, '#pop'),
+ ],
+
+ # common case - A | B | C of int
+ 'datbind': [
+ include('whitespace'),
+
+ (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
+ (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
+ (r'\b(of)\b(?!\')', Keyword.Reserved),
+
+ (r'(\|)(\s*)(%s)' % alphanumid_re,
+ bygroups(Punctuation, Text, Name.Class)),
+ (r'(\|)(\s+)(%s)' % symbolicid_re,
+ bygroups(Punctuation, Text, Name.Class)),
+
+ include('breakout'),
+ include('core'),
+ (r'\S+', Error),
+ ],
+
+ # Dealing with what comes after an exception
+ 'ename': [
+ include('whitespace'),
+
+ (r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
+ bygroups(Keyword.Reserved, Text, Name.Class)),
+ (r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
+ bygroups(Keyword.Reserved, Text, Name.Class)),
+ (r'\b(of)\b(?!\')', Keyword.Reserved),
+
+ include('breakout'),
+ include('core'),
+ (r'\S+', Error),
+ ],
+
+ 'datcon': [
+ include('whitespace'),
+ (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
+ (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
+ (r'\S+', Error, '#pop'),
+ ],
+
+ # Series of type variables
+ 'tyvarseq': [
+ (r'\s', Text),
+ (r'\(\*', Comment.Multiline, 'comment'),
+
+ (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+ (r',', Punctuation),
+ (r'\)', Punctuation, '#pop'),
+ (r'', Error, '#pop'),
+ ],
+
+ 'comment': [
+ (r'[^(*)]', Comment.Multiline),
+ (r'\(\*', Comment.Multiline, '#push'),
+ (r'\*\)', Comment.Multiline, '#pop'),
+ (r'[(*)]', Comment.Multiline),
+ ],
+ }
+
+
class OcamlLexer(RegexLexer):
"""
For the OCaml language.
@@ -570,11 +893,11 @@ class OcamlLexer(RegexLexer):
(r"[^\W\d][\w']*", Name),
- (r'\d[\d_]*', Number.Integer),
+ (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
(r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
(r'0[oO][0-7][0-7_]*', Number.Oct),
(r'0[bB][01][01_]*', Number.Binary),
- (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
+ (r'\d[\d_]*', Number.Integer),
(r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
String.Char),
@@ -656,7 +979,7 @@ class ErlangLexer(RegexLexer):
'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
]
- operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
+ operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!|\?)'
word_operators = [
'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
'div', 'not', 'or', 'orelse', 'rem', 'xor'