1 files changed, 595 insertions, 4 deletions
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index a8102cc8..5f710837 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -5,7 +5,7 @@
 
     Lexers for functional languages.
 
-    :copyright: Copyright 2006-2011 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
@@ -15,10 +15,9 @@ from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
 from pygments.token import Text, Comment, Operator, Keyword, Name, \
      String, Number, Punctuation, Literal, Generic, Error
 
-
 __all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
            'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
-           'ErlangShellLexer']
+           'ErlangShellLexer', 'OpaLexer', 'CoqLexer', 'NewLispLexer']
 
 
 class SchemeLexer(RegexLexer):
@@ -942,7 +941,7 @@ class ErlangLexer(RegexLexer):
 
     name = 'Erlang'
     aliases = ['erlang']
-    filenames = ['*.erl', '*.hrl']
+    filenames = ['*.erl', '*.hrl', '*.es', '*.escript']
     mimetypes = ['text/x-erlang']
 
     keywords = [
@@ -1079,3 +1078,595 @@ class ErlangShellLexer(Lexer):
                                       erlexer.get_tokens_unprocessed(curcode)):
                 yield item
 
+
+class OpaLexer(RegexLexer):
+    """
+    Lexer for the Opa language (http://opalang.org).
+
+    *New in Pygments 1.5.*
+    """
+
+    name = 'Opa'
+    aliases = ['opa']
+    filenames = ['*.opa']
+    mimetypes = ['text/x-opa']
+
+    # most of these aren't strictly keywords
+    # but if you color only real keywords, you might just
+    # as well not color anything
+    keywords = [
+        'and', 'as', 'begin', 'css', 'database', 'db', 'do', 'else', 'end',
+        'external', 'forall', 'if', 'import', 'match', 'package', 'parser',
+        'rec', 'server', 'then', 'type', 'val', 'with', 'xml_parser'
+    ]
+
+    # matches both stuff and `stuff`
+    ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
+
+    op_re = r'[.=\-<>,@~%/+?*&^!]'
+    punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
+                               # because they are also used for inserts
+
+    tokens = {
+        # copied from the caml lexer, should be adapted
+        'escape-sequence': [
+            (r'\\[\\\"\'ntr}]', String.Escape),
+            (r'\\[0-9]{3}', String.Escape),
+            (r'\\x[0-9a-fA-F]{2}', String.Escape),
+        ],
+
+        # factorizing these rules, because they are inserted many times
+        'comments': [
+            (r'/\*', Comment, 'nested-comment'),
+            (r'//.*?$', Comment),
+        ],
+        'comments-and-spaces': [
+            include('comments'),
+            (r'\s+', Text),
+        ],
+
+        'root': [
+            include('comments-and-spaces'),
+            # keywords
+            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
+            # directives
+            # we could parse the actual set of directives instead of anything
+            # starting with @, but this is troublesome
+            # because it needs to be adjusted all the time
+            # and assuming we parse only sources that compile, it is useless
+            (r'@'+ident_re+r'\b', Name.Builtin.Pseudo),
+
+            # number literals
+            (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
+            (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
+            (r'-?\d+[eE][+\-]?\d+', Number.Float),
+            (r'0[xX][\da-fA-F]+', Number.Hex),
+            (r'0[oO][0-7]+', Number.Oct),
+            (r'0[bB][01]+', Number.Binary),
+            (r'\d+', Number.Integer),
+            # color literals
+            (r'#[\da-fA-F]{3,6}', Number.Integer),
+
+            # string literals
+            (r'"', String.Double, 'string'),
+            # char literal, should be checked because this is the regexp from
+            # the caml lexer
+            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
+             String.Char),
+
+            # this is meant to deal with embedded exprs in strings
+            # every time we find a '}' we pop a state so that if we were
+            # inside a string, we are back in the string state
+            # as a consequence, we must also push a state every time we find a
+            # '{' or else we will have errors when parsing {} for instance
+            (r'{', Operator, '#push'),
+            (r'}', Operator, '#pop'),
+
+            # html literals
+            # this is a much more strict that the actual parser,
+            # since a<b would not be parsed as html
+            # but then again, the parser is way too lax, and we can't hope
+            # to have something as tolerant
+            (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
+
+            # db path
+            # matching the '[_]' in '/a[_]' because it is a part
+            # of the syntax of the db path definition
+            # unfortunately, i don't know how to match the ']' in
+            # /a[1], so this is somewhat inconsistent
+            (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
+            # putting the same color on <- as on db path, since
+            # it can be used only to mean Db.write
+            (r'<-(?!'+op_re+r')', Name.Variable),
+
+            # 'modules'
+            # although modules are not distinguished by their names as in caml
+            # the standard library seems to follow the convention that modules
+            # only area capitalized
+            (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
+
+            # operators
+            # = has a special role because this is the only
+            # way to syntactic distinguish binding constructions
+            # unfortunately, this colors the equal in {x=2} too
+            (r'=(?!'+op_re+r')', Keyword),
+            (r'(%s)+' % op_re, Operator),
+            (r'(%s)+' % punc_re, Operator),
+
+            # coercions
+            (r':', Operator, 'type'),
+            # type variables
+            # we need this rule because we don't parse specially type
+            # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
+            ("'"+ident_re, Keyword.Type),
+
+            # id literal, #something, or #{expr}
+            (r'#'+ident_re, String.Single),
+            (r'#(?={)', String.Single),
+
+            # identifiers
+            # this avoids to color '2' in 'a2' as an integer
+            (ident_re, Text),
+
+            # default, not sure if that is needed or not
+            # (r'.', Text),
+        ],
+
+        # it is quite painful to have to parse types to know where they end
+        # this is the general rule for a type
+        # a type is either:
+        # * -> ty
+        # * type-with-slash
+        # * type-with-slash -> ty
+        # * type-with-slash (, type-with-slash)+ -> ty
+        #
+        # the code is pretty funky in here, but this code would roughly
+        # translate in caml to:
+        # let rec type stream =
+        # match stream with
+        # | [< "->";  stream >] -> type stream
+        # | [< "";  stream >] ->
+        #   type_with_slash stream
+        #   type_lhs_1 stream;
+        # and type_1 stream = ...
+        'type': [
+            include('comments-and-spaces'),
+            (r'->', Keyword.Type),
+            (r'', Keyword.Type, ('#pop', 'type-lhs-1', 'type-with-slash')),
+        ],
+
+        # parses all the atomic or closed constructions in the syntax of type
+        # expressions: record types, tuple types, type constructors, basic type
+        # and type variables
+        'type-1': [
+            include('comments-and-spaces'),
+            (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
+            (r'~?{', Keyword.Type, ('#pop', 'type-record')),
+            (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
+            (ident_re, Keyword.Type, '#pop'),
+            ("'"+ident_re, Keyword.Type),
+            # this case is not in the syntax but sometimes
+            # we think we are parsing types when in fact we are parsing
+            # some css, so we just pop the states until we get back into
+            # the root state
+            (r'', Keyword.Type, '#pop'),
+        ],
+
+        # type-with-slash is either:
+        # * type-1
+        # * type-1 (/ type-1)+
+        'type-with-slash': [
+            include('comments-and-spaces'),
+            (r'', Keyword.Type, ('#pop', 'slash-type-1', 'type-1')),
+        ],
+        'slash-type-1': [
+            include('comments-and-spaces'),
+            ('/', Keyword.Type, ('#pop', 'type-1')),
+            # same remark as above
+            (r'', Keyword.Type, '#pop'),
+        ],
+
+        # we go in this state after having parsed a type-with-slash
+        # while trying to parse a type
+        # and at this point we must determine if we are parsing an arrow
+        # type (in which case we must continue parsing) or not (in which
+        # case we stop)
+        'type-lhs-1': [
+            include('comments-and-spaces'),
+            (r'->', Keyword.Type, ('#pop', 'type')),
+            (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
+            (r'', Keyword.Type, '#pop'),
+        ],
+        'type-arrow': [
+            include('comments-and-spaces'),
+            # the look ahead here allows to parse f(x : int, y : float -> truc)
+            # correctly
+            (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
+            (r'->', Keyword.Type, ('#pop', 'type')),
+            # same remark as above
+            (r'', Keyword.Type, '#pop'),
+        ],
+
+        # no need to do precise parsing for tuples and records
+        # because they are closed constructions, so we can simply
+        # find the closing delimiter
+        # note that this function would be not work if the source
+        # contained identifiers like `{)` (although it could be patched
+        # to support it)
+        'type-tuple': [
+            include('comments-and-spaces'),
+            (r'[^\(\)/*]+', Keyword.Type),
+            (r'[/*]', Keyword.Type),
+            (r'\(', Keyword.Type, '#push'),
+            (r'\)', Keyword.Type, '#pop'),
+        ],
+        'type-record': [
+            include('comments-and-spaces'),
+            (r'[^{}/*]+', Keyword.Type),
+            (r'[/*]', Keyword.Type),
+            (r'{', Keyword.Type, '#push'),
+            (r'}', Keyword.Type, '#pop'),
+        ],
+
+#        'type-tuple': [
+#            include('comments-and-spaces'),
+#            (r'\)', Keyword.Type, '#pop'),
+#            (r'', Keyword.Type, ('#pop', 'type-tuple-1', 'type-1')),
+#        ],
+#        'type-tuple-1': [
+#            include('comments-and-spaces'),
+#            (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
+#            (r',', Keyword.Type, 'type-1'),
+#        ],
+#        'type-record':[
+#            include('comments-and-spaces'),
+#            (r'}', Keyword.Type, '#pop'),
+#            (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
+#        ],
+#        'type-record-field-expr': [
+#
+#        ],
+
+        'nested-comment': [
+            (r'[^/*]+', Comment),
+            (r'/\*', Comment, '#push'),
+            (r'\*/', Comment, '#pop'),
+            (r'[/*]', Comment),
+        ],
+
+        # the coy pasting between string and single-string
+        # is kinda sad. Is there a way to avoid that??
+        'string': [
+            (r'[^\\"{]+', String.Double),
+            (r'"', String.Double, '#pop'),
+            (r'{', Operator, 'root'),
+            include('escape-sequence'),
+        ],
+        'single-string': [
+            (r'[^\\\'{]+', String.Double),
+            (r'\'', String.Double, '#pop'),
+            (r'{', Operator, 'root'),
+            include('escape-sequence'),
+        ],
+
+        # all the html stuff
+        # can't really reuse some existing html parser
+        # because we must be able to parse embedded expressions
+
+        # we are in this state after someone parsed the '<' that
+        # started the html literal
+        'html-open-tag': [
+            (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
+            (r'>', String.Single, ('#pop', 'html-content')),
+        ],
+
+        # we are in this state after someone parsed the '</' that
+        # started the end of the closing tag
+        'html-end-tag': [
+            # this is a star, because </> is allowed
+            (r'[\w\-:]*>', String.Single, '#pop'),
+        ],
+
+        # we are in this state after having parsed '<ident(:ident)?'
+        # we thus parse a possibly empty list of attributes
+        'html-attr': [
+            (r'\s+', Text),
+            (r'[\w\-:]+=', String.Single, 'html-attr-value'),
+            (r'/>', String.Single, '#pop'),
+            (r'>', String.Single, ('#pop', 'html-content')),
+        ],
+
+        'html-attr-value': [
+            (r"'", String.Single, ('#pop', 'single-string')),
+            (r'"', String.Single, ('#pop', 'string')),
+            (r'#'+ident_re, String.Single, '#pop'),
+            (r'#(?={)', String.Single, ('#pop', 'root')),
+            (r'{', Operator, ('#pop', 'root')), # this is a tail call!
+        ],
+
+        # we should probably deal with '\' escapes here
+        'html-content': [
+            (r'<!--', Comment, 'html-comment'),
+            (r'</', String.Single, ('#pop', 'html-end-tag')),
+            (r'<', String.Single, 'html-open-tag'),
+            (r'{', Operator, 'root'),
+            (r'.|\s+', String.Single),
+        ],
+
+        'html-comment': [
+            (r'-->', Comment, '#pop'),
+            (r'[^\-]+|-', Comment),
+        ],
+    }
+
+
+class CoqLexer(RegexLexer):
+    """
+    For the `Coq <http://coq.inria.fr/>`_ theorem prover.
+
+    *New in Pygments 1.5.*
+    """
+
+    name = 'Coq'
+    aliases = ['coq']
+    filenames = ['*.v']
+    mimetypes = ['text/x-coq']
+
+    keywords1 = [
+        # Vernacular commands
+        'Section', 'Module', 'End', 'Require', 'Import', 'Export', 'Variable',
+        'Variables', 'Parameter', 'Parameters', 'Axiom', 'Hypothesis',
+        'Hypotheses', 'Notation', 'Local', 'Tactic', 'Reserved', 'Scope',
+        'Open', 'Close', 'Bind', 'Delimit', 'Definition', 'Let', 'Ltac',
+        'Fixpoint', 'CoFixpoint', 'Morphism', 'Relation', 'Implicit',
+        'Arguments', 'Set', 'Unset', 'Contextual', 'Strict', 'Prenex',
+        'Implicits', 'Inductive', 'CoInductive', 'Record', 'Structure',
+        'Canonical', 'Coercion', 'Theorem', 'Lemma', 'Corollary',
+        'Proposition', 'Fact', 'Remark', 'Example', 'Proof', 'Goal', 'Save',
+        'Qed', 'Defined', 'Hint', 'Resolve', 'Rewrite', 'View', 'Search',
+        'Show', 'Print', 'Printing', 'All', 'Graph', 'Projections', 'inside',
+        'outside',
+    ]
+    keywords2 = [
+        # Gallina
+        'forall', 'exists', 'exists2', 'fun', 'fix', 'cofix', 'struct',
+        'match', 'end',  'in', 'return', 'let', 'if', 'is', 'then', 'else',
+        'for', 'of', 'nosimpl', 'with', 'as',
+    ]
+    keywords3 = [
+        # Sorts
+        'Type', 'Prop',
+    ]
+    keywords4 = [
+        # Tactics
+        'pose', 'set', 'move', 'case', 'elim', 'apply', 'clear', 'hnf', 'intro',
+        'intros', 'generalize', 'rename', 'pattern', 'after', 'destruct',
+        'induction', 'using', 'refine', 'inversion', 'injection', 'rewrite',
+        'congr', 'unlock', 'compute', 'ring', 'field', 'replace', 'fold',
+        'unfold', 'change', 'cutrewrite', 'simpl', 'have', 'suff', 'wlog',
+        'suffices', 'without', 'loss', 'nat_norm', 'assert', 'cut', 'trivial',
+        'revert', 'bool_congr', 'nat_congr', 'symmetry', 'transitivity', 'auto',
+        'split', 'left', 'right', 'autorewrite',
+    ]
+    keywords5 = [
+        # Terminators
+        'by', 'done', 'exact', 'reflexivity', 'tauto', 'romega', 'omega',
+        'assumption', 'solve', 'contradiction', 'discriminate',
+    ]
+    keywords6 = [
+        # Control
+        'do', 'last', 'first', 'try', 'idtac', 'repeat',
+    ]
+      # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
+      # 'downto', 'else', 'end', 'exception', 'external', 'false',
+      # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
+      # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
+      # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
+      # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
+      # 'type', 'val', 'virtual', 'when', 'while', 'with'
+    keyopts = [
+        '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
+        r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
+        '<-', '=', '>', '>]', '>}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
+        r'\[\|', ']', '_', '`', '{', '{<', r'\|', r'\|]', '}', '~', '=>',
+        r'/\\', r'\\/',
+        u'Π', u'λ',
+    ]
+    operators = r'[!$%&*+\./:<=>?@^|~-]'
+    word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
+    prefix_syms = r'[!?~]'
+    infix_syms = r'[=<>@^|&+\*/$%-]'
+    primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list',
+                  'array']
+
+    tokens = {
+        'root': [
+            (r'\s+', Text),
+            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
+            (r'\(\*', Comment, 'comment'),
+            (r'\b(%s)\b' % '|'.join(keywords1), Keyword.Namespace),
+            (r'\b(%s)\b' % '|'.join(keywords2), Keyword),
+            (r'\b(%s)\b' % '|'.join(keywords3), Keyword.Type),
+            (r'\b(%s)\b' % '|'.join(keywords4), Keyword),
+            (r'\b(%s)\b' % '|'.join(keywords5), Keyword.Pseudo),
+            (r'\b(%s)\b' % '|'.join(keywords6), Keyword.Reserved),
+            (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
+             Name.Namespace, 'dotted'),
+            (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
+            (r'(%s)' % '|'.join(keyopts), Operator),
+            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
+            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
+            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
+
+            (r"[^\W\d][\w']*", Name),
+
+            (r'\d[\d_]*', Number.Integer),
+            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
+            (r'0[oO][0-7][0-7_]*', Number.Oct),
+            (r'0[bB][01][01_]*', Number.Binary),
+            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
+
+            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
+             String.Char),
+            (r"'.'", String.Char),
+            (r"'", Keyword), # a stray quote is another syntax element
+
+            (r'"', String.Double, 'string'),
+
+            (r'[~?][a-z][\w\']*:', Name.Variable),
+        ],
+        'comment': [
+            (r'[^(*)]+', Comment),
+            (r'\(\*', Comment, '#push'),
+            (r'\*\)', Comment, '#pop'),
+            (r'[(*)]', Comment),
+        ],
+        'string': [
+            (r'[^"]+', String.Double),
+            (r'""', String.Double),
+            (r'"', String.Double, '#pop'),
+        ],
+        'dotted': [
+            (r'\s+', Text),
+            (r'\.', Punctuation),
+            (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
+            (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
+            (r'[a-z][a-z0-9_\']*', Name, '#pop'),
+            (r'', Text, '#pop')
+        ],
+    }
+
+    def analyse_text(text):
+        if text.startswith('(*'):
+            return True
+
+
+class NewLispLexer(RegexLexer):
+    """
+    For `newLISP. <www.newlisp.org>`_ source code (version 10.3.0).
+
+    *New in Pygments 1.5.*
+    """
+
+    name = 'NewLisp'
+    aliases = ['newlisp']
+    filenames = ['*.lsp', '*.nl']
+    mimetypes = ['text/x-newlisp', 'application/x-newlisp']
+
+    flags = re.IGNORECASE | re.MULTILINE | re.UNICODE
+
+    # list of built-in functions for newLISP version 10.3
+    builtins = [
+        '^', '--', '-', ':', '!', '!=', '?', '@', '*', '/', '&', '%', '+', '++',
+        '<', '<<', '<=', '=', '>', '>=', '>>', '|', '~', '$', '$0', '$1', '$10',
+        '$11', '$12', '$13', '$14', '$15', '$2', '$3', '$4', '$5', '$6', '$7',
+        '$8', '$9', '$args', '$idx', '$it', '$main-args', 'abort', 'abs',
+        'acos', 'acosh', 'add', 'address', 'amb', 'and',  'and', 'append-file',
+        'append', 'apply', 'args', 'array-list', 'array?', 'array', 'asin',
+        'asinh', 'assoc', 'atan', 'atan2', 'atanh', 'atom?', 'base64-dec',
+        'base64-enc', 'bayes-query', 'bayes-train', 'begin', 'begin', 'begin',
+        'beta', 'betai', 'bind', 'binomial', 'bits', 'callback', 'case', 'case',
+        'case', 'catch', 'ceil', 'change-dir', 'char', 'chop', 'Class', 'clean',
+        'close', 'command-event', 'cond', 'cond', 'cond', 'cons', 'constant',
+        'context?', 'context', 'copy-file', 'copy', 'cos', 'cosh', 'count',
+        'cpymem', 'crc32', 'crit-chi2', 'crit-z', 'current-line', 'curry',
+        'date-list', 'date-parse', 'date-value', 'date', 'debug', 'dec',
+        'def-new', 'default', 'define-macro', 'define-macro', 'define',
+        'delete-file', 'delete-url', 'delete', 'destroy', 'det', 'device',
+        'difference', 'directory?', 'directory', 'div', 'do-until', 'do-while',
+        'doargs',  'dolist',  'dostring', 'dotimes',  'dotree', 'dump', 'dup',
+        'empty?', 'encrypt', 'ends-with', 'env', 'erf', 'error-event',
+        'eval-string', 'eval', 'exec', 'exists', 'exit', 'exp', 'expand',
+        'explode', 'extend', 'factor', 'fft', 'file-info', 'file?', 'filter',
+        'find-all', 'find', 'first', 'flat', 'float?', 'float', 'floor', 'flt',
+        'fn', 'for-all', 'for', 'fork', 'format', 'fv', 'gammai', 'gammaln',
+        'gcd', 'get-char', 'get-float', 'get-int', 'get-long', 'get-string',
+        'get-url', 'global?', 'global', 'if-not', 'if', 'ifft', 'import', 'inc',
+        'index', 'inf?', 'int', 'integer?', 'integer', 'intersect', 'invert',
+        'irr', 'join', 'lambda-macro', 'lambda?', 'lambda', 'last-error',
+        'last', 'legal?', 'length', 'let', 'let', 'let', 'letex', 'letn',
+        'letn', 'letn', 'list?', 'list', 'load', 'local', 'log', 'lookup',
+        'lower-case', 'macro?', 'main-args', 'MAIN', 'make-dir', 'map', 'mat',
+        'match', 'max', 'member', 'min', 'mod', 'module', 'mul', 'multiply',
+        'NaN?', 'net-accept', 'net-close', 'net-connect', 'net-error',
+        'net-eval', 'net-interface', 'net-ipv', 'net-listen', 'net-local',
+        'net-lookup', 'net-packet', 'net-peek', 'net-peer', 'net-ping',
+        'net-receive-from', 'net-receive-udp', 'net-receive', 'net-select',
+        'net-send-to', 'net-send-udp', 'net-send', 'net-service',
+        'net-sessions', 'new', 'nil?', 'nil', 'normal', 'not', 'now', 'nper',
+        'npv', 'nth', 'null?', 'number?', 'open', 'or', 'ostype', 'pack',
+        'parse-date', 'parse', 'peek', 'pipe', 'pmt', 'pop-assoc', 'pop',
+        'post-url', 'pow', 'prefix', 'pretty-print', 'primitive?', 'print',
+        'println', 'prob-chi2', 'prob-z', 'process', 'prompt-event',
+        'protected?', 'push', 'put-url', 'pv', 'quote?', 'quote', 'rand',
+        'random', 'randomize', 'read', 'read-char', 'read-expr', 'read-file',
+        'read-key', 'read-line', 'read-utf8', 'read', 'reader-event',
+        'real-path', 'receive', 'ref-all', 'ref', 'regex-comp', 'regex',
+        'remove-dir', 'rename-file', 'replace', 'reset', 'rest', 'reverse',
+        'rotate', 'round', 'save', 'search', 'seed', 'seek', 'select', 'self',
+        'semaphore', 'send', 'sequence', 'series', 'set-locale', 'set-ref-all',
+        'set-ref', 'set', 'setf',  'setq', 'sgn', 'share', 'signal', 'silent',
+        'sin', 'sinh', 'sleep', 'slice', 'sort', 'source', 'spawn', 'sqrt',
+        'starts-with', 'string?', 'string', 'sub', 'swap', 'sym', 'symbol?',
+        'symbols', 'sync', 'sys-error', 'sys-info', 'tan', 'tanh', 'term',
+        'throw-error', 'throw', 'time-of-day', 'time', 'timer', 'title-case',
+        'trace-highlight', 'trace', 'transpose', 'Tree', 'trim', 'true?',
+        'true', 'unicode', 'unify', 'unique', 'unless', 'unpack', 'until',
+        'upper-case', 'utf8', 'utf8len', 'uuid', 'wait-pid', 'when', 'while',
+        'write', 'write-char', 'write-file', 'write-line', 'write',
+        'xfer-event', 'xml-error', 'xml-parse', 'xml-type-tags', 'zero?',
+    ]
+
+    # valid names
+    valid_name = r'([a-zA-Z0-9!$%&*+.,/<=>?@^_~|-])+|(\[.*?\])+'
+
+    tokens = {
+        'root': [
+            # shebang
+            (r'#!(.*?)$', Comment.Preproc),
+            # comments starting with semicolon
+            (r';.*$', Comment.Single),
+            # comments starting with #
+            (r'#.*$', Comment.Single),
+
+            # whitespace
+            (r'\s+', Text),
+
+            # strings, symbols and characters
+            (r'"(\\\\|\\"|[^"])*"', String),
+
+            # braces
+            (r"{", String, "bracestring"),
+
+            # [text] ... [/text] delimited strings
+            (r'\[text\]*', String, "tagstring"),
+
+            # 'special' operators...
+            (r"('|:)", Operator),
+
+            # highlight the builtins
+            ('(%s)' % '|'.join(re.escape(entry) + '\\b' for entry in builtins),
+             Keyword),
+
+            # the remaining functions
+            (r'(?<=\()' + valid_name, Name.Variable),
+
+            # the remaining variables
+            (valid_name, String.Symbol),
+
+            # parentheses
+            (r'(\(|\))', Punctuation),
+        ],
+
+        # braced strings...
+        'bracestring': [
+             ("{", String, "#push"),
+             ("}", String, "#pop"),
+             ("[^{}]+", String),
+        ],
+
+        # tagged [text]...[/text] delimited strings...
+        'tagstring': [
+            (r'(?s)(.*?)(\[/text\])', String, '#pop'),
+        ],
+    }