# -*- coding: utf-8 -*- """ pygments.lexers.functional ~~~~~~~~~~~~~~~~~~~~~~~~~~ Lexers for functional languages. :copyright: 2006-2007 by Georg Brandl, Marek Kubica, Adam Blinkinsop . :license: BSD, see LICENSE for more details. """ import re try: set except NameError: from sets import Set as set from pygments.lexer import RegexLexer, bygroups, using, this, include from pygments.token import Text, Comment, Operator, Keyword, Name, \ String, Number, Punctuation __all__ = ['SchemeLexer', 'HaskellLexer', 'OcamlLexer'] class SchemeLexer(RegexLexer): """ A Scheme lexer, parsing a stream and outputting the tokens needed to highlight scheme code. This lexer could be most probably easily subclassed to parse other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp. This parser is checked with pastes from the LISP pastebin at http://paste.lisp.org/ to cover as much syntax as possible. It supports the full Scheme syntax as defined in R5RS. *New in Pygments 0.6.* """ name = 'Scheme' aliases = ['scheme', 'scm'] filenames = ['*.scm'] mimetypes = ['text/x-scheme', 'application/x-scheme'] # list of known keywords and builtins taken form vim 6.4 scheme.vim # syntax file. keywords = [ 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let', 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote', 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax', 'let-syntax', 'letrec-syntax', 'syntax-rules' ] builtins = [ '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle', 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan', 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr', 'cadr', 'call-with-current-continuation', 'call-with-input-file', 'call-with-output-file', 'call-with-values', 'call/cc', 'car', 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar', 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr', 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?', 'char-ci=?', 'char-ci>?', 'char-downcase', 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase', 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char=?', 'char>?', 'char?', 'close-input-port', 'close-output-port', 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port', 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?', 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp', 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part', 'inexact->exact', 'inexact?', 'input-port?', 'integer->char', 'integer?', 'interaction-environment', 'lcm', 'length', 'list', 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?', 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular', 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv', 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment', 'null?', 'number->string', 'number?', 'numerator', 'odd?', 'open-input-file', 'open-output-file', 'output-port?', 'pair?', 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient', 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?', 'remainder', 'reverse', 'round', 'scheme-report-environment', 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list', 'string->number', 'string->symbol', 'string-append', 'string-ci<=?', 'string-ci=?', 'string-ci>?', 'string-copy', 'string-fill!', 'string-length', 'string-ref', 'string-set!', 'string<=?', 'string=?', 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?', 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values', 'vector', 'vector->list', 'vector-fill!', 'vector-length', 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file', 'with-output-to-file', 'write', 'write-char', 'zero?' ] # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+' tokens = { 'root' : [ # the comments - always starting with semicolon # and going to the end of the line (r';.*$', Comment.Single), # whitespaces - usually not relevant (r'\s+', Text), # numbers (r'-?\d+\.\d+', Number.Float), (r'-?\d+', Number.Integer), # support for uncommon kinds of numbers - # have to figure out what the characters mean #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), (r"#\\([()/'\".'_!ยง$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), # constants (r'(#t|#f)', Name.Constant), # special operators (r"('|#|`|,@|,|\.)", Operator), # highlight the keywords ('(%s)' % '|'.join([ re.escape(entry) + ' ' for entry in keywords]), Keyword ), # first variable in a quoted string like # '(this is syntactic sugar) (r"(?<='\()" + valid_name, Name.Variable), (r"(?<=#\()" + valid_name, Name.Variable), # highlight the builtins ("(?<=\()(%s)" % '|'.join([ re.escape(entry) + ' ' for entry in builtins]), Name.Builtin ), # the remaining functions (r'(?<=\()' + valid_name, Name.Function), # find the remaining variables (valid_name, Name.Variable), # the famous parentheses! (r'(\(|\))', Punctuation), ], } class HaskellLexer(RegexLexer): """ A Haskell lexer based on the lexemes defined in the Haskell 98 Report. *New in Pygments 0.8.* """ name = 'Haskell' aliases = ['haskell', 'hs'] filenames = ['*.hs'] reserved = ['case','class','data','default','deriving','do','else', 'if','in','infix[lr]?','instance', 'let','newtype','of','then','type','where','_'] ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK', 'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE', 'DC[1-4]','NAK','SYN','ETB','CAN', 'EM','SUB','ESC','[FGRU]S','SP','DEL'] tokens = { 'root': [ # Whitespace: (r'\s+', Text), (r'--.*$', Comment.Single), (r'{-', Comment.Multiline, 'comment'), # Lexemes: # Identifiers (r'\bimport\b', Keyword.Reserved, 'import'), (r'\bmodule\b', Keyword.Reserved, 'module'), (r'\berror\b', Name.Exception), (r'\b(%s)\b' % '|'.join(reserved), Keyword.Reserved), (r'^[_a-z][\w\']*', Name.Function), (r'[_a-z][\w\']*', Name), (r'[A-Z][\w\']*', Keyword.Type), # Operators (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Numbers (r'\d+[eE][+-]?\d+', Number.Float), (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), (r'0[oO][0-7]+', Number.Oct), (r'0[xX][\da-fA-F]+', Number.Hex), (r'\d+', Number.Integer), # Character/String Literals (r"'", String.Char, 'character'), (r'"', String, 'string'), # Special (r'\[\]', Keyword.Type), (r'\(\)', Name.Builtin), (r'[][(),;`{}]', Punctuation), ], 'import': [ # Import statements (r'\s+', Text), # after "funclist" state (r'\)', Punctuation, '#pop'), (r'qualified\b', Keyword), # import X as Y (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)', bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), # import X hiding (functions) (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()', bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), # import X (functions) (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), # import X (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'), ], 'module': [ (r'\s+', Text), (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', bygroups(Name.Namespace, Text, Punctuation), 'funclist'), (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), ], 'funclist': [ (r'\s+', Text), (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), (r'[a-zA-Z0-9_]+', Name.Function), (r',', Punctuation), (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # (HACK, but it makes sense to push two instances, believe me) (r'\(', Punctuation, ('funclist', 'funclist')), (r'\)', Punctuation, '#pop:2'), ], 'comment': [ # Multiline Comments (r'[^-{}]', Comment.Multiline), (r'{-', Comment.Multiline, '#push'), (r'-}', Comment.Multiline, '#pop'), (r'[-{}]', Comment.Multiline), ], 'character': [ # Allows multi-chars, incorrectly. (r"[^\']", String.Char), (r"\\", String.Escape, 'escape'), ("'", String.Char, '#pop'), ], 'string': [ (r'[^\\"]', String), (r"\\", String.Escape, 'escape'), ('"', String, '#pop'), ], 'escape': [ ('[abfnrtv\\"\'&]', String.Escape, '#pop'), ('\\^[][A-Z@\\^_]', String.Escape, '#pop'), ('|'.join(ascii), String.Escape, '#pop'), (r'o[0-7]+', String.Escape, '#pop'), (r'x[\da-fA-F]+', String.Escape, '#pop'), (r'\d+', String.Escape, '#pop'), ], } class OcamlLexer(RegexLexer): """ For the OCaml language. *New in Pygments 0.7.* """ name = 'OCaml' aliases = ['ocaml'] filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] mimetypes = ['text/x-ocaml'] keywords = [ 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto', 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'virtual', 'when', 'while', 'with' ] keyopts = [ '!=','#','&','&&','\(','\)','\*','\+',',','-', '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<', '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|', ']','_','`','{','{<','\|','\|]','}','~' ] operators = r'[!$%&*+\./:<=>?@^|~-]' word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or'] prefix_syms = r'[!?~]' infix_syms = r'[=<>@^|&+\*/$%-]' primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array'] tokens = { 'escape-sequence': [ (r'\\[\"\'ntbr]', String.Escape), (r'\\[0-9]{3}', String.Escape), (r'\\x[0-9a-fA-F]{2}', String.Escape), ], 'root': [ (r'\s+', Text), (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', Name.Namespace, 'dotted'), (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), (r'\(\*', Comment, 'comment'), (r'\b(%s)\b' % '|'.join(keywords), Keyword), (r'(%s)' % '|'.join(keyopts), Operator), (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), (r"[^\W\d][\w']*", Name), (r'\d[\d_]*', Number.Integer), (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), (r'0[oO][0-7][0-7_]*', Number.Oct), (r'0[bB][01][01_]*', Number.Binary), (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", String.Char), (r"'.'", String.Char), (r"'", Keyword), # a stray quote is another syntax element (r'"', String.Double, 'string'), (r'[~?][a-z][\w\']*:', Name.Variable), ], 'comment': [ (r'[^(*)]', Comment), (r'\(\*', Comment, '#push'), (r'\*\)', Comment, '#pop'), (r'[(*)]', Comment), ], 'string': [ (r'[^\\"]', String.Double), include('escape-sequence'), (r'\\\n', String.Double), (r'"', String.Double, '#pop'), ], 'dotted': [ (r'\s+', Text), (r'\.', Punctuation), (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'), (r'[a-z][a-z0-9_\']*', Name, '#pop'), ], }