diff options
author | Jean Abou Samra <jean@abou-samra.fr> | 2022-02-01 13:33:38 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2022-02-08 16:36:49 +0100 |
commit | 93d4cce817553c4bc2694747cfa6b3a04c6e5ddf (patch) | |
tree | 8fb49ebe6ab07a1edf20ec1848a09a8f81ac168b /pygments/lexers/lisp.py | |
parent | 88f36b5a27ab6b086018a2bb339e17a018345850 (diff) | |
download | pygments-git-93d4cce817553c4bc2694747cfa6b3a04c6e5ddf.tar.gz |
Scheme: autogenerate lists of builtins
This enriches the space of recognized builtins.
Note that this also fixes the space after a builtin being included in
the token.
Diffstat (limited to 'pygments/lexers/lisp.py')
-rw-r--r-- | pygments/lexers/lisp.py | 84 |
1 files changed, 20 insertions, 64 deletions
diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py index 7e5dadb2..7d457d3c 100644 --- a/pygments/lexers/lisp.py +++ b/pygments/lexers/lisp.py @@ -16,17 +16,15 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ from pygments.lexers.python import PythonLexer +from pygments.lexers._scheme_builtins import scheme_keywords, scheme_builtins + __all__ = ['SchemeLexer', 'CommonLispLexer', 'HyLexer', 'RacketLexer', 'NewLispLexer', 'EmacsLispLexer', 'ShenLexer', 'CPSALexer', 'XtlangLexer', 'FennelLexer'] - class SchemeLexer(RegexLexer): """ - A Scheme lexer, parsing a stream and outputting the tokens - needed to highlight scheme code. - This lexer could be most probably easily subclassed to parse - other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp. + A Scheme lexer. This parser is checked with pastes from the LISP pastebin at http://paste.lisp.org/ to cover as much syntax as possible. @@ -41,60 +39,25 @@ class SchemeLexer(RegexLexer): mimetypes = ['text/x-scheme', 'application/x-scheme'] flags = re.DOTALL | re.MULTILINE - # list of known keywords and builtins taken form vim 6.4 scheme.vim - # syntax file. - keywords = ( - 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let', - 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote', - 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax', - 'let-syntax', 'letrec-syntax', 'syntax-rules' - ) - builtins = ( - '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle', - 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan', - 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', - 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr', - 'cadr', 'call-with-current-continuation', 'call-with-input-file', - 'call-with-output-file', 'call-with-values', 'call/cc', 'car', - 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar', - 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr', - 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?', - 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase', - 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase', - 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?', - 'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port', - 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port', - 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?', - 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp', - 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part', - 'inexact->exact', 'inexact?', 'input-port?', 'integer->char', - 'integer?', 'interaction-environment', 'lcm', 'length', 'list', - 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?', - 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular', - 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv', - 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment', - 'null?', 'number->string', 'number?', 'numerator', 'odd?', - 'open-input-file', 'open-output-file', 'output-port?', 'pair?', - 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient', - 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?', - 'remainder', 'reverse', 'round', 'scheme-report-environment', - 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list', - 'string->number', 'string->symbol', 'string-append', 'string-ci<=?', - 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?', - 'string-copy', 'string-fill!', 'string-length', 'string-ref', - 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?', - 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?', - 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values', - 'vector', 'vector->list', 'vector-fill!', 'vector-length', - 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file', - 'with-output-to-file', 'write', 'write-char', 'zero?' - ) # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now valid_name = r'[\w!$%&*+,/:<=>?@^~|-]+' + # Recognizing builtins. + def get_tokens_unprocessed(self, text): + for index, token, value in super().get_tokens_unprocessed(text): + if token is Name.Function or token is Name.Variable: + if value in scheme_keywords: + yield index, Keyword, value + elif value in scheme_builtins: + yield index, Name.Builtin, value + else: + yield index, token, value + else: + yield index, token, value + # Scheme has funky syntactic rules for numbers. These are all # valid number literals: 5.0e55|14, 14/13, -1+5j, +1@5, #b110, # #o#Iinf.0-nan.0i. This is adapted from the formal grammar given @@ -263,23 +226,16 @@ class SchemeLexer(RegexLexer): # special operators (r"('|#|`|,@|,|\.)", Operator), - # highlight the keywords - ('(%s)' % '|'.join(re.escape(entry) + ' ' for entry in keywords), - Keyword, - '#pop'), - # first variable in a quoted string like # '(this is syntactic sugar) (r"(?<='\()" + valid_name, Name.Variable, '#pop'), (r"(?<=#\()" + valid_name, Name.Variable, '#pop'), - # highlight the builtins - (r"(?<=\()(%s)" % '|'.join(re.escape(entry) + ' ' for entry in builtins), - Name.Builtin, - '#pop'), - - # the remaining functions + # Functions -- note that this also catches variables + # defined in let/let*, but there is little that can + # be done about it. (r'(?<=\()' + valid_name, Name.Function, '#pop'), + # find the remaining variables (valid_name, Name.Variable, '#pop'), |