summaryrefslogtreecommitdiff
path: root/pygments/lexers/lisp.py
diff options
context:
space:
mode:
authorJean Abou Samra <jean@abou-samra.fr>2022-02-01 13:33:38 +0100
committerGeorg Brandl <georg@python.org>2022-02-08 16:36:49 +0100
commit93d4cce817553c4bc2694747cfa6b3a04c6e5ddf (patch)
tree8fb49ebe6ab07a1edf20ec1848a09a8f81ac168b /pygments/lexers/lisp.py
parent88f36b5a27ab6b086018a2bb339e17a018345850 (diff)
downloadpygments-git-93d4cce817553c4bc2694747cfa6b3a04c6e5ddf.tar.gz
Scheme: autogenerate lists of builtins
This enriches the space of recognized builtins. Note that this also fixes the space after a builtin being included in the token.
Diffstat (limited to 'pygments/lexers/lisp.py')
-rw-r--r--pygments/lexers/lisp.py84
1 files changed, 20 insertions, 64 deletions
diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py
index 7e5dadb2..7d457d3c 100644
--- a/pygments/lexers/lisp.py
+++ b/pygments/lexers/lisp.py
@@ -16,17 +16,15 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
from pygments.lexers.python import PythonLexer
+from pygments.lexers._scheme_builtins import scheme_keywords, scheme_builtins
+
__all__ = ['SchemeLexer', 'CommonLispLexer', 'HyLexer', 'RacketLexer',
'NewLispLexer', 'EmacsLispLexer', 'ShenLexer', 'CPSALexer',
'XtlangLexer', 'FennelLexer']
-
class SchemeLexer(RegexLexer):
"""
- A Scheme lexer, parsing a stream and outputting the tokens
- needed to highlight scheme code.
- This lexer could be most probably easily subclassed to parse
- other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp.
+ A Scheme lexer.
This parser is checked with pastes from the LISP pastebin
at http://paste.lisp.org/ to cover as much syntax as possible.
@@ -41,60 +39,25 @@ class SchemeLexer(RegexLexer):
mimetypes = ['text/x-scheme', 'application/x-scheme']
flags = re.DOTALL | re.MULTILINE
- # list of known keywords and builtins taken form vim 6.4 scheme.vim
- # syntax file.
- keywords = (
- 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let',
- 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote',
- 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax',
- 'let-syntax', 'letrec-syntax', 'syntax-rules'
- )
- builtins = (
- '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle',
- 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan',
- 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr',
- 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr',
- 'cadr', 'call-with-current-continuation', 'call-with-input-file',
- 'call-with-output-file', 'call-with-values', 'call/cc', 'car',
- 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar',
- 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr',
- 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?',
- 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase',
- 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase',
- 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?',
- 'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port',
- 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port',
- 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?',
- 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp',
- 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part',
- 'inexact->exact', 'inexact?', 'input-port?', 'integer->char',
- 'integer?', 'interaction-environment', 'lcm', 'length', 'list',
- 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?',
- 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular',
- 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv',
- 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment',
- 'null?', 'number->string', 'number?', 'numerator', 'odd?',
- 'open-input-file', 'open-output-file', 'output-port?', 'pair?',
- 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient',
- 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?',
- 'remainder', 'reverse', 'round', 'scheme-report-environment',
- 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list',
- 'string->number', 'string->symbol', 'string-append', 'string-ci<=?',
- 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
- 'string-copy', 'string-fill!', 'string-length', 'string-ref',
- 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?',
- 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?',
- 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values',
- 'vector', 'vector->list', 'vector-fill!', 'vector-length',
- 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file',
- 'with-output-to-file', 'write', 'write-char', 'zero?'
- )
# valid names for identifiers
# well, names can only not consist fully of numbers
# but this should be good enough for now
valid_name = r'[\w!$%&*+,/:<=>?@^~|-]+'
+ # Recognizing builtins.
+ def get_tokens_unprocessed(self, text):
+ for index, token, value in super().get_tokens_unprocessed(text):
+ if token is Name.Function or token is Name.Variable:
+ if value in scheme_keywords:
+ yield index, Keyword, value
+ elif value in scheme_builtins:
+ yield index, Name.Builtin, value
+ else:
+ yield index, token, value
+ else:
+ yield index, token, value
+
# Scheme has funky syntactic rules for numbers. These are all
# valid number literals: 5.0e55|14, 14/13, -1+5j, +1@5, #b110,
# #o#Iinf.0-nan.0i. This is adapted from the formal grammar given
@@ -263,23 +226,16 @@ class SchemeLexer(RegexLexer):
# special operators
(r"('|#|`|,@|,|\.)", Operator),
- # highlight the keywords
- ('(%s)' % '|'.join(re.escape(entry) + ' ' for entry in keywords),
- Keyword,
- '#pop'),
-
# first variable in a quoted string like
# '(this is syntactic sugar)
(r"(?<='\()" + valid_name, Name.Variable, '#pop'),
(r"(?<=#\()" + valid_name, Name.Variable, '#pop'),
- # highlight the builtins
- (r"(?<=\()(%s)" % '|'.join(re.escape(entry) + ' ' for entry in builtins),
- Name.Builtin,
- '#pop'),
-
- # the remaining functions
+ # Functions -- note that this also catches variables
+ # defined in let/let*, but there is little that can
+ # be done about it.
(r'(?<=\()' + valid_name, Name.Function, '#pop'),
+
# find the remaining variables
(valid_name, Name.Variable, '#pop'),