diff options
author | Bruce Mitchener, Jr. <bruce.mitchener@gmail.com> | 2013-01-09 16:16:34 +0700 |
---|---|---|
committer | Bruce Mitchener, Jr. <bruce.mitchener@gmail.com> | 2013-01-09 16:16:34 +0700 |
commit | f3e614b2a8ae54b7bf5a7ee7751c36fd952e6a30 (patch) | |
tree | f011f2a6558830c804e725344666511065ebb4d1 | |
parent | 6e145a03531db07ed5bad04ad6f894559067e2eb (diff) | |
download | pygments-f3e614b2a8ae54b7bf5a7ee7751c36fd952e6a30.tar.gz |
Rewrite the Dylan Lexer and provide a lexer for Dylan LID files.
Fixes include:
* Dylan is not case sensitive.
* Support for other number literal formats (hex, octal, binary).
* Support nested multi-line comments.
* Correct various issues with matching of 'names'.
* Support highlighting of macro definitions better.
* Support keyword / required keyword syntax in class definitions.
* Support each-subclass slot allocation.
* Handle the | operator.
* Change what tokens are mapped to what types and reduce the Christmas tree look of the output a bit.
* Explicitly list and handle the various builtins defined in the DRM.
-rw-r--r-- | pygments/lexers/_mapping.py | 3 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 206 | ||||
-rw-r--r-- | tests/examplefiles/classes.dylan | 90 | ||||
-rw-r--r-- | tests/examplefiles/unix-io.lid | 37 |
4 files changed, 304 insertions, 32 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index b4706848..c0cbeae9 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -83,7 +83,8 @@ LEXERS = { 'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')), 'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)), 'DuelLexer': ('pygments.lexers.web', 'Duel', ('duel', 'Duel Engine', 'Duel View', 'JBST', 'jbst', 'JsonML+BST'), ('*.duel', '*.jbst'), ('text/x-duel', 'text/x-jbst')), - 'DylanLexer': ('pygments.lexers.compiled', 'Dylan', ('dylan',), ('*.dylan', '*.dyl'), ('text/x-dylan',)), + 'DylanLexer': ('pygments.lexers.compiled', 'Dylan', ('dylan',), ('*.dylan', '*.dyl', '*.intr'), ('text/x-dylan',)), + 'DylanLidLexer': ('pygments.lexers.compiled', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)), 'ECLLexer': ('pygments.lexers.other', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)), 'ECLexer': ('pygments.lexers.compiled', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')), 'ElixirConsoleLexer': ('pygments.lexers.functional', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)), diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 5d09f4c8..385aca14 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -27,7 +27,8 @@ __all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer', 'DylanLexer', 'ObjectiveCLexer', 'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer', 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer', 'Modula2Lexer', 'BlitzMaxLexer', - 'NimrodLexer', 'FantomLexer', 'RustLexer', 'CudaLexer', 'MonkeyLexer'] + 'NimrodLexer', 'FantomLexer', 'RustLexer', 'CudaLexer', 'MonkeyLexer', + 'DylanLidLexer'] class CLexer(RegexLexer): @@ -1057,40 +1058,164 @@ class DylanLexer(RegexLexer): name = 'Dylan' aliases = ['dylan'] - filenames = ['*.dylan', '*.dyl'] + filenames = ['*.dylan', '*.dyl', '*.intr'] mimetypes = ['text/x-dylan'] - flags = re.DOTALL + flags = re.DOTALL | re.IGNORECASE + + builtins = set([ + 'subclass', 'abstract', 'block', 'concrete', 'constant', 'class', + 'compiler-open', 'compiler-sideways', 'domain', 'dynamic', + 'each-subclass', 'exception', 'exclude', 'function', 'generic', + 'handler', 'inherited', 'inline', 'inline-only', 'instance', + 'interface', 'import', 'keyword', 'library', 'macro', 'method', + 'module', 'open', 'primary', 'required', 'sealed', 'sideways', + 'singleton', 'slot', 'thread', 'variable', 'virtual']) + + keywords = set([ + 'above', 'afterwards', 'begin', 'below', 'by', 'case', 'cleanup', + 'create', 'define', 'else', 'elseif', 'end', 'export', 'finally', + 'for', 'from', 'if', 'in', 'let', 'local', 'otherwise', 'rename', + 'select', 'signal', 'then', 'to', 'unless', 'until', 'use', 'when', + 'while']) + + operators = set([ + '~', '+', '-', '*', '|', '^', '=', '==', '~=', '~==', '<', '<=', + '>', '>=', '&', '|']) + + functions = set([ + 'abort', 'abs', 'add', 'add!', 'add-method', 'add-new', 'add-new!', + 'all-superclasses', 'always', 'any?', 'applicable-method?', 'apply', + 'aref', 'aref-setter', 'as', 'as-lowercase', 'as-lowercase!', + 'as-uppercase', 'as-uppercase!', 'ash', 'backward-iteration-protocol', + 'break', 'ceiling', 'ceiling/', 'cerror', 'check-type', 'choose', + 'choose-by', 'complement', 'compose', 'concatenate', 'concatenate-as', + 'condition-format-arguments', 'condition-format-string', 'conjoin', + 'copy-sequence', 'curry', 'default-handler', 'dimension', 'dimensions', + 'direct-subclasses', 'direct-superclasses', 'disjoin', 'do', + 'do-handlers', 'element', 'element-setter', 'empty?', 'error', 'even?', + 'every?', 'false-or', 'fill!', 'find-key', 'find-method', 'first', + 'first-setter', 'floor', 'floor/', 'forward-iteration-protocol', + 'function-arguments', 'function-return-values', + 'function-specializers', 'gcd', 'generic-function-mandatory-keywords', + 'generic-function-methods', 'head', 'head-setter', 'identity', + 'initialize', 'instance?', 'integral?', 'intersection', + 'key-sequence', 'key-test', 'last', 'last-setter', 'lcm', 'limited', + 'list', 'logand', 'logbit?', 'logior', 'lognot', 'logxor', 'make', + 'map', 'map-as', 'map-into', 'max', 'member?', 'merge-hash-codes', + 'min', 'modulo', 'negative', 'negative?', 'next-method', + 'object-class', 'object-hash', 'odd?', 'one-of', 'pair', 'pop', + 'pop-last', 'positive?', 'push', 'push-last', 'range', 'rank', + 'rcurry', 'reduce', 'reduce1', 'remainder', 'remove', 'remove!', + 'remove-duplicates', 'remove-duplicates!', 'remove-key!', + 'remove-method', 'replace-elements!', 'replace-subsequence!', + 'restart-query', 'return-allowed?', 'return-description', + 'return-query', 'reverse', 'reverse!', 'round', 'round/', + 'row-major-index', 'second', 'second-setter', 'shallow-copy', + 'signal', 'singleton', 'size', 'size-setter', 'slot-initialized?', + 'sort', 'sort!', 'sorted-applicable-methods', 'subsequence-position', + 'subtype?', 'table-protocol', 'tail', 'tail-setter', 'third', + 'third-setter', 'truncate', 'truncate/', 'type-error-expected-type', + 'type-error-value', 'type-for-copy', 'type-union', 'union', 'values', + 'vector', 'zero?']) + + valid_name = '\\\\?[a-zA-Z0-9' + re.escape('!&*<>|^$%@_-+~?/=') + ']+' + + def get_tokens_unprocessed(self, text): + stack = ['root'] + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack): + if token is Name: + if value in self.builtins: + yield index, Name.Builtin, value + continue + if value in self.keywords: + yield index, Keyword, value + continue + if value in self.functions: + yield index, Name.Builtin, value + continue + if value in self.operators: + yield index, Operator, value + continue + yield index, token, value tokens = { 'root': [ - (r'\b(subclass|abstract|block|c(on(crete|stant)|lass)|domain' - r'|ex(c(eption|lude)|port)|f(unction(al)?)|generic|handler' - r'|i(n(herited|line|stance|terface)|mport)|library|m(acro|ethod)' - r'|open|primary|sealed|si(deways|ngleton)|slot' - r'|v(ariable|irtual))\b', Name.Builtin), - (r'<\w+>', Keyword.Type), + # Whitespace + (r'\s+', Text), + + # single line comment (r'//.*?\n', Comment.Single), - (r'/\*[\w\W]*?\*/', Comment.Multiline), + + # multi-line comment + (r'/\*', Comment.Multiline, 'comment'), + + # strings and characters (r'"', String, 'string'), (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), - (r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)' - r'|define|else(if)?|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise' - r'|rename|s(elect|ignal)|t(hen|o)|u(n(less|til)|se)|wh(en|ile))\b', - Keyword), - (r'([ \t])([!\$%&\*\/:<=>\?~_^a-zA-Z0-9.+\-]*:)', - bygroups(Text, Name.Variable)), - (r'([ \t]*)(\S+[^:])([ \t]*)(\()([ \t]*)', - bygroups(Text, Name.Function, Text, Punctuation, Text)), - (r'-?[0-9.]+', Number), - (r'[(),;]', Punctuation), - (r'\$[a-zA-Z0-9-]+', Name.Constant), - (r'[!$%&*/:<>=?~^.+\[\]{}-]+', Operator), - (r'\s+', Text), - (r'#"[a-zA-Z0-9-]+"', Keyword), + + # binary integer + (r'#[bB][01]+', Number), + + # octal integer + (r'#[oO][0-7]+', Number.Oct), + + # floating point + (r'[-+]?(\d*\.\d+(e[-+]?\d+)?|\d+(\.\d*)?e[-+]?\d+)', Number.Float), + + # decimal integer + (r'[-+]?\d+', Number.Integer), + + # hex integer + (r'#[xX][0-9a-fA-F]+', Number.Hex), + + # Macro parameters + (r'(\?' + valid_name + ')(:)(token|name|variable|expression|body|case-body|\*)', + bygroups(Name.Tag, Operator, Name.Builtin)), + (r'(\?)(:)(token|name|variable|expression|body|case-body|\*)', + bygroups(Name.Tag, Operator, Name.Builtin)), + (r'\?' + valid_name, Name.Tag), + + # Punctuation + (r'(=>|::|#\(|#\[|##|\?|\?\?|\?=|[(){}\[\],\.;])', Punctuation), + + # Most operators are picked up as names and then re-flagged. + # This one isn't valid in a name though, so we pick it up now. + (r':=', Operator), + + # Pick up #t / #f before we match other stuff with #. + (r'#[tf]', Literal), + + # #"foo" style keywords + (r'#"', String.Symbol, 'keyword'), + + # #rest, #key, #all-keys, etc. (r'#[a-zA-Z0-9-]+', Keyword), - (r'#(\(|\[)', Punctuation), - (r'[a-zA-Z0-9-_]+', Name.Variable), + + # required-init-keyword: style keywords. + (valid_name + ':', Keyword), + + # class names + (r'<' + valid_name + '>', Name.Class), + + # define variable forms. + (r'\*' + valid_name + '\*', Name.Variable.Global), + + # define constant forms. + (r'\$' + valid_name, Name.Constant), + + # everything else. We re-flag some of these in the method above. + (valid_name, Name), + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ], + 'keyword': [ + (r'"', String.Symbol, '#pop'), + (r'[^\\"]+', String.Symbol), # all other characters ], 'string': [ (r'"', String, '#pop'), @@ -1098,7 +1223,34 @@ class DylanLexer(RegexLexer): (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash - ], + ] + } + + +class DylanLidLexer(RegexLexer): + """ + For Dylan LID (Library Interchange Definition) files. + """ + + name = 'DylanLID' + aliases = ['dylan-lid', 'lid'] + filenames = ['*.lid', '*.hdp'] + mimetypes = ['text/x-dylan-lid'] + + flags = re.IGNORECASE + + tokens = { + 'root': [ + # Whitespace + (r'\s+', Text), + + # single line comment + (r'//.*?\n', Comment.Single), + + # lid header + (r'(.*?)(:)([ \t]*)(.*(?:\n[ \t].+)*)', + bygroups(Name.Attribute, Operator, Text, String)), + ] } diff --git a/tests/examplefiles/classes.dylan b/tests/examplefiles/classes.dylan index 6dd55ff2..90cafdf6 100644 --- a/tests/examplefiles/classes.dylan +++ b/tests/examplefiles/classes.dylan @@ -1,12 +1,25 @@ +module: sample +comment: for make sure that does not highlight per word. + define class <car> (<object>) slot serial-number :: <integer> = unique-serial-number(); - slot model-name :: <string>, + constant slot model-name :: <string>, required-init-keyword: model:; - slot has-sunroof? :: <boolean>, + each-subclass slot has-sunroof? :: <boolean>, init-keyword: sunroof?:, init-value: #f; + keyword foo:; + required keyword bar:; end class <car>; +define class <flying-car> (<car>) +end class <flying-car>; + +let flying-car = make(<flying-car>); +let car? :: <car?> = #f; +let prefixed-car :: <vehicles/car> = #f; +let model :: <car-911> = #f; + define constant $empty-string = ""; define constant $escaped-backslash = '\\'; define constant $escaped-single-quote = '\''; @@ -31,10 +44,79 @@ define method foo() => _ :: <boolean>; #t end method; -define method \+() -end; +define method \+ + (offset1 :: <time-offset>, offset2 :: <time-offset>) + => (sum :: <time-offset>) + let sum = offset1.total-seconds + offset2.total-seconds; + make(<time-offset>, total-seconds: sum); +end method \+; + +define method bar () + 1 | 2 & 3 +end + +if (bar) + 1 +elseif (foo) + 2 +else + 3 +end if; + +select (foo by instance?) + <integer> => 1 + otherwise => 3 +end select; + +/* multi + line + comment +*/ + +/* multi line comments + /* can be */ + nested */ define constant $symbol = #"hello"; define variable *vector* = #[3.5, 5] define constant $list = #(1, 2); define constant $pair = #(1 . "foo") + +let octal-number = #o238; +let hex-number = #x3890ADEF; +let binary-number = #b1010; +let float-exponent = 3.5e10; + +block (return) + with-lock (lock) + return(); + end; +exception (e :: <error>) + format-out("Oh no"); +cleanup + return(); +afterwards + format-out("Hello"); +end; + +define macro repeat + { repeat ?:body end } + => { block (?=stop!) + local method again() ?body; again() end; + again(); + end } +end macro repeat; + +define macro with-decoded-seconds + { + with-decoded-seconds + (?max:variable, ?min:variable, ?sec:variable = ?time:expression) + ?:body + end + } + => { + let (?max, ?min, ?sec) = decode-total-seconds(?time); + ?body + } +end macro; + diff --git a/tests/examplefiles/unix-io.lid b/tests/examplefiles/unix-io.lid new file mode 100644 index 00000000..617fcaa4 --- /dev/null +++ b/tests/examplefiles/unix-io.lid @@ -0,0 +1,37 @@ +Library: io +Synopsis: A portable IO library +Author: Gail Zacharias +Files: library + streams/defs + streams/stream + streams/sequence-stream + streams/native-buffer + streams/buffer + streams/typed-stream + streams/external-stream + streams/buffered-stream + streams/convenience + streams/wrapper-stream + streams/cleanup-streams + streams/native-speed + streams/async-writes + streams/file-stream + streams/multi-buffered-streams + pprint + print + print-double-integer-kludge + format + buffered-format + format-condition + unix-file-accessor + unix-standard-io + unix-interface + format-out +C-Source-Files: unix-portability.c +Major-Version: 2 +Minor-Version: 1 +Target-Type: dll +Copyright: Original Code is Copyright (c) 1995-2004 Functional Objects, Inc. + All rights reserved. +License: See License.txt in this distribution for details. +Warranty: Distributed WITHOUT WARRANTY OF ANY KIND |