1 files changed, 218 insertions, 45 deletions
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index a5650da8..a2536951 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -5,7 +5,7 @@
 
     Lexers for compiled languages.
 
-    :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
@@ -23,12 +23,13 @@ from pygments.scanner import Scanner
 from pygments.lexers.functional import OcamlLexer
 from pygments.lexers.jvm import JavaLexer, ScalaLexer
 
-__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer',
-           'DylanLexer', 'ObjectiveCLexer', 'ObjectiveCppLexer',
-           'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer',
-           'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer',
-           'Modula2Lexer', 'BlitzMaxLexer', 'NimrodLexer', 'FantomLexer',
-           'RustLexer', 'CudaLexer', 'MonkeyLexer']
+__all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer', 'DylanLexer',
+           'ObjectiveCLexer', 'ObjectiveCppLexer', 'FortranLexer', 'GLShaderLexer',
+           'PrologLexer', 'CythonLexer', 'ValaLexer', 'OocLexer', 'GoLexer',
+           'FelixLexer', 'AdaLexer', 'Modula2Lexer', 'BlitzMaxLexer',
+           'NimrodLexer', 'FantomLexer', 'RustLexer', 'CudaLexer', 'MonkeyLexer',
+           'DylanLidLexer']
+
 
 class CFamilyLexer(RegexLexer):
     """
@@ -173,6 +174,7 @@ class CFamilyLexer(RegexLexer):
                     token = Keyword.Type
             yield index, token, value
 
+
 class CLexer(CFamilyLexer):
     """
     For C source code with preprocessor directives.
@@ -186,6 +188,7 @@ class CLexer(CFamilyLexer):
     def analyse_text(text):
         return 0.1
 
+
 class CppLexer(CFamilyLexer):
     """
     For C++ source code with preprocessor directives.
@@ -226,6 +229,7 @@ class CppLexer(CFamilyLexer):
     def analyse_text(text):
         return 0.1
 
+
 class ECLexer(CLexer):
     """
     For eC source code with preprocessor directives.
@@ -260,6 +264,7 @@ class ECLexer(CLexer):
         ],
     }
 
+
 class DLexer(RegexLexer):
     """
     For D source.
@@ -903,40 +908,176 @@ class DylanLexer(RegexLexer):
 
     name = 'Dylan'
     aliases = ['dylan']
-    filenames = ['*.dylan', '*.dyl']
+    filenames = ['*.dylan', '*.dyl', '*.intr']
     mimetypes = ['text/x-dylan']
 
-    flags = re.DOTALL
+    flags = re.IGNORECASE
+
+    builtins = set([
+        'subclass', 'abstract', 'block', 'concrete', 'constant', 'class',
+        'compiler-open', 'compiler-sideways', 'domain', 'dynamic',
+        'each-subclass', 'exception', 'exclude', 'function', 'generic',
+        'handler', 'inherited', 'inline', 'inline-only', 'instance',
+        'interface', 'import', 'keyword', 'library', 'macro', 'method',
+        'module', 'open', 'primary', 'required', 'sealed', 'sideways',
+        'singleton', 'slot', 'thread', 'variable', 'virtual'])
+
+    keywords = set([
+        'above', 'afterwards', 'begin', 'below', 'by', 'case', 'cleanup',
+        'create', 'define', 'else', 'elseif', 'end', 'export', 'finally',
+        'for', 'from', 'if', 'in', 'let', 'local', 'otherwise', 'rename',
+        'select', 'signal', 'then', 'to', 'unless', 'until', 'use', 'when',
+        'while'])
+
+    operators = set([
+        '~', '+', '-', '*', '|', '^', '=', '==', '~=', '~==', '<', '<=',
+        '>', '>=', '&', '|'])
+
+    functions = set([
+        'abort', 'abs', 'add', 'add!', 'add-method', 'add-new', 'add-new!',
+        'all-superclasses', 'always', 'any?', 'applicable-method?', 'apply',
+        'aref', 'aref-setter', 'as', 'as-lowercase', 'as-lowercase!',
+        'as-uppercase', 'as-uppercase!', 'ash', 'backward-iteration-protocol',
+        'break', 'ceiling', 'ceiling/', 'cerror', 'check-type', 'choose',
+        'choose-by', 'complement', 'compose', 'concatenate', 'concatenate-as',
+        'condition-format-arguments', 'condition-format-string', 'conjoin',
+        'copy-sequence', 'curry', 'default-handler', 'dimension', 'dimensions',
+        'direct-subclasses', 'direct-superclasses', 'disjoin', 'do',
+        'do-handlers', 'element', 'element-setter', 'empty?', 'error', 'even?',
+        'every?', 'false-or', 'fill!', 'find-key', 'find-method', 'first',
+        'first-setter', 'floor', 'floor/', 'forward-iteration-protocol',
+        'function-arguments', 'function-return-values',
+        'function-specializers', 'gcd', 'generic-function-mandatory-keywords',
+        'generic-function-methods', 'head', 'head-setter', 'identity',
+        'initialize', 'instance?', 'integral?', 'intersection',
+        'key-sequence', 'key-test', 'last', 'last-setter', 'lcm', 'limited',
+        'list', 'logand', 'logbit?', 'logior', 'lognot', 'logxor', 'make',
+        'map', 'map-as', 'map-into', 'max', 'member?', 'merge-hash-codes',
+        'min', 'modulo', 'negative', 'negative?', 'next-method',
+        'object-class', 'object-hash', 'odd?', 'one-of', 'pair', 'pop',
+        'pop-last', 'positive?', 'push', 'push-last', 'range', 'rank',
+        'rcurry', 'reduce', 'reduce1', 'remainder', 'remove', 'remove!',
+        'remove-duplicates', 'remove-duplicates!', 'remove-key!',
+        'remove-method', 'replace-elements!', 'replace-subsequence!',
+        'restart-query', 'return-allowed?', 'return-description',
+        'return-query', 'reverse', 'reverse!', 'round', 'round/',
+        'row-major-index', 'second', 'second-setter', 'shallow-copy',
+        'signal', 'singleton', 'size', 'size-setter', 'slot-initialized?',
+        'sort', 'sort!', 'sorted-applicable-methods', 'subsequence-position',
+        'subtype?', 'table-protocol', 'tail', 'tail-setter', 'third',
+        'third-setter', 'truncate', 'truncate/', 'type-error-expected-type',
+        'type-error-value', 'type-for-copy', 'type-union', 'union', 'values',
+        'vector', 'zero?'])
+
+    valid_name = '\\\\?[a-zA-Z0-9' + re.escape('!&*<>|^$%@_-+~?/=') + ']+'
+
+    def get_tokens_unprocessed(self, text):
+        for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
+            if token is Name:
+                if value in self.builtins:
+                    yield index, Name.Builtin, value
+                    continue
+                if value in self.keywords:
+                    yield index, Keyword, value
+                    continue
+                if value in self.functions:
+                    yield index, Name.Builtin, value
+                    continue
+                if value in self.operators:
+                    yield index, Operator, value
+                    continue
+            yield index, token, value
 
     tokens = {
         'root': [
-            (r'\b(subclass|abstract|block|c(on(crete|stant)|lass)|domain'
-             r'|ex(c(eption|lude)|port)|f(unction(al)?)|generic|handler'
-             r'|i(n(herited|line|stance|terface)|mport)|library|m(acro|ethod)'
-             r'|open|primary|sealed|si(deways|ngleton)|slot'
-             r'|v(ariable|irtual))\b', Name.Builtin),
-            (r'<\w+>', Keyword.Type),
+            # Whitespace
+            (r'\s+', Text),
+
+            # single line comment
+            (r'//.*?\n', Comment.Single),
+
+            # lid header
+            (r'([A-Za-z0-9-]+)(:)([ \t]*)(.*(?:\n[ \t].+)*)',
+                bygroups(Name.Attribute, Operator, Text, String)),
+
+            ('', Text, 'code') # no header match, switch to code
+        ],
+        'code': [
+            # Whitespace
+            (r'\s+', Text),
+
+            # single line comment
             (r'//.*?\n', Comment.Single),
-            (r'/\*[\w\W]*?\*/', Comment.Multiline),
+
+            # multi-line comment
+            (r'/\*', Comment.Multiline, 'comment'),
+
+            # strings and characters
             (r'"', String, 'string'),
             (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
-            (r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)'
-             r'|define|else(if)?|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise'
-             r'|rename|s(elect|ignal)|t(hen|o)|u(n(less|til)|se)|wh(en|ile))\b',
-             Keyword),
-            (r'([ \t])([!\$%&\*\/:<=>\?~_^a-zA-Z0-9.+\-]*:)',
-             bygroups(Text, Name.Variable)),
-            (r'([ \t]*)(\S+[^:])([ \t]*)(\()([ \t]*)',
-             bygroups(Text, Name.Function, Text, Punctuation, Text)),
-            (r'-?[0-9.]+', Number),
-            (r'[(),;]', Punctuation),
-            (r'\$[a-zA-Z0-9-]+', Name.Constant),
-            (r'[!$%&*/:<>=?~^.+\[\]{}-]+', Operator),
-            (r'\s+', Text),
-            (r'#"[a-zA-Z0-9-]+"', Keyword),
+
+            # binary integer
+            (r'#[bB][01]+', Number),
+
+            # octal integer
+            (r'#[oO][0-7]+', Number.Oct),
+
+            # floating point
+            (r'[-+]?(\d*\.\d+(e[-+]?\d+)?|\d+(\.\d*)?e[-+]?\d+)', Number.Float),
+
+            # decimal integer
+            (r'[-+]?\d+', Number.Integer),
+
+            # hex integer
+            (r'#[xX][0-9a-fA-F]+', Number.Hex),
+
+            # Macro parameters
+            (r'(\?' + valid_name + ')(:)(token|name|variable|expression|body|case-body|\*)',
+                bygroups(Name.Tag, Operator, Name.Builtin)),
+            (r'(\?)(:)(token|name|variable|expression|body|case-body|\*)',
+                bygroups(Name.Tag, Operator, Name.Builtin)),
+            (r'\?' + valid_name, Name.Tag),
+
+            # Punctuation
+            (r'(=>|::|#\(|#\[|##|\?|\?\?|\?=|[(){}\[\],\.;])', Punctuation),
+
+            # Most operators are picked up as names and then re-flagged.
+            # This one isn't valid in a name though, so we pick it up now.
+            (r':=', Operator),
+
+            # Pick up #t / #f before we match other stuff with #.
+            (r'#[tf]', Literal),
+
+            # #"foo" style keywords
+            (r'#"', String.Symbol, 'keyword'),
+
+            # #rest, #key, #all-keys, etc.
             (r'#[a-zA-Z0-9-]+', Keyword),
-            (r'#(\(|\[)', Punctuation),
-            (r'[a-zA-Z0-9-_]+', Name.Variable),
+
+            # required-init-keyword: style keywords.
+            (valid_name + ':', Keyword),
+
+            # class names
+            (r'<' + valid_name + '>', Name.Class),
+
+            # define variable forms.
+            (r'\*' + valid_name + '\*', Name.Variable.Global),
+
+            # define constant forms.
+            (r'\$' + valid_name, Name.Constant),
+
+            # everything else. We re-flag some of these in the method above.
+            (valid_name, Name),
+        ],
+        'comment': [
+            (r'[^*/]', Comment.Multiline),
+            (r'/\*', Comment.Multiline, '#push'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[*/]', Comment.Multiline)
+        ],
+        'keyword': [
+            (r'"', String.Symbol, '#pop'),
+            (r'[^\\"]+', String.Symbol), # all other characters
         ],
         'string': [
             (r'"', String, '#pop'),
@@ -944,7 +1085,36 @@ class DylanLexer(RegexLexer):
             (r'[^\\"\n]+', String), # all other characters
             (r'\\\n', String), # line continuation
             (r'\\', String), # stray backslash
-        ],
+        ]
+    }
+
+
+class DylanLidLexer(RegexLexer):
+    """
+    For Dylan LID (Library Interchange Definition) files.
+
+    *New in Pygments 1.6.*
+    """
+
+    name = 'DylanLID'
+    aliases = ['dylan-lid', 'lid']
+    filenames = ['*.lid', '*.hdp']
+    mimetypes = ['text/x-dylan-lid']
+
+    flags = re.IGNORECASE
+
+    tokens = {
+        'root': [
+            # Whitespace
+            (r'\s+', Text),
+
+            # single line comment
+            (r'//.*?\n', Comment.Single),
+
+            # lid header
+            (r'(.*?)(:)([ \t]*)(.*(?:\n[ \t].+)*)',
+             bygroups(Name.Attribute, Operator, Text, String)),
+        ]
     }
 
 def objective(baselexer):
@@ -1046,6 +1216,7 @@ def objective(baselexer):
 
     return GeneratedObjectiveCVariant
 
+
 class ObjectiveCLexer(objective(CLexer)):
     """
     For Objective-C source code with preprocessor directives.
@@ -1057,6 +1228,7 @@ class ObjectiveCLexer(objective(CLexer)):
     mimetypes = ['text/x-objective-c']
     priority = 0.05    # Lower than C
 
+
 class ObjectiveCppLexer(objective(CppLexer)):
     """
     For Objective-C++ source code with preprocessor directives.
@@ -1068,6 +1240,7 @@ class ObjectiveCppLexer(objective(CppLexer)):
     mimetypes = ['text/x-objective-c++']
     priority = 0.05    # Lower than C++
 
+
 class FortranLexer(RegexLexer):
     """
     Lexer for FORTRAN 90 code.
@@ -2740,13 +2913,13 @@ class RustLexer(RegexLexer):
             (r'/[*](.|\n)*?[*]/', Comment.Multiline),
 
             # Keywords
-            (r'(alt|as|assert|be|break|check|claim|class|const'
-             r'|cont|copy|crust|do|else|enum|export|fail'
-             r'|false|fn|for|if|iface|impl|import|let|log'
-             r'|loop|mod|mut|native|pure|resource|ret|true'
-             r'|type|unsafe|use|white|note|bind|prove|unchecked'
-             r'|with|syntax|u8|u16|u32|u64|i8|i16|i32|i64|uint'
-             r'|int|f32|f64)\b', Keyword),
+            (r'(as|assert|break|const'
+             r'|copy|do|else|enum|extern|fail'
+             r'|false|fn|for|if|impl|let|log'
+             r'|loop|match|mod|move|mut|once|priv|pub|pure'
+             r'|ref|return|static|struct|trait|true|type|unsafe|use|while'
+             r'|u8|u16|u32|u64|i8|i16|i32|i64|uint'
+             r'|int|float|f32|f64|str)\b', Keyword),
 
             # Character Literal
             (r"""'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}"""
@@ -2775,8 +2948,8 @@ class RustLexer(RegexLexer):
             (r'#\[', Comment.Preproc, 'attribute['),
             (r'#\(', Comment.Preproc, 'attribute('),
             # Macros
-            (r'#[A-Za-z_][A-Za-z0-9_]*\[', Comment.Preproc, 'attribute['),
-            (r'#[A-Za-z_][A-Za-z0-9_]*\(', Comment.Preproc, 'attribute('),
+            (r'[A-Za-z_][A-Za-z0-9_]*!\[', Comment.Preproc, 'attribute['),
+            (r'[A-Za-z_][A-Za-z0-9_]*!\(', Comment.Preproc, 'attribute('),
         ],
         'number_lit': [
             (r'(([ui](8|16|32|64)?)|(f(32|64)?))?', Keyword, '#pop'),
@@ -2867,7 +3040,7 @@ class MonkeyLexer(RegexLexer):
     name = 'Monkey'
     aliases = ['monkey']
     filenames = ['*.monkey']
-    mimetypes = ['text/x-monkey'] 
+    mimetypes = ['text/x-monkey']
 
     name_variable = r'[a-z_][a-zA-Z0-9_]*'
     name_function = r'[A-Z][a-zA-Z0-9_]*'
@@ -2941,10 +3114,10 @@ class MonkeyLexer(RegexLexer):
             (r':', Punctuation, 'classname'),
             (r'\s+', Text),
             (r'\(', Punctuation, 'variables'),
-            (r'\)', Punctuation, '#pop') 
+            (r'\)', Punctuation, '#pop')
         ],
         'classname': [
-            (r'%s\.' % name_module, Name.Namespace), 
+            (r'%s\.' % name_module, Name.Namespace),
             (r'%s\b' % keyword_type, Keyword.Type),
             (r'%s\b' % name_class, Name.Class),
             # array (of given size)