split up text lexers

author: Georg Brandl <georg@python.org> 2014-09-19 23:02:18 +0200
committer: Georg Brandl <georg@python.org> 2014-09-19 23:02:18 +0200
commit: 731527e9183d17d0f9eaf35bc0dd263ba84ae5c1 (patch)
tree: e80c9918f3efb9862c4fce6c45fb8790e97534c1 /pygments/lexers/textedit.py
parent: 9cb037001991ce4ef3444820d695ddd202dd3b26 (diff)
download: pygments-731527e9183d17d0f9eaf35bc0dd263ba84ae5c1.tar.gz
1 files changed, 101 insertions, 2 deletions
diff --git a/pygments/lexers/textedit.py b/pygments/lexers/textedit.py
index 66255fae..1f6d3fee 100644
--- a/pygments/lexers/textedit.py
+++ b/pygments/lexers/textedit.py
@@ -9,11 +9,16 @@
     :license: BSD, see LICENSE for details.
 """
 
-from pygments.lexer import RegexLexer, include, default
+import re
+from bisect import bisect
+
+from pygments.lexer import RegexLexer, include, default, bygroups, using, this
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
     Number, Punctuation
 
-__all__ = ['AwkLexer']
+from pygments.lexers.python import PythonLexer
+
+__all__ = ['AwkLexer', 'VimLexer']
 
 
 class AwkLexer(RegexLexer):
@@ -68,3 +73,97 @@ class AwkLexer(RegexLexer):
             (r"'(\\\\|\\'|[^'])*'", String.Single),
         ]
     }
+
+
+class VimLexer(RegexLexer):
+    """
+    Lexer for VimL script files.
+
+    .. versionadded:: 0.8
+    """
+    name = 'VimL'
+    aliases = ['vim']
+    filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
+                 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
+    mimetypes = ['text/x-vim']
+    flags = re.MULTILINE
+
+    _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
+
+    tokens = {
+        'root': [
+            (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
+             bygroups(using(this), Keyword, Text, Operator, Text, Text,
+                      using(PythonLexer), Text)),
+            (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
+             bygroups(using(this), Keyword, Text, using(PythonLexer))),
+
+            (r'^\s*".*', Comment),
+
+            (r'[ \t]+', Text),
+            # TODO: regexes can have other delims
+            (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
+            (r'"(\\\\|\\"|[^\n"])*"', String.Double),
+            (r"'(''|[^\n'])*'", String.Single),
+
+            # Who decided that doublequote was a good comment character??
+            (r'(?<=\s)"[^\-:.%#=*].*', Comment),
+            (r'-?\d+', Number),
+            (r'#[0-9a-f]{6}', Number.Hex),
+            (r'^:', Punctuation),
+            (r'[()<>+=!|,~-]', Punctuation),  # Inexact list.  Looks decent.
+            (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
+             Keyword),
+            (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
+            (r'\b\w+\b', Name.Other),  # These are postprocessed below
+            (r'.', Text),
+        ],
+    }
+
+    def __init__(self, **options):
+        from pygments.lexers._vimbuiltins import command, option, auto
+        self._cmd = command
+        self._opt = option
+        self._aut = auto
+
+        RegexLexer.__init__(self, **options)
+
+    def is_in(self, w, mapping):
+        r"""
+        It's kind of difficult to decide if something might be a keyword
+        in VimL because it allows you to abbreviate them.  In fact,
+        'ab[breviate]' is a good example.  :ab, :abbre, or :abbreviate are
+        valid ways to call it so rather than making really awful regexps
+        like::
+
+            \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
+
+        we match `\b\w+\b` and then call is_in() on those tokens.  See
+        `scripts/get_vimkw.py` for how the lists are extracted.
+        """
+        p = bisect(mapping, (w,))
+        if p > 0:
+            if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
+               mapping[p-1][1][:len(w)] == w:
+                return True
+        if p < len(mapping):
+            return mapping[p][0] == w[:len(mapping[p][0])] and \
+                mapping[p][1][:len(w)] == w
+        return False
+
+    def get_tokens_unprocessed(self, text):
+        # TODO: builtins are only subsequent tokens on lines
+        #       and 'keywords' only happen at the beginning except
+        #       for :au ones
+        for index, token, value in \
+                RegexLexer.get_tokens_unprocessed(self, text):
+            if token is Name.Other:
+                if self.is_in(value, self._cmd):
+                    yield index, Keyword, value
+                elif self.is_in(value, self._opt) or \
+                        self.is_in(value, self._aut):
+                    yield index, Name.Builtin, value
+                else:
+                    yield index, Text, value
+            else:
+                yield index, token, value
author	Georg Brandl <georg@python.org>	2014-09-19 23:02:18 +0200
committer	Georg Brandl <georg@python.org>	2014-09-19 23:02:18 +0200
commit	731527e9183d17d0f9eaf35bc0dd263ba84ae5c1 (patch)
tree	e80c9918f3efb9862c4fce6c45fb8790e97534c1 /pygments/lexers/textedit.py
parent	9cb037001991ce4ef3444820d695ddd202dd3b26 (diff)
download	pygments-731527e9183d17d0f9eaf35bc0dd263ba84ae5c1.tar.gz