diff options
author | Georg Brandl <georg@python.org> | 2014-09-19 23:02:18 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-09-19 23:02:18 +0200 |
commit | 731527e9183d17d0f9eaf35bc0dd263ba84ae5c1 (patch) | |
tree | e80c9918f3efb9862c4fce6c45fb8790e97534c1 /pygments/lexers/textedit.py | |
parent | 9cb037001991ce4ef3444820d695ddd202dd3b26 (diff) | |
download | pygments-731527e9183d17d0f9eaf35bc0dd263ba84ae5c1.tar.gz |
split up text lexers
Diffstat (limited to 'pygments/lexers/textedit.py')
-rw-r--r-- | pygments/lexers/textedit.py | 103 |
1 files changed, 101 insertions, 2 deletions
diff --git a/pygments/lexers/textedit.py b/pygments/lexers/textedit.py index 66255fae..1f6d3fee 100644 --- a/pygments/lexers/textedit.py +++ b/pygments/lexers/textedit.py @@ -9,11 +9,16 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, include, default +import re +from bisect import bisect + +from pygments.lexer import RegexLexer, include, default, bygroups, using, this from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation -__all__ = ['AwkLexer'] +from pygments.lexers.python import PythonLexer + +__all__ = ['AwkLexer', 'VimLexer'] class AwkLexer(RegexLexer): @@ -68,3 +73,97 @@ class AwkLexer(RegexLexer): (r"'(\\\\|\\'|[^'])*'", String.Single), ] } + + +class VimLexer(RegexLexer): + """ + Lexer for VimL script files. + + .. versionadded:: 0.8 + """ + name = 'VimL' + aliases = ['vim'] + filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', + '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] + mimetypes = ['text/x-vim'] + flags = re.MULTILINE + + _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' + + tokens = { + 'root': [ + (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', + bygroups(using(this), Keyword, Text, Operator, Text, Text, + using(PythonLexer), Text)), + (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', + bygroups(using(this), Keyword, Text, using(PythonLexer))), + + (r'^\s*".*', Comment), + + (r'[ \t]+', Text), + # TODO: regexes can have other delims + (r'/(\\\\|\\/|[^\n/])*/', String.Regex), + (r'"(\\\\|\\"|[^\n"])*"', String.Double), + (r"'(''|[^\n'])*'", String.Single), + + # Who decided that doublequote was a good comment character?? + (r'(?<=\s)"[^\-:.%#=*].*', Comment), + (r'-?\d+', Number), + (r'#[0-9a-f]{6}', Number.Hex), + (r'^:', Punctuation), + (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. + (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', + Keyword), + (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), + (r'\b\w+\b', Name.Other), # These are postprocessed below + (r'.', Text), + ], + } + + def __init__(self, **options): + from pygments.lexers._vimbuiltins import command, option, auto + self._cmd = command + self._opt = option + self._aut = auto + + RegexLexer.__init__(self, **options) + + def is_in(self, w, mapping): + r""" + It's kind of difficult to decide if something might be a keyword + in VimL because it allows you to abbreviate them. In fact, + 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are + valid ways to call it so rather than making really awful regexps + like:: + + \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b + + we match `\b\w+\b` and then call is_in() on those tokens. See + `scripts/get_vimkw.py` for how the lists are extracted. + """ + p = bisect(mapping, (w,)) + if p > 0: + if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ + mapping[p-1][1][:len(w)] == w: + return True + if p < len(mapping): + return mapping[p][0] == w[:len(mapping[p][0])] and \ + mapping[p][1][:len(w)] == w + return False + + def get_tokens_unprocessed(self, text): + # TODO: builtins are only subsequent tokens on lines + # and 'keywords' only happen at the beginning except + # for :au ones + for index, token, value in \ + RegexLexer.get_tokens_unprocessed(self, text): + if token is Name.Other: + if self.is_in(value, self._cmd): + yield index, Keyword, value + elif self.is_in(value, self._opt) or \ + self.is_in(value, self._aut): + yield index, Name.Builtin, value + else: + yield index, Text, value + else: + yield index, token, value |