summaryrefslogtreecommitdiff
path: root/pygments/lexers/special.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/special.py')
-rw-r--r--pygments/lexers/special.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/pygments/lexers/special.py b/pygments/lexers/special.py
new file mode 100644
index 00000000..9076ca51
--- /dev/null
+++ b/pygments/lexers/special.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.special
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+ Special lexers.
+
+ :copyright: 2006 by Georg Brandl.
+ :license: GNU LGPL, see LICENSE for more details.
+"""
+
+import re
+import cStringIO
+
+from pygments.lexer import Lexer, RegexLexer
+from pygments.token import Token, \
+ Text, Comment, Operator, Keyword, Name, String, Number
+
+
+__all__ = ['TextLexer', 'RawTokenLexer']
+
+
+class TextLexer(Lexer):
+ name = 'Text only'
+ aliases = ['text']
+ filenames = ['*.txt']
+
+ def get_tokens_unprocessed(self, text):
+ yield 0, Text, text
+
+
+_ttype_cache = {}
+
+line_re = re.compile('.*?\n')
+
+class RawTokenLexer(Lexer):
+ """
+ Recreate a token stream formatted with the RawTokenFormatter.
+
+ Additional options accepted:
+
+ ``compress``
+ If set to "gz" or "bz2", decompress the token stream with
+ the given compression algorithm (default: '').
+ """
+ name = 'Raw token data'
+ aliases = ['raw']
+ filenames = ['*.raw']
+
+ def __init__(self, **options):
+ self.compress = options.get('compress', '')
+ Lexer.__init__(self, **options)
+
+ def get_tokens(self, text):
+ if self.compress == 'gz':
+ import gzip
+ gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text))
+ text = gzipfile.read()
+ elif self.compress == 'bz2':
+ import bz2
+ text = bz2.decompress(text)
+ return Lexer.get_tokens(self, text)
+
+ def get_tokens_unprocessed(self, text):
+ for match in line_re.finditer(text):
+ ttypestr, val = match.group().split('\t', 1)
+ ttype = _ttype_cache.get(ttypestr)
+ if not ttype:
+ ttype = Token
+ ttypes = ttypestr.split('.')[1:]
+ for ttype_ in ttypes:
+ ttype = getattr(ttype, ttype_)
+ _ttype_cache[ttypestr] = ttype
+ val = val[1:-2].decode('string-escape')
+ yield 0, ttype, val