summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py36
1 files changed, 17 insertions, 19 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index ce851437..36f2f4a8 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -14,18 +14,18 @@ from pygments.filter import apply_filters, Filter
from pygments.filters import get_filter_by_name
from pygments.token import Error, Text, Other, _TokenType
from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
- make_analysator
+ make_analysator, text_type, add_metaclass, iteritems
__all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
-_encoding_map = [('\xef\xbb\xbf', 'utf-8'),
- ('\xff\xfe\0\0', 'utf-32'),
- ('\0\0\xfe\xff', 'utf-32be'),
- ('\xff\xfe', 'utf-16'),
- ('\xfe\xff', 'utf-16be')]
+_encoding_map = [(b'\xef\xbb\xbf', 'utf-8'),
+ (b'\xff\xfe\0\0', 'utf-32'),
+ (b'\0\0\xfe\xff', 'utf-32be'),
+ (b'\xff\xfe', 'utf-16'),
+ (b'\xfe\xff', 'utf-16be')]
_default_analyse = staticmethod(lambda x: 0.0)
@@ -42,6 +42,7 @@ class LexerMeta(type):
return type.__new__(cls, name, bases, d)
+@add_metaclass(LexerMeta)
class Lexer(object):
"""
Lexer for a specific language.
@@ -84,8 +85,6 @@ class Lexer(object):
#: Priority, should multiple lexers match and no content is provided
priority = 0
- __metaclass__ = LexerMeta
-
def __init__(self, **options):
self.options = options
self.stripnl = get_bool_opt(options, 'stripnl', True)
@@ -136,7 +135,7 @@ class Lexer(object):
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
"""
- if not isinstance(text, unicode):
+ if not isinstance(text, text_type):
if self.encoding == 'guess':
try:
text = text.decode('utf-8')
@@ -155,14 +154,13 @@ class Lexer(object):
decoded = None
for bom, encoding in _encoding_map:
if text.startswith(bom):
- decoded = unicode(text[len(bom):], encoding,
- errors='replace')
+ decoded = text[len(bom):].decode(encoding, 'replace')
break
# no BOM found, so use chardet
if decoded is None:
enc = chardet.detect(text[:1024]) # Guess using first 1KB
- decoded = unicode(text, enc.get('encoding') or 'utf-8',
- errors='replace')
+ decoded = text.decode(enc.get('encoding') or 'utf-8',
+ 'replace')
text = decoded
else:
text = text.decode(self.encoding)
@@ -476,7 +474,7 @@ class RegexLexerMeta(LexerMeta):
"""Preprocess a dictionary of token definitions."""
processed = cls._all_tokens[name] = {}
tokendefs = tokendefs or cls.tokens[name]
- for state in tokendefs.keys():
+ for state in list(tokendefs):
cls._process_state(tokendefs, processed, state)
return processed
@@ -497,7 +495,7 @@ class RegexLexerMeta(LexerMeta):
for c in itertools.chain((cls,), cls.__mro__):
toks = c.__dict__.get('tokens', {})
- for state, items in toks.iteritems():
+ for state, items in iteritems(toks):
curitems = tokens.get(state)
if curitems is None:
tokens[state] = items
@@ -537,13 +535,13 @@ class RegexLexerMeta(LexerMeta):
return type.__call__(cls, *args, **kwds)
+@add_metaclass(RegexLexerMeta)
class RegexLexer(Lexer):
"""
Base for simple stateful regular expression-based lexers.
Simplifies the lexing process so that you need only
provide a list of states and regular expressions.
"""
- __metaclass__ = RegexLexerMeta
#: Flags for compiling the regular expressions.
#: Defaults to MULTILINE.
@@ -722,7 +720,7 @@ def do_insertions(insertions, tokens):
"""
insertions = iter(insertions)
try:
- index, itokens = insertions.next()
+ index, itokens = next(insertions)
except StopIteration:
# no insertions
for item in tokens:
@@ -748,7 +746,7 @@ def do_insertions(insertions, tokens):
realpos += len(it_value)
oldi = index - i
try:
- index, itokens = insertions.next()
+ index, itokens = next(insertions)
except StopIteration:
insleft = False
break # not strictly necessary
@@ -763,7 +761,7 @@ def do_insertions(insertions, tokens):
yield realpos, t, v
realpos += len(v)
try:
- index, itokens = insertions.next()
+ index, itokens = next(insertions)
except StopIteration:
insleft = False
break # not strictly necessary