Finalize single-source port for Py2.[67] and Py3.3+.

author: Georg Brandl <georg@python.org> 2014-01-18 16:44:49 +0100
committer: Georg Brandl <georg@python.org> 2014-01-18 16:44:49 +0100
commit: 97703d63f39e6086d497a6a749c9eee3293dcbeb (patch)
tree: c970bf2a7bc17aa7053f3621e299a01fb9695342 /pygments/lexer.py
parent: 5500fd3a6d0c5ece01826606fcf2d684407b9cc6 (diff)
download: pygments-97703d63f39e6086d497a6a749c9eee3293dcbeb.tar.gz
1 files changed, 17 insertions, 19 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index ce851437..36f2f4a8 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -14,18 +14,18 @@ from pygments.filter import apply_filters, Filter
 from pygments.filters import get_filter_by_name
 from pygments.token import Error, Text, Other, _TokenType
 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
-     make_analysator
+     make_analysator, text_type, add_metaclass, iteritems
 
 
 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
            'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this']
 
 
-_encoding_map = [('\xef\xbb\xbf', 'utf-8'),
-                 ('\xff\xfe\0\0', 'utf-32'),
-                 ('\0\0\xfe\xff', 'utf-32be'),
-                 ('\xff\xfe', 'utf-16'),
-                 ('\xfe\xff', 'utf-16be')]
+_encoding_map = [(b'\xef\xbb\xbf', 'utf-8'),
+                 (b'\xff\xfe\0\0', 'utf-32'),
+                 (b'\0\0\xfe\xff', 'utf-32be'),
+                 (b'\xff\xfe', 'utf-16'),
+                 (b'\xfe\xff', 'utf-16be')]
 
 _default_analyse = staticmethod(lambda x: 0.0)
 
@@ -42,6 +42,7 @@ class LexerMeta(type):
         return type.__new__(cls, name, bases, d)
 
 
+@add_metaclass(LexerMeta)
 class Lexer(object):
     """
     Lexer for a specific language.
@@ -84,8 +85,6 @@ class Lexer(object):
     #: Priority, should multiple lexers match and no content is provided
     priority = 0
 
-    __metaclass__ = LexerMeta
-
     def __init__(self, **options):
         self.options = options
         self.stripnl = get_bool_opt(options, 'stripnl', True)
@@ -136,7 +135,7 @@ class Lexer(object):
         Also preprocess the text, i.e. expand tabs and strip it if
         wanted and applies registered filters.
         """
-        if not isinstance(text, unicode):
+        if not isinstance(text, text_type):
             if self.encoding == 'guess':
                 try:
                     text = text.decode('utf-8')
@@ -155,14 +154,13 @@ class Lexer(object):
                 decoded = None
                 for bom, encoding in _encoding_map:
                     if text.startswith(bom):
-                        decoded = unicode(text[len(bom):], encoding,
-                                          errors='replace')
+                        decoded = text[len(bom):].decode(encoding, 'replace')
                         break
                 # no BOM found, so use chardet
                 if decoded is None:
                     enc = chardet.detect(text[:1024]) # Guess using first 1KB
-                    decoded = unicode(text, enc.get('encoding') or 'utf-8',
-                                      errors='replace')
+                    decoded = text.decode(enc.get('encoding') or 'utf-8',
+                                          'replace')
                 text = decoded
             else:
                 text = text.decode(self.encoding)
@@ -476,7 +474,7 @@ class RegexLexerMeta(LexerMeta):
         """Preprocess a dictionary of token definitions."""
         processed = cls._all_tokens[name] = {}
         tokendefs = tokendefs or cls.tokens[name]
-        for state in tokendefs.keys():
+        for state in list(tokendefs):
             cls._process_state(tokendefs, processed, state)
         return processed
 
@@ -497,7 +495,7 @@ class RegexLexerMeta(LexerMeta):
         for c in itertools.chain((cls,), cls.__mro__):
             toks = c.__dict__.get('tokens', {})
 
-            for state, items in toks.iteritems():
+            for state, items in iteritems(toks):
                 curitems = tokens.get(state)
                 if curitems is None:
                     tokens[state] = items
@@ -537,13 +535,13 @@ class RegexLexerMeta(LexerMeta):
         return type.__call__(cls, *args, **kwds)
 
 
+@add_metaclass(RegexLexerMeta)
 class RegexLexer(Lexer):
     """
     Base for simple stateful regular expression-based lexers.
     Simplifies the lexing process so that you need only
     provide a list of states and regular expressions.
     """
-    __metaclass__ = RegexLexerMeta
 
     #: Flags for compiling the regular expressions.
     #: Defaults to MULTILINE.
@@ -722,7 +720,7 @@ def do_insertions(insertions, tokens):
     """
     insertions = iter(insertions)
     try:
-        index, itokens = insertions.next()
+        index, itokens = next(insertions)
     except StopIteration:
         # no insertions
         for item in tokens:
@@ -748,7 +746,7 @@ def do_insertions(insertions, tokens):
                 realpos += len(it_value)
             oldi = index - i
             try:
-                index, itokens = insertions.next()
+                index, itokens = next(insertions)
             except StopIteration:
                 insleft = False
                 break  # not strictly necessary
@@ -763,7 +761,7 @@ def do_insertions(insertions, tokens):
             yield realpos, t, v
             realpos += len(v)
         try:
-            index, itokens = insertions.next()
+            index, itokens = next(insertions)
         except StopIteration:
             insleft = False
             break  # not strictly necessary
author	Georg Brandl <georg@python.org>	2014-01-18 16:44:49 +0100
committer	Georg Brandl <georg@python.org>	2014-01-18 16:44:49 +0100
commit	97703d63f39e6086d497a6a749c9eee3293dcbeb (patch)
tree	c970bf2a7bc17aa7053f3621e299a01fb9695342 /pygments/lexer.py
parent	5500fd3a6d0c5ece01826606fcf2d684407b9cc6 (diff)
download	pygments-97703d63f39e6086d497a6a749c9eee3293dcbeb.tar.gz