Simplify handling of encoding in lexer.py

author: Victor Uriarte <victor.m.uriarte@intel.com> 2016-05-28 23:32:41 -0700
committer: Victor Uriarte <victor.m.uriarte@intel.com> 2016-05-29 11:34:37 -0700
commit: faacd60c2769008cf1cf439c969e6183cdb754fc (patch)
tree: b14edca1c8865a37ff8b3b54de7369102109dd93 /sqlparse/lexer.py
parent: ce1374796a6dca53f44f1bd3fe09c6aa17574652 (diff)
download: sqlparse-faacd60c2769008cf1cf439c969e6183cdb754fc.tar.gz
1 files changed, 10 insertions, 22 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index a371af7..a93f7a7 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -13,7 +13,6 @@
 # and to allow some customizations.
 
 import re
-import sys
 
 from sqlparse import tokens
 from sqlparse.keywords import SQL_REGEX
@@ -42,14 +41,6 @@ class Lexer(object):
                         new_state = (tdef[2],)
                 self._tokens[state].append((rex, tdef[1], new_state))
 
-    def _decode(self, text):
-        if not isinstance(text, text_type):
-            try:
-                text = text.decode(self.encoding)
-            except UnicodeDecodeError:
-                text = text.decode('unicode-escape')
-        return text
-
     def get_tokens(self, text):
         """
         Return an iterable of (tokentype, value) pairs generated from
@@ -58,18 +49,7 @@ class Lexer(object):
 
         Also preprocess the text, i.e. expand tabs and strip it if
         wanted and applies registered filters.
-        """
-        if isinstance(text, string_types):
-            if sys.version_info[0] < 3 and isinstance(text, text_type):
-                text = StringIO(text.encode('utf-8'))
-                self.encoding = 'utf-8'
-            else:
-                text = StringIO(text)
-
-        return self.get_tokens_unprocessed(text)
 
-    def get_tokens_unprocessed(self, stream):
-        """
         Split ``text`` into (tokentype, text) pairs.
 
         ``stack`` is the inital stack (default: ``['root']``)
@@ -77,8 +57,16 @@ class Lexer(object):
         statestack = ['root', ]
         statetokens = self._tokens['root']
 
-        text = stream.read()
-        text = self._decode(text)
+        if isinstance(text, string_types):
+            text = StringIO(text)
+
+        text = text.read()
+        if not isinstance(text, text_type):
+            try:
+                text = text.decode(self.encoding)
+            except UnicodeDecodeError:
+                text = text.decode('unicode-escape')
+
         iterable = iter(range(len(text)))
 
         for pos in iterable:
author	Victor Uriarte <victor.m.uriarte@intel.com>	2016-05-28 23:32:41 -0700
committer	Victor Uriarte <victor.m.uriarte@intel.com>	2016-05-29 11:34:37 -0700
commit	faacd60c2769008cf1cf439c969e6183cdb754fc (patch)
tree	b14edca1c8865a37ff8b3b54de7369102109dd93 /sqlparse/lexer.py
parent	ce1374796a6dca53f44f1bd3fe09c6aa17574652 (diff)
download	sqlparse-faacd60c2769008cf1cf439c969e6183cdb754fc.tar.gz