summaryrefslogtreecommitdiff
path: root/sqlparse/lexer.py
diff options
context:
space:
mode:
authorVictor Uriarte <victor.m.uriarte@intel.com>2016-05-28 23:32:41 -0700
committerVictor Uriarte <victor.m.uriarte@intel.com>2016-05-29 11:34:37 -0700
commitfaacd60c2769008cf1cf439c969e6183cdb754fc (patch)
treeb14edca1c8865a37ff8b3b54de7369102109dd93 /sqlparse/lexer.py
parentce1374796a6dca53f44f1bd3fe09c6aa17574652 (diff)
downloadsqlparse-faacd60c2769008cf1cf439c969e6183cdb754fc.tar.gz
Simplify handling of encoding in lexer.py
Diffstat (limited to 'sqlparse/lexer.py')
-rw-r--r--sqlparse/lexer.py32
1 files changed, 10 insertions, 22 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index a371af7..a93f7a7 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -13,7 +13,6 @@
# and to allow some customizations.
import re
-import sys
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
@@ -42,14 +41,6 @@ class Lexer(object):
new_state = (tdef[2],)
self._tokens[state].append((rex, tdef[1], new_state))
- def _decode(self, text):
- if not isinstance(text, text_type):
- try:
- text = text.decode(self.encoding)
- except UnicodeDecodeError:
- text = text.decode('unicode-escape')
- return text
-
def get_tokens(self, text):
"""
Return an iterable of (tokentype, value) pairs generated from
@@ -58,18 +49,7 @@ class Lexer(object):
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
- """
- if isinstance(text, string_types):
- if sys.version_info[0] < 3 and isinstance(text, text_type):
- text = StringIO(text.encode('utf-8'))
- self.encoding = 'utf-8'
- else:
- text = StringIO(text)
-
- return self.get_tokens_unprocessed(text)
- def get_tokens_unprocessed(self, stream):
- """
Split ``text`` into (tokentype, text) pairs.
``stack`` is the inital stack (default: ``['root']``)
@@ -77,8 +57,16 @@ class Lexer(object):
statestack = ['root', ]
statetokens = self._tokens['root']
- text = stream.read()
- text = self._decode(text)
+ if isinstance(text, string_types):
+ text = StringIO(text)
+
+ text = text.read()
+ if not isinstance(text, text_type):
+ try:
+ text = text.decode(self.encoding)
+ except UnicodeDecodeError:
+ text = text.decode('unicode-escape')
+
iterable = iter(range(len(text)))
for pos in iterable: