diff options
author | Victor Uriarte <victor.m.uriarte@intel.com> | 2016-05-28 23:32:41 -0700 |
---|---|---|
committer | Victor Uriarte <victor.m.uriarte@intel.com> | 2016-05-29 11:34:37 -0700 |
commit | faacd60c2769008cf1cf439c969e6183cdb754fc (patch) | |
tree | b14edca1c8865a37ff8b3b54de7369102109dd93 /sqlparse/lexer.py | |
parent | ce1374796a6dca53f44f1bd3fe09c6aa17574652 (diff) | |
download | sqlparse-faacd60c2769008cf1cf439c969e6183cdb754fc.tar.gz |
Simplify handling of encoding in lexer.py
Diffstat (limited to 'sqlparse/lexer.py')
-rw-r--r-- | sqlparse/lexer.py | 32 |
1 files changed, 10 insertions, 22 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index a371af7..a93f7a7 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -13,7 +13,6 @@ # and to allow some customizations. import re -import sys from sqlparse import tokens from sqlparse.keywords import SQL_REGEX @@ -42,14 +41,6 @@ class Lexer(object): new_state = (tdef[2],) self._tokens[state].append((rex, tdef[1], new_state)) - def _decode(self, text): - if not isinstance(text, text_type): - try: - text = text.decode(self.encoding) - except UnicodeDecodeError: - text = text.decode('unicode-escape') - return text - def get_tokens(self, text): """ Return an iterable of (tokentype, value) pairs generated from @@ -58,18 +49,7 @@ class Lexer(object): Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. - """ - if isinstance(text, string_types): - if sys.version_info[0] < 3 and isinstance(text, text_type): - text = StringIO(text.encode('utf-8')) - self.encoding = 'utf-8' - else: - text = StringIO(text) - - return self.get_tokens_unprocessed(text) - def get_tokens_unprocessed(self, stream): - """ Split ``text`` into (tokentype, text) pairs. ``stack`` is the inital stack (default: ``['root']``) @@ -77,8 +57,16 @@ class Lexer(object): statestack = ['root', ] statetokens = self._tokens['root'] - text = stream.read() - text = self._decode(text) + if isinstance(text, string_types): + text = StringIO(text) + + text = text.read() + if not isinstance(text, text_type): + try: + text = text.decode(self.encoding) + except UnicodeDecodeError: + text = text.decode('unicode-escape') + iterable = iter(range(len(text))) for pos in iterable: |