summaryrefslogtreecommitdiff
path: root/sqlparse/lexer.py
diff options
context:
space:
mode:
authorVictor Uriarte <victor.m.uriarte@intel.com>2016-05-28 13:13:40 -0700
committerVictor Uriarte <victor.m.uriarte@intel.com>2016-05-29 00:08:58 -0700
commit5aa809137f9e65c56e0b34c15c0fe42be36894d7 (patch)
tree19f2caa7c26781fd23da155a999963b483477167 /sqlparse/lexer.py
parenta767c88b008d407d91b9118d124e2a9b579a7f12 (diff)
downloadsqlparse-5aa809137f9e65c56e0b34c15c0fe42be36894d7.tar.gz
Refactor var Lexer.tokens & func is_keyword
Diffstat (limited to 'sqlparse/lexer.py')
-rw-r--r--sqlparse/lexer.py71
1 files changed, 3 insertions, 68 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index 6bc49ee..a908989 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -16,10 +16,8 @@ import re
import sys
from sqlparse import tokens
-from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
+from sqlparse.keywords import SQL_REGEX
from sqlparse.compat import StringIO, string_types, with_metaclass, text_type
-
-
class include(str):
pass
@@ -35,9 +33,6 @@ class combined(tuple):
pass
-def is_keyword(value):
- test = value.upper()
- return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
def apply_filters(stream, filters, lexer=None):
@@ -134,9 +129,8 @@ class LexerMeta(type):
cls._all_tokens = {}
cls._tmpname = 0
processed = cls._all_tokens[cls.__name__] = {}
- # tokendefs = tokendefs or cls.tokens[name]
- for state in cls.tokens.keys():
- cls._process_state(cls.tokens, processed, state)
+ for state in SQL_REGEX:
+ cls._process_state(SQL_REGEX, processed, state)
return processed
def __call__(cls, *args, **kwds):
@@ -160,65 +154,6 @@ class _Lexer(object):
tabsize = 0
flags = re.IGNORECASE | re.UNICODE
- tokens = {
- 'root': [
- (r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single),
- # $ matches *before* newline, therefore we have two patterns
- # to match Comment.Single
- (r'(--|# ).*?$', tokens.Comment.Single),
- (r'(\r\n|\r|\n)', tokens.Newline),
- (r'\s+', tokens.Whitespace),
- (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
- (r':=', tokens.Assignment),
- (r'::', tokens.Punctuation),
- (r'[*]', tokens.Wildcard),
- (r'CASE\b', tokens.Keyword), # extended CASE(foo)
- (r"`(``|[^`])*`", tokens.Name),
- (r"´(´´|[^´])*´", tokens.Name),
- (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin),
- (r'\?{1}', tokens.Name.Placeholder),
- (r'%\(\w+\)s', tokens.Name.Placeholder),
- (r'%s', tokens.Name.Placeholder),
- (r'[$:?]\w+', tokens.Name.Placeholder),
- # FIXME(andi): VALUES shouldn't be listed here
- # see https://github.com/andialbrecht/sqlparse/pull/64
- (r'VALUES', tokens.Keyword),
- (r'(@|##|#)[^\W\d_]\w+', tokens.Name),
- # IN is special, it may be followed by a parenthesis, but
- # is never a functino, see issue183
- (r'in\b(?=[ (])?', tokens.Keyword),
- (r'USING(?=\()', tokens.Keyword),
- (r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39
- (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
- (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float),
- (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float),
- (r'[-]?[0-9]+', tokens.Number.Integer),
- (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
- # not a real string literal in ANSI SQL:
- (r'(""|".*?[^\\]")', tokens.String.Symbol),
- # sqlite names can be escaped with [square brackets]. left bracket
- # cannot be preceded by word character or a right bracket --
- # otherwise it's probably an array index
- (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
- (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
- r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
- (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
- (r'NOT NULL\b', tokens.Keyword),
- (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
- (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
- (r'(?<=\.)[^\W\d_]\w*', tokens.Name),
- (r'[^\W\d]\w*', is_keyword),
- (r'[;:()\[\],\.]', tokens.Punctuation),
- (r'[<>=~!]+', tokens.Operator.Comparison),
- (r'[+/@#%^&|`?^-]+', tokens.Operator),
- ],
- 'multiline-comments': [
- (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
- (r'\*/', tokens.Comment.Multiline, '#pop'),
- (r'[^/\*]+', tokens.Comment.Multiline),
- (r'[/*]', tokens.Comment.Multiline),
- ]}
-
def __init__(self):
self.filters = []