# -*- coding: utf-8 -*- import itertools from sqlparse import tokens as T from sqlparse.sql import * def _group_left_right(tlist, ttype, value, cls, check_right=lambda t: True, check_left = lambda t: True, include_semicolon=False): [_group_left_right(sgroup, ttype, value, cls, check_right, include_semicolon) for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] idx = 0 token = tlist.token_next_match(idx, ttype, value) while token: right = tlist.token_next(tlist.token_index(token)) left = tlist.token_prev(tlist.token_index(token)) if right is None or not check_right(right): token = tlist.token_next_match(tlist.token_index(token)+1, ttype, value) elif left is None or not check_right(left): token = tlist.token_next_match(tlist.token_index(token)+1, ttype, value) else: if include_semicolon: sright = tlist.token_next_match(tlist.token_index(right), T.Punctuation, ';') if sright is not None: # only overwrite "right" if a semicolon is actually # present. right = sright tokens = tlist.tokens_between(left, right)[1:] if not isinstance(left, cls): new = cls([left]) new_idx = tlist.token_index(left) tlist.tokens.remove(left) tlist.tokens.insert(new_idx, new) left = new left.tokens.extend(tokens) for t in tokens: tlist.tokens.remove(t) token = tlist.token_next_match(tlist.token_index(left)+1, ttype, value) def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, cls, include_semicolon=False, recurse=False): def _find_matching(i, tl, stt, sva, ett, eva): depth = 1 for t in tl.tokens[i:]: if t.match(stt, sva): depth += 1 elif t.match(ett, eva): depth -= 1 if depth == 1: return t return None [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, cls, include_semicolon) for sgroup in tlist.get_sublists() if recurse] if isinstance(tlist, cls): idx = 1 else: idx = 0 token = tlist.token_next_match(idx, start_ttype, start_value) while token: tidx = tlist.token_index(token) end = _find_matching(tidx, tlist, start_ttype, start_value, end_ttype, end_value) if end is None: idx = tidx+1 else: if include_semicolon: next_ = tlist.token_next(tlist.token_index(end)) if next_ and next_.match(T.Punctuation, ';'): end = next_ group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) _group_matching(group, start_ttype, start_value, end_ttype, end_value, cls, include_semicolon) idx = tlist.token_index(group)+1 token = tlist.token_next_match(idx, start_ttype, start_value) def group_if(tlist): _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True) def group_for(tlist): _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True) def group_as(tlist): def _right_valid(token): # Currently limited to DML/DDL. Maybe additional more non SQL reserved # keywords should appear here (see issue8). return not token.ttype in (T.DML, T.DDL) _group_left_right(tlist, T.Keyword, 'AS', Identifier, check_right=_right_valid) def group_assignment(tlist): _group_left_right(tlist, T.Assignment, ':=', Assignment, include_semicolon=True) def group_comparsion(tlist): def _parts_valid(token): return (token.ttype in (T.String.Symbol, T.Name, T.Number, T.Number.Integer, T.Literal) or isinstance(token, (Identifier,))) _group_left_right(tlist, T.Operator.Comparsion, None, Comparsion, check_left=_parts_valid, check_right=_parts_valid) def group_case(tlist): _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, include_semicolon=True, recurse=True) def group_identifier(tlist): def _consume_cycle(tl, i): x = itertools.cycle(( lambda y: (y.match(T.Punctuation, '.') or y.ttype is T.Operator), lambda y: (y.ttype in (T.String.Symbol, T.Name, T.Wildcard)) )) for t in tl.tokens[i:]: if x.next()(t): yield t else: raise StopIteration # bottom up approach: group subgroups first [group_identifier(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, Identifier)] # real processing idx = 0 token = tlist.token_next_by_instance(idx, Function) if token is None: token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) while token: identifier_tokens = [token]+list( _consume_cycle(tlist, tlist.token_index(token)+1)) if not (len(identifier_tokens) == 1 and isinstance(identifier_tokens[0], Function)): group = tlist.group_tokens(Identifier, identifier_tokens) idx = tlist.token_index(group)+1 else: idx += 1 token = tlist.token_next_by_instance(idx, Function) if token is None: token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) def group_identifier_list(tlist): [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, (Identifier, IdentifierList))] idx = 0 # Allowed list items fend1_funcs = [lambda t: isinstance(t, Identifier), lambda t: t.is_whitespace(), lambda t: t.ttype == T.Wildcard, lambda t: t.match(T.Keyword, 'null'), lambda t: t.ttype == T.Number.Integer, lambda t: t.ttype == T.String.Single, lambda t: isinstance(t, Comparsion), ] tcomma = tlist.token_next_match(idx, T.Punctuation, ',') start = None while tcomma is not None: before = tlist.token_prev(tcomma) after = tlist.token_next(tcomma) # Check if the tokens around tcomma belong to a list bpassed = apassed = False for func in fend1_funcs: if before is not None and func(before): bpassed = True if after is not None and func(after): apassed = True if not bpassed or not apassed: # Something's wrong here, skip ahead to next "," start = None tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1, T.Punctuation, ',') else: if start is None: start = before next_ = tlist.token_next(after) if next_ is None or not next_.match(T.Punctuation, ','): # Reached the end of the list tokens = tlist.tokens_between(start, after) group = tlist.group_tokens(IdentifierList, tokens) start = None tcomma = tlist.token_next_match(tlist.token_index(group)+1, T.Punctuation, ',') else: tcomma = next_ def group_parenthesis(tlist): _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis) def group_comments(tlist): [group_comments(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, Comment)] idx = 0 token = tlist.token_next_by_type(idx, T.Comment) while token: tidx = tlist.token_index(token) end = tlist.token_not_matching(tidx+1, [lambda t: t.ttype in T.Comment, lambda t: t.is_whitespace()]) if end is None: idx = tidx + 1 else: eidx = tlist.token_index(end) grp_tokens = tlist.tokens_between(token, tlist.token_prev(eidx, False)) group = tlist.group_tokens(Comment, grp_tokens) idx = tlist.token_index(group) token = tlist.token_next_by_type(idx, T.Comment) def group_where(tlist): [group_where(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, Where)] idx = 0 token = tlist.token_next_match(idx, T.Keyword, 'WHERE') stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION') while token: tidx = tlist.token_index(token) end = tlist.token_next_match(tidx+1, T.Keyword, stopwords) if end is None: end = tlist._groupable_tokens[-1] else: end = tlist.tokens[tlist.token_index(end)-1] group = tlist.group_tokens(Where, tlist.tokens_between(token, end)) idx = tlist.token_index(group) token = tlist.token_next_match(idx, T.Keyword, 'WHERE') def group_aliased(tlist): [group_aliased(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, Identifier)] idx = 0 token = tlist.token_next_by_instance(idx, Identifier) while token: next_ = tlist.token_next(tlist.token_index(token)) if next_ is not None and isinstance(next_, Identifier): grp = tlist.tokens_between(token, next_)[1:] token.tokens.extend(grp) for t in grp: tlist.tokens.remove(t) idx = tlist.token_index(token)+1 token = tlist.token_next_by_instance(idx, Identifier) def group_typecasts(tlist): _group_left_right(tlist, T.Punctuation, '::', Identifier) def group_functions(tlist): [group_functions(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, Function)] idx = 0 token = tlist.token_next_by_type(idx, T.Name) while token: next_ = tlist.token_next(token) if not isinstance(next_, Parenthesis): idx = tlist.token_index(token)+1 else: func = tlist.group_tokens(Function, tlist.tokens_between(token, next_)) idx = tlist.token_index(func)+1 token = tlist.token_next_by_type(idx, T.Name) def group(tlist): for func in [group_parenthesis, group_functions, group_comments, group_where, group_case, group_identifier, group_typecasts, group_as, group_aliased, group_assignment, group_comparsion, group_identifier_list, group_if, group_for,]: func(tlist)