diff options
Diffstat (limited to 'sqlparse/engine/statement_splitter.py')
-rw-r--r-- | sqlparse/engine/statement_splitter.py | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py new file mode 100644 index 0000000..1d1d842 --- /dev/null +++ b/sqlparse/engine/statement_splitter.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from sqlparse import sql, tokens as T + + +class StatementSplitter(object): + """Filter that split stream at individual statements""" + + def __init__(self): + self._reset() + + def _reset(self): + """Set the filter attributes to its default values""" + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + self._begin_depth = 0 + + self.consume_ws = False + self.tokens = [] + self.level = 0 + + def _change_splitlevel(self, ttype, value): + """Get the new split level (increase, decrease or remain equal)""" + # PostgreSQL + if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$': + + # 2nd dbldollar found. $quote$ completed + # decrease level + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + + # if inside $$ everything inside is defining function character. + # Nothing inside can create a new statement + elif self._in_dbldollar: + return 0 + + # ANSI + # if normal token return + # wouldn't parenthesis increase/decrease a level? + # no, inside a paranthesis can't start new statement + if ttype not in T.Keyword: + return 0 + + # Everything after here is ttype = T.Keyword + # Also to note, once entered an If statement you are done and basically + # returning + unified = value.upper() + + # three keywords begin with CREATE, but only one of them is DDL + # DDL Create though can contain more words such as "or replace" + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + return 0 + + # can have nested declare inside of being... + if unified == 'DECLARE' and self._is_create and self._begin_depth == 0: + self._in_declare = True + return 1 + + if unified == 'BEGIN': + self._begin_depth += 1 + if self._is_create: + # FIXME(andi): This makes no sense. + return 1 + return 0 + + # Should this respect a preceeding BEGIN? + # In CASE ... WHEN ... END this results in a split level -1. + # Would having multiple CASE WHEN END and a Assigment Operator + # cause the statement to cut off prematurely? + if unified == 'END': + self._begin_depth = max(0, self._begin_depth - 1) + return -1 + + if (unified in ('IF', 'FOR', 'WHILE') and + self._is_create and self._begin_depth > 0): + return 1 + + if unified in ('END IF', 'END FOR', 'END WHILE'): + return -1 + + # Default + return 0 + + def process(self, stream): + """Process the stream""" + EOS_TTYPE = T.Whitespace, T.Comment.Single + + # Run over all stream tokens + for ttype, value in stream: + # Yield token if we finished a statement and there's no whitespaces + # It will count newline token as a non whitespace. In this context + # whitespace ignores newlines. + # why don't multi line comments also count? + if self.consume_ws and ttype not in EOS_TTYPE: + yield sql.Statement(self.tokens) + + # Reset filter and prepare to process next statement + self._reset() + + # Change current split level (increase, decrease or remain equal) + self.level += self._change_splitlevel(ttype, value) + + # Append the token to the current statement + self.tokens.append(sql.Token(ttype, value)) + + # Check if we get the end of a statement + if self.level <= 0 and ttype is T.Punctuation and value == ';': + self.consume_ws = True + + # Yield pending statement (if any) + if self.tokens: + yield sql.Statement(self.tokens) |