summaryrefslogtreecommitdiff
path: root/sqlparse/engine/statement_splitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlparse/engine/statement_splitter.py')
-rw-r--r--sqlparse/engine/statement_splitter.py123
1 files changed, 123 insertions, 0 deletions
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
new file mode 100644
index 0000000..1d1d842
--- /dev/null
+++ b/sqlparse/engine/statement_splitter.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from sqlparse import sql, tokens as T
+
+
+class StatementSplitter(object):
+ """Filter that split stream at individual statements"""
+
+ def __init__(self):
+ self._reset()
+
+ def _reset(self):
+ """Set the filter attributes to its default values"""
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+ self._begin_depth = 0
+
+ self.consume_ws = False
+ self.tokens = []
+ self.level = 0
+
+ def _change_splitlevel(self, ttype, value):
+ """Get the new split level (increase, decrease or remain equal)"""
+ # PostgreSQL
+ if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$':
+
+ # 2nd dbldollar found. $quote$ completed
+ # decrease level
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return -1
+ else:
+ self._in_dbldollar = True
+ return 1
+
+ # if inside $$ everything inside is defining function character.
+ # Nothing inside can create a new statement
+ elif self._in_dbldollar:
+ return 0
+
+ # ANSI
+ # if normal token return
+ # wouldn't parenthesis increase/decrease a level?
+ # no, inside a paranthesis can't start new statement
+ if ttype not in T.Keyword:
+ return 0
+
+ # Everything after here is ttype = T.Keyword
+ # Also to note, once entered an If statement you are done and basically
+ # returning
+ unified = value.upper()
+
+ # three keywords begin with CREATE, but only one of them is DDL
+ # DDL Create though can contain more words such as "or replace"
+ if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+ self._is_create = True
+ return 0
+
+ # can have nested declare inside of being...
+ if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
+ self._in_declare = True
+ return 1
+
+ if unified == 'BEGIN':
+ self._begin_depth += 1
+ if self._is_create:
+ # FIXME(andi): This makes no sense.
+ return 1
+ return 0
+
+ # Should this respect a preceeding BEGIN?
+ # In CASE ... WHEN ... END this results in a split level -1.
+ # Would having multiple CASE WHEN END and a Assigment Operator
+ # cause the statement to cut off prematurely?
+ if unified == 'END':
+ self._begin_depth = max(0, self._begin_depth - 1)
+ return -1
+
+ if (unified in ('IF', 'FOR', 'WHILE') and
+ self._is_create and self._begin_depth > 0):
+ return 1
+
+ if unified in ('END IF', 'END FOR', 'END WHILE'):
+ return -1
+
+ # Default
+ return 0
+
+ def process(self, stream):
+ """Process the stream"""
+ EOS_TTYPE = T.Whitespace, T.Comment.Single
+
+ # Run over all stream tokens
+ for ttype, value in stream:
+ # Yield token if we finished a statement and there's no whitespaces
+ # It will count newline token as a non whitespace. In this context
+ # whitespace ignores newlines.
+ # why don't multi line comments also count?
+ if self.consume_ws and ttype not in EOS_TTYPE:
+ yield sql.Statement(self.tokens)
+
+ # Reset filter and prepare to process next statement
+ self._reset()
+
+ # Change current split level (increase, decrease or remain equal)
+ self.level += self._change_splitlevel(ttype, value)
+
+ # Append the token to the current statement
+ self.tokens.append(sql.Token(ttype, value))
+
+ # Check if we get the end of a statement
+ if self.level <= 0 and ttype is T.Punctuation and value == ';':
+ self.consume_ws = True
+
+ # Yield pending statement (if any)
+ if self.tokens:
+ yield sql.Statement(self.tokens)