1 files changed, 123 insertions, 0 deletions
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
new file mode 100644
index 0000000..1d1d842
--- /dev/null
+++ b/sqlparse/engine/statement_splitter.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from sqlparse import sql, tokens as T
+
+
+class StatementSplitter(object):
+    """Filter that split stream at individual statements"""
+
+    def __init__(self):
+        self._reset()
+
+    def _reset(self):
+        """Set the filter attributes to its default values"""
+        self._in_declare = False
+        self._in_dbldollar = False
+        self._is_create = False
+        self._begin_depth = 0
+
+        self.consume_ws = False
+        self.tokens = []
+        self.level = 0
+
+    def _change_splitlevel(self, ttype, value):
+        """Get the new split level (increase, decrease or remain equal)"""
+        # PostgreSQL
+        if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$':
+
+            # 2nd dbldollar found. $quote$ completed
+            # decrease level
+            if self._in_dbldollar:
+                self._in_dbldollar = False
+                return -1
+            else:
+                self._in_dbldollar = True
+                return 1
+
+        # if inside $$ everything inside is defining function character.
+        # Nothing inside can create a new statement
+        elif self._in_dbldollar:
+            return 0
+
+        # ANSI
+        # if normal token return
+        # wouldn't parenthesis increase/decrease a level?
+        # no, inside a paranthesis can't start new statement
+        if ttype not in T.Keyword:
+            return 0
+
+        # Everything after here is ttype = T.Keyword
+        # Also to note, once entered an If statement you are done and basically
+        # returning
+        unified = value.upper()
+
+        # three keywords begin with CREATE, but only one of them is DDL
+        # DDL Create though can contain more words such as "or replace"
+        if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+            self._is_create = True
+            return 0
+
+        # can have nested declare inside of being...
+        if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
+            self._in_declare = True
+            return 1
+
+        if unified == 'BEGIN':
+            self._begin_depth += 1
+            if self._is_create:
+                # FIXME(andi): This makes no sense.
+                return 1
+            return 0
+
+        # Should this respect a preceeding BEGIN?
+        # In CASE ... WHEN ... END this results in a split level -1.
+        # Would having multiple CASE WHEN END and a Assigment Operator
+        # cause the statement to cut off prematurely?
+        if unified == 'END':
+            self._begin_depth = max(0, self._begin_depth - 1)
+            return -1
+
+        if (unified in ('IF', 'FOR', 'WHILE') and
+                self._is_create and self._begin_depth > 0):
+            return 1
+
+        if unified in ('END IF', 'END FOR', 'END WHILE'):
+            return -1
+
+        # Default
+        return 0
+
+    def process(self, stream):
+        """Process the stream"""
+        EOS_TTYPE = T.Whitespace, T.Comment.Single
+
+        # Run over all stream tokens
+        for ttype, value in stream:
+            # Yield token if we finished a statement and there's no whitespaces
+            # It will count newline token as a non whitespace. In this context
+            # whitespace ignores newlines.
+            # why don't multi line comments also count?
+            if self.consume_ws and ttype not in EOS_TTYPE:
+                yield sql.Statement(self.tokens)
+
+                # Reset filter and prepare to process next statement
+                self._reset()
+
+            # Change current split level (increase, decrease or remain equal)
+            self.level += self._change_splitlevel(ttype, value)
+
+            # Append the token to the current statement
+            self.tokens.append(sql.Token(ttype, value))
+
+            # Check if we get the end of a statement
+            if self.level <= 0 and ttype is T.Punctuation and value == ';':
+                self.consume_ws = True
+
+        # Yield pending statement (if any)
+        if self.tokens:
+            yield sql.Statement(self.tokens)