summaryrefslogtreecommitdiff
path: root/sqlparse
diff options
context:
space:
mode:
authorAndi Albrecht <albrecht.andi@gmail.com>2009-04-03 21:26:42 +0200
committerAndi Albrecht <albrecht.andi@gmail.com>2009-04-03 21:26:42 +0200
commit361122eb22d5681c58dac731009e4814b3dd5fa5 (patch)
treeb096496bc9c6b8febe092d0aefd56de1a4f8f4a0 /sqlparse
downloadsqlparse-361122eb22d5681c58dac731009e4814b3dd5fa5.tar.gz
Initial import.
Diffstat (limited to 'sqlparse')
-rw-r--r--sqlparse/__init__.py65
-rw-r--r--sqlparse/dialects.py88
-rw-r--r--sqlparse/engine/__init__.py81
-rw-r--r--sqlparse/engine/_grouping.py499
-rw-r--r--sqlparse/engine/filter.py98
-rw-r--r--sqlparse/engine/grouping.py537
-rw-r--r--sqlparse/filters.py432
-rw-r--r--sqlparse/formatter.py163
-rw-r--r--sqlparse/keywords.py589
-rw-r--r--sqlparse/lexer.py310
-rw-r--r--sqlparse/tokens.py131
11 files changed, 2993 insertions, 0 deletions
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
new file mode 100644
index 0000000..01b3bd8
--- /dev/null
+++ b/sqlparse/__init__.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""Parse SQL statements."""
+
+__version__ = '0.1.0'
+
+import logging
+import os
+
+
+if 'SQLPARSE_DEBUG' in os.environ:
+ logging.basicConfig(level=logging.DEBUG)
+
+
+class SQLParseError(Exception):
+ """Base class for exceptions in this module."""
+
+
+# Setup namespace
+from sqlparse import engine
+from sqlparse import filters
+from sqlparse import formatter
+
+
+def parse(sql):
+ """Parse sql and return a list of statements.
+
+ *sql* is a single string containting one or more SQL statements.
+
+ The returned :class:`~sqlparse.parser.Statement` are fully analyzed.
+
+ Returns a list of :class:`~sqlparse.parser.Statement` instances.
+ """
+ stack = engine.FilterStack()
+ stack.full_analyze()
+ return tuple(stack.run(sql))
+
+
+def format(sql, **options):
+ """Format *sql* according to *options*.
+
+ Returns a list of :class:`~sqlparse.parse.Statement` instances like
+ :meth:`parse`, but the statements are formatted according to *options*.
+
+ Available options are documented in the :mod:`~sqlparse.format` module.
+ """
+ stack = engine.FilterStack()
+ options = formatter.validate_options(options)
+ stack = formatter.build_filter_stack(stack, options)
+ stack.postprocess.append(filters.SerializerUnicode())
+ return ''.join(stack.run(sql))
+
+
+def split(sql):
+ """Split *sql* into separate statements.
+
+ Returns a list of strings.
+ """
+ stack = engine.FilterStack()
+ stack.split_statements = True
+ return [unicode(stmt) for stmt in stack.run(sql)]
+
diff --git a/sqlparse/dialects.py b/sqlparse/dialects.py
new file mode 100644
index 0000000..cabe503
--- /dev/null
+++ b/sqlparse/dialects.py
@@ -0,0 +1,88 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""This module contains classes that represent SQL dialects."""
+
+from tokens import *
+
+
+class Dialect(object):
+ """Base class for SQL dialect implementations."""
+
+ def handle_token(self, tokentype, text):
+ """Handle a token.
+
+ Arguments:
+ tokentype: A token type.
+ text: Text representation of the token.
+
+ Returns:
+ A tuple of three items: tokentype, text, splitlevel.
+ splitlevel is either -1, 0 or 1 and describes an identation level.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """Reset Dialect state."""
+ pass
+
+
+class DefaultDialect(Dialect):
+
+ def __init__(self):
+ self._in_declare = False
+ self._stmt_type = None
+
+ def get_statement_type(self):
+ return self._stmt_type
+
+ def set_statement_type(self, type_):
+ self._stmt_type = type_
+
+ def handle_token(self, tokentype, text):
+ if not tokentype == Keyword:
+ return tokentype, text, 0
+ unified = text.upper()
+ if unified == 'DECLARE':
+ self._in_declare = True
+ return tokentype, text, 1
+ if unified == 'BEGIN':
+ if self._in_declare:
+ return tokentype, text, 0
+ return tokentype, text, 0
+ if unified == 'END':
+ return tokentype, text, -1
+ # TODO: Use a constant here
+ if unified in ('IF', 'FOR') and self._stmt_type == 6:
+ return tokentype, text, 1
+ return tokentype, text, 0
+
+ def reset(self):
+ self._in_declare = False
+
+
+class PSQLDialect(DefaultDialect):
+
+ def __init__(self):
+ super(PSQLDialect, self).__init__()
+ self._in_dbldollar = False
+
+ def handle_token(self, tokentype, text):
+ if (tokentype == Name.Builtin
+ and text.startswith('$') and text.endswith('$')):
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return tokentype, text, -1
+ else:
+ self._in_dbldollar = True
+ return tokentype, text, 1
+ elif self._in_dbldollar:
+ return tokentype, text, 0
+ else:
+ return super(PSQLDialect, self).handle_token(tokentype, text)
+
+ def reset(self):
+ self._dollar_started = False
+ self._in_dbldollar = False
diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py
new file mode 100644
index 0000000..5cac528
--- /dev/null
+++ b/sqlparse/engine/__init__.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""filter"""
+
+import logging
+import re
+
+from sqlparse import lexer, SQLParseError
+from sqlparse.engine import grouping
+from sqlparse.engine.filter import StatementFilter
+
+# XXX remove this when cleanup is complete
+Filter = object
+
+
+class FilterStack(object):
+
+ def __init__(self):
+ self.preprocess = []
+ self.stmtprocess = []
+ self.postprocess = []
+ self.split_statements = False
+ self._grouping = False
+
+ def _flatten(self, stream):
+ for token in stream:
+ if token.is_group():
+ for t in self._flatten(token.tokens):
+ yield t
+ else:
+ yield token
+
+ def enable_grouping(self):
+ self._grouping = True
+
+ def full_analyze(self):
+ self.enable_grouping()
+
+ def run(self, sql):
+ stream = lexer.tokenize(sql)
+ # Process token stream
+ if self.preprocess:
+ for filter_ in self.preprocess:
+ stream = filter_.process(self, stream)
+
+ if (self.stmtprocess or self.postprocess or self.split_statements
+ or self._grouping):
+ splitter = StatementFilter()
+ stream = splitter.process(self, stream)
+
+ if self._grouping:
+ def _group(stream):
+ for stmt in stream:
+ grouping.group(stmt)
+ yield stmt
+ stream = _group(stream)
+
+ if self.stmtprocess:
+ def _run(stream):
+ ret = []
+ for stmt in stream:
+ for filter_ in self.stmtprocess:
+ filter_.process(self, stmt)
+ ret.append(stmt)
+ return ret
+ stream = _run(stream)
+
+ if self.postprocess:
+ def _run(stream):
+ for stmt in stream:
+ stmt.tokens = list(self._flatten(stmt.tokens))
+ for filter_ in self.postprocess:
+ stmt = filter_.process(self, stmt)
+ yield stmt
+ stream = _run(stream)
+
+ return stream
+
diff --git a/sqlparse/engine/_grouping.py b/sqlparse/engine/_grouping.py
new file mode 100644
index 0000000..512c590
--- /dev/null
+++ b/sqlparse/engine/_grouping.py
@@ -0,0 +1,499 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from sqlparse.engine.filter import TokenFilter
+from sqlparse import tokens as T
+
+class _Base(object):
+
+ __slots__ = ('to_unicode', 'to_str', '_get_repr_name')
+
+ def __unicode__(self):
+ return 'Unkown _Base object'
+
+ def __str__(self):
+ return unicode(self).encode('latin-1')
+
+ def __repr__(self):
+ raw = unicode(self)
+ if len(raw) > 7:
+ short = raw[:6]+u'...'
+ else:
+ short = raw
+ short = re.sub('\s+', ' ', short)
+ return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+ short, id(self))
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def to_unicode(self):
+ return unicode(self)
+
+ def to_str(self):
+ return str(self)
+
+
+class Token(_Base):
+
+ __slots__ = ('value', 'ttype')
+
+ def __init__(self, ttype, value):
+ self.value = value
+ self.ttype = ttype
+
+ def __unicode__(self):
+ return self.value
+
+ def _get_repr_name(self):
+ return str(self.ttype).split('.')[-1]
+
+ def match(self, ttype, values):
+ if self.ttype is not ttype:
+ return False
+ if isinstance(values, basestring):
+ values = [values]
+ if self.ttype is T.Keyword:
+ return self.value.upper() in [v.upper() for v in values]
+ else:
+ return self.value in values
+
+ def is_group(self):
+ return False
+
+ def is_whitespace(self):
+ return self.ttype and self.ttype is T.Whitespace
+
+
+class _Group(Token):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def __init__(self, tokens=None):
+ super(_Group, self).__init__(None, None)
+ if tokens is None:
+ tokens = []
+ self._tokens = tokens
+
+ def _set_tokens(self, tokens):
+ self._tokens = tokens
+ def _get_tokens(self):
+ if type(self._tokens) is not types.TupleType:
+ self._tokens = tuple(self._tokens)
+ return self._tokens
+ tokens = property(fget=_get_tokens, fset=_set_tokens)
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def _pprint_tree(self, depth=0):
+ """Pretty-print the object tree."""
+ indent = ' '*(depth*2)
+ for token in self.tokens:
+ print '%s%r' % (indent, token)
+ if token.is_group():
+ token._pprint_tree(depth+1)
+
+ def __unicode__(self):
+ return u''.join(unicode(t) for t in self.tokens)
+
+ @property
+ def subgroups(self):
+ #return [x for x in self.tokens if isinstance(x, _Group)]
+ for item in self.tokens:
+ if item.is_group():
+ yield item
+
+ def is_group(self):
+ return True
+
+
+class Statement(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Parenthesis(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Where(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class CommentMulti(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class Identifier(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+
+class TypeCast(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+ @property
+ def casted_object(self):
+ return self.tokens[0]
+
+ @property
+ def casted_type(self):
+ return self.tokens[-1]
+
+
+class Alias(_Group):
+ __slots__ = ('value', 'ttype', '_tokens')
+
+ @property
+ def aliased_object(self):
+ return self.tokens[0]
+
+ @property
+ def alias(self):
+ return self.tokens[-1]
+
+
+
+
+# - Filter
+
+class StatementFilter(TokenFilter):
+
+ def __init__(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _reset(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _change_splitlevel(self, ttype, value):
+ # PostgreSQL
+ if (ttype == T.Name.Builtin
+ and value.startswith('$') and value.endswith('$')):
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return -1
+ else:
+ self._in_dbldollar = True
+ return 1
+ elif self._in_dbldollar:
+ return 0
+
+ # ANSI
+ if ttype is not T.Keyword:
+ return 0
+
+ unified = value.upper()
+
+ if unified == 'DECLARE':
+ self._in_declare = True
+ return 1
+
+ if unified == 'BEGIN':
+ if self._in_declare:
+ return 0
+ return 0
+
+ if unified == 'END':
+ return -1
+
+ if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+ self._is_create = True
+
+ if unified in ('IF', 'FOR') and self._is_create:
+ return 1
+
+ # Default
+ return 0
+
+ def process(self, stack, stream):
+ splitlevel = 0
+ stmt = None
+ consume_ws = False
+ stmt_tokens = []
+ for ttype, value in stream:
+ # Before appending the token
+ if (consume_ws and ttype is not T.Whitespace
+ and ttype is not T.Comment.Single):
+ consume_ws = False
+ stmt.tokens = stmt_tokens
+ yield stmt
+ self._reset()
+ stmt = None
+ splitlevel = 0
+ if stmt is None:
+ stmt = Statement()
+ stmt_tokens = []
+ splitlevel += self._change_splitlevel(ttype, value)
+ # Append the token
+ stmt_tokens.append(Token(ttype, value))
+ # After appending the token
+ if (not splitlevel and ttype is T.Punctuation
+ and value == ';'):
+ consume_ws = True
+ if stmt is not None:
+ stmt.tokens = stmt_tokens
+ yield stmt
+
+
+class GroupFilter(object):
+
+ def process(self, stream):
+ pass
+
+
+class GroupParenthesis(GroupFilter):
+ """Group parenthesis groups."""
+
+ def _finish_group(self, group):
+ start = group[0]
+ end = group[-1]
+ tokens = list(self._process(group[1:-1]))
+ return [start]+tokens+[end]
+
+ def _process(self, stream):
+ group = None
+ depth = 0
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if token.match(T.Punctuation, '('):
+ if depth == 0:
+ group = []
+ depth += 1
+ if group is not None:
+ group.append(token)
+ if token.match(T.Punctuation, ')'):
+ depth -= 1
+ if depth == 0:
+ yield Parenthesis(self._finish_group(group))
+ group = None
+ continue
+ if group is None:
+ yield token
+
+ def process(self, group):
+ if not isinstance(group, Parenthesis):
+ group.tokens = self._process(group.tokens)
+
+
+class GroupWhere(GroupFilter):
+
+ def _process(self, stream):
+ group = None
+ depth = 0
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if token.match(T.Keyword, 'WHERE'):
+ if depth == 0:
+ group = []
+ depth += 1
+ # Process conditions here? E.g. "A =|!=|in|is|... B"...
+ elif (token.ttype is T.Keyword
+ and token.value.upper() in ('ORDER', 'GROUP',
+ 'LIMIT', 'UNION')):
+ depth -= 1
+ if depth == 0:
+ yield Where(group)
+ group = None
+ if depth < 0:
+ depth = 0
+ if group is not None:
+ group.append(token)
+ else:
+ yield token
+ if group is not None:
+ yield Where(group)
+
+ def process(self, group):
+ if not isinstance(group, Where):
+ group.tokens = self._process(group.tokens)
+
+
+class GroupMultiComments(GroupFilter):
+ """Groups Comment.Multiline and adds trailing whitespace up to first lb."""
+
+ def _process(self, stream):
+ new_tokens = []
+ grp = None
+ consume_ws = False
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if token.ttype is T.Comment.Multiline:
+ if grp is None:
+ grp = []
+ consume_ws = True
+ grp.append(token)
+ elif consume_ws and token.ttype is not T.Whitespace:
+ yield CommentMulti(grp)
+ grp = None
+ consume_ws = False
+ yield token
+ elif consume_ws:
+ lines = token.value.splitlines(True)
+ grp.append(Token(T.Whitespace, lines[0]))
+ if lines[0].endswith('\n'):
+ yield CommentMulti(grp)
+ grp = None
+ consume_ws = False
+ if lines[1:]:
+ yield Token(T.Whitespace, ''.join(lines[1:]))
+ else:
+ yield token
+
+ def process(self, group):
+ if not isinstance(group, CommentMulti):
+ group.tokens = self._process(group.tokens)
+
+
+## class GroupIdentifier(GroupFilter):
+
+## def _process(self, stream):
+## buff = []
+## expect_dot = False
+## for token in stream:
+## if token.is_group():
+## token.tokens = self._process(token.tokens)
+## if (token.ttype is T.String.Symbol or token.ttype is T.Name
+## and not expect_dot):
+## buff.append(token)
+## expect_dot = True
+## elif expect_dot and token.match(T.Punctuation, '.'):
+## buff.append(token)
+## expect_dot = False
+## else:
+## if expect_dot == False:
+## # something's wrong, it ends with a dot...
+## while buff:
+## yield buff.pop(0)
+## expect_dot = False
+## elif buff:
+## idt = Identifier()
+## idt.tokens = buff
+## yield idt
+## buff = []
+## yield token
+## if buff and expect_dot:
+## idt = Identifier()
+## idt.tokens = buff
+## yield idt
+## buff = []
+## while buff:
+## yield buff.pop(0)
+
+## def process(self, group):
+## if not isinstance(group, Identifier):
+## group.tokens = self._process(group.tokens)
+
+
+class AddTypeCastFilter(GroupFilter):
+
+ def _process(self, stream):
+ buff = []
+ expect_colon = False
+ has_colons = False
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if ((isinstance(token, Parenthesis)
+ or isinstance(token, Identifier))
+ and not expect_colon):
+ buff.append(token)
+ expect_colon = True
+ elif expect_colon and token.match(T.Punctuation, ':'):
+ buff.append(token)
+ has_colons = True
+ elif (expect_colon
+ and (token.ttype in T.Name
+ or isinstance(token, Identifier))
+ ):
+ if not has_colons:
+ while buff:
+ yield buff.pop(0)
+ yield token
+ else:
+ buff.append(token)
+ grp = TypeCast()
+ grp.tokens = buff
+ buff = []
+ yield grp
+ expect_colons = has_colons = False
+ else:
+ while buff:
+ yield buff.pop(0)
+ yield token
+ while buff:
+ yield buff.pop(0)
+
+ def process(self, group):
+ if not isinstance(group, TypeCast):
+ group.tokens = self._process(group.tokens)
+
+
+class AddAliasFilter(GroupFilter):
+
+ def _process(self, stream):
+ buff = []
+ search_alias = False
+ lazy = False
+ for token in stream:
+ if token.is_group():
+ token.tokens = self._process(token.tokens)
+ if search_alias and (isinstance(token, Identifier)
+ or token.ttype in (T.Name,
+ T.String.Symbol)
+ or (lazy and not token.is_whitespace())):
+ buff.append(token)
+ search_alias = lazy = False
+ grp = Alias()
+ grp.tokens = buff
+ buff = []
+ yield grp
+ elif (isinstance(token, (Identifier, TypeCast))
+ or token.ttype in (T.Name, T.String.Symbol)):
+ buff.append(token)
+ search_alias = True
+ elif search_alias and (token.is_whitespace()
+ or token.match(T.Keyword, 'as')):
+ buff.append(token)
+ if token.match(T.Keyword, 'as'):
+ lazy = True
+ else:
+ while buff:
+ yield buff.pop(0)
+ yield token
+ search_alias = False
+ while buff:
+ yield buff.pop(0)
+
+ def process(self, group):
+ if not isinstance(group, Alias):
+ group.tokens = self._process(group.tokens)
+
+
+GROUP_FILTER = (GroupParenthesis(),
+ GroupMultiComments(),
+ GroupWhere(),
+ GroupIdentifier(),
+ AddTypeCastFilter(),
+ AddAliasFilter(),
+ )
+
+import types
+def group_tokens(group):
+ def _materialize(g):
+ if type(g.tokens) is not types.TupleType:
+ g.tokens = tuple(g.tokens)
+ for sg in g.subgroups:
+ _materialize(sg)
+ for groupfilter in GROUP_FILTER:
+ groupfilter.process(group)
+# _materialize(group)
+# group.tokens = tuple(group.tokens)
+# for subgroup in group.subgroups:
+# group_tokens(subgroup)
diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py
new file mode 100644
index 0000000..146690c
--- /dev/null
+++ b/sqlparse/engine/filter.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+from sqlparse import tokens as T
+from sqlparse.engine.grouping import Statement, Token
+
+
+class TokenFilter(object):
+
+ def __init__(self, **options):
+ self.options = options
+
+ def process(self, stack, stream):
+ """Process token stream."""
+ raise NotImplementedError
+
+
+class StatementFilter(TokenFilter):
+
+ def __init__(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _reset(self):
+ self._in_declare = False
+ self._in_dbldollar = False
+ self._is_create = False
+
+ def _change_splitlevel(self, ttype, value):
+ # PostgreSQL
+ if (ttype == T.Name.Builtin
+ and value.startswith('$') and value.endswith('$')):
+ if self._in_dbldollar:
+ self._in_dbldollar = False
+ return -1
+ else:
+ self._in_dbldollar = True
+ return 1
+ elif self._in_dbldollar:
+ return 0
+
+ # ANSI
+ if ttype is not T.Keyword:
+ return 0
+
+ unified = value.upper()
+
+ if unified == 'DECLARE':
+ self._in_declare = True
+ return 1
+
+ if unified == 'BEGIN':
+ if self._in_declare:
+ return 0
+ return 0
+
+ if unified == 'END':
+ # Should this respect a preceeding BEGIN?
+ # In CASE ... WHEN ... END this results in a split level -1.
+ return -1
+
+ if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
+ self._is_create = True
+
+ if unified in ('IF', 'FOR') and self._is_create:
+ return 1
+
+ # Default
+ return 0
+
+ def process(self, stack, stream):
+ splitlevel = 0
+ stmt = None
+ consume_ws = False
+ stmt_tokens = []
+ for ttype, value in stream:
+ # Before appending the token
+ if (consume_ws and ttype is not T.Whitespace
+ and ttype is not T.Comment.Single):
+ consume_ws = False
+ stmt.tokens = stmt_tokens
+ yield stmt
+ self._reset()
+ stmt = None
+ splitlevel = 0
+ if stmt is None:
+ stmt = Statement()
+ stmt_tokens = []
+ splitlevel += self._change_splitlevel(ttype, value)
+ # Append the token
+ stmt_tokens.append(Token(ttype, value))
+ # After appending the token
+ if (splitlevel <= 0 and ttype is T.Punctuation
+ and value == ';'):
+ consume_ws = True
+ if stmt is not None:
+ stmt.tokens = stmt_tokens
+ yield stmt
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
new file mode 100644
index 0000000..433f539
--- /dev/null
+++ b/sqlparse/engine/grouping.py
@@ -0,0 +1,537 @@
+# -*- coding: utf-8 -*-
+
+import itertools
+import re
+import types
+
+from sqlparse import tokens as T
+
+
+class Token(object):
+
+ __slots__ = ('value', 'ttype')
+
+ def __init__(self, ttype, value):
+ self.value = value
+ self.ttype = ttype
+
+ def __str__(self):
+ return unicode(self).encode('latin-1')
+
+ def __repr__(self):
+ short = self._get_repr_value()
+ return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+ short, id(self))
+
+ def __unicode__(self):
+ return self.value
+
+ def to_unicode(self):
+ return unicode(self)
+
+ def _get_repr_name(self):
+ return str(self.ttype).split('.')[-1]
+
+ def _get_repr_value(self):
+ raw = unicode(self)
+ if len(raw) > 7:
+ short = raw[:6]+u'...'
+ else:
+ short = raw
+ return re.sub('\s+', ' ', short)
+
+ def match(self, ttype, values, regex=False):
+ if self.ttype is not ttype:
+ return False
+ if values is None:
+ return self.ttype is ttype
+ if isinstance(values, basestring):
+ values = [values]
+ if regex:
+ if self.ttype is T.Keyword:
+ values = [re.compile(v, re.IGNORECASE) for v in values]
+ else:
+ values = [re.compile(v) for v in values]
+ for pattern in values:
+ if pattern.search(self.value):
+ return True
+ return False
+ else:
+ if self.ttype is T.Keyword:
+ return self.value.upper() in [v.upper() for v in values]
+ else:
+ return self.value in values
+
+ def is_group(self):
+ return False
+
+ def is_whitespace(self):
+ return self.ttype and self.ttype in T.Whitespace
+
+
+class TokenList(Token):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def __init__(self, tokens=None):
+ if tokens is None:
+ tokens = []
+ self.tokens = tokens
+ Token.__init__(self, None, None)
+
+ def __unicode__(self):
+ return ''.join(unicode(x) for x in self.flatten())
+
+ def __str__(self):
+ return unicode(self).encode('latin-1')
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def _pprint_tree(self, max_depth=None, depth=0):
+ """Pretty-print the object tree."""
+ indent = ' '*(depth*2)
+ for token in self.tokens:
+ if token.is_group():
+ pre = ' | '
+ else:
+ pre = ' | '
+ print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(),
+ token._get_repr_value())
+ if (token.is_group() and max_depth is not None
+ and depth < max_depth):
+ token._pprint_tree(max_depth, depth+1)
+
+ def flatten(self):
+ for token in self.tokens:
+ if isinstance(token, TokenList):
+ for item in token.flatten():
+ yield item
+ else:
+ yield token
+
+ def is_group(self):
+ return True
+
+ def get_sublists(self):
+ return [x for x in self.tokens if isinstance(x, TokenList)]
+
+ def token_first(self, ignore_whitespace=True):
+ for token in self.tokens:
+ if ignore_whitespace and token.is_whitespace():
+ continue
+ return token
+ return None
+
+ def token_next_by_instance(self, idx, clss):
+ if type(clss) not in (types.ListType, types.TupleType):
+ clss = (clss,)
+ if type(clss) is not types.TupleType:
+ clss = tuple(clss)
+ for token in self.tokens[idx:]:
+ if isinstance(token, clss):
+ return token
+ return None
+
+ def token_next_by_type(self, idx, ttypes):
+ if not isinstance(ttypes, (types.TupleType, types.ListType)):
+ ttypes = [ttypes]
+ for token in self.tokens[idx:]:
+ if token.ttype in ttypes:
+ return token
+ return None
+
+ def token_next_match(self, idx, ttype, value, regex=False):
+ if type(idx) != types.IntType:
+ idx = self.token_index(idx)
+ for token in self.tokens[idx:]:
+ if token.match(ttype, value, regex):
+ return token
+ return None
+
+ def token_not_matching(self, idx, funcs):
+ for token in self.tokens[idx:]:
+ passed = False
+ for func in funcs:
+ if func(token):
+ passed = True
+ break
+ if not passed:
+ return token
+ return None
+
+ def token_prev(self, idx, skip_ws=True):
+ while idx != 0:
+ idx -= 1
+ if self.tokens[idx].is_whitespace() and skip_ws:
+ continue
+ return self.tokens[idx]
+
+ def token_next(self, idx, skip_ws=True):
+ while idx < len(self.tokens)-1:
+ idx += 1
+ if self.tokens[idx].is_whitespace() and skip_ws:
+ continue
+ return self.tokens[idx]
+
+ def token_index(self, token):
+ """Return list index of token."""
+ return self.tokens.index(token)
+
+ def tokens_between(self, start, end, exclude_end=False):
+ """Return all tokens between (and including) start and end."""
+ if exclude_end:
+ offset = 0
+ else:
+ offset = 1
+ return self.tokens[self.token_index(start):self.token_index(end)+offset]
+
+ def group_tokens(self, grp_cls, tokens):
+ """Replace tokens by instance of grp_cls."""
+ idx = self.token_index(tokens[0])
+ for t in tokens:
+ self.tokens.remove(t)
+ grp = grp_cls(tokens)
+ self.tokens.insert(idx, grp)
+ return grp
+
+ def insert_before(self, where, token):
+ self.tokens.insert(self.token_index(where), token)
+
+
+class Statement(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_type(self):
+ first_token = self.token_first()
+ if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ return first_token.value.upper()
+ else:
+ return 'UNKNOWN'
+
+
+class Identifier(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def has_alias(self):
+ return self.get_alias() is not None
+
+ def get_alias(self):
+ kw = self.token_next_match(0, T.Keyword, 'AS')
+ if kw is not None:
+ alias = self.token_next(self.token_index(kw))
+ if alias is None:
+ return None
+ else:
+ next_ = self.token_next(0)
+ if next_ is None or not isinstance(next_, Identifier):
+ return None
+ alias = next_
+ if isinstance(alias, Identifier):
+ return alias.get_name()
+ else:
+ return alias.to_unicode()
+
+ def get_name(self):
+ alias = self.get_alias()
+ if alias is not None:
+ return alias
+ return self.get_real_name()
+
+ def get_real_name(self):
+ return self.token_next_by_type(0, T.Name).value
+
+ def get_typecast(self):
+ marker = self.token_next_match(0, T.Punctuation, '::')
+ if marker is None:
+ return None
+ next_ = self.token_next(self.token_index(marker), False)
+ if next_ is None:
+ return None
+ return next_.to_unicode()
+
+
+class IdentifierList(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_identifiers(self):
+ return [x for x in self.tokens if isinstance(x, Identifier)]
+
+
+class Parenthesis(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Assignment(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class If(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class For(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class Comparsion(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class Comment(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+class Where(TokenList):
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Case(TokenList):
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_cases(self):
+ """Returns a list of 2-tuples (condition, value).
+
+ If an ELSE exists condition is None.
+ """
+ ret = []
+ in_condition = in_value = False
+ for token in self.tokens:
+ if token.match(T.Keyword, 'WHEN'):
+ ret.append(([], []))
+ in_condition = True
+ in_value = False
+ elif token.match(T.Keyword, 'ELSE'):
+ ret.append((None, []))
+ in_condition = False
+ in_value = True
+ elif token.match(T.Keyword, 'THEN'):
+ in_condition = False
+ in_value = True
+ elif token.match(T.Keyword, 'END'):
+ in_condition = False
+ in_value = False
+ if in_condition:
+ ret[-1][0].append(token)
+ elif in_value:
+ ret[-1][1].append(token)
+ return ret
+
+def _group_left_right(tlist, ttype, value, cls,
+ check_right=lambda t: True,
+ include_semicolon=False):
+# [_group_left_right(sgroup, ttype, value, cls, check_right,
+# include_semicolon) for sgroup in tlist.get_sublists()
+# if not isinstance(sgroup, cls)]
+ idx = 0
+ token = tlist.token_next_match(idx, ttype, value)
+ while token:
+ right = tlist.token_next(tlist.token_index(token))
+ left = tlist.token_prev(tlist.token_index(token))
+ if (right is None or not check_right(right)
+ or left is None):
+ token = tlist.token_next_match(tlist.token_index(token)+1,
+ ttype, value)
+ else:
+ if include_semicolon:
+ right = tlist.token_next_match(tlist.token_index(right),
+ T.Punctuation, ';')
+ tokens = tlist.tokens_between(left, right)[1:]
+ if not isinstance(left, cls):
+ new = cls([left])
+ new_idx = tlist.token_index(left)
+ tlist.tokens.remove(left)
+ tlist.tokens.insert(new_idx, new)
+ left = new
+ left.tokens.extend(tokens)
+ for t in tokens:
+ tlist.tokens.remove(t)
+ token = tlist.token_next_match(tlist.token_index(left)+1,
+ ttype, value)
+
+def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
+ cls, include_semicolon=False, recurse=False):
+ def _find_matching(i, tl, stt, sva, ett, eva):
+ depth = 1
+ for t in tl.tokens[i:]:
+ if t.match(stt, sva):
+ depth += 1
+ elif t.match(ett, eva):
+ depth -= 1
+ if depth == 1:
+ return t
+ return None
+ [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
+ cls, include_semicolon) for sgroup in tlist.get_sublists()
+ if recurse]
+ if isinstance(tlist, cls):
+ idx = 1
+ else:
+ idx = 0
+ token = tlist.token_next_match(idx, start_ttype, start_value)
+ while token:
+ tidx = tlist.token_index(token)
+ end = _find_matching(tidx, tlist, start_ttype, start_value,
+ end_ttype, end_value)
+ if end is None:
+ idx = tidx+1
+ else:
+ if include_semicolon:
+ next_ = tlist.token_next(tlist.token_index(end))
+ if next_ and next_.match(T.Punctuation, ';'):
+ end = next_
+ group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
+ _group_matching(group, start_ttype, start_value,
+ end_ttype, end_value, cls, include_semicolon)
+ idx = tlist.token_index(group)+1
+ token = tlist.token_next_match(idx, start_ttype, start_value)
+
+def group_if(tlist):
+ _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True)
+
+def group_for(tlist):
+ _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True)
+
+def group_as(tlist):
+ _group_left_right(tlist, T.Keyword, 'AS', Identifier)
+
+def group_assignment(tlist):
+ _group_left_right(tlist, T.Assignment, ':=', Assignment,
+ include_semicolon=True)
+
+def group_comparsion(tlist):
+ _group_left_right(tlist, T.Operator, None, Comparsion)
+
+
+def group_case(tlist):
+ _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, True)
+
+
+def group_identifier(tlist):
+ def _consume_cycle(tl, i):
+ x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'),
+ lambda y: y.ttype in (T.String.Symbol, T.Name)))
+ for t in tl.tokens[i:]:
+ if x.next()(t):
+ yield t
+ else:
+ raise StopIteration
+
+ # bottom up approach: group subgroups first
+ [group_identifier(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Identifier)]
+
+ # real processing
+ idx = 0
+ token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name))
+ while token:
+ identifier_tokens = [token]+list(
+ _consume_cycle(tlist,
+ tlist.token_index(token)+1))
+ group = tlist.group_tokens(Identifier, identifier_tokens)
+ idx = tlist.token_index(group)+1
+ token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name))
+
+
+def group_identifier_list(tlist):
+ [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, IdentifierList)]
+ idx = 0
+ token = tlist.token_next_by_instance(idx, Identifier)
+ while token:
+ tidx = tlist.token_index(token)
+ end = tlist.token_not_matching(tidx+1,
+ [lambda t: isinstance(t, Identifier),
+ lambda t: t.is_whitespace(),
+ lambda t: t.match(T.Punctuation,
+ ',')
+ ])
+ if end is None:
+ idx = tidx + 1
+ else:
+ grp_tokens = tlist.tokens_between(token, end, exclude_end=True)
+ while grp_tokens and (grp_tokens[-1].is_whitespace()
+ or grp_tokens[-1].match(T.Punctuation, ',')):
+ grp_tokens.pop()
+ if len(grp_tokens) <= 1:
+ idx = tidx + 1
+ else:
+ group = tlist.group_tokens(IdentifierList, grp_tokens)
+ idx = tlist.token_index(group)
+ token = tlist.token_next_by_instance(idx, Identifier)
+
+
+def group_parenthesis(tlist):
+ _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis)
+
+def group_comments(tlist):
+ [group_comments(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Comment)]
+ idx = 0
+ token = tlist.token_next_by_type(idx, T.Comment)
+ while token:
+ tidx = tlist.token_index(token)
+ end = tlist.token_not_matching(tidx+1,
+ [lambda t: t.ttype in T.Comment,
+ lambda t: t.is_whitespace()])
+ if end is None:
+ idx = tidx + 1
+ else:
+ eidx = tlist.token_index(end)
+ grp_tokens = tlist.tokens_between(token,
+ tlist.token_prev(eidx, False))
+ group = tlist.group_tokens(Comment, grp_tokens)
+ idx = tlist.token_index(group)
+ token = tlist.token_next_by_type(idx, T.Comment)
+
+def group_where(tlist):
+ [group_where(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Where)]
+ idx = 0
+ token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+ stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION')
+ while token:
+ tidx = tlist.token_index(token)
+ end = tlist.token_next_match(tidx+1, T.Keyword, stopwords)
+ if end is None:
+ end = tlist.tokens[-1]
+ else:
+ end = tlist.tokens[tlist.token_index(end)-1]
+ group = tlist.group_tokens(Where, tlist.tokens_between(token, end))
+ idx = tlist.token_index(group)
+ token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+
+def group_aliased(tlist):
+ [group_aliased(sgroup) for sgroup in tlist.get_sublists()
+ if not isinstance(sgroup, Identifier)]
+ idx = 0
+ token = tlist.token_next_by_instance(idx, Identifier)
+ while token:
+ next_ = tlist.token_next(tlist.token_index(token))
+ if next_ is not None and isinstance(next_, Identifier):
+ grp = tlist.tokens_between(token, next_)[1:]
+ token.tokens.extend(grp)
+ for t in grp:
+ tlist.tokens.remove(t)
+ idx = tlist.token_index(token)+1
+ token = tlist.token_next_by_instance(idx, Identifier)
+
+
+def group_typecasts(tlist):
+ _group_left_right(tlist, T.Punctuation, '::', Identifier)
+
+
+def group(tlist):
+ for func in [group_parenthesis,
+ group_comments,
+ group_where,
+ group_case,
+ group_identifier,
+ group_typecasts,
+ group_as,
+ group_aliased,
+ group_assignment,
+ group_comparsion,
+ group_identifier_list,
+ group_if,
+ group_for,]:
+ func(tlist)
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
new file mode 100644
index 0000000..695b298
--- /dev/null
+++ b/sqlparse/filters.py
@@ -0,0 +1,432 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from sqlparse.engine import grouping
+from sqlparse import tokens as T
+
+
+class Filter(object):
+
+ def process(self, *args):
+ raise NotImplementedError
+
+
+class TokenFilter(Filter):
+
+ def process(self, stack, stream):
+ raise NotImplementedError
+
+
+# FIXME: Should be removed
+def rstrip(stream):
+ buff = []
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ # assuming there's only one \n in value
+ before, rest = token.value.split('\n', 1)
+ token.value = '\n%s' % rest
+ buff = []
+ yield token
+ elif token.is_whitespace():
+ buff.append(token)
+ elif token.is_group():
+ token.tokens = list(rstrip(token.tokens))
+ # process group and look if it starts with a nl
+ if token.tokens and token.tokens[0].is_whitespace():
+ before, rest = token.tokens[0].value.split('\n', 1)
+ token.tokens[0].value = '\n%s' % rest
+ buff = []
+ while buff:
+ yield buff.pop(0)
+ yield token
+ else:
+ while buff:
+ yield buff.pop(0)
+ yield token
+
+
+# --------------------------
+# token process
+
+class _CaseFilter(TokenFilter):
+
+ ttype = None
+
+ def __init__(self, case=None):
+ if case is None:
+ case = 'upper'
+ assert case in ['lower', 'upper', 'capitalize']
+ self.convert = getattr(unicode, case)
+
+ def process(self, stack, stream):
+ for ttype, value in stream:
+ if ttype in self.ttype:
+ value = self.convert(value)
+ yield ttype, value
+
+
+class KeywordCaseFilter(_CaseFilter):
+ ttype = T.Keyword
+
+
+class IdentifierCaseFilter(_CaseFilter):
+ ttype = (T.Name, T.String.Symbol)
+
+
+# ----------------------
+# statement process
+
+class StripCommentsFilter(Filter):
+
+ def _process(self, tlist):
+ idx = 0
+ clss = set([x.__class__ for x in tlist.tokens])
+ while grouping.Comment in clss:
+ token = tlist.token_next_by_instance(0, grouping.Comment)
+ tidx = tlist.token_index(token)
+ prev = tlist.token_prev(tidx, False)
+ next_ = tlist.token_next(tidx, False)
+ # Replace by whitespace if prev and next exist and if they're not
+ # whitespaces. This doesn't apply if prev or next is a paranthesis.
+ if (prev is not None and next_ is not None
+ and not prev.is_whitespace() and not next_.is_whitespace()
+ and not (prev.match(T.Punctuation, '(')
+ or next_.match(T.Punctuation, ')'))):
+ tlist.tokens[tidx] = grouping.Token(T.Whitespace, ' ')
+ else:
+ tlist.tokens.pop(tidx)
+ clss = set([x.__class__ for x in tlist.tokens])
+
+ def process(self, stack, stmt):
+ [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+ self._process(stmt)
+
+
+class StripWhitespaceFilter(Filter):
+
+ def _stripws(self, tlist):
+ func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
+ func = getattr(self, func_name, self._stripws_default)
+ func(tlist)
+
+ def _stripws_default(self, tlist):
+ last_was_ws = False
+ for token in tlist.tokens:
+ if token.is_whitespace():
+ if last_was_ws:
+ token.value = ''
+ else:
+ token.value = ' '
+ last_was_ws = token.is_whitespace()
+
+ def _stripws_parenthesis(self, tlist):
+ if tlist.tokens[1].is_whitespace():
+ tlist.tokens.pop(1)
+ if tlist.tokens[-2].is_whitespace():
+ tlist.tokens.pop(-2)
+ self._stripws_default(tlist)
+
+ def process(self, stack, stmt):
+ [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+ self._stripws(stmt)
+ if stmt.tokens[-1].is_whitespace():
+ stmt.tokens.pop(-1)
+
+
+class ReindentFilter(Filter):
+
+ def __init__(self, width=2, char=' ', line_width=None):
+ self.width = width
+ self.char = char
+ self.indent = 0
+ self.offset = 0
+ self.line_width = line_width
+ self._curr_stmt = None
+ self._last_stmt = None
+
+ def _get_offset(self, token):
+ all_ = list(self._curr_stmt.flatten())
+ idx = all_.index(token)
+ raw = ''.join(unicode(x) for x in all_[:idx+1])
+ line = raw.splitlines()[-1]
+ # Now take current offset into account and return relative offset.
+ full_offset = len(line)-(len(self.char*(self.width*self.indent)))
+ return full_offset - self.offset
+
+ def nl(self):
+ # TODO: newline character should be configurable
+ ws = '\n'+(self.char*((self.indent*self.width)+self.offset))
+ return grouping.Token(T.Whitespace, ws)
+
+ def _split_kwds(self, tlist):
+ split_words = ('FROM', 'JOIN$', 'AND', 'OR',
+ 'GROUP', 'ORDER', 'UNION', 'VALUES')
+ idx = 0
+ token = tlist.token_next_match(idx, T.Keyword, split_words,
+ regex=True)
+ while token:
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ offset = 1
+ if prev and prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+ offset += 1
+ nl = self.nl()
+ tlist.insert_before(token, nl)
+ token = tlist.token_next_match(tlist.token_index(nl)+offset,
+ T.Keyword, split_words, regex=True)
+
+ def _split_statements(self, tlist):
+ idx = 0
+ token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+ while token:
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ if prev and prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+ # only break if it's not the first token
+ if prev:
+ nl = self.nl()
+ tlist.insert_before(token, nl)
+ token = tlist.token_next_by_type(tlist.token_index(token)+1,
+ (T.Keyword.DDL, T.Keyword.DML))
+
+ def _process(self, tlist):
+ func_name = '_process_%s' % tlist.__class__.__name__.lower()
+ func = getattr(self, func_name, self._process_default)
+ func(tlist)
+
+ def _process_where(self, tlist):
+ token = tlist.token_next_match(0, T.Keyword, 'WHERE')
+ tlist.insert_before(token, self.nl())
+ self.indent += 1
+ self._process_default(tlist)
+ self.indent -= 1
+
+ def _process_parenthesis(self, tlist):
+ first = tlist.token_next(0)
+ indented = False
+ if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ self.indent += 1
+ tlist.tokens.insert(0, self.nl())
+ indented = True
+ num_offset = self._get_offset(tlist.token_next_match(0,
+ T.Punctuation, '('))
+ self.offset += num_offset
+ self._process_default(tlist, stmts=not indented)
+ if indented:
+ self.indent -= 1
+ self.offset -= num_offset
+
+ def _process_identifierlist(self, tlist):
+ identifiers = tlist.get_identifiers()
+ if len(identifiers) > 1:
+ first = list(identifiers[0].flatten())[0]
+ num_offset = self._get_offset(first)-len(first.value)
+ self.offset += num_offset
+ for token in identifiers[1:]:
+ tlist.insert_before(token, self.nl())
+ self.offset -= num_offset
+ self._process_default(tlist)
+
+ def _process_case(self, tlist):
+ cases = tlist.get_cases()
+ is_first = True
+ num_offset = None
+ case = tlist.tokens[0]
+ outer_offset = self._get_offset(case)-len(case.value)
+ self.offset += outer_offset
+ for cond, value in tlist.get_cases():
+ if is_first:
+ is_first = False
+ num_offset = self._get_offset(cond[0])-len(cond[0].value)
+ self.offset += num_offset
+ continue
+ if cond is None:
+ token = value[0]
+ else:
+ token = cond[0]
+ tlist.insert_before(token, self.nl())
+ # Line breaks on group level are done. Now let's add an offset of
+ # 5 (=length of "when", "then", "else") and process subgroups.
+ self.offset += 5
+ self._process_default(tlist)
+ self.offset -= 5
+ if num_offset is not None:
+ self.offset -= num_offset
+ end = tlist.token_next_match(0, T.Keyword, 'END')
+ tlist.insert_before(end, self.nl())
+ self.offset -= outer_offset
+
+ def _process_default(self, tlist, stmts=True, kwds=True):
+ if stmts:
+ self._split_statements(tlist)
+ if kwds:
+ self._split_kwds(tlist)
+ [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+ def process(self, stack, stmt):
+ if isinstance(stmt, grouping.Statement):
+ self._curr_stmt = stmt
+ self._process(stmt)
+ if isinstance(stmt, grouping.Statement):
+ if self._last_stmt is not None:
+ if self._last_stmt.to_unicode().endswith('\n'):
+ nl = '\n'
+ else:
+ nl = '\n\n'
+ stmt.tokens.insert(0,
+ grouping.Token(T.Whitespace, nl))
+ if self._last_stmt != stmt:
+ self._last_stmt = stmt
+
+
+# FIXME: Doesn't work ;)
+class RightMarginFilter(Filter):
+
+ keep_together = (
+# grouping.TypeCast, grouping.Identifier, grouping.Alias,
+ )
+
+ def __init__(self, width=79):
+ self.width = width
+ self.line = ''
+
+ def _process(self, stack, group, stream):
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ if token.value.endswith('\n'):
+ self.line = ''
+ else:
+ self.line = token.value.splitlines()[-1]
+ elif (token.is_group()
+ and not token.__class__ in self.keep_together):
+ token.tokens = self._process(stack, token, token.tokens)
+ else:
+ val = token.to_unicode()
+ if len(self.line) + len(val) > self.width:
+ match = re.search('^ +', self.line)
+ if match is not None:
+ indent = match.group()
+ else:
+ indent = ''
+ yield grouping.Token(T.Whitespace, '\n%s' % indent)
+ self.line = indent
+ self.line += val
+ yield token
+
+ def process(self, stack, group):
+ return
+ group.tokens = self._process(stack, group, group.tokens)
+
+
+# ---------------------------
+# postprocess
+
+class SerializerUnicode(Filter):
+
+ def process(self, stack, stmt):
+ raw = stmt.to_unicode()
+ add_nl = raw.endswith('\n')
+ res = '\n'.join(line.rstrip() for line in raw.splitlines())
+ if add_nl:
+ res += '\n'
+ return res
+
+
+class OutputPythonFilter(Filter):
+
+ def __init__(self, varname='sql'):
+ self.varname = varname
+ self.cnt = 0
+
+ def _process(self, stream, varname, count, has_nl):
+ if count > 1:
+ yield grouping.Token(T.Whitespace, '\n')
+ yield grouping.Token(T.Name, varname)
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Operator, '=')
+ yield grouping.Token(T.Whitespace, ' ')
+ if has_nl:
+ yield grouping.Token(T.Operator, '(')
+ yield grouping.Token(T.Text, "'")
+ cnt = 0
+ for token in stream:
+ cnt += 1
+ if token.is_whitespace() and '\n' in token.value:
+ if cnt == 1:
+ continue
+ after_lb = token.value.split('\n', 1)[1]
+ yield grouping.Token(T.Text, "'")
+ yield grouping.Token(T.Whitespace, '\n')
+ for i in range(len(varname)+4):
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Text, "'")
+ if after_lb: # it's the indendation
+ yield grouping.Token(T.Whitespace, after_lb)
+ continue
+ elif token.value and "'" in token.value:
+ token.value = token.value.replace("'", "\\'")
+ yield grouping.Token(T.Text, token.value or '')
+ yield grouping.Token(T.Text, "'")
+ if has_nl:
+ yield grouping.Token(T.Operator, ')')
+
+ def process(self, stack, stmt):
+ self.cnt += 1
+ if self.cnt > 1:
+ varname = '%s%d' % (self.varname, self.cnt)
+ else:
+ varname = self.varname
+ has_nl = len(stmt.to_unicode().strip().splitlines()) > 1
+ stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl)
+ return stmt
+
+
+class OutputPHPFilter(Filter):
+
+ def __init__(self, varname='sql'):
+ self.varname = '$%s' % varname
+ self.count = 0
+
+ def _process(self, stream, varname):
+ if self.count > 1:
+ yield grouping.Token(T.Whitespace, '\n')
+ yield grouping.Token(T.Name, varname)
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Operator, '=')
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Text, '"')
+ cnt = 0
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ cnt += 1
+ if cnt == 1:
+ continue
+ after_lb = token.value.split('\n', 1)[1]
+ yield grouping.Token(T.Text, '"')
+ yield grouping.Token(T.Operator, ';')
+ yield grouping.Token(T.Whitespace, '\n')
+ yield grouping.Token(T.Name, varname)
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Punctuation, '.')
+ yield grouping.Token(T.Operator, '=')
+ yield grouping.Token(T.Whitespace, ' ')
+ yield grouping.Token(T.Text, '"')
+ if after_lb:
+ yield grouping.Token(T.Text, after_lb)
+ continue
+ elif '"' in token.value:
+ token.value = token.value.replace('"', '\\"')
+ yield grouping.Token(T.Text, token.value)
+ yield grouping.Token(T.Text, '"')
+ yield grouping.Token(T.Punctuation, ';')
+
+ def process(self, stack, stmt):
+ self.count += 1
+ if self.count > 1:
+ varname = '%s%d' % (self.varname, self.count)
+ else:
+ varname = self.varname
+ stmt.tokens = tuple(self._process(stmt.tokens, varname))
+ return stmt
+
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
new file mode 100644
index 0000000..9d443ca
--- /dev/null
+++ b/sqlparse/formatter.py
@@ -0,0 +1,163 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL formatter"""
+
+import logging
+
+from sqlparse import SQLParseError
+from sqlparse import filters
+
+
+def validate_options(options):
+ """Validates options."""
+ kwcase = options.get('keyword_case', None)
+ if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
+
+ idcase = options.get('identifier_case', None)
+ if idcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
+
+ ofrmt = options.get('output_format', None)
+ if ofrmt not in [None, 'sql', 'python', 'php']:
+ raise SQLParseError('Unknown output format: %r' % ofrmt)
+
+ strip_comments = options.get('strip_comments', False)
+ if strip_comments not in [True, False]:
+ raise SQLParseError('Invalid value for strip_comments: %r'
+ % strip_comments)
+
+ strip_ws = options.get('strip_whitespace', False)
+ if strip_ws not in [True, False]:
+ raise SQLParseError('Invalid value for strip_whitespace: %r'
+ % strip_ws)
+
+ reindent = options.get('reindent', False)
+ if reindent not in [True, False]:
+ raise SQLParseError('Invalid value for reindent: %r'
+ % reindent)
+ elif reindent:
+ options['strip_whitespace'] = True
+ indent_tabs = options.get('indent_tabs', False)
+ if indent_tabs not in [True, False]:
+ raise SQLParserError('Invalid value for indent_tabs: %r' % indent_tabs)
+ elif indent_tabs:
+ options['indent_char'] = '\t'
+ else:
+ options['indent_char'] = ' '
+ indent_width = options.get('indent_width', 2)
+ try:
+ indent_width = int(indent_width)
+ except (TypeError, ValueError):
+ raise SQLParseError('indent_width requires an integer')
+ if indent_width < 1:
+ raise SQLParseError('indent_width requires an positive integer')
+ options['indent_width'] = indent_width
+
+ right_margin = options.get('right_margin', None)
+ if right_margin is not None:
+ try:
+ right_margin = int(right_margin)
+ except (TypeError, ValueError):
+ raise SQLParseError('right_margin requires an integer')
+ if right_margin < 10:
+ raise SQLParseError('right_margin requires an integer > 10')
+ options['right_margin'] = right_margin
+
+ return options
+
+
+def build_filter_stack(stack, options):
+ """Setup and return a filter stack.
+
+ Args:
+ stack: :class:`~sqlparse.filters.FilterStack` instance
+ options: Dictionary with options validated by validate_options.
+ """
+ # Token filter
+ if 'keyword_case' in options:
+ stack.preprocess.append(
+ filters.KeywordCaseFilter(options['keyword_case']))
+
+ if 'identifier_case' in options:
+ stack.preprocess.append(
+ filters.IdentifierCaseFilter(options['identifier_case']))
+
+ # After grouping
+ if options.get('strip_comments', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripCommentsFilter())
+
+ if (options.get('strip_whitespace', False)
+ or options.get('reindent', False)):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+ if options.get('reindent', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.ReindentFilter(char=options['indent_char'],
+ width=options['indent_width']))
+
+ if options.get('right_margin', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.RightMarginFilter(width=options['right_margin']))
+
+ # Serializer
+ if options.get('output_format'):
+ frmt = options['output_format']
+ if frmt.lower() == 'php':
+ fltr = filters.OutputPHPFilter()
+ elif frmt.lower() == 'python':
+ fltr = filters.OutputPythonFilter()
+ else:
+ fltr = None
+ if fltr is not None:
+ stack.postprocess.append(fltr)
+
+ return stack
+
+
+def format(statement, **options):
+ import filters
+ logging.info('OPTIONS %r', options)
+ lexer = Lexer()
+# lexer.add_filter('whitespace')
+ lexer.add_filter(filters.GroupFilter())
+ if options.get('reindent', False):
+ lexer.add_filter(filters.StripWhitespaceFilter())
+ lexer.add_filter(filters.IndentFilter(
+ n_indents=options.get('n_indents', 2)))
+ if options.get('ltrim', False):
+ lexer.add_filter(filters.LTrimFilter())
+ keyword_case = options.get('keyword_case', None)
+ if keyword_case is not None:
+ assert keyword_case in ('lower', 'upper', 'capitalize')
+ lexer.add_filter(filters.KeywordCaseFilter(case=keyword_case))
+ identifier_case = options.get('identifier_case', None)
+ if identifier_case is not None:
+ assert identifier_case in ('lower', 'upper', 'capitalize')
+ lexer.add_filter(filters.IdentifierCaseFilter(case=identifier_case))
+ if options.get('strip_comments', False):
+ lexer.add_filter(filters.StripCommentsFilter())
+ right_margin = options.get('right_margin', None)
+ if right_margin is not None:
+ right_margin = int(right_margin)
+ assert right_margin > 0
+ lexer.add_filter(filters.RightMarginFilter(margin=right_margin))
+ lexer.add_filter(filters.UngroupFilter())
+ if options.get('output_format', None):
+ ofrmt = options['output_format']
+ assert ofrmt in ('sql', 'python', 'php')
+ if ofrmt == 'python':
+ lexer.add_filter(filters.OutputPythonFilter())
+ elif ofrmt == 'php':
+ lexer.add_filter(filters.OutputPHPFilter())
+ tokens = []
+ for ttype, value in lexer.get_tokens(unicode(statement)):
+ tokens.append((ttype, value))
+ return statement.__class__(tokens)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
new file mode 100644
index 0000000..3f0632e
--- /dev/null
+++ b/sqlparse/keywords.py
@@ -0,0 +1,589 @@
+from sqlparse.tokens import *
+
+KEYWORDS = {
+ 'ABORT': Keyword,
+ 'ABS': Keyword,
+ 'ABSOLUTE': Keyword,
+ 'ACCESS': Keyword,
+ 'ADA': Keyword,
+ 'ADD': Keyword,
+ 'ADMIN': Keyword,
+ 'AFTER': Keyword,
+ 'AGGREGATE': Keyword,
+ 'ALIAS': Keyword,
+ 'ALL': Keyword,
+ 'ALLOCATE': Keyword,
+ 'ANALYSE': Keyword,
+ 'ANALYZE': Keyword,
+ 'AND': Keyword,
+ 'ANY': Keyword,
+ 'ARE': Keyword,
+ 'AS': Keyword,
+ 'ASC': Keyword,
+ 'ASENSITIVE': Keyword,
+ 'ASSERTION': Keyword,
+ 'ASSIGNMENT': Keyword,
+ 'ASYMMETRIC': Keyword,
+ 'AT': Keyword,
+ 'ATOMIC': Keyword,
+ 'AUTHORIZATION': Keyword,
+ 'AVG': Keyword,
+
+ 'BACKWARD': Keyword,
+ 'BEFORE': Keyword,
+ 'BEGIN': Keyword,
+ 'BETWEEN': Keyword,
+ 'BITVAR': Keyword,
+ 'BIT_LENGTH': Keyword,
+ 'BOTH': Keyword,
+ 'BREADTH': Keyword,
+ 'BY': Keyword,
+
+ 'C': Keyword,
+ 'CACHE': Keyword,
+ 'CALL': Keyword,
+ 'CALLED': Keyword,
+ 'CARDINALITY': Keyword,
+ 'CASCADE': Keyword,
+ 'CASCADED': Keyword,
+ 'CASE': Keyword,
+ 'CAST': Keyword,
+ 'CATALOG': Keyword,
+ 'CATALOG_NAME': Keyword,
+ 'CHAIN': Keyword,
+ 'CHARACTERISTICS': Keyword,
+ 'CHARACTER_LENGTH': Keyword,
+ 'CHARACTER_SET_CATALOG': Keyword,
+ 'CHARACTER_SET_NAME': Keyword,
+ 'CHARACTER_SET_SCHEMA': Keyword,
+ 'CHAR_LENGTH': Keyword,
+ 'CHECK': Keyword,
+ 'CHECKED': Keyword,
+ 'CHECKPOINT': Keyword,
+ 'CLASS': Keyword,
+ 'CLASS_ORIGIN': Keyword,
+ 'CLOB': Keyword,
+ 'CLOSE': Keyword,
+ 'CLUSTER': Keyword,
+ 'COALSECE': Keyword,
+ 'COBOL': Keyword,
+ 'COLLATE': Keyword,
+ 'COLLATION': Keyword,
+ 'COLLATION_CATALOG': Keyword,
+ 'COLLATION_NAME': Keyword,
+ 'COLLATION_SCHEMA': Keyword,
+ 'COLUMN': Keyword,
+ 'COLUMN_NAME': Keyword,
+ 'COMMAND_FUNCTION': Keyword,
+ 'COMMAND_FUNCTION_CODE': Keyword,
+ 'COMMENT': Keyword,
+ 'COMMIT': Keyword,
+ 'COMMITTED': Keyword,
+ 'COMPLETION': Keyword,
+ 'CONDITION_NUMBER': Keyword,
+ 'CONNECT': Keyword,
+ 'CONNECTION': Keyword,
+ 'CONNECTION_NAME': Keyword,
+ 'CONSTRAINT': Keyword,
+ 'CONSTRAINTS': Keyword,
+ 'CONSTRAINT_CATALOG': Keyword,
+ 'CONSTRAINT_NAME': Keyword,
+ 'CONSTRAINT_SCHEMA': Keyword,
+ 'CONSTRUCTOR': Keyword,
+ 'CONTAINS': Keyword,
+ 'CONTINUE': Keyword,
+ 'CONVERSION': Keyword,
+ 'CONVERT': Keyword,
+ 'COPY': Keyword,
+ 'CORRESPONTING': Keyword,
+ 'COUNT': Keyword,
+ 'CREATEDB': Keyword,
+ 'CREATEUSER': Keyword,
+ 'CROSS': Keyword,
+ 'CUBE': Keyword,
+ 'CURRENT': Keyword,
+ 'CURRENT_DATE': Keyword,
+ 'CURRENT_PATH': Keyword,
+ 'CURRENT_ROLE': Keyword,
+ 'CURRENT_TIME': Keyword,
+ 'CURRENT_TIMESTAMP': Keyword,
+ 'CURRENT_USER': Keyword,
+ 'CURSOR': Keyword,
+ 'CURSOR_NAME': Keyword,
+ 'CYCLE': Keyword,
+
+ 'DATA': Keyword,
+ 'DATABASE': Keyword,
+ 'DATETIME_INTERVAL_CODE': Keyword,
+ 'DATETIME_INTERVAL_PRECISION': Keyword,
+ 'DAY': Keyword,
+ 'DEALLOCATE': Keyword,
+ 'DECLARE': Keyword,
+ 'DEFAULT': Keyword,
+ 'DEFAULTS': Keyword,
+ 'DEFERRABLE': Keyword,
+ 'DEFERRED': Keyword,
+ 'DEFINED': Keyword,
+ 'DEFINER': Keyword,
+ 'DELIMITER': Keyword,
+ 'DELIMITERS': Keyword,
+ 'DEREF': Keyword,
+ 'DESC': Keyword,
+ 'DESCRIBE': Keyword,
+ 'DESCRIPTOR': Keyword,
+ 'DESTROY': Keyword,
+ 'DESTRUCTOR': Keyword,
+ 'DETERMINISTIC': Keyword,
+ 'DIAGNOSTICS': Keyword,
+ 'DICTIONARY': Keyword,
+ 'DISCONNECT': Keyword,
+ 'DISPATCH': Keyword,
+ 'DISTINCT': Keyword,
+ 'DO': Keyword,
+ 'DOMAIN': Keyword,
+ 'DYNAMIC': Keyword,
+ 'DYNAMIC_FUNCTION': Keyword,
+ 'DYNAMIC_FUNCTION_CODE': Keyword,
+
+ 'EACH': Keyword,
+ 'ELSE': Keyword,
+ 'ENCODING': Keyword,
+ 'ENCRYPTED': Keyword,
+ 'END': Keyword,
+ 'END-EXEC': Keyword,
+ 'EQUALS': Keyword,
+ 'ESCAPE': Keyword,
+ 'EVERY': Keyword,
+ 'EXCEPT': Keyword,
+ 'ESCEPTION': Keyword,
+ 'EXCLUDING': Keyword,
+ 'EXCLUSIVE': Keyword,
+ 'EXEC': Keyword,
+ 'EXECUTE': Keyword,
+ 'EXISTING': Keyword,
+ 'EXISTS': Keyword,
+ 'EXTERNAL': Keyword,
+ 'EXTRACT': Keyword,
+
+ 'FALSE': Keyword,
+ 'FETCH': Keyword,
+ 'FINAL': Keyword,
+ 'FIRST': Keyword,
+ 'FOR': Keyword,
+ 'FORCE': Keyword,
+ 'FOREIGN': Keyword,
+ 'FORTRAN': Keyword,
+ 'FORWARD': Keyword,
+ 'FOUND': Keyword,
+ 'FREE': Keyword,
+ 'FREEZE': Keyword,
+ 'FROM': Keyword,
+ 'FULL': Keyword,
+ 'FUNCTION': Keyword,
+
+ 'G': Keyword,
+ 'GENERAL': Keyword,
+ 'GENERATED': Keyword,
+ 'GET': Keyword,
+ 'GLOBAL': Keyword,
+ 'GO': Keyword,
+ 'GOTO': Keyword,
+ 'GRANT': Keyword,
+ 'GRANTED': Keyword,
+ 'GROUP': Keyword,
+ 'GROUPING': Keyword,
+
+ 'HANDLER': Keyword,
+ 'HAVING': Keyword,
+ 'HIERARCHY': Keyword,
+ 'HOLD': Keyword,
+ 'HOST': Keyword,
+
+ 'IDENTITY': Keyword,
+ 'IF': Keyword,
+ 'IGNORE': Keyword,
+ 'ILIKE': Keyword,
+ 'IMMEDIATE': Keyword,
+ 'IMMUTABLE': Keyword,
+
+ 'IMPLEMENTATION': Keyword,
+ 'IMPLICIT': Keyword,
+ 'IN': Keyword,
+ 'INCLUDING': Keyword,
+ 'INCREMENT': Keyword,
+ 'INDEX': Keyword,
+
+ 'INDITCATOR': Keyword,
+ 'INFIX': Keyword,
+ 'INHERITS': Keyword,
+ 'INITIALIZE': Keyword,
+ 'INITIALLY': Keyword,
+ 'INNER': Keyword,
+ 'INOUT': Keyword,
+ 'INPUT': Keyword,
+ 'INSENSITIVE': Keyword,
+ 'INSTANTIABLE': Keyword,
+ 'INSTEAD': Keyword,
+ 'INTERSECT': Keyword,
+ 'INTO': Keyword,
+ 'INVOKER': Keyword,
+ 'IS': Keyword,
+ 'ISNULL': Keyword,
+ 'ISOLATION': Keyword,
+ 'ITERATE': Keyword,
+
+ 'JOIN': Keyword,
+
+ 'K': Keyword,
+ 'KEY': Keyword,
+ 'KEY_MEMBER': Keyword,
+ 'KEY_TYPE': Keyword,
+
+ 'LANCOMPILER': Keyword,
+ 'LANGUAGE': Keyword,
+ 'LARGE': Keyword,
+ 'LAST': Keyword,
+ 'LATERAL': Keyword,
+ 'LEADING': Keyword,
+ 'LEFT': Keyword,
+ 'LENGTH': Keyword,
+ 'LESS': Keyword,
+ 'LEVEL': Keyword,
+ 'LIKE': Keyword,
+ 'LILMIT': Keyword,
+ 'LISTEN': Keyword,
+ 'LOAD': Keyword,
+ 'LOCAL': Keyword,
+ 'LOCALTIME': Keyword,
+ 'LOCALTIMESTAMP': Keyword,
+ 'LOCATION': Keyword,
+ 'LOCATOR': Keyword,
+ 'LOCK': Keyword,
+ 'LOWER': Keyword,
+
+ 'M': Keyword,
+ 'MAP': Keyword,
+ 'MATCH': Keyword,
+ 'MAX': Keyword,
+ 'MAXVALUE': Keyword,
+ 'MESSAGE_LENGTH': Keyword,
+ 'MESSAGE_OCTET_LENGTH': Keyword,
+ 'MESSAGE_TEXT': Keyword,
+ 'METHOD': Keyword,
+ 'MIN': Keyword,
+ 'MINUTE': Keyword,
+ 'MINVALUE': Keyword,
+ 'MOD': Keyword,
+ 'MODE': Keyword,
+ 'MODIFIES': Keyword,
+ 'MODIFY': Keyword,
+ 'MONTH': Keyword,
+ 'MORE': Keyword,
+ 'MOVE': Keyword,
+ 'MUMPS': Keyword,
+
+ 'NAMES': Keyword,
+ 'NATIONAL': Keyword,
+ 'NATURAL': Keyword,
+ 'NCHAR': Keyword,
+ 'NCLOB': Keyword,
+ 'NEW': Keyword,
+ 'NEXT': Keyword,
+ 'NO': Keyword,
+ 'NOCREATEDB': Keyword,
+ 'NOCREATEUSER': Keyword,
+ 'NONE': Keyword,
+ 'NOT': Keyword,
+ 'NOTHING': Keyword,
+ 'NOTIFY': Keyword,
+ 'NOTNULL': Keyword,
+ 'NULL': Keyword,
+ 'NULLABLE': Keyword,
+ 'NULLIF': Keyword,
+
+ 'OBJECT': Keyword,
+ 'OCTET_LENGTH': Keyword,
+ 'OF': Keyword,
+ 'OFF': Keyword,
+ 'OFFSET': Keyword,
+ 'OIDS': Keyword,
+ 'OLD': Keyword,
+ 'ON': Keyword,
+ 'ONLY': Keyword,
+ 'OPEN': Keyword,
+ 'OPERATION': Keyword,
+ 'OPERATOR': Keyword,
+ 'OPTION': Keyword,
+ 'OPTIONS': Keyword,
+ 'OR': Keyword,
+ 'ORDER': Keyword,
+ 'ORDINALITY': Keyword,
+ 'OUT': Keyword,
+ 'OUTER': Keyword,
+ 'OUTPUT': Keyword,
+ 'OVERLAPS': Keyword,
+ 'OVERLAY': Keyword,
+ 'OVERRIDING': Keyword,
+ 'OWNER': Keyword,
+
+ 'PAD': Keyword,
+ 'PARAMETER': Keyword,
+ 'PARAMETERS': Keyword,
+ 'PARAMETER_MODE': Keyword,
+ 'PARAMATER_NAME': Keyword,
+ 'PARAMATER_ORDINAL_POSITION': Keyword,
+ 'PARAMETER_SPECIFIC_CATALOG': Keyword,
+ 'PARAMETER_SPECIFIC_NAME': Keyword,
+ 'PARAMATER_SPECIFIC_SCHEMA': Keyword,
+ 'PARTIAL': Keyword,
+ 'PASCAL': Keyword,
+ 'PENDANT': Keyword,
+ 'PLACING': Keyword,
+ 'PLI': Keyword,
+ 'POSITION': Keyword,
+ 'POSTFIX': Keyword,
+ 'PRECISION': Keyword,
+ 'PREFIX': Keyword,
+ 'PREORDER': Keyword,
+ 'PREPARE': Keyword,
+ 'PRESERVE': Keyword,
+ 'PRIMARY': Keyword,
+ 'PRIOR': Keyword,
+ 'PRIVILEGES': Keyword,
+ 'PROCEDURAL': Keyword,
+ 'PROCEDURE': Keyword,
+ 'PUBLIC': Keyword,
+
+ 'RAISE': Keyword,
+ 'READ': Keyword,
+ 'READS': Keyword,
+ 'RECHECK': Keyword,
+ 'RECURSIVE': Keyword,
+ 'REF': Keyword,
+ 'REFERENCES': Keyword,
+ 'REFERENCING': Keyword,
+ 'REINDEX': Keyword,
+ 'RELATIVE': Keyword,
+ 'RENAME': Keyword,
+ 'REPEATABLE': Keyword,
+ 'REPLACE': Keyword,
+ 'RESET': Keyword,
+ 'RESTART': Keyword,
+ 'RESTRICT': Keyword,
+ 'RESULT': Keyword,
+ 'RETURN': Keyword,
+ 'RETURNED_LENGTH': Keyword,
+ 'RETURNED_OCTET_LENGTH': Keyword,
+ 'RETURNED_SQLSTATE': Keyword,
+ 'RETURNS': Keyword,
+ 'REVOKE': Keyword,
+ 'RIGHT': Keyword,
+ 'ROLE': Keyword,
+ 'ROLLBACK': Keyword,
+ 'ROLLUP': Keyword,
+ 'ROUTINE': Keyword,
+ 'ROUTINE_CATALOG': Keyword,
+ 'ROUTINE_NAME': Keyword,
+ 'ROUTINE_SCHEMA': Keyword,
+ 'ROW': Keyword,
+ 'ROWS': Keyword,
+ 'ROW_COUNT': Keyword,
+ 'RULE': Keyword,
+
+ 'SAVE_POINT': Keyword,
+ 'SCALE': Keyword,
+ 'SCHEMA': Keyword,
+ 'SCHEMA_NAME': Keyword,
+ 'SCOPE': Keyword,
+ 'SCROLL': Keyword,
+ 'SEARCH': Keyword,
+ 'SECOND': Keyword,
+ 'SECURITY': Keyword,
+ 'SELF': Keyword,
+ 'SENSITIVE': Keyword,
+ 'SERIALIZABLE': Keyword,
+ 'SERVER_NAME': Keyword,
+ 'SESSION': Keyword,
+ 'SESSION_USER': Keyword,
+ 'SETOF': Keyword,
+ 'SETS': Keyword,
+ 'SHARE': Keyword,
+ 'SHOW': Keyword,
+ 'SIMILAR': Keyword,
+ 'SIMPLE': Keyword,
+ 'SIZE': Keyword,
+ 'SOME': Keyword,
+ 'SOURCE': Keyword,
+ 'SPACE': Keyword,
+ 'SPECIFIC': Keyword,
+ 'SPECIFICTYPE': Keyword,
+ 'SPECIFIC_NAME': Keyword,
+ 'SQL': Keyword,
+ 'SQLCODE': Keyword,
+ 'SQLERROR': Keyword,
+ 'SQLEXCEPTION': Keyword,
+ 'SQLSTATE': Keyword,
+ 'SQLWARNINIG': Keyword,
+ 'STABLE': Keyword,
+ 'START': Keyword,
+ 'STATE': Keyword,
+ 'STATEMENT': Keyword,
+ 'STATIC': Keyword,
+ 'STATISTICS': Keyword,
+ 'STDIN': Keyword,
+ 'STDOUT': Keyword,
+ 'STORAGE': Keyword,
+ 'STRICT': Keyword,
+ 'STRUCTURE': Keyword,
+ 'STYPE': Keyword,
+ 'SUBCLASS_ORIGIN': Keyword,
+ 'SUBLIST': Keyword,
+ 'SUBSTRING': Keyword,
+ 'SUM': Keyword,
+ 'SYMMETRIC': Keyword,
+ 'SYSID': Keyword,
+ 'SYSTEM': Keyword,
+ 'SYSTEM_USER': Keyword,
+
+ 'TABLE': Keyword,
+ 'TABLE_NAME': Keyword,
+ ' TEMP': Keyword,
+ 'TEMPLATE': Keyword,
+ 'TEMPORARY': Keyword,
+ 'TERMINATE': Keyword,
+ 'THAN': Keyword,
+ 'THEN': Keyword,
+ 'TIMESTAMP': Keyword,
+ 'TIMEZONE_HOUR': Keyword,
+ 'TIMEZONE_MINUTE': Keyword,
+ 'TO': Keyword,
+ 'TOAST': Keyword,
+ 'TRAILING': Keyword,
+ 'TRANSATION': Keyword,
+ 'TRANSACTIONS_COMMITTED': Keyword,
+ 'TRANSACTIONS_ROLLED_BACK': Keyword,
+ 'TRANSATION_ACTIVE': Keyword,
+ 'TRANSFORM': Keyword,
+ 'TRANSFORMS': Keyword,
+ 'TRANSLATE': Keyword,
+ 'TRANSLATION': Keyword,
+ 'TREAT': Keyword,
+ 'TRIGGER': Keyword,
+ 'TRIGGER_CATALOG': Keyword,
+ 'TRIGGER_NAME': Keyword,
+ 'TRIGGER_SCHEMA': Keyword,
+ 'TRIM': Keyword,
+ 'TRUE': Keyword,
+ 'TRUNCATE': Keyword,
+ 'TRUSTED': Keyword,
+ 'TYPE': Keyword,
+
+ 'UNCOMMITTED': Keyword,
+ 'UNDER': Keyword,
+ 'UNENCRYPTED': Keyword,
+ 'UNION': Keyword,
+ 'UNIQUE': Keyword,
+ 'UNKNOWN': Keyword,
+ 'UNLISTEN': Keyword,
+ 'UNNAMED': Keyword,
+ 'UNNEST': Keyword,
+ 'UNTIL': Keyword,
+ 'UPPER': Keyword,
+ 'USAGE': Keyword,
+ 'USER': Keyword,
+ 'USER_DEFINED_TYPE_CATALOG': Keyword,
+ 'USER_DEFINED_TYPE_NAME': Keyword,
+ 'USER_DEFINED_TYPE_SCHEMA': Keyword,
+ 'USING': Keyword,
+
+ 'VACUUM': Keyword,
+ 'VALID': Keyword,
+ 'VALIDATOR': Keyword,
+ 'VALUES': Keyword,
+ 'VARIABLE': Keyword,
+ 'VERBOSE': Keyword,
+ 'VERSION': Keyword,
+ 'VIEW': Keyword,
+ 'VOLATILE': Keyword,
+
+ 'WHEN': Keyword,
+ 'WHENEVER': Keyword,
+ 'WHERE': Keyword,
+ 'WITH': Keyword,
+ 'WITHOUT': Keyword,
+ 'WORK': Keyword,
+ 'WRITE': Keyword,
+
+ 'YEAR': Keyword,
+
+ 'ZONE': Keyword,
+
+
+ 'ARRAY': Name.Builtin,
+ 'BIGINT': Name.Builtin,
+ 'BINARY': Name.Builtin,
+ 'BIT': Name.Builtin,
+ 'BLOB': Name.Builtin,
+ 'BOOLEAN': Name.Builtin,
+ 'CHAR': Name.Builtin,
+ 'CHARACTER': Name.Builtin,
+ 'DATE': Name.Builtin,
+ 'DEC': Name.Builtin,
+ 'DECIMAL': Name.Builtin,
+ 'FLOAT': Name.Builtin,
+ 'INT': Name.Builtin,
+ 'INTEGER': Name.Builtin,
+ 'INTERVAL': Name.Builtin,
+ 'NUMBER': Name.Builtin,
+ 'NUMERIC': Name.Builtin,
+ 'REAL': Name.Builtin,
+ 'SERIAL': Name.Builtin,
+ 'SMALLINT': Name.Builtin,
+ 'VARCHAR': Name.Builtin,
+ 'VARYING': Name.Builtin,
+ 'INT8': Name.Builtin,
+ 'SERIAL8': Name.Builtin,
+ 'TEXT': Name.Builtin,
+ }
+
+
+KEYWORDS_COMMON = {
+ 'SELECT': Keyword.DML,
+ 'INSERT': Keyword.DML,
+ 'DELETE': Keyword.DML,
+ 'UPDATE': Keyword.DML,
+ 'DROP': Keyword.DDL,
+ 'CREATE': Keyword.DDL,
+ 'ALTER': Keyword.DDL,
+
+ 'WHERE': Keyword,
+ 'FROM': Keyword,
+ 'INNER': Keyword,
+ 'JOIN': Keyword,
+ 'AND': Keyword,
+ 'OR': Keyword,
+ 'LIKE': Keyword,
+ 'ON': Keyword,
+ 'IN': Keyword,
+
+ 'BY': Keyword,
+ 'GROUP': Keyword,
+ 'ORDER': Keyword,
+ 'LEFT': Keyword,
+ 'OUTER': Keyword,
+
+ 'IF': Keyword,
+ 'END': Keyword,
+ 'THEN': Keyword,
+ 'LOOP': Keyword,
+ 'AS': Keyword,
+ 'ELSE': Keyword,
+ 'FOR': Keyword,
+
+ 'CASE': Keyword,
+ 'WHEN': Keyword,
+ 'MIN': Keyword,
+ 'MAX': Keyword,
+ 'DISTINCT': Keyword,
+
+ }
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
new file mode 100644
index 0000000..b635fc6
--- /dev/null
+++ b/sqlparse/lexer.py
@@ -0,0 +1,310 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL Lexer"""
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
+import re
+
+from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
+from sqlparse.tokens import *
+from sqlparse.tokens import _TokenType
+
+
+class include(str):
+ pass
+
+class combined(tuple):
+ """Indicates a state combined from multiple states."""
+
+ def __new__(cls, *args):
+ return tuple.__new__(cls, args)
+
+ def __init__(self, *args):
+ # tuple.__init__ doesn't do anything
+ pass
+
+def is_keyword(value):
+ test = value.upper()
+ return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, Name)), value
+
+
+def apply_filters(stream, filters, lexer=None):
+ """
+ Use this method to apply an iterable of filters to
+ a stream. If lexer is given it's forwarded to the
+ filter, otherwise the filter receives `None`.
+ """
+ def _apply(filter_, stream):
+ for token in filter_.filter(lexer, stream):
+ yield token
+ for filter_ in filters:
+ stream = _apply(filter_, stream)
+ return stream
+
+
+class LexerMeta(type):
+ """
+ Metaclass for Lexer, creates the self._tokens attribute from
+ self.tokens on the first instantiation.
+ """
+
+ def _process_state(cls, unprocessed, processed, state):
+ assert type(state) is str, "wrong state name %r" % state
+ assert state[0] != '#', "invalid state name %r" % state
+ if state in processed:
+ return processed[state]
+ tokens = processed[state] = []
+ rflags = cls.flags
+ for tdef in unprocessed[state]:
+ if isinstance(tdef, include):
+ # it's a state reference
+ assert tdef != state, "circular state reference %r" % state
+ tokens.extend(cls._process_state(unprocessed, processed, str(tdef)))
+ continue
+
+ assert type(tdef) is tuple, "wrong rule def %r" % tdef
+
+ try:
+ rex = re.compile(tdef[0], rflags).match
+ except Exception, err:
+ raise ValueError("uncompilable regex %r in state %r of %r: %s" %
+ (tdef[0], state, cls, err))
+
+ assert type(tdef[1]) is _TokenType or callable(tdef[1]), \
+ 'token type must be simple type or callable, not %r' % (tdef[1],)
+
+ if len(tdef) == 2:
+ new_state = None
+ else:
+ tdef2 = tdef[2]
+ if isinstance(tdef2, str):
+ # an existing state
+ if tdef2 == '#pop':
+ new_state = -1
+ elif tdef2 in unprocessed:
+ new_state = (tdef2,)
+ elif tdef2 == '#push':
+ new_state = tdef2
+ elif tdef2[:5] == '#pop:':
+ new_state = -int(tdef2[5:])
+ else:
+ assert False, 'unknown new state %r' % tdef2
+ elif isinstance(tdef2, combined):
+ # combine a new state from existing ones
+ new_state = '_tmp_%d' % cls._tmpname
+ cls._tmpname += 1
+ itokens = []
+ for istate in tdef2:
+ assert istate != state, 'circular state ref %r' % istate
+ itokens.extend(cls._process_state(unprocessed,
+ processed, istate))
+ processed[new_state] = itokens
+ new_state = (new_state,)
+ elif isinstance(tdef2, tuple):
+ # push more than one state
+ for state in tdef2:
+ assert (state in unprocessed or
+ state in ('#pop', '#push')), \
+ 'unknown new state ' + state
+ new_state = tdef2
+ else:
+ assert False, 'unknown new state def %r' % tdef2
+ tokens.append((rex, tdef[1], new_state))
+ return tokens
+
+ def process_tokendef(cls):
+ cls._all_tokens = {}
+ cls._tmpname = 0
+ processed = cls._all_tokens[cls.__name__] = {}
+ #tokendefs = tokendefs or cls.tokens[name]
+ for state in cls.tokens.keys():
+ cls._process_state(cls.tokens, processed, state)
+ return processed
+
+ def __call__(cls, *args, **kwds):
+ if not hasattr(cls, '_tokens'):
+ cls._all_tokens = {}
+ cls._tmpname = 0
+ if hasattr(cls, 'token_variants') and cls.token_variants:
+ # don't process yet
+ pass
+ else:
+ cls._tokens = cls.process_tokendef()
+
+ return type.__call__(cls, *args, **kwds)
+
+
+
+
+class Lexer:
+
+ __metaclass__ = LexerMeta
+
+ encoding = 'utf-8'
+ stripall = False
+ stripnl = False
+ tabsize = 0
+ flags = re.IGNORECASE
+
+ tokens = {
+ 'root': [
+ (r'--.*?(\r|\n|\r\n)', Comment.Single),
+ (r'(\r|\n|\r\n)', Newline),
+ (r'\s+', Whitespace),
+ (r'/\*', Comment.Multiline, 'multiline-comments'),
+ (r':=', Assignment),
+ (r'::', Punctuation),
+ (r'[*]', Wildcard),
+ (r'[+/<>=~!@#%^&|`?^-]', Operator),
+ (r'[0-9]+', Number.Integer),
+ # TODO: Backslash escapes?
+ (r"'(''|[^'])*'", String.Single),
+ (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
+ (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN', Keyword),
+ (r'END( IF| LOOP)?', Keyword),
+ (r'CREATE( OR REPLACE)?', Keyword.DDL),
+ (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword),
+ (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', Name.Builtin),
+ (r'[;:()\[\],\.]', Punctuation),
+ ],
+ 'multiline-comments': [
+ (r'/\*', Comment.Multiline, 'multiline-comments'),
+ (r'\*/', Comment.Multiline, '#pop'),
+ (r'[^/\*]+', Comment.Multiline),
+ (r'[/*]', Comment.Multiline)
+ ]
+ }
+
+ def __init__(self):
+ self.filters = []
+
+ def add_filter(self, filter_, **options):
+ from sqlparse.filters import Filter
+ if not isinstance(filter_, Filter):
+ filter_ = filter_(**options)
+ self.filters.append(filter_)
+
+ def get_tokens(self, text, unfiltered=False):
+ """
+ Return an iterable of (tokentype, value) pairs generated from
+ `text`. If `unfiltered` is set to `True`, the filtering mechanism
+ is bypassed even if filters are defined.
+
+ Also preprocess the text, i.e. expand tabs and strip it if
+ wanted and applies registered filters.
+ """
+ if not isinstance(text, unicode):
+ if self.encoding == 'guess':
+ try:
+ text = text.decode('utf-8')
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+ except UnicodeDecodeError:
+ text = text.decode('latin1')
+ elif self.encoding == 'chardet':
+ try:
+ import chardet
+ except ImportError:
+ raise ImportError('To enable chardet encoding guessing, '
+ 'please install the chardet library '
+ 'from http://chardet.feedparser.org/')
+ enc = chardet.detect(text)
+ text = text.decode(enc['encoding'])
+ else:
+ text = text.decode(self.encoding)
+ if self.stripall:
+ text = text.strip()
+ elif self.stripnl:
+ text = text.strip('\n')
+ if self.tabsize > 0:
+ text = text.expandtabs(self.tabsize)
+# if not text.endswith('\n'):
+# text += '\n'
+
+ def streamer():
+ for i, t, v in self.get_tokens_unprocessed(text):
+ yield t, v
+ stream = streamer()
+ if not unfiltered:
+ stream = apply_filters(stream, self.filters, self)
+ return stream
+
+
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ """
+ Split ``text`` into (tokentype, text) pairs.
+
+ ``stack`` is the inital stack (default: ``['root']``)
+ """
+ pos = 0
+ tokendefs = self._tokens
+ statestack = list(stack)
+ statetokens = tokendefs[statestack[-1]]
+ known_names = {}
+ while 1:
+ for rexmatch, action, new_state in statetokens:
+ m = rexmatch(text, pos)
+ if m:
+ # print rex.pattern
+ value = m.group()
+ if value in known_names:
+ yield pos, known_names[value], value
+ elif type(action) is _TokenType:
+ yield pos, action, value
+ elif hasattr(action, '__call__'):
+ ttype, value = action(value)
+ known_names[value] = ttype
+ yield pos, ttype, value
+ else:
+ for item in action(self, m):
+ yield item
+ pos = m.end()
+ if new_state is not None:
+ # state transition
+ if isinstance(new_state, tuple):
+ for state in new_state:
+ if state == '#pop':
+ statestack.pop()
+ elif state == '#push':
+ statestack.append(statestack[-1])
+ else:
+ statestack.append(state)
+ elif isinstance(new_state, int):
+ # pop
+ del statestack[new_state:]
+ elif new_state == '#push':
+ statestack.append(statestack[-1])
+ else:
+ assert False, "wrong state def: %r" % new_state
+ statetokens = tokendefs[statestack[-1]]
+ break
+ else:
+ try:
+ if text[pos] == '\n':
+ # at EOL, reset state to "root"
+ pos += 1
+ statestack = ['root']
+ statetokens = tokendefs['root']
+ yield pos, Text, u'\n'
+ continue
+ yield pos, Error, text[pos]
+ pos += 1
+ except IndexError:
+ break
+
+
+def tokenize(sql):
+ """Tokenize sql.
+
+ Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
+ of ``(token type, value)`` items.
+ """
+ lexer = Lexer()
+ return lexer.get_tokens(sql)
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
new file mode 100644
index 0000000..2c63c41
--- /dev/null
+++ b/sqlparse/tokens.py
@@ -0,0 +1,131 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+# The Token implementation is based on pygment's token system written
+# by Georg Brandl.
+# http://pygments.org/
+
+"""Tokens"""
+
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+
+class _TokenType(tuple):
+ parent = None
+
+ def split(self):
+ buf = []
+ node = self
+ while node is not None:
+ buf.append(node)
+ node = node.parent
+ buf.reverse()
+ return buf
+
+ def __init__(self, *args):
+ # no need to call super.__init__
+ self.subtypes = set()
+
+ def __contains__(self, val):
+ return self is val or (
+ type(val) is self.__class__ and
+ val[:len(self)] == self
+ )
+
+ def __getattr__(self, val):
+ if not val or not val[0].isupper():
+ return tuple.__getattribute__(self, val)
+ new = _TokenType(self + (val,))
+ setattr(self, val, new)
+ self.subtypes.add(new)
+ new.parent = self
+ return new
+
+ def __hash__(self):
+ return hash(tuple(self))
+
+ def __repr__(self):
+ return 'Token' + (self and '.' or '') + '.'.join(self)
+
+
+Token = _TokenType()
+
+# Special token types
+Text = Token.Text
+Whitespace = Text.Whitespace
+Newline = Whitespace.Newline
+Error = Token.Error
+# Text that doesn't belong to this lexer (e.g. HTML in PHP)
+Other = Token.Other
+
+# Common token types for source code
+Keyword = Token.Keyword
+Name = Token.Name
+Literal = Token.Literal
+String = Literal.String
+Number = Literal.Number
+Punctuation = Token.Punctuation
+Operator = Token.Operator
+Wildcard = Token.Wildcard
+Comment = Token.Comment
+Assignment = Token.Assignement
+
+# Generic types for non-source code
+Generic = Token.Generic
+
+# String and some others are not direct childs of Token.
+# alias them:
+Token.Token = Token
+Token.String = String
+Token.Number = Number
+
+# SQL specific tokens
+DML = Keyword.DML
+DDL = Keyword.DDL
+Command = Keyword.Command
+
+Group = Token.Group
+Group.Parenthesis = Token.Group.Parenthesis
+Group.Comment = Token.Group.Comment
+Group.Where = Token.Group.Where
+
+
+def is_token_subtype(ttype, other):
+ """
+ Return True if ``ttype`` is a subtype of ``other``.
+
+ exists for backwards compatibility. use ``ttype in other`` now.
+ """
+ return ttype in other
+
+
+def string_to_tokentype(s):
+ """
+ Convert a string into a token type::
+
+ >>> string_to_token('String.Double')
+ Token.Literal.String.Double
+ >>> string_to_token('Token.Literal.Number')
+ Token.Literal.Number
+ >>> string_to_token('')
+ Token
+
+ Tokens that are already tokens are returned unchanged:
+
+ >>> string_to_token(String)
+ Token.Literal.String
+ """
+ if isinstance(s, _TokenType):
+ return s
+ if not s:
+ return Token
+ node = Token
+ for item in s.split('.'):
+ node = getattr(node, item)
+ return node
+