summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Albrecht <albrecht.andi@gmail.com>2015-02-21 08:15:09 +0100
committerAndi Albrecht <albrecht.andi@gmail.com>2015-02-21 08:15:09 +0100
commit9cec0cde3818005d70b0473f3c99241f5df68394 (patch)
tree195c683a37221e307c600980c00dcddc9a2b76da
parent71af186659923dfe8721c551d5dbf4db7c4854d9 (diff)
parent77e0789aea8918a2fbbc6f20196cd0bcdecccf52 (diff)
downloadsqlparse-9cec0cde3818005d70b0473f3c99241f5df68394.tar.gz
Merge master into v0.2.0 branch.
-rw-r--r--AUTHORS2
-rw-r--r--CHANGES9
-rw-r--r--sqlparse/engine/grouping.py7
-rw-r--r--sqlparse/filters.py6
-rw-r--r--sqlparse/lexer.py10
-rw-r--r--sqlparse/sql.py90
-rw-r--r--sqlparse/tokens.py1
-rw-r--r--tests/test_format.py3
-rw-r--r--tests/test_grouping.py51
-rw-r--r--tests/test_parse.py58
10 files changed, 189 insertions, 48 deletions
diff --git a/AUTHORS b/AUTHORS
index 09d8b5b..02266d9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -7,8 +7,10 @@ Alphabetical list of contributors:
* Alexander Beedie <ayembee@gmail.com>
* Alexey Malyshev <nostrict@gmail.com>
* Cristian Orellana <cristiano@groupon.com>
+* Darik Gamble <darik.gamble@gmail.com>
* Florian Bauer <florian.bauer@zmdi.com>
* Gavin Wahl <gwahl@fusionbox.com>
+* JacekPliszka <Jacek.Pliszka@gmail.com>
* Jesús Leganés Combarro "Piranna" <piranna@gmail.com>
* Kevin Jing Qiu <kevin.jing.qiu@gmail.com>
* Michael Schuller <chick@mschuller.net>
diff --git a/CHANGES b/CHANGES
index 7c4b52c..dc6cf0b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,13 +2,18 @@ Development Version
-------------------
Bug Fixes
-* Fix a regression for identifiers with square bracktes notation (issue153).
-* Add missing SQL types (issue154, issue155, issue156).
+* Fix a regression for identifiers with square bracktes
+ notation (issue153, by darikg).
+* Add missing SQL types (issue154, issue155, issue156, by jukebox).
+* Fix parsing of multi-line comments (issue172, by JacekPliszka).
Enhancements
* Improve formatting of HAVING statements.
* Improve parsing of inline comments (issue163).
* Group comments to parent object (issue128, issue160).
+* Add double precision builtin (issue169, by darikg).
+* Add support for square bracket array indexing (issue170, by darikg).
+* Improve grouping of aliased elements (issue167, by darikg).
Release 0.1.14 (Nov 30, 2014)
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 6aa9e18..e9d77e6 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -150,11 +150,14 @@ def group_identifier(tlist):
# TODO: Usage of Wildcard token is ambivalent here.
x = itertools.cycle((
lambda y: (y.match(T.Punctuation, '.')
- or y.ttype is T.Operator
- or y.ttype is T.Wildcard),
+ or y.ttype in (T.Operator,
+ T.Wildcard,
+ T.ArrayIndex,
+ T.Name)),
lambda y: (y.ttype in (T.String.Symbol,
T.Name,
T.Wildcard,
+ T.ArrayIndex,
T.Literal.String.Single,
T.Literal.Number.Integer,
T.Literal.Number.Float)
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
index 697f22b..7e82f26 100644
--- a/sqlparse/filters.py
+++ b/sqlparse/filters.py
@@ -271,7 +271,11 @@ class StripWhitespaceFilter:
[self.process(stack, sgroup, depth + 1)
for sgroup in stmt.get_sublists()]
self._stripws(stmt)
- if depth == 0 and stmt.tokens[-1].is_whitespace():
+ if (
+ depth == 0
+ and stmt.tokens
+ and stmt.tokens[-1].is_whitespace()
+ ):
stmt.tokens.pop(-1)
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index ac50442..223d70c 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -172,12 +172,14 @@ class Lexer(compat.with_metaclass(LexerMeta)):
(r"'(''|\\'|[^'])*'", tokens.String.Single),
# not a real string literal in ANSI SQL:
(r'(""|".*?[^\\]")', tokens.String.Symbol),
+ (r'(?<=[\w\]])(\[[^\]]*?\])', tokens.Punctuation.ArrayIndex),
(r'(\[[^\]]+\])', tokens.Name),
((r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b'), tokens.Keyword),
(r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
(r'NOT NULL\b', tokens.Keyword),
(r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+ (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
(r'(?<=\.)[^\W\d_]\w*', tokens.Name),
(r'[^\W\d_]\w*', is_keyword),
(r'[;:()\[\],\.]', tokens.Punctuation),
@@ -291,7 +293,13 @@ class Lexer(compat.with_metaclass(LexerMeta)):
statestack.pop()
elif state == '#push':
statestack.append(statestack[-1])
- else:
+ elif (
+ # Ugly hack - multiline-comments
+ # are not stackable
+ state != 'multiline-comments'
+ or not statestack
+ or statestack[-1] != 'multiline-comments'
+ ):
statestack.append(state)
elif isinstance(new_state, int):
# pop
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 17509eb..367204d 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -383,21 +383,17 @@ class TokenList(Token):
def get_alias(self):
"""Returns the alias for this identifier or ``None``."""
+
+ # "name AS alias"
kw = self.token_next_match(0, T.Keyword, 'AS')
if kw is not None:
- alias = self.token_next(self.token_index(kw))
- if alias is None:
- return None
- else:
- next_ = self.token_next_by_instance(0, Identifier)
- if next_ is None:
- next_ = self.token_next_by_type(0, T.String.Symbol)
- if next_ is None:
- return None
- alias = next_
- if isinstance(alias, Identifier):
- return alias.get_name()
- return self._remove_quotes(compat.text_type(alias))
+ return self._get_first_name(kw, keywords=True)
+
+ # "name alias" or "complicated column expression alias"
+ if len(self.tokens) > 2:
+ return self._get_first_name(reverse=True)
+
+ return None
def get_name(self):
"""Returns the name of this identifier.
@@ -415,18 +411,43 @@ class TokenList(Token):
"""Returns the real name (object name) of this identifier."""
# a.b
dot = self.token_next_match(0, T.Punctuation, '.')
+ if dot is not None:
+ return self._get_first_name(self.token_index(dot))
+
+ return self._get_first_name()
+
+ def get_parent_name(self):
+ """Return name of the parent object if any.
+
+ A parent object is identified by the first occuring dot.
+ """
+ dot = self.token_next_match(0, T.Punctuation, '.')
if dot is None:
- next_ = self.token_next_by_type(0, T.Name)
- if next_ is not None:
- return self._remove_quotes(next_.value)
return None
-
- next_ = self.token_next_by_type(self.token_index(dot),
- (T.Name, T.Wildcard, T.String.Symbol))
- if next_ is None: # invalid identifier, e.g. "a."
+ prev_ = self.token_prev(self.token_index(dot))
+ if prev_ is None: # something must be verry wrong here..
return None
- return self._remove_quotes(next_.value)
+ return self._remove_quotes(prev_.value)
+
+ def _get_first_name(self, idx=None, reverse=False, keywords=False):
+ """Returns the name of the first token with a name"""
+
+ if idx and not isinstance(idx, int):
+ idx = self.token_index(idx) + 1
+ tokens = self.tokens[idx:] if idx else self.tokens
+ tokens = reversed(tokens) if reverse else tokens
+ types = [T.Name, T.Wildcard, T.String.Symbol]
+
+ if keywords:
+ types.append(T.Keyword)
+
+ for tok in tokens:
+ if tok.ttype in types:
+ return self._remove_quotes(tok.value)
+ elif isinstance(tok, Identifier) or isinstance(tok, Function):
+ return tok.get_name()
+ return None
class Statement(TokenList):
"""Represents a SQL statement."""
@@ -460,19 +481,6 @@ class Identifier(TokenList):
__slots__ = ('value', 'ttype', 'tokens')
- def get_parent_name(self):
- """Return name of the parent object if any.
-
- A parent object is identified by the first occuring dot.
- """
- dot = self.token_next_match(0, T.Punctuation, '.')
- if dot is None:
- return None
- prev_ = self.token_prev(self.token_index(dot))
- if prev_ is None: # something must be verry wrong here..
- return None
- return self._remove_quotes(prev_.value)
-
def is_wildcard(self):
"""Return ``True`` if this identifier contains a wildcard."""
token = self.token_next_by_type(0, T.Wildcard)
@@ -495,6 +503,13 @@ class Identifier(TokenList):
return None
return ordering.value.upper()
+ def get_array_indices(self):
+ """Returns an iterator of index expressions as strings"""
+
+ # Use [1:-1] index to discard the square brackets
+ return (tok.value[1:-1] for tok in self.tokens
+ if tok.ttype in T.ArrayIndex)
+
class IdentifierList(TokenList):
"""A list of :class:`~sqlparse.sql.Identifier`\'s."""
@@ -622,9 +637,10 @@ class Function(TokenList):
for t in parenthesis.tokens:
if isinstance(t, IdentifierList):
return t.get_identifiers()
- elif (isinstance(t, (Identifier, Function))
- or t.ttype in T.Literal):
- return [t]
+ elif isinstance(t, Identifier) or \
+ isinstance(t, Function) or \
+ t.ttype in T.Literal:
+ return [t,]
return []
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
index 53c31ce..5ad51a6 100644
--- a/sqlparse/tokens.py
+++ b/sqlparse/tokens.py
@@ -48,6 +48,7 @@ Literal = Token.Literal
String = Literal.String
Number = Literal.Number
Punctuation = Token.Punctuation
+ArrayIndex = Punctuation.ArrayIndex
Operator = Token.Operator
Comparison = Operator.Comparison
Wildcard = Token.Wildcard
diff --git a/tests/test_format.py b/tests/test_format.py
index 31cf74a..4746358 100644
--- a/tests/test_format.py
+++ b/tests/test_format.py
@@ -61,6 +61,9 @@ class TestFormat(TestCaseBase):
sql = 'select (/* sql starts here */ select 2)'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select (select 2)')
+ sql = 'select (/* sql /* starts here */ select 2)'
+ res = sqlparse.format(sql, strip_comments=True)
+ self.ndiffAssertEqual(res, 'select (select 2)')
def test_strip_ws(self):
f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 8b5108e..e1dc3fe 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -16,11 +16,12 @@ class TestGrouping(TestCaseBase):
s = 'select (select (x3) x2) and (y2) bar'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, str(parsed))
- self.assertEqual(len(parsed.tokens), 9)
+ self.assertEqual(len(parsed.tokens), 7)
self.assert_(isinstance(parsed.tokens[2], sql.Parenthesis))
- self.assert_(isinstance(parsed.tokens[-3], sql.Parenthesis))
- self.assertEqual(len(parsed.tokens[2].tokens), 7)
- self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Parenthesis))
+ self.assert_(isinstance(parsed.tokens[-1], sql.Identifier))
+ self.assertEqual(len(parsed.tokens[2].tokens), 5)
+ self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Identifier))
+ self.assert_(isinstance(parsed.tokens[2].tokens[3].tokens[0], sql.Parenthesis))
self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3)
def test_comments(self):
@@ -146,7 +147,7 @@ class TestGrouping(TestCaseBase):
s = 'select x from (select y from foo where bar = 1) z'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, compat.text_type(p))
- self.assertTrue(isinstance(p.tokens[-3].tokens[-2], sql.Where))
+ self.assertTrue(isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where))
def test_typecast(self):
s = 'select foo::integer from bar'
@@ -346,3 +347,43 @@ def test_nested_begin():
assert inner.tokens[0].value == 'BEGIN'
assert inner.tokens[-1].value == 'END'
assert isinstance(inner, sql.Begin)
+
+
+def test_aliased_column_without_as():
+ p = sqlparse.parse('foo bar')[0].tokens
+ assert len(p) == 1
+ assert p[0].get_real_name() == 'foo'
+ assert p[0].get_alias() == 'bar'
+
+ p = sqlparse.parse('foo.bar baz')[0].tokens[0]
+ assert p.get_parent_name() == 'foo'
+ assert p.get_real_name() == 'bar'
+ assert p.get_alias() == 'baz'
+
+
+def test_qualified_function():
+ p = sqlparse.parse('foo()')[0].tokens[0]
+ assert p.get_parent_name() is None
+ assert p.get_real_name() == 'foo'
+
+ p = sqlparse.parse('foo.bar()')[0].tokens[0]
+ assert p.get_parent_name() == 'foo'
+ assert p.get_real_name() == 'bar'
+
+
+def test_aliased_function_without_as():
+ p = sqlparse.parse('foo() bar')[0].tokens[0]
+ assert p.get_parent_name() is None
+ assert p.get_real_name() == 'foo'
+ assert p.get_alias() == 'bar'
+
+ p = sqlparse.parse('foo.bar() baz')[0].tokens[0]
+ assert p.get_parent_name() == 'foo'
+ assert p.get_real_name() == 'bar'
+ assert p.get_alias() == 'baz'
+
+
+def test_aliased_literal_without_as():
+ p = sqlparse.parse('1 foo')[0].tokens
+ assert len(p) == 1
+ assert p[0].get_alias() == 'foo' \ No newline at end of file
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 641ca55..9c24ee0 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -167,6 +167,14 @@ def test_psql_quotation_marks(): # issue83
assert len(t) == 2
+def test_double_precision_is_builtin():
+ sql = 'DOUBLE PRECISION'
+ t = sqlparse.parse(sql)[0].tokens
+ assert (len(t) == 1
+ and t[0].ttype == sqlparse.tokens.Name.Builtin
+ and t[0].value == 'DOUBLE PRECISION')
+
+
@pytest.mark.parametrize('ph', ['?', ':1', ':foo', '%s', '%(foo)s'])
def test_placeholder(ph):
p = sqlparse.parse(ph)[0].tokens
@@ -197,3 +205,53 @@ def test_single_quotes_with_linebreaks(): # issue118
p = sqlparse.parse("'f\nf'")[0].tokens
assert len(p) == 1
assert p[0].ttype is T.String.Single
+
+
+def test_array_indexed_column():
+ # Make sure we still parse sqlite style escapes
+ p = sqlparse.parse('[col1],[col2]')[0].tokens
+ assert (len(p) == 1
+ and isinstance(p[0], sqlparse.sql.IdentifierList)
+ and [id.get_name() for id in p[0].get_identifiers()]
+ == ['[col1]', '[col2]'])
+
+ p = sqlparse.parse('[col1]+[col2]')[0]
+ types = [tok.ttype for tok in p.flatten()]
+ assert types == [T.Name, T.Operator, T.Name]
+
+ p = sqlparse.parse('col[1]')[0].tokens
+ assert (len(p) == 1
+ and tuple(p[0].get_array_indices()) == ('1',)
+ and p[0].get_name() == 'col')
+
+ p = sqlparse.parse('col[1][1:5] as mycol')[0].tokens
+ assert (len(p) == 1
+ and tuple(p[0].get_array_indices()) == ('1', '1:5')
+ and p[0].get_name() == 'mycol'
+ and p[0].get_real_name() == 'col')
+
+ p = sqlparse.parse('col[1][other_col]')[0].tokens
+ assert len(p) == 1 and tuple(p[0].get_array_indices()) == ('1', 'other_col')
+
+ sql = 'SELECT col1, my_1d_array[2] as alias1, my_2d_array[2][5] as alias2'
+ p = sqlparse.parse(sql)[0].tokens
+ assert len(p) == 3 and isinstance(p[2], sqlparse.sql.IdentifierList)
+ ids = list(p[2].get_identifiers())
+ assert (ids[0].get_name() == 'col1'
+ and tuple(ids[0].get_array_indices()) == ()
+ and ids[1].get_name() == 'alias1'
+ and ids[1].get_real_name() == 'my_1d_array'
+ and tuple(ids[1].get_array_indices()) == ('2',)
+ and ids[2].get_name() == 'alias2'
+ and ids[2].get_real_name() == 'my_2d_array'
+ and tuple(ids[2].get_array_indices()) == ('2', '5'))
+
+
+def test_typed_array_definition():
+ # array indices aren't grouped with builtins, but make sure we can extract
+ # indentifer names
+ p = sqlparse.parse('x int, y int[], z int')[0]
+ names = [x.get_name() for x in p.get_sublists()]
+ assert names == ['x', 'y', 'z']
+
+