diff options
author | Andi Albrecht <albrecht.andi@gmail.com> | 2015-02-21 08:15:09 +0100 |
---|---|---|
committer | Andi Albrecht <albrecht.andi@gmail.com> | 2015-02-21 08:15:09 +0100 |
commit | 9cec0cde3818005d70b0473f3c99241f5df68394 (patch) | |
tree | 195c683a37221e307c600980c00dcddc9a2b76da | |
parent | 71af186659923dfe8721c551d5dbf4db7c4854d9 (diff) | |
parent | 77e0789aea8918a2fbbc6f20196cd0bcdecccf52 (diff) | |
download | sqlparse-9cec0cde3818005d70b0473f3c99241f5df68394.tar.gz |
Merge master into v0.2.0 branch.
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | CHANGES | 9 | ||||
-rw-r--r-- | sqlparse/engine/grouping.py | 7 | ||||
-rw-r--r-- | sqlparse/filters.py | 6 | ||||
-rw-r--r-- | sqlparse/lexer.py | 10 | ||||
-rw-r--r-- | sqlparse/sql.py | 90 | ||||
-rw-r--r-- | sqlparse/tokens.py | 1 | ||||
-rw-r--r-- | tests/test_format.py | 3 | ||||
-rw-r--r-- | tests/test_grouping.py | 51 | ||||
-rw-r--r-- | tests/test_parse.py | 58 |
10 files changed, 189 insertions, 48 deletions
@@ -7,8 +7,10 @@ Alphabetical list of contributors: * Alexander Beedie <ayembee@gmail.com> * Alexey Malyshev <nostrict@gmail.com> * Cristian Orellana <cristiano@groupon.com> +* Darik Gamble <darik.gamble@gmail.com> * Florian Bauer <florian.bauer@zmdi.com> * Gavin Wahl <gwahl@fusionbox.com> +* JacekPliszka <Jacek.Pliszka@gmail.com> * Jesús Leganés Combarro "Piranna" <piranna@gmail.com> * Kevin Jing Qiu <kevin.jing.qiu@gmail.com> * Michael Schuller <chick@mschuller.net> @@ -2,13 +2,18 @@ Development Version ------------------- Bug Fixes -* Fix a regression for identifiers with square bracktes notation (issue153). -* Add missing SQL types (issue154, issue155, issue156). +* Fix a regression for identifiers with square bracktes + notation (issue153, by darikg). +* Add missing SQL types (issue154, issue155, issue156, by jukebox). +* Fix parsing of multi-line comments (issue172, by JacekPliszka). Enhancements * Improve formatting of HAVING statements. * Improve parsing of inline comments (issue163). * Group comments to parent object (issue128, issue160). +* Add double precision builtin (issue169, by darikg). +* Add support for square bracket array indexing (issue170, by darikg). +* Improve grouping of aliased elements (issue167, by darikg). Release 0.1.14 (Nov 30, 2014) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 6aa9e18..e9d77e6 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -150,11 +150,14 @@ def group_identifier(tlist): # TODO: Usage of Wildcard token is ambivalent here. x = itertools.cycle(( lambda y: (y.match(T.Punctuation, '.') - or y.ttype is T.Operator - or y.ttype is T.Wildcard), + or y.ttype in (T.Operator, + T.Wildcard, + T.ArrayIndex, + T.Name)), lambda y: (y.ttype in (T.String.Symbol, T.Name, T.Wildcard, + T.ArrayIndex, T.Literal.String.Single, T.Literal.Number.Integer, T.Literal.Number.Float) diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 697f22b..7e82f26 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -271,7 +271,11 @@ class StripWhitespaceFilter: [self.process(stack, sgroup, depth + 1) for sgroup in stmt.get_sublists()] self._stripws(stmt) - if depth == 0 and stmt.tokens[-1].is_whitespace(): + if ( + depth == 0 + and stmt.tokens + and stmt.tokens[-1].is_whitespace() + ): stmt.tokens.pop(-1) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index ac50442..223d70c 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -172,12 +172,14 @@ class Lexer(compat.with_metaclass(LexerMeta)): (r"'(''|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: (r'(""|".*?[^\\]")', tokens.String.Symbol), + (r'(?<=[\w\]])(\[[^\]]*?\])', tokens.Punctuation.ArrayIndex), (r'(\[[^\]]+\])', tokens.Name), ((r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?' r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b'), tokens.Keyword), (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword), (r'NOT NULL\b', tokens.Keyword), (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), + (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), (r'(?<=\.)[^\W\d_]\w*', tokens.Name), (r'[^\W\d_]\w*', is_keyword), (r'[;:()\[\],\.]', tokens.Punctuation), @@ -291,7 +293,13 @@ class Lexer(compat.with_metaclass(LexerMeta)): statestack.pop() elif state == '#push': statestack.append(statestack[-1]) - else: + elif ( + # Ugly hack - multiline-comments + # are not stackable + state != 'multiline-comments' + or not statestack + or statestack[-1] != 'multiline-comments' + ): statestack.append(state) elif isinstance(new_state, int): # pop diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 17509eb..367204d 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -383,21 +383,17 @@ class TokenList(Token): def get_alias(self): """Returns the alias for this identifier or ``None``.""" + + # "name AS alias" kw = self.token_next_match(0, T.Keyword, 'AS') if kw is not None: - alias = self.token_next(self.token_index(kw)) - if alias is None: - return None - else: - next_ = self.token_next_by_instance(0, Identifier) - if next_ is None: - next_ = self.token_next_by_type(0, T.String.Symbol) - if next_ is None: - return None - alias = next_ - if isinstance(alias, Identifier): - return alias.get_name() - return self._remove_quotes(compat.text_type(alias)) + return self._get_first_name(kw, keywords=True) + + # "name alias" or "complicated column expression alias" + if len(self.tokens) > 2: + return self._get_first_name(reverse=True) + + return None def get_name(self): """Returns the name of this identifier. @@ -415,18 +411,43 @@ class TokenList(Token): """Returns the real name (object name) of this identifier.""" # a.b dot = self.token_next_match(0, T.Punctuation, '.') + if dot is not None: + return self._get_first_name(self.token_index(dot)) + + return self._get_first_name() + + def get_parent_name(self): + """Return name of the parent object if any. + + A parent object is identified by the first occuring dot. + """ + dot = self.token_next_match(0, T.Punctuation, '.') if dot is None: - next_ = self.token_next_by_type(0, T.Name) - if next_ is not None: - return self._remove_quotes(next_.value) return None - - next_ = self.token_next_by_type(self.token_index(dot), - (T.Name, T.Wildcard, T.String.Symbol)) - if next_ is None: # invalid identifier, e.g. "a." + prev_ = self.token_prev(self.token_index(dot)) + if prev_ is None: # something must be verry wrong here.. return None - return self._remove_quotes(next_.value) + return self._remove_quotes(prev_.value) + + def _get_first_name(self, idx=None, reverse=False, keywords=False): + """Returns the name of the first token with a name""" + + if idx and not isinstance(idx, int): + idx = self.token_index(idx) + 1 + tokens = self.tokens[idx:] if idx else self.tokens + tokens = reversed(tokens) if reverse else tokens + types = [T.Name, T.Wildcard, T.String.Symbol] + + if keywords: + types.append(T.Keyword) + + for tok in tokens: + if tok.ttype in types: + return self._remove_quotes(tok.value) + elif isinstance(tok, Identifier) or isinstance(tok, Function): + return tok.get_name() + return None class Statement(TokenList): """Represents a SQL statement.""" @@ -460,19 +481,6 @@ class Identifier(TokenList): __slots__ = ('value', 'ttype', 'tokens') - def get_parent_name(self): - """Return name of the parent object if any. - - A parent object is identified by the first occuring dot. - """ - dot = self.token_next_match(0, T.Punctuation, '.') - if dot is None: - return None - prev_ = self.token_prev(self.token_index(dot)) - if prev_ is None: # something must be verry wrong here.. - return None - return self._remove_quotes(prev_.value) - def is_wildcard(self): """Return ``True`` if this identifier contains a wildcard.""" token = self.token_next_by_type(0, T.Wildcard) @@ -495,6 +503,13 @@ class Identifier(TokenList): return None return ordering.value.upper() + def get_array_indices(self): + """Returns an iterator of index expressions as strings""" + + # Use [1:-1] index to discard the square brackets + return (tok.value[1:-1] for tok in self.tokens + if tok.ttype in T.ArrayIndex) + class IdentifierList(TokenList): """A list of :class:`~sqlparse.sql.Identifier`\'s.""" @@ -622,9 +637,10 @@ class Function(TokenList): for t in parenthesis.tokens: if isinstance(t, IdentifierList): return t.get_identifiers() - elif (isinstance(t, (Identifier, Function)) - or t.ttype in T.Literal): - return [t] + elif isinstance(t, Identifier) or \ + isinstance(t, Function) or \ + t.ttype in T.Literal: + return [t,] return [] diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py index 53c31ce..5ad51a6 100644 --- a/sqlparse/tokens.py +++ b/sqlparse/tokens.py @@ -48,6 +48,7 @@ Literal = Token.Literal String = Literal.String Number = Literal.Number Punctuation = Token.Punctuation +ArrayIndex = Punctuation.ArrayIndex Operator = Token.Operator Comparison = Operator.Comparison Wildcard = Token.Wildcard diff --git a/tests/test_format.py b/tests/test_format.py index 31cf74a..4746358 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -61,6 +61,9 @@ class TestFormat(TestCaseBase): sql = 'select (/* sql starts here */ select 2)' res = sqlparse.format(sql, strip_comments=True) self.ndiffAssertEqual(res, 'select (select 2)') + sql = 'select (/* sql /* starts here */ select 2)' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select (select 2)') def test_strip_ws(self): f = lambda sql: sqlparse.format(sql, strip_whitespace=True) diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 8b5108e..e1dc3fe 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -16,11 +16,12 @@ class TestGrouping(TestCaseBase): s = 'select (select (x3) x2) and (y2) bar' parsed = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, str(parsed)) - self.assertEqual(len(parsed.tokens), 9) + self.assertEqual(len(parsed.tokens), 7) self.assert_(isinstance(parsed.tokens[2], sql.Parenthesis)) - self.assert_(isinstance(parsed.tokens[-3], sql.Parenthesis)) - self.assertEqual(len(parsed.tokens[2].tokens), 7) - self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Parenthesis)) + self.assert_(isinstance(parsed.tokens[-1], sql.Identifier)) + self.assertEqual(len(parsed.tokens[2].tokens), 5) + self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Identifier)) + self.assert_(isinstance(parsed.tokens[2].tokens[3].tokens[0], sql.Parenthesis)) self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3) def test_comments(self): @@ -146,7 +147,7 @@ class TestGrouping(TestCaseBase): s = 'select x from (select y from foo where bar = 1) z' p = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, compat.text_type(p)) - self.assertTrue(isinstance(p.tokens[-3].tokens[-2], sql.Where)) + self.assertTrue(isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where)) def test_typecast(self): s = 'select foo::integer from bar' @@ -346,3 +347,43 @@ def test_nested_begin(): assert inner.tokens[0].value == 'BEGIN' assert inner.tokens[-1].value == 'END' assert isinstance(inner, sql.Begin) + + +def test_aliased_column_without_as(): + p = sqlparse.parse('foo bar')[0].tokens + assert len(p) == 1 + assert p[0].get_real_name() == 'foo' + assert p[0].get_alias() == 'bar' + + p = sqlparse.parse('foo.bar baz')[0].tokens[0] + assert p.get_parent_name() == 'foo' + assert p.get_real_name() == 'bar' + assert p.get_alias() == 'baz' + + +def test_qualified_function(): + p = sqlparse.parse('foo()')[0].tokens[0] + assert p.get_parent_name() is None + assert p.get_real_name() == 'foo' + + p = sqlparse.parse('foo.bar()')[0].tokens[0] + assert p.get_parent_name() == 'foo' + assert p.get_real_name() == 'bar' + + +def test_aliased_function_without_as(): + p = sqlparse.parse('foo() bar')[0].tokens[0] + assert p.get_parent_name() is None + assert p.get_real_name() == 'foo' + assert p.get_alias() == 'bar' + + p = sqlparse.parse('foo.bar() baz')[0].tokens[0] + assert p.get_parent_name() == 'foo' + assert p.get_real_name() == 'bar' + assert p.get_alias() == 'baz' + + +def test_aliased_literal_without_as(): + p = sqlparse.parse('1 foo')[0].tokens + assert len(p) == 1 + assert p[0].get_alias() == 'foo'
\ No newline at end of file diff --git a/tests/test_parse.py b/tests/test_parse.py index 641ca55..9c24ee0 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -167,6 +167,14 @@ def test_psql_quotation_marks(): # issue83 assert len(t) == 2 +def test_double_precision_is_builtin(): + sql = 'DOUBLE PRECISION' + t = sqlparse.parse(sql)[0].tokens + assert (len(t) == 1 + and t[0].ttype == sqlparse.tokens.Name.Builtin + and t[0].value == 'DOUBLE PRECISION') + + @pytest.mark.parametrize('ph', ['?', ':1', ':foo', '%s', '%(foo)s']) def test_placeholder(ph): p = sqlparse.parse(ph)[0].tokens @@ -197,3 +205,53 @@ def test_single_quotes_with_linebreaks(): # issue118 p = sqlparse.parse("'f\nf'")[0].tokens assert len(p) == 1 assert p[0].ttype is T.String.Single + + +def test_array_indexed_column(): + # Make sure we still parse sqlite style escapes + p = sqlparse.parse('[col1],[col2]')[0].tokens + assert (len(p) == 1 + and isinstance(p[0], sqlparse.sql.IdentifierList) + and [id.get_name() for id in p[0].get_identifiers()] + == ['[col1]', '[col2]']) + + p = sqlparse.parse('[col1]+[col2]')[0] + types = [tok.ttype for tok in p.flatten()] + assert types == [T.Name, T.Operator, T.Name] + + p = sqlparse.parse('col[1]')[0].tokens + assert (len(p) == 1 + and tuple(p[0].get_array_indices()) == ('1',) + and p[0].get_name() == 'col') + + p = sqlparse.parse('col[1][1:5] as mycol')[0].tokens + assert (len(p) == 1 + and tuple(p[0].get_array_indices()) == ('1', '1:5') + and p[0].get_name() == 'mycol' + and p[0].get_real_name() == 'col') + + p = sqlparse.parse('col[1][other_col]')[0].tokens + assert len(p) == 1 and tuple(p[0].get_array_indices()) == ('1', 'other_col') + + sql = 'SELECT col1, my_1d_array[2] as alias1, my_2d_array[2][5] as alias2' + p = sqlparse.parse(sql)[0].tokens + assert len(p) == 3 and isinstance(p[2], sqlparse.sql.IdentifierList) + ids = list(p[2].get_identifiers()) + assert (ids[0].get_name() == 'col1' + and tuple(ids[0].get_array_indices()) == () + and ids[1].get_name() == 'alias1' + and ids[1].get_real_name() == 'my_1d_array' + and tuple(ids[1].get_array_indices()) == ('2',) + and ids[2].get_name() == 'alias2' + and ids[2].get_real_name() == 'my_2d_array' + and tuple(ids[2].get_array_indices()) == ('2', '5')) + + +def test_typed_array_definition(): + # array indices aren't grouped with builtins, but make sure we can extract + # indentifer names + p = sqlparse.parse('x int, y int[], z int')[0] + names = [x.get_name() for x in p.get_sublists()] + assert names == ['x', 'y', 'z'] + + |