Merge master into v0.2.0 branch.

author: Andi Albrecht <albrecht.andi@gmail.com> 2015-02-21 08:15:09 +0100
committer: Andi Albrecht <albrecht.andi@gmail.com> 2015-02-21 08:15:09 +0100
commit: 9cec0cde3818005d70b0473f3c99241f5df68394 (patch)
tree: 195c683a37221e307c600980c00dcddc9a2b76da
parent: 71af186659923dfe8721c551d5dbf4db7c4854d9 (diff)
parent: 77e0789aea8918a2fbbc6f20196cd0bcdecccf52 (diff)
download: sqlparse-9cec0cde3818005d70b0473f3c99241f5df68394.tar.gz
10 files changed, 189 insertions, 48 deletions
diff --git a/AUTHORS b/AUTHORS
index 09d8b5b..02266d9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -7,8 +7,10 @@ Alphabetical list of contributors:
 * Alexander Beedie <ayembee@gmail.com>
 * Alexey Malyshev <nostrict@gmail.com>
 * Cristian Orellana <cristiano@groupon.com>
+* Darik Gamble <darik.gamble@gmail.com>
 * Florian Bauer <florian.bauer@zmdi.com>
 * Gavin Wahl <gwahl@fusionbox.com>
+* JacekPliszka <Jacek.Pliszka@gmail.com>
 * Jesús Leganés Combarro "Piranna" <piranna@gmail.com>
 * Kevin Jing Qiu <kevin.jing.qiu@gmail.com>
 * Michael Schuller <chick@mschuller.net>
diff --git a/CHANGES b/CHANGES
index 7c4b52c..dc6cf0b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,13 +2,18 @@ Development Version
 -------------------
 
 Bug Fixes
-* Fix a regression for identifiers with square bracktes notation (issue153).
-* Add missing SQL types (issue154, issue155, issue156).
+* Fix a regression for identifiers with square bracktes
+  notation (issue153, by darikg).
+* Add missing SQL types (issue154, issue155, issue156, by jukebox).
+* Fix parsing of multi-line comments (issue172, by JacekPliszka).
 
 Enhancements
 * Improve formatting of HAVING statements.
 * Improve parsing of inline comments (issue163).
 * Group comments to parent object (issue128, issue160).
+* Add double precision builtin (issue169, by darikg).
+* Add support for square bracket array indexing (issue170, by darikg).
+* Improve grouping of aliased elements (issue167, by darikg).
 
 
 Release 0.1.14 (Nov 30, 2014)
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 6aa9e18..e9d77e6 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -150,11 +150,14 @@ def group_identifier(tlist):
         # TODO: Usage of Wildcard token is ambivalent here.
         x = itertools.cycle((
             lambda y: (y.match(T.Punctuation, '.')
-                       or y.ttype is T.Operator
-                       or y.ttype is T.Wildcard),
+                       or y.ttype in (T.Operator,
+                                      T.Wildcard,
+                                      T.ArrayIndex,
+                                      T.Name)),
             lambda y: (y.ttype in (T.String.Symbol,
                                    T.Name,
                                    T.Wildcard,
+                                   T.ArrayIndex,
                                    T.Literal.String.Single,
                                    T.Literal.Number.Integer,
                                    T.Literal.Number.Float)
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
index 697f22b..7e82f26 100644
--- a/sqlparse/filters.py
+++ b/sqlparse/filters.py
@@ -271,7 +271,11 @@ class StripWhitespaceFilter:
         [self.process(stack, sgroup, depth + 1)
          for sgroup in stmt.get_sublists()]
         self._stripws(stmt)
-        if depth == 0 and stmt.tokens[-1].is_whitespace():
+        if (
+            depth == 0
+            and stmt.tokens
+            and stmt.tokens[-1].is_whitespace()
+        ):
             stmt.tokens.pop(-1)
 
 
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index ac50442..223d70c 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -172,12 +172,14 @@ class Lexer(compat.with_metaclass(LexerMeta)):
             (r"'(''|\\'|[^'])*'", tokens.String.Single),
             # not a real string literal in ANSI SQL:
             (r'(""|".*?[^\\]")', tokens.String.Symbol),
+            (r'(?<=[\w\]])(\[[^\]]*?\])', tokens.Punctuation.ArrayIndex),
             (r'(\[[^\]]+\])', tokens.Name),
             ((r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
               r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b'), tokens.Keyword),
             (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
             (r'NOT NULL\b', tokens.Keyword),
             (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+            (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
             (r'(?<=\.)[^\W\d_]\w*', tokens.Name),
             (r'[^\W\d_]\w*', is_keyword),
             (r'[;:()\[\],\.]', tokens.Punctuation),
@@ -291,7 +293,13 @@ class Lexer(compat.with_metaclass(LexerMeta)):
                                     statestack.pop()
                                 elif state == '#push':
                                     statestack.append(statestack[-1])
-                                else:
+                                elif (
+                                    # Ugly hack - multiline-comments
+                                    # are not stackable
+                                    state != 'multiline-comments'
+                                    or not statestack
+                                    or statestack[-1] != 'multiline-comments'
+                                ):
                                     statestack.append(state)
                         elif isinstance(new_state, int):
                             # pop
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 17509eb..367204d 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -383,21 +383,17 @@ class TokenList(Token):
 
     def get_alias(self):
         """Returns the alias for this identifier or ``None``."""
+
+        # "name AS alias"
         kw = self.token_next_match(0, T.Keyword, 'AS')
         if kw is not None:
-            alias = self.token_next(self.token_index(kw))
-            if alias is None:
-                return None
-        else:
-            next_ = self.token_next_by_instance(0, Identifier)
-            if next_ is None:
-                next_ = self.token_next_by_type(0, T.String.Symbol)
-                if next_ is None:
-                    return None
-            alias = next_
-        if isinstance(alias, Identifier):
-            return alias.get_name()
-        return self._remove_quotes(compat.text_type(alias))
+            return self._get_first_name(kw, keywords=True)
+
+        # "name alias" or "complicated column expression alias"
+        if len(self.tokens) > 2:
+            return self._get_first_name(reverse=True)
+
+        return None
 
     def get_name(self):
         """Returns the name of this identifier.
@@ -415,18 +411,43 @@ class TokenList(Token):
         """Returns the real name (object name) of this identifier."""
         # a.b
         dot = self.token_next_match(0, T.Punctuation, '.')
+        if dot is not None:
+            return self._get_first_name(self.token_index(dot))
+
+        return self._get_first_name()
+
+    def get_parent_name(self):
+        """Return name of the parent object if any.
+
+        A parent object is identified by the first occuring dot.
+        """
+        dot = self.token_next_match(0, T.Punctuation, '.')
         if dot is None:
-            next_ = self.token_next_by_type(0, T.Name)
-            if next_ is not None:
-                return self._remove_quotes(next_.value)
             return None
-
-        next_ = self.token_next_by_type(self.token_index(dot),
-                                        (T.Name, T.Wildcard, T.String.Symbol))
-        if next_ is None:  # invalid identifier, e.g. "a."
+        prev_ = self.token_prev(self.token_index(dot))
+        if prev_ is None:  # something must be verry wrong here..
             return None
-        return self._remove_quotes(next_.value)
+        return self._remove_quotes(prev_.value)
+
+    def _get_first_name(self, idx=None, reverse=False, keywords=False):
+        """Returns the name of the first token with a name"""
+
+        if idx and not isinstance(idx, int):
+            idx = self.token_index(idx) + 1
 
+        tokens = self.tokens[idx:] if idx else self.tokens
+        tokens = reversed(tokens) if reverse else tokens
+        types = [T.Name, T.Wildcard, T.String.Symbol]
+
+        if keywords:
+            types.append(T.Keyword)
+
+        for tok in tokens:
+            if tok.ttype in types:
+                return self._remove_quotes(tok.value)
+            elif isinstance(tok, Identifier) or isinstance(tok, Function):
+                return tok.get_name()
+        return None
 
 class Statement(TokenList):
     """Represents a SQL statement."""
@@ -460,19 +481,6 @@ class Identifier(TokenList):
 
     __slots__ = ('value', 'ttype', 'tokens')
 
-    def get_parent_name(self):
-        """Return name of the parent object if any.
-
-        A parent object is identified by the first occuring dot.
-        """
-        dot = self.token_next_match(0, T.Punctuation, '.')
-        if dot is None:
-            return None
-        prev_ = self.token_prev(self.token_index(dot))
-        if prev_ is None:  # something must be verry wrong here..
-            return None
-        return self._remove_quotes(prev_.value)
-
     def is_wildcard(self):
         """Return ``True`` if this identifier contains a wildcard."""
         token = self.token_next_by_type(0, T.Wildcard)
@@ -495,6 +503,13 @@ class Identifier(TokenList):
             return None
         return ordering.value.upper()
 
+    def get_array_indices(self):
+        """Returns an iterator of index expressions as strings"""
+
+        # Use [1:-1] index to discard the square brackets
+        return (tok.value[1:-1] for tok in self.tokens
+                if tok.ttype in T.ArrayIndex)
+
 
 class IdentifierList(TokenList):
     """A list of :class:`~sqlparse.sql.Identifier`\'s."""
@@ -622,9 +637,10 @@ class Function(TokenList):
         for t in parenthesis.tokens:
             if isinstance(t, IdentifierList):
                 return t.get_identifiers()
-            elif (isinstance(t, (Identifier, Function))
-                  or t.ttype in T.Literal):
-                return [t]
+            elif isinstance(t, Identifier) or \
+                isinstance(t, Function) or \
+                t.ttype in T.Literal:
+                return [t,]
         return []
 
 
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
index 53c31ce..5ad51a6 100644
--- a/sqlparse/tokens.py
+++ b/sqlparse/tokens.py
@@ -48,6 +48,7 @@ Literal = Token.Literal
 String = Literal.String
 Number = Literal.Number
 Punctuation = Token.Punctuation
+ArrayIndex = Punctuation.ArrayIndex
 Operator = Token.Operator
 Comparison = Operator.Comparison
 Wildcard = Token.Wildcard
diff --git a/tests/test_format.py b/tests/test_format.py
index 31cf74a..4746358 100644
--- a/tests/test_format.py
+++ b/tests/test_format.py
@@ -61,6 +61,9 @@ class TestFormat(TestCaseBase):
         sql = 'select (/* sql starts here */ select 2)'
         res = sqlparse.format(sql, strip_comments=True)
         self.ndiffAssertEqual(res, 'select (select 2)')
+        sql = 'select (/* sql /* starts here */ select 2)'
+        res = sqlparse.format(sql, strip_comments=True)
+        self.ndiffAssertEqual(res, 'select (select 2)')
 
     def test_strip_ws(self):
         f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 8b5108e..e1dc3fe 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -16,11 +16,12 @@ class TestGrouping(TestCaseBase):
         s = 'select (select (x3) x2) and (y2) bar'
         parsed = sqlparse.parse(s)[0]
         self.ndiffAssertEqual(s, str(parsed))
-        self.assertEqual(len(parsed.tokens), 9)
+        self.assertEqual(len(parsed.tokens), 7)
         self.assert_(isinstance(parsed.tokens[2], sql.Parenthesis))
-        self.assert_(isinstance(parsed.tokens[-3], sql.Parenthesis))
-        self.assertEqual(len(parsed.tokens[2].tokens), 7)
-        self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Parenthesis))
+        self.assert_(isinstance(parsed.tokens[-1], sql.Identifier))
+        self.assertEqual(len(parsed.tokens[2].tokens), 5)
+        self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Identifier))
+        self.assert_(isinstance(parsed.tokens[2].tokens[3].tokens[0], sql.Parenthesis))
         self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3)
 
     def test_comments(self):
@@ -146,7 +147,7 @@ class TestGrouping(TestCaseBase):
         s = 'select x from (select y from foo where bar = 1) z'
         p = sqlparse.parse(s)[0]
         self.ndiffAssertEqual(s, compat.text_type(p))
-        self.assertTrue(isinstance(p.tokens[-3].tokens[-2], sql.Where))
+        self.assertTrue(isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where))
 
     def test_typecast(self):
         s = 'select foo::integer from bar'
@@ -346,3 +347,43 @@ def test_nested_begin():
     assert inner.tokens[0].value == 'BEGIN'
     assert inner.tokens[-1].value == 'END'
     assert isinstance(inner, sql.Begin)
+
+
+def test_aliased_column_without_as():
+    p = sqlparse.parse('foo bar')[0].tokens
+    assert len(p) == 1
+    assert p[0].get_real_name() == 'foo'
+    assert p[0].get_alias() == 'bar'
+
+    p = sqlparse.parse('foo.bar baz')[0].tokens[0]
+    assert p.get_parent_name() == 'foo'
+    assert p.get_real_name() == 'bar'
+    assert p.get_alias() == 'baz'
+
+
+def test_qualified_function():
+    p = sqlparse.parse('foo()')[0].tokens[0]
+    assert p.get_parent_name() is None
+    assert p.get_real_name() == 'foo'
+
+    p = sqlparse.parse('foo.bar()')[0].tokens[0]
+    assert p.get_parent_name() == 'foo'
+    assert p.get_real_name() == 'bar'
+
+
+def test_aliased_function_without_as():
+    p = sqlparse.parse('foo() bar')[0].tokens[0]
+    assert p.get_parent_name() is None
+    assert p.get_real_name() == 'foo'
+    assert p.get_alias() == 'bar'
+
+    p = sqlparse.parse('foo.bar() baz')[0].tokens[0]
+    assert p.get_parent_name() == 'foo'
+    assert p.get_real_name() == 'bar'
+    assert p.get_alias() == 'baz'
+
+
+def test_aliased_literal_without_as():
+    p = sqlparse.parse('1 foo')[0].tokens
+    assert len(p) == 1
+    assert p[0].get_alias() == 'foo'
+\ No newline at end of file
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 641ca55..9c24ee0 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -167,6 +167,14 @@ def test_psql_quotation_marks():  # issue83
     assert len(t) == 2
 
 
+def test_double_precision_is_builtin():
+    sql = 'DOUBLE PRECISION'
+    t = sqlparse.parse(sql)[0].tokens
+    assert (len(t) == 1
+            and t[0].ttype == sqlparse.tokens.Name.Builtin
+            and t[0].value == 'DOUBLE PRECISION')
+
+
 @pytest.mark.parametrize('ph', ['?', ':1', ':foo', '%s', '%(foo)s'])
 def test_placeholder(ph):
     p = sqlparse.parse(ph)[0].tokens
@@ -197,3 +205,53 @@ def test_single_quotes_with_linebreaks():  # issue118
     p = sqlparse.parse("'f\nf'")[0].tokens
     assert len(p) == 1
     assert p[0].ttype is T.String.Single
+
+
+def test_array_indexed_column():
+    # Make sure we still parse sqlite style escapes
+    p = sqlparse.parse('[col1],[col2]')[0].tokens
+    assert (len(p) == 1
+            and isinstance(p[0], sqlparse.sql.IdentifierList)
+            and [id.get_name() for id in p[0].get_identifiers()]
+                    == ['[col1]', '[col2]'])
+
+    p = sqlparse.parse('[col1]+[col2]')[0]
+    types = [tok.ttype for tok in p.flatten()]
+    assert types == [T.Name, T.Operator, T.Name]
+
+    p = sqlparse.parse('col[1]')[0].tokens
+    assert (len(p) == 1
+        and tuple(p[0].get_array_indices()) == ('1',)
+        and p[0].get_name() == 'col')
+
+    p = sqlparse.parse('col[1][1:5] as mycol')[0].tokens
+    assert (len(p) == 1
+        and tuple(p[0].get_array_indices()) == ('1', '1:5')
+        and p[0].get_name() == 'mycol'
+        and p[0].get_real_name() == 'col')
+
+    p = sqlparse.parse('col[1][other_col]')[0].tokens
+    assert len(p) == 1 and tuple(p[0].get_array_indices()) == ('1', 'other_col')
+
+    sql = 'SELECT col1, my_1d_array[2] as alias1, my_2d_array[2][5] as alias2'
+    p = sqlparse.parse(sql)[0].tokens
+    assert len(p) == 3 and isinstance(p[2], sqlparse.sql.IdentifierList)
+    ids = list(p[2].get_identifiers())
+    assert (ids[0].get_name() == 'col1'
+            and tuple(ids[0].get_array_indices()) == ()
+            and ids[1].get_name() == 'alias1'
+            and ids[1].get_real_name() == 'my_1d_array'
+            and tuple(ids[1].get_array_indices()) == ('2',)
+            and ids[2].get_name() == 'alias2'
+            and ids[2].get_real_name() == 'my_2d_array'
+            and tuple(ids[2].get_array_indices()) == ('2', '5'))
+
+
+def test_typed_array_definition():
+    # array indices aren't grouped with builtins, but make sure we can extract
+    # indentifer names
+    p = sqlparse.parse('x int, y int[], z int')[0]
+    names = [x.get_name() for x in p.get_sublists()]
+    assert names == ['x', 'y', 'z']
+
+
author	Andi Albrecht <albrecht.andi@gmail.com>	2015-02-21 08:15:09 +0100
committer	Andi Albrecht <albrecht.andi@gmail.com>	2015-02-21 08:15:09 +0100
commit	9cec0cde3818005d70b0473f3c99241f5df68394 (patch)
tree	195c683a37221e307c600980c00dcddc9a2b76da
parent	71af186659923dfe8721c551d5dbf4db7c4854d9 (diff)
parent	77e0789aea8918a2fbbc6f20196cd0bcdecccf52 (diff)
download	sqlparse-9cec0cde3818005d70b0473f3c99241f5df68394.tar.gz