diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2015-08-12 14:26:11 -0400 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2015-08-12 14:26:11 -0400 |
commit | 88749550f6b973efaa09b9571176dbb65c45574d (patch) | |
tree | 67980cd0c1d7764231c7c545f9595bd9e0563f99 | |
parent | 5198b1de31029cc985102cd13569086a7056c2f1 (diff) | |
download | sqlalchemy-88749550f6b973efaa09b9571176dbb65c45574d.tar.gz |
- The behavior of the :func:`.union` construct and related constructs
such as :meth:`.Query.union` now handle the case where the embedded
SELECT statements need to be parenthesized due to the fact that they
include LIMIT, OFFSET and/or ORDER BY. These queries **do not work
on SQLite**, and will fail on that backend as they did before, but
should now work on all other backends.
fixes #2528
-rw-r--r-- | doc/build/changelog/changelog_11.rst | 15 | ||||
-rw-r--r-- | doc/build/changelog/migration_11.rst | 55 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/selectable.py | 18 | ||||
-rw-r--r-- | lib/sqlalchemy/testing/requirements.py | 11 | ||||
-rw-r--r-- | lib/sqlalchemy/testing/suite/test_select.py | 124 | ||||
-rw-r--r-- | test/requirements.py | 9 | ||||
-rw-r--r-- | test/sql/test_compiler.py | 67 | ||||
-rw-r--r-- | test/sql/test_selectable.py | 20 |
8 files changed, 312 insertions, 7 deletions
diff --git a/doc/build/changelog/changelog_11.rst b/doc/build/changelog/changelog_11.rst index bb395a826..ad858a462 100644 --- a/doc/build/changelog/changelog_11.rst +++ b/doc/build/changelog/changelog_11.rst @@ -22,6 +22,21 @@ :version: 1.1.0b1 .. change:: + :tags: bug, sql + :tickets: 2528 + + The behavior of the :func:`.union` construct and related constructs + such as :meth:`.Query.union` now handle the case where the embedded + SELECT statements need to be parenthesized due to the fact that they + include LIMIT, OFFSET and/or ORDER BY. These queries **do not work + on SQLite**, and will fail on that backend as they did before, but + should now work on all other backends. + + .. seealso:: + + :ref:`change_2528` + + .. change:: :tags: bug, mssql :tickets: 3504 diff --git a/doc/build/changelog/migration_11.rst b/doc/build/changelog/migration_11.rst index f5602a8ad..6ce0d031c 100644 --- a/doc/build/changelog/migration_11.rst +++ b/doc/build/changelog/migration_11.rst @@ -71,6 +71,61 @@ New Features and Improvements - Core ==================================== +.. _change_2528: + +A UNION or similar of SELECTs with LIMIT/OFFSET/ORDER BY now parenthesizes the embedded selects +----------------------------------------------------------------------------------------------- + +An issue that, like others, was long driven by SQLite's lack of capabilities +has now been enhanced to work on all supporting backends. We refer to a query that +is a UNION of SELECT statements that themselves contain row-limiting or ordering +features which include LIMIT, OFFSET, and/or ORDER BY:: + + (SELECT x FROM table1 ORDER BY y LIMIT 1) UNION + (SELECT x FROM table2 ORDER BY y LIMIT 2) + +The above query requires parenthesis within each sub-select in order to +group the sub-results correctly. Production of the above statement in +SQLAlchemy Core looks like:: + + stmt1 = select([table1.c.x]).order_by(table1.c.y).limit(1) + stmt2 = select([table1.c.x]).order_by(table2.c.y).limit(2) + + stmt = union(stmt1, stmt2) + +Previously, the above construct would not produce parenthesization for the +inner SELECT statements, producing a query that fails on all backends. + +The above formats will **continue to fail on SQLite**. +This is not a backwards-incompatible change, because the queries fail without +the parentheses as well; with the fix, the queries at least work on all other +databases. + +In all cases, in order to produce a UNION of limited SELECT statements that +also works on SQLite, the subqueries must be a SELECT of an ALIAS:: + + stmt1 = select([table1.c.x]).order_by(table1.c.y).limit(1).alias().select() + stmt2 = select([table2.c.x]).order_by(table2.c.y).limit(2).alias().select() + + stmt = union(stmt1, stmt2) + +This workaround works on all SQLAlchemy versions. In the ORM, it looks like:: + + stmt1 = session.query(Model1).order_by(Model1.y).limit(1).subquery().select() + stmt2 = session.query(Model2).order_by(Model2.y).limit(1).subquery().select() + + stmt = session.query(Model1).from_statement(stmt1.union(stmt2)) + +The behavior here has many parallels to the "join rewriting" behavior +introduced in SQLAlchemy 0.9 in :ref:`feature_joins_09`; however in this case +we have opted not to add new rewriting behavior to accommodate this +case for SQLite. +The existing rewriting behavior is very complicated already, and the case of +UNIONs with parenthesized SELECT statements is much less common than the +"right-nested-join" use case of that feature. + +:ticket:`2528` + Key Behavioral Changes - ORM ============================ diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index bfba35de1..73341053d 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -1101,6 +1101,14 @@ class Alias(FromClause): or 'anon')) self.name = name + def self_group(self, target=None): + if isinstance(target, CompoundSelect) and \ + isinstance(self.original, Select) and \ + self.original._needs_parens_for_grouping(): + return FromGrouping(self) + + return super(Alias, self).self_group(target) + @property def description(self): if util.py3k: @@ -3208,6 +3216,13 @@ class Select(HasPrefixes, HasSuffixes, GenerativeSelect): return None return None + def _needs_parens_for_grouping(self): + return ( + self._limit_clause is not None or + self._offset_clause is not None or + bool(self._order_by_clause.clauses) + ) + def self_group(self, against=None): """return a 'grouping' construct as per the ClauseElement specification. @@ -3217,7 +3232,8 @@ class Select(HasPrefixes, HasSuffixes, GenerativeSelect): expressions and should not require explicit use. """ - if isinstance(against, CompoundSelect): + if isinstance(against, CompoundSelect) and \ + not self._needs_parens_for_grouping(): return self return FromGrouping(self) diff --git a/lib/sqlalchemy/testing/requirements.py b/lib/sqlalchemy/testing/requirements.py index e8b3a995f..8b02f3e40 100644 --- a/lib/sqlalchemy/testing/requirements.py +++ b/lib/sqlalchemy/testing/requirements.py @@ -111,6 +111,17 @@ class SuiteRequirements(Requirements): return exclusions.open() @property + def parens_in_union_contained_select(self): + """Target database must support parenthesized SELECT in UNION. + + E.g. (SELECT ...) UNION (SELECT ..) + + This is known to fail on SQLite. + + """ + return exclusions.open() + + @property def boolean_col_expressions(self): """Target database must support boolean expressions as columns""" diff --git a/lib/sqlalchemy/testing/suite/test_select.py b/lib/sqlalchemy/testing/suite/test_select.py index d4bf63b55..0bcd35fd2 100644 --- a/lib/sqlalchemy/testing/suite/test_select.py +++ b/lib/sqlalchemy/testing/suite/test_select.py @@ -2,7 +2,7 @@ from .. import fixtures, config from ..assertions import eq_ from sqlalchemy import util -from sqlalchemy import Integer, String, select, func, bindparam +from sqlalchemy import Integer, String, select, func, bindparam, union from sqlalchemy import testing from ..schema import Table, Column @@ -146,7 +146,7 @@ class LimitOffsetTest(fixtures.TablesTest): select([table]).order_by(table.c.id).limit(2).offset(1), [(2, 2, 3), (3, 3, 4)] ) - + @testing.requires.offset def test_limit_offset_nobinds(self): """test that 'literal binds' mode works - no bound params.""" @@ -190,3 +190,123 @@ class LimitOffsetTest(fixtures.TablesTest): [(2, 2, 3), (3, 3, 4)], params={"l": 2, "o": 1} ) + + +class CompoundSelectTest(fixtures.TablesTest): + __backend__ = True + + @classmethod + def define_tables(cls, metadata): + Table("some_table", metadata, + Column('id', Integer, primary_key=True), + Column('x', Integer), + Column('y', Integer)) + + @classmethod + def insert_data(cls): + config.db.execute( + cls.tables.some_table.insert(), + [ + {"id": 1, "x": 1, "y": 2}, + {"id": 2, "x": 2, "y": 3}, + {"id": 3, "x": 3, "y": 4}, + {"id": 4, "x": 4, "y": 5}, + ] + ) + + def _assert_result(self, select, result, params=()): + eq_( + config.db.execute(select, params).fetchall(), + result + ) + + def test_plain_union(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2) + s2 = select([table]).where(table.c.id == 3) + + u1 = union(s1, s2) + self._assert_result( + u1.order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) + + def test_select_from_plain_union(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2) + s2 = select([table]).where(table.c.id == 3) + + u1 = union(s1, s2).alias().select() + self._assert_result( + u1.order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) + + @testing.requires.parens_in_union_contained_select + def test_limit_offset_selectable_in_unions(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2).\ + limit(1).order_by(table.c.id) + s2 = select([table]).where(table.c.id == 3).\ + limit(1).order_by(table.c.id) + + u1 = union(s1, s2).limit(2) + self._assert_result( + u1.order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) + + @testing.requires.parens_in_union_contained_select + def test_order_by_selectable_in_unions(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2).\ + order_by(table.c.id) + s2 = select([table]).where(table.c.id == 3).\ + order_by(table.c.id) + + u1 = union(s1, s2).limit(2) + self._assert_result( + u1.order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) + + def test_distinct_selectable_in_unions(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2).\ + distinct() + s2 = select([table]).where(table.c.id == 3).\ + distinct() + + u1 = union(s1, s2).limit(2) + self._assert_result( + u1.order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) + + @testing.requires.parens_in_union_contained_select + def test_limit_offset_in_unions_from_alias(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2).\ + limit(1).order_by(table.c.id) + s2 = select([table]).where(table.c.id == 3).\ + limit(1).order_by(table.c.id) + + # this necessarily has double parens + u1 = union(s1, s2).alias() + self._assert_result( + u1.select().limit(2).order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) + + def test_limit_offset_aliased_selectable_in_unions(self): + table = self.tables.some_table + s1 = select([table]).where(table.c.id == 2).\ + limit(1).order_by(table.c.id).alias().select() + s2 = select([table]).where(table.c.id == 3).\ + limit(1).order_by(table.c.id).alias().select() + + u1 = union(s1, s2).limit(2) + self._assert_result( + u1.order_by(u1.c.id), + [(2, 2, 3), (3, 3, 4)] + ) diff --git a/test/requirements.py b/test/requirements.py index db4daca20..939af4db1 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -362,6 +362,15 @@ class DefaultRequirements(SuiteRequirements): ], 'no support for EXCEPT') @property + def parens_in_union_contained_select(self): + """Target database must support parenthesized SELECT in UNION. + + E.g. (SELECT ...) UNION (SELECT ..) + + """ + return fails_if('sqlite') + + @property def offset(self): """Target database must support some method of adding OFFSET or equivalent to a result set.""" diff --git a/test/sql/test_compiler.py b/test/sql/test_compiler.py index 06cb80ba0..7ff7d68af 100644 --- a/test/sql/test_compiler.py +++ b/test/sql/test_compiler.py @@ -1643,14 +1643,12 @@ class SelectTest(fixtures.TestBase, AssertsCompiledSQL): s = select([column('foo'), column('bar')]) - # ORDER BY's even though not supported by - # all DB's, are rendered if requested self.assert_compile( union( s.order_by("foo"), s.order_by("bar")), - "SELECT foo, bar ORDER BY foo UNION SELECT foo, bar ORDER BY bar") - # self_group() is honored + "(SELECT foo, bar ORDER BY foo) UNION " + "(SELECT foo, bar ORDER BY bar)") self.assert_compile( union(s.order_by("foo").self_group(), s.order_by("bar").limit(10).self_group()), @@ -1759,6 +1757,67 @@ class SelectTest(fixtures.TestBase, AssertsCompiledSQL): "SELECT foo, bar FROM bat)" ) + # tests for [ticket:2528] + # sqlite hates all of these. + self.assert_compile( + union( + s.limit(1), + s.offset(2) + ), + "(SELECT foo, bar FROM bat LIMIT :param_1) " + "UNION (SELECT foo, bar FROM bat LIMIT -1 OFFSET :param_2)" + ) + + self.assert_compile( + union( + s.order_by(column('bar')), + s.offset(2) + ), + "(SELECT foo, bar FROM bat ORDER BY bar) " + "UNION (SELECT foo, bar FROM bat LIMIT -1 OFFSET :param_1)" + ) + + self.assert_compile( + union( + s.limit(1).alias('a'), + s.limit(2).alias('b') + ), + "(SELECT foo, bar FROM bat LIMIT :param_1) " + "UNION (SELECT foo, bar FROM bat LIMIT :param_2)" + ) + + self.assert_compile( + union( + s.limit(1).self_group(), + s.limit(2).self_group() + ), + "(SELECT foo, bar FROM bat LIMIT :param_1) " + "UNION (SELECT foo, bar FROM bat LIMIT :param_2)" + ) + + self.assert_compile( + union(s.limit(1), s.limit(2).offset(3)).alias().select(), + "SELECT anon_1.foo, anon_1.bar FROM " + "((SELECT foo, bar FROM bat LIMIT :param_1) " + "UNION (SELECT foo, bar FROM bat LIMIT :param_2 OFFSET :param_3)) " + "AS anon_1" + ) + + # this version works for SQLite + self.assert_compile( + union( + s.limit(1).alias().select(), + s.offset(2).alias().select(), + ), + "SELECT anon_1.foo, anon_1.bar " + "FROM (SELECT foo, bar FROM bat" + " LIMIT :param_1) AS anon_1 " + "UNION SELECT anon_2.foo, anon_2.bar " + "FROM (SELECT foo, bar " + "FROM bat" + " LIMIT -1 OFFSET :param_2) AS anon_2" + ) + def test_binds(self): for ( stmt, diff --git a/test/sql/test_selectable.py b/test/sql/test_selectable.py index 3390f4a77..4a332a4d1 100644 --- a/test/sql/test_selectable.py +++ b/test/sql/test_selectable.py @@ -458,6 +458,26 @@ class SelectableTest( assert u1.corresponding_column(table2.c.col1) is u1.c._all_columns[0] assert u1.corresponding_column(table2.c.col3) is u1.c._all_columns[2] + @testing.emits_warning("Column 'col1'") + def test_union_alias_dupe_keys_grouped(self): + s1 = select([table1.c.col1, table1.c.col2, table2.c.col1]).\ + limit(1).alias() + s2 = select([table2.c.col1, table2.c.col2, table2.c.col3]).limit(1) + u1 = union(s1, s2) + + assert u1.corresponding_column( + s1.c._all_columns[0]) is u1.c._all_columns[0] + assert u1.corresponding_column(s2.c.col1) is u1.c._all_columns[0] + assert u1.corresponding_column(s1.c.col2) is u1.c.col2 + assert u1.corresponding_column(s2.c.col2) is u1.c.col2 + + assert u1.corresponding_column(s2.c.col3) is u1.c._all_columns[2] + + # this differs from the non-alias test because table2.c.col1 is + # more directly at s2.c.col1 than it is s1.c.col1. + assert u1.corresponding_column(table2.c.col1) is u1.c._all_columns[0] + assert u1.corresponding_column(table2.c.col3) is u1.c._all_columns[2] + def test_select_union(self): # like testaliasunion, but off a Select off the union. |