diff options
author | unknown <sergefp@mysql.com> | 2006-10-31 20:51:09 +0300 |
---|---|---|
committer | unknown <sergefp@mysql.com> | 2006-10-31 20:51:09 +0300 |
commit | 48df3b96a1719141749c05e4080c57366e9d0fbe (patch) | |
tree | 9720832e2f3ce2403e51f324722b5522b5bac946 | |
parent | 3cf49a3dd890bd6229acd8c89b672359e7d16452 (diff) | |
download | mariadb-git-48df3b96a1719141749c05e4080c57366e9d0fbe.tar.gz |
BUG#8804: wrong results for NULL IN (SELECT ...)
Evaluate "NULL IN (SELECT ...)" in a special way: Disable pushed-down
conditions and their "consequences":
= Do full table scans instead of unique_[index_subquery] lookups.
= Change appropriate "ref_or_null" accesses to full table scans in
subquery's joins.
Also cache value of NULL IN (SELECT ...) if the SELECT is not correlated
wrt any upper select.
mysql-test/r/subselect.result:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Updated test results
sql/item.h:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Added comments
sql/item_cmpfunc.cc:
BUG#8804: wrong results for NULL IN (SELECT ...):
Made Item_in_optimizer to:
- cache the value of "NULL IN (uncorrelated select)"
- Turn off pushed-down predicates when evaluating "NULL IN (SELECT ...)"
sql/item_cmpfunc.h:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Made Item_in_optimizer cache the value of "NULL IN (uncorrelated select)"
- Added comments
sql/item_subselect.cc:
BUG#8804: wrong results for NULL IN (SELECT ...):
- When needed, wrap the predicates we push into subquery into an
Item_func_trig_cond so we're able to turn them off when evaluating
NULL IN (SELECT ...).
- Added code to evaluate NULL IN (SELECT ...) in a special way:
= In [unique_]index_subquery, do full table scan to see if there
are any rows.
= For other subqueries, change ref[_or_null] to ALL if the
ref[_or_null] was created from pushed-down predicate.
sql/item_subselect.h:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Added Item_subselect::is_correlated
- Added comments
sql/records.cc:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Make rr_sequential() non-static
sql/sql_lex.cc:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Added st_select_lex::is_correlated and Item_subselect::is_correlated.
sql/sql_lex.h:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Added st_select_lex::is_correlated
sql/sql_select.cc:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Added KEY_FIELD::outer_ref to keep track of which ref accesses are
created from predicates that were pushed down into the subquery.
sql/sql_select.h:
BUG#8804: wrong results for NULL IN (SELECT ...):
- Added KEYUSE::outer_ref
mysql-test/r/subselect3.result:
New BitKeeper file ``mysql-test/r/subselect3.result''
mysql-test/t/subselect3.test:
New BitKeeper file ``mysql-test/t/subselect3.test''
-rw-r--r-- | mysql-test/r/subselect.result | 10 | ||||
-rw-r--r-- | mysql-test/r/subselect3.result | 153 | ||||
-rw-r--r-- | mysql-test/t/subselect3.test | 137 | ||||
-rw-r--r-- | sql/item.h | 10 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 34 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 48 | ||||
-rw-r--r-- | sql/item_subselect.cc | 406 | ||||
-rw-r--r-- | sql/item_subselect.h | 70 | ||||
-rw-r--r-- | sql/records.cc | 5 | ||||
-rw-r--r-- | sql/sql_lex.cc | 4 | ||||
-rw-r--r-- | sql/sql_lex.h | 4 | ||||
-rw-r--r-- | sql/sql_select.cc | 112 | ||||
-rw-r--r-- | sql/sql_select.h | 11 |
13 files changed, 900 insertions, 104 deletions
diff --git a/mysql-test/r/subselect.result b/mysql-test/r/subselect.result index 55d48030a07..82c70e19f9c 100644 --- a/mysql-test/r/subselect.result +++ b/mysql-test/r/subselect.result @@ -744,7 +744,7 @@ id select_type table type possible_keys key key_len ref rows Extra 3 DEPENDENT UNION NULL NULL NULL NULL NULL NULL NULL No tables used NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL Warnings: -Note 1003 select `test`.`t2`.`id` AS `id` from `test`.`t2` where <in_optimizer>(`test`.`t2`.`id`,<exists>(select 1 AS `1` having (<cache>(`test`.`t2`.`id`) = <ref_null_helper>(1)) union select 3 AS `3` having (<cache>(`test`.`t2`.`id`) = <ref_null_helper>(3)))) +Note 1003 select `test`.`t2`.`id` AS `id` from `test`.`t2` where <in_optimizer>(`test`.`t2`.`id`,<exists>(select 1 AS `1` having trigcond((<cache>(`test`.`t2`.`id`) = <ref_null_helper>(1))) union select 3 AS `3` having trigcond((<cache>(`test`.`t2`.`id`) = <ref_null_helper>(3))))) SELECT * FROM t2 WHERE id IN (SELECT 5 UNION SELECT 3); id SELECT * FROM t2 WHERE id IN (SELECT 5 UNION SELECT 2); @@ -907,7 +907,7 @@ id select_type table type possible_keys key key_len ref rows Extra 2 DEPENDENT SUBQUERY t2 ref_or_null a a 5 func 2 Using where; Using index 2 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 3 Using where Warnings: -Note 1003 select `test`.`t1`.`a` AS `a`,<in_optimizer>(`test`.`t1`.`a`,<exists>(select 1 AS `Not_used` from `test`.`t2` join `test`.`t3` where ((`test`.`t3`.`a` = `test`.`t2`.`a`) and ((<cache>(`test`.`t1`.`a`) = `test`.`t2`.`a`) or isnull(`test`.`t2`.`a`))) having <is_not_null_test>(`test`.`t2`.`a`))) AS `t1.a in (select t2.a from t2,t3 where t3.a=t2.a)` from `test`.`t1` +Note 1003 select `test`.`t1`.`a` AS `a`,<in_optimizer>(`test`.`t1`.`a`,<exists>(select 1 AS `Not_used` from `test`.`t2` join `test`.`t3` where ((`test`.`t3`.`a` = `test`.`t2`.`a`) and trigcond(((<cache>(`test`.`t1`.`a`) = `test`.`t2`.`a`) or isnull(`test`.`t2`.`a`)))) having trigcond(<is_not_null_test>(`test`.`t2`.`a`)))) AS `t1.a in (select t2.a from t2,t3 where t3.a=t2.a)` from `test`.`t1` drop table t1,t2,t3; create table t1 (a float); select 10.5 IN (SELECT * from t1 LIMIT 1); @@ -2817,19 +2817,19 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 8 2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 9 Using where Warnings: -Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'0') and ((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`))) having (<is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`)))) AS `test` from `test`.`t1` +Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'0') and trigcond((((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`))))) having trigcond((<is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1` explain extended SELECT one,two from t1 where ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = 'N'); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 8 Using where 2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 9 Using where Warnings: -Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` where <in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'N') and (<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) and (<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`)))) +Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` where <in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'N') and trigcond(((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) and (<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`)))))) explain extended SELECT one,two,ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = '0' group by one,two) as 'test' from t1; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 8 2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 9 Using where; Using temporary; Using filesort Warnings: -Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where (`test`.`t2`.`flag` = _latin1'0') group by `test`.`t2`.`one`,`test`.`t2`.`two` having (((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)) and <is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`)))) AS `test` from `test`.`t1` +Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where (`test`.`t2`.`flag` = _latin1'0') group by `test`.`t2`.`one`,`test`.`t2`.`two` having trigcond((((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)) and <is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1` DROP TABLE t1,t2; CREATE TABLE t1 (a char(5), b char(5)); INSERT INTO t1 VALUES (NULL,'aaa'), ('aaa','aaa'); diff --git a/mysql-test/r/subselect3.result b/mysql-test/r/subselect3.result new file mode 100644 index 00000000000..5ab8e448b39 --- /dev/null +++ b/mysql-test/r/subselect3.result @@ -0,0 +1,153 @@ +drop table if exists t0, t1, t2, t3, t4; +create table t1 (oref int, grp int, ie int) ; +insert into t1 (oref, grp, ie) values +(1, 1, 1), +(1, 1, 1), +(1, 2, NULL), +(2, 1, 3), +(3, 1, 4), +(3, 2, NULL); +create table t2 (oref int, a int); +insert into t2 values +(1, 1), +(2, 2), +(3, 3), +(4, NULL), +(2, NULL); +select a, oref, a in (select max(ie) +from t1 where oref=t2.oref group by grp) from t2; +a oref a in (select max(ie) +from t1 where oref=t2.oref group by grp) +1 1 1 +2 2 0 +3 3 NULL +NULL 4 0 +NULL 2 NULL +explain extended +select a, oref, a in (select max(ie) +from t1 where oref=t2.oref group by grp) from t2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 +2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 6 Using where; Using temporary; Using filesort +Warnings: +Note 1276 Field or reference 't2.oref' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`oref` AS `oref`,<in_optimizer>(`test`.`t2`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = `test`.`t2`.`oref`) group by `test`.`t1`.`grp` having trigcond((<cache>(`test`.`t2`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))) AS `a in (select max(ie) +from t1 where oref=t2.oref group by grp)` from `test`.`t2` +explain extended +select a, oref from t2 +where a in (select max(ie) from t1 where oref=t2.oref group by grp); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 6 Using where; Using temporary; Using filesort +Warnings: +Note 1276 Field or reference 't2.oref' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t2`.`a` AS `a`,`test`.`t2`.`oref` AS `oref` from `test`.`t2` where <in_optimizer>(`test`.`t2`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = `test`.`t2`.`oref`) group by `test`.`t1`.`grp` having (<cache>(`test`.`t2`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`))))) +create table t3 (a int); +insert into t3 values (NULL), (NULL); +flush status; +select a in (select max(ie) from t1 where oref=4 group by grp) from t3; +a in (select max(ie) from t1 where oref=4 group by grp) +0 +0 +show status like 'Handler_read_rnd_next'; +Variable_name Value +Handler_read_rnd_next 11 +select ' ^ This must show 11' Z; +Z + ^ This must show 11 +explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t3 ALL NULL NULL NULL NULL 2 +2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 6 Using where; Using temporary; Using filesort +Warnings: +Note 1003 select <in_optimizer>(`test`.`t3`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = 4) group by `test`.`t1`.`grp` having trigcond((<cache>(`test`.`t3`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))) AS `a in (select max(ie) from t1 where oref=4 group by grp)` from `test`.`t3` +drop table t1, t2, t3; +create table t1 (a int, oref int, key(a)); +insert into t1 values +(1, 1), +(1, NULL), +(2, 3), +(2, NULL), +(3, NULL); +create table t2 (a int, oref int); +insert into t2 values (1, 1), (2,2), (NULL, 3), (NULL, 4); +select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2; +oref a Z +1 1 1 +2 2 0 +3 NULL NULL +4 NULL 0 +explain extended +select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 4 +2 DEPENDENT SUBQUERY t1 index_subquery a a 5 func 2 Using index; Using where +Warnings: +Note 1276 Field or reference 't2.oref' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t2`.`oref` AS `oref`,`test`.`t2`.`a` AS `a`,<in_optimizer>(`test`.`t2`.`a`,<exists>(<index_lookup>(<cache>(`test`.`t2`.`a`) in t1 on a checking NULL where (`test`.`t1`.`oref` = `test`.`t2`.`oref`)))) AS `Z` from `test`.`t2` +flush status; +select oref, a from t2 where a in (select a from t1 where oref=t2.oref); +oref a +1 1 +show status like '%Handler_read_rnd_next'; +Variable_name Value +Handler_read_rnd_next 5 +delete from t2; +insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0); +flush status; +select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2; +oref a Z +0 NULL 0 +0 NULL 0 +0 NULL 0 +0 NULL 0 +show status like '%Handler_read%'; +Variable_name Value +Handler_read_first 0 +Handler_read_key 0 +Handler_read_next 0 +Handler_read_prev 0 +Handler_read_rnd 0 +Handler_read_rnd_next 29 +select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z; +Z +No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1. +drop table t1, t2; +create table t1 (a int, b int, primary key (a)); +insert into t1 values (1,1), (3,1),(100,1); +create table t2 (a int, b int); +insert into t2 values (1,1),(2,1),(NULL,1),(NULL,0); +select a,b, a in (select a from t1 where t1.b = t2.b) Z from t2 ; +a b Z +1 1 1 +2 1 0 +NULL 1 NULL +NULL 0 0 +drop table t1, t2; +create table t1 (a int, b int, key(a)); +insert into t1 values +(0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9); +create table t2 like t1; +insert into t2 select * from t1; +update t2 set b=1; +create table t3 (a int, oref int); +insert into t3 values (1, 1), (NULL,1), (NULL,0); +select a, oref, +t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z +from t3; +a oref Z +1 1 1 +NULL 1 NULL +NULL 0 0 +explain extended +select a, oref, +t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z +from t3; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t3 ALL NULL NULL NULL NULL 3 +2 DEPENDENT SUBQUERY t1 ref_or_null a a 5 func 4 Using where +2 DEPENDENT SUBQUERY t2 ref a a 5 test.t1.b 1 Using where +Warnings: +Note 1276 Field or reference 't3.oref' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<in_optimizer>(`test`.`t3`.`a`,<exists>(select 1 AS `Not_used` from `test`.`t1` join `test`.`t2` where ((`test`.`t2`.`a` = `test`.`t1`.`b`) and (`test`.`t2`.`b` = `test`.`t3`.`oref`) and trigcond(((<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`) or isnull(`test`.`t1`.`a`)))) having trigcond(<is_not_null_test>(`test`.`t1`.`a`)))) AS `Z` from `test`.`t3` +drop table t1, t2, t3; diff --git a/mysql-test/t/subselect3.test b/mysql-test/t/subselect3.test new file mode 100644 index 00000000000..f7fbafdd17f --- /dev/null +++ b/mysql-test/t/subselect3.test @@ -0,0 +1,137 @@ +--disable_warnings +drop table if exists t0, t1, t2, t3, t4; +--enable_warnings + +# +# 1. Subquery with GROUP/HAVING +# +create table t1 (oref int, grp int, ie int) ; +insert into t1 (oref, grp, ie) values + (1, 1, 1), + (1, 1, 1), + (1, 2, NULL), + + (2, 1, 3), + + (3, 1, 4), + (3, 2, NULL); + +# Ok, for +# select max(ie) from t1 where oref=PARAM group by grp +# we'll have: +# 1 -> (1, NULL) matching + NULL +# 2 -> (3) non-matching +# 3 -> (3, NULL) non-matching + NULL +# 4 -> () nothing. + +create table t2 (oref int, a int); +insert into t2 values + (1, 1), + (2, 2), + (3, 3), + (4, NULL), + (2, NULL); + +# true, false, null, false, null +select a, oref, a in (select max(ie) + from t1 where oref=t2.oref group by grp) from t2; + +# This must have a trigcond +explain extended +select a, oref, a in (select max(ie) + from t1 where oref=t2.oref group by grp) from t2; + +# This must not have a trigcond: +explain extended +select a, oref from t2 +where a in (select max(ie) from t1 where oref=t2.oref group by grp); + + +# Non-correlated subquery, 2 NULL evaluations +create table t3 (a int); +insert into t3 values (NULL), (NULL); +flush status; +select a in (select max(ie) from t1 where oref=4 group by grp) from t3; +show status like 'Handler_read_rnd_next'; +select ' ^ This must show 11' Z; + +# This must show trigcond: +explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3; + +drop table t1, t2, t3; + +# +# 2. Subquery handled with 'index_subquery': +# +create table t1 (a int, oref int, key(a)); +insert into t1 values + (1, 1), + (1, NULL), + (2, 3), + (2, NULL), + (3, NULL); + +create table t2 (a int, oref int); +insert into t2 values (1, 1), (2,2), (NULL, 3), (NULL, 4); + +select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2; + +# The next explain shows "using index" but that is just incorrect display +# (there is a bug filed about this). +explain extended +select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2; + +flush status; +select oref, a from t2 where a in (select a from t1 where oref=t2.oref); +# This will only show access to t2: +show status like '%Handler_read_rnd_next'; + +# Check that repeated NULL-scans are not cached (subq. is not correlated): +delete from t2; +insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0); + +flush status; +select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2; +show status like '%Handler_read%'; +select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z; + +drop table t1, t2; + +# +# 3. Subquery handled with 'unique_index_subquery': +# +create table t1 (a int, b int, primary key (a)); +insert into t1 values (1,1), (3,1),(100,1); + +create table t2 (a int, b int); +insert into t2 values (1,1),(2,1),(NULL,1),(NULL,0); + +select a,b, a in (select a from t1 where t1.b = t2.b) Z from t2 ; + +drop table t1, t2; + +# +# 4. Subquery that is a join, with ref access +# +create table t1 (a int, b int, key(a)); +insert into t1 values + (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9); + +create table t2 like t1; +insert into t2 select * from t1; +update t2 set b=1; + +create table t3 (a int, oref int); +insert into t3 values (1, 1), (NULL,1), (NULL,0); +select a, oref, + t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z +from t3; + +# This must have trigcond in WHERE and HAVING: +explain extended +select a, oref, + t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z +from t3; + +drop table t1, t2, t3; + diff --git a/sql/item.h b/sql/item.h index 0cfb0b01fd8..566daa1aaee 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1959,6 +1959,16 @@ public: class Item_in_subselect; + +/* + An object of this class: + - Converts val_XXX() calls to ref->val_XXX_result() calls, like Item_ref. + - Sets owner->was_null=TRUE if it has returned a NULL value from any + val_XXX() function. This allows to inject an Item_ref_null_helper + object into subquery and then check if the subquery has produced a row + with NULL value. +*/ + class Item_ref_null_helper: public Item_ref { protected: diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 9a400d60ae6..540f67ba0ee 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -786,9 +786,41 @@ longlong Item_in_optimizer::val_int() { DBUG_ASSERT(fixed == 1); cache->store(args[0]); + if (cache->null_value) { - null_value= 1; + if (((Item_in_subselect*)args[1])->is_top_level_item()) + { + /* + We're evaluating "NULL IN (SELECT ...)". The result can be NULL or + FALSE, and we can return one instead of another. Just return NULL. + */ + null_value= 1; + } + else + { + if (!((Item_in_subselect*)args[1])->is_correlated && + result_for_null_param != UNKNOWN) + { + /* Use cached value from previous execution */ + null_value= result_for_null_param; + } + else + { + /* + We're evaluating "NULL IN (SELECT ...)". The result is: + FALSE if SELECT produces an empty set, or + NULL otherwise. + We disable the predicates we've pushed down into subselect, run the + subselect and see if it has produced any rows. + */ + ((Item_in_subselect*)args[1])->enable_pushed_conds= FALSE; + longlong tmp= args[1]->val_bool_result(); + result_for_null_param= null_value= + !((Item_in_subselect*)args[1])->engine->no_rows(); + ((Item_in_subselect*)args[1])->enable_pushed_conds= TRUE; + } + } return 0; } bool tmp= args[1]->val_bool_result(); diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index c8439cba303..acad1e51bc9 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -100,25 +100,44 @@ public: }; class Item_cache; +#define UNKNOWN ((my_bool)-1) + + +/* + Item_in_optimizer(left_expr, Item_in_subselect(...)) + + Item_in_optimizer is used to wrap an instance of Item_in_subselect. This + class does the following: + - Evaluate the left expression and store it in Item_cache_* object (to + avoid re-evaluating it many times during subquery execution) + - Shortcut the evaluation of "NULL IN (...)" to NULL in the cases where we + don't care if the result is NULL or FALSE. + + NOTE + It is not quite clear why the above listed functionality should be + placed into a separate class called 'Item_in_optimizer'. +*/ + class Item_in_optimizer: public Item_bool_func { protected: Item_cache *cache; bool save_cache; + /* + Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries: + UNKNOWN - "NULL in (SELECT ...)" has not yet been evaluated + FALSE - result is FALSE + TRUE - result is NULL + */ + my_bool result_for_null_param; public: Item_in_optimizer(Item *a, Item_in_subselect *b): - Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0), save_cache(0) + Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0), + save_cache(0), result_for_null_param(UNKNOWN) {} bool fix_fields(THD *, Item **); bool fix_left(THD *thd, Item **ref); bool is_null(); - /* - Item_in_optimizer item is special boolean function. On value request - (one of val, val_int or val_str methods) it evaluate left expression - of IN by storing it value in cache item (one of Item_cache* items), - then it test cache is it NULL. If left expression (cache) is NULL then - Item_in_optimizer return NULL, else it evaluate Item_in_subselect. - */ longlong val_int(); void cleanup(); const char *func_name() const { return "<in_optimizer>"; } @@ -256,9 +275,11 @@ public: class Item_maxmin_subselect; /* + trigcond<param>(arg) ::= param? arg : TRUE + The class Item_func_trig_cond is used for guarded predicates which are employed only for internal purposes. - A guarded predicates is an object consisting of an a regular or + A guarded predicate is an object consisting of an a regular or a guarded predicate P and a pointer to a boolean guard variable g. A guarded predicate P/g is evaluated to true if the value of the guard g is false, otherwise it is evaluated to the same value that @@ -276,6 +297,10 @@ class Item_maxmin_subselect; Objects of this class are built only for query execution after the execution plan has been already selected. That's why this class needs only val_int out of generic methods. + + Current uses of Item_func_trig_cond objects: + - To wrap selection conditions when executing outer joins + - To wrap condition that is pushed down into subquery */ class Item_func_trig_cond: public Item_bool_func @@ -1019,6 +1044,11 @@ public: /* Functions used by HAVING for rewriting IN subquery */ class Item_in_subselect; + +/* + This is like IS NOT NULL but it also remembers if it ever has + encountered a NULL. +*/ class Item_is_not_null_test :public Item_func_isnull { Item_in_subselect* owner; diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 7015f450aa7..489a647402e 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -37,7 +37,7 @@ inline Item * and_items(Item* cond, Item *item) Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), thd(0), substitution(0), engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0), - const_item_cache(1), engine_changed(0), changed(0) + const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE) { with_subselect= 1; reset(); @@ -192,16 +192,16 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) return res; } -bool Item_subselect::exec() +bool Item_subselect::exec(bool full_scan) { int res; - res= engine->exec(); + res= engine->exec(full_scan); if (engine_changed) { engine_changed= 0; - return exec(); + return exec(full_scan); } return (res); } @@ -441,13 +441,13 @@ bool Item_singlerow_subselect::null_inside() void Item_singlerow_subselect::bring_value() { - exec(); + exec(FALSE); } double Item_singlerow_subselect::val_real() { DBUG_ASSERT(fixed == 1); - if (!exec() && !value->null_value) + if (!exec(FALSE) && !value->null_value) { null_value= 0; return value->val_real(); @@ -462,7 +462,7 @@ double Item_singlerow_subselect::val_real() longlong Item_singlerow_subselect::val_int() { DBUG_ASSERT(fixed == 1); - if (!exec() && !value->null_value) + if (!exec(FALSE) && !value->null_value) { null_value= 0; return value->val_int(); @@ -476,7 +476,7 @@ longlong Item_singlerow_subselect::val_int() String *Item_singlerow_subselect::val_str(String *str) { - if (!exec() && !value->null_value) + if (!exec(FALSE) && !value->null_value) { null_value= 0; return value->val_str(str); @@ -491,7 +491,7 @@ String *Item_singlerow_subselect::val_str(String *str) my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value) { - if (!exec() && !value->null_value) + if (!exec(FALSE) && !value->null_value) { null_value= 0; return value->val_decimal(decimal_value); @@ -506,7 +506,7 @@ my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value) bool Item_singlerow_subselect::val_bool() { - if (!exec() && !value->null_value) + if (!exec(FALSE) && !value->null_value) { null_value= 0; return value->val_bool(); @@ -557,7 +557,8 @@ bool Item_in_subselect::test_limit(SELECT_LEX_UNIT *unit) Item_in_subselect::Item_in_subselect(Item * left_exp, st_select_lex *select_lex): - Item_exists_subselect(), optimizer(0), transformed(0), upper_item(0) + Item_exists_subselect(), optimizer(0), transformed(0), + enable_pushed_conds(TRUE), upper_item(0) { DBUG_ENTER("Item_in_subselect::Item_in_subselect"); left_expr= left_exp; @@ -602,7 +603,7 @@ void Item_exists_subselect::fix_length_and_dec() double Item_exists_subselect::val_real() { DBUG_ASSERT(fixed == 1); - if (exec()) + if (exec(FALSE)) { reset(); return 0; @@ -613,7 +614,7 @@ double Item_exists_subselect::val_real() longlong Item_exists_subselect::val_int() { DBUG_ASSERT(fixed == 1); - if (exec()) + if (exec(FALSE)) { reset(); return 0; @@ -624,7 +625,7 @@ longlong Item_exists_subselect::val_int() String *Item_exists_subselect::val_str(String *str) { DBUG_ASSERT(fixed == 1); - if (exec()) + if (exec(FALSE)) { reset(); return 0; @@ -637,7 +638,7 @@ String *Item_exists_subselect::val_str(String *str) my_decimal *Item_exists_subselect::val_decimal(my_decimal *decimal_value) { DBUG_ASSERT(fixed == 1); - if (exec()) + if (exec(FALSE)) { reset(); return 0; @@ -650,7 +651,7 @@ my_decimal *Item_exists_subselect::val_decimal(my_decimal *decimal_value) bool Item_exists_subselect::val_bool() { DBUG_ASSERT(fixed == 1); - if (exec()) + if (exec(FALSE)) { reset(); return 0; @@ -668,7 +669,7 @@ double Item_in_subselect::val_real() DBUG_ASSERT(0); DBUG_ASSERT(fixed == 1); null_value= 0; - if (exec()) + if (exec(!enable_pushed_conds)) { reset(); null_value= 1; @@ -689,7 +690,7 @@ longlong Item_in_subselect::val_int() DBUG_ASSERT(0); DBUG_ASSERT(fixed == 1); null_value= 0; - if (exec()) + if (exec(!enable_pushed_conds)) { reset(); null_value= 1; @@ -710,7 +711,7 @@ String *Item_in_subselect::val_str(String *str) DBUG_ASSERT(0); DBUG_ASSERT(fixed == 1); null_value= 0; - if (exec()) + if (exec(!enable_pushed_conds)) { reset(); null_value= 1; @@ -730,7 +731,7 @@ bool Item_in_subselect::val_bool() { DBUG_ASSERT(fixed == 1); null_value= 0; - if (exec()) + if (exec(!enable_pushed_conds)) { reset(); null_value= 1; @@ -750,7 +751,7 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value) DBUG_ASSERT(0); null_value= 0; DBUG_ASSERT(fixed == 1); - if (exec()) + if (exec(!enable_pushed_conds)) { reset(); null_value= 1; @@ -763,7 +764,51 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value) } -/* Rewrite a single-column IN/ALL/ANY subselect. */ +/* + Rewrite a single-column IN/ALL/ANY subselect + + SYNOPSIS + Item_in_subselect::single_value_transformer() + join + func + + DESCRIPTION + Rewrite a single-column subquery using rule-based approach. The subquery + + oe $cmp$ (SELECT sel FROM ... WHERE subq_where HAVING subq_having) + + First, try to convert the subquery to scalar-result subquery in one of + the forms: + + - oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect + - oe $cmp$ <max>(SELECT ...) // handled by Item_maxminsubselect + + If that fails, the subquery will be handled with class Item_in_optimizer, + Inject the predicates into subquery, i.e. convert it to: + + - If the subquery has aggregates, GROUP BY, or HAVING, convert to + + SELECT sel FROM ... HAVING subq_having AND + trigcond(oe $cmp$ ref_or_null_helper<ie>) + + the addition is wrapped into trigger only when we want to distinguish + between NULL and FALSE results. + + - Else, if we don't care if subquery result is NULL or FALSE, convert to + + SELECT 1 ... WHERE (oe $CMP$ ie) AND subq_where + + - Else convert to: + + SELECT 1 WHERE ... + WHERE subq_where AND trigcond((oe $CMP$ ie) OR ie IS NULL) + HAVING subq_having AND trigcond(<is_not_null_test>(ie)) + + RETURN + RES_OK - Transformed successfully (or done nothing?) + RES_REDUCE - The subquery was reduced to non-subquery + RES_ERROR - Error +*/ Item_subselect::trans_res Item_in_subselect::single_value_transformer(JOIN *join, @@ -896,8 +941,12 @@ Item_in_subselect::single_value_transformer(JOIN *join, select_lex->uncacheable|= UNCACHEABLE_DEPENDENT; /* Add the left part of a subselect to a WHERE or HAVING clause of - the right part, e.g. SELECT 1 IN (SELECT a FROM t1) => - SELECT Item_in_optimizer(1, SELECT a FROM t1 WHERE a=1) + the right part, e.g. + + SELECT 1 IN (SELECT a FROM t1) => + + SELECT Item_in_optimizer(1, SELECT a FROM t1 WHERE a=1) + HAVING is used only if the right part contains a SUM function, a GROUP BY or a HAVING clause. */ @@ -912,10 +961,15 @@ Item_in_subselect::single_value_transformer(JOIN *join, ref_pointer_array, (char *)"<ref>", this->full_name())); -#ifdef CORRECT_BUT_TOO_SLOW_TO_BE_USABLE - if (!abort_on_null && left_expr->maybe_null) - item= new Item_cond_or(new Item_func_isnull(left_expr), item); -#endif + if (!abort_on_null && ((Item*)select_lex->item_list.head())->maybe_null) + { + /* + We can encounter "NULL IN (SELECT ...)". Wrap the added condition + within a trigger. + */ + item= new Item_func_trig_cond(item, &enable_pushed_conds); + } + /* AND and comparison functions can't be changed during fix_fields() we can assign select_lex->having here, and pass 0 as last @@ -944,10 +998,13 @@ Item_in_subselect::single_value_transformer(JOIN *join, select_lex->item_list.push_back(new Item_int("Not_used", (longlong) 1, 21)); select_lex->ref_pointer_array[0]= select_lex->item_list.head(); + item= func->create(expr, item); if (!abort_on_null && orig_item->maybe_null) { - having= new Item_is_not_null_test(this, having); + having= + new Item_func_trig_cond(new Item_is_not_null_test(this, having), + &enable_pushed_conds); /* Item_is_not_null_test can't be changed during fix_fields() we can assign select_lex->having here, and pass 0 as last @@ -967,12 +1024,15 @@ Item_in_subselect::single_value_transformer(JOIN *join, select_lex->having_fix_field= 0; if (tmp) DBUG_RETURN(RES_ERROR); + /* + NOTE: It is important that we add this "IS NULL" here, even when + orig_item can't be NULL. This is needed so that this predicate is + only used by ref[_or_null] analyzer (and, e.g. is not used by const + propagation). + */ item= new Item_cond_or(item, new Item_func_isnull(orig_item)); -#ifdef CORRECT_BUT_TOO_SLOW_TO_BE_USABLE - if (left_expr->maybe_null) - item= new Item_cond_or(new Item_func_isnull(left_expr), item); -#endif + item= new Item_func_trig_cond(item, &enable_pushed_conds); } item->name= (char *)in_additional_cond; /* @@ -999,13 +1059,14 @@ Item_in_subselect::single_value_transformer(JOIN *join, we can assign select_lex->having here, and pass 0 as last argument (reference) to fix_fields() */ - select_lex->having= - join->having= - func->create(expr, + Item *new_having= + func->create(expr, new Item_ref_null_helper(&select_lex->context, this, select_lex->ref_pointer_array, (char *)"<no matter>", (char *)"<result>")); + new_having= new Item_func_trig_cond(new_having, &enable_pushed_conds); + select_lex->having= join->having= new_having; select_lex->having_fix_field= 1; /* @@ -1210,6 +1271,8 @@ Item_in_subselect::row_value_transformer(JOIN *join) where_item= and_items(where_item, item); } + if (where_item) + where_item= new Item_func_trig_cond(where_item, &enable_pushed_conds); /* AND can't be changed during fix_fields() we can assign select_lex->where here, and pass 0 as last @@ -1223,6 +1286,8 @@ Item_in_subselect::row_value_transformer(JOIN *join) if (having_item) { bool res; + having_item= new Item_func_trig_cond(having_item, &enable_pushed_conds); + select_lex->having= join->having= and_items(join->having, having_item); select_lex->having->top_level_item(); /* @@ -1439,6 +1504,27 @@ bool subselect_union_engine::is_executed() const } +/* + Check if last execution of the subquery engine produced any rows + + SYNOPSIS + subselect_union_engine::no_rows() + + DESCRIPTION + Check if last execution of the subquery engine produced any rows. The + return value is undefined if last execution ended in an error. + + RETURN + TRUE - Last subselect execution has produced no rows + FALSE - Otherwise +*/ + +bool subselect_union_engine::no_rows() +{ + /* Check if we got any rows when reading UNION result from temp. table: */ + return test(!unit->fake_select_lex->join->send_records); +} + void subselect_uniquesubquery_engine::cleanup() { DBUG_ENTER("subselect_uniquesubquery_engine::cleanup"); @@ -1504,6 +1590,29 @@ int subselect_uniquesubquery_engine::prepare() return 1; } + +/* + Check if last execution of the subquery engine produced any rows + + SYNOPSIS + subselect_single_select_engine::no_rows() + + DESCRIPTION + Check if last execution of the subquery engine produced any rows. The + return value is undefined if last execution ended in an error. + + RETURN + TRUE - Last subselect execution has produced no rows + FALSE - Otherwise +*/ + +bool subselect_single_select_engine::no_rows() +{ +// return test(!join->send_records); + return !item->assigned(); +} + + static Item_result set_row(List<Item> &item_list, Item *item, Item_cache **row, bool *maybe_null) { @@ -1557,7 +1666,11 @@ void subselect_uniquesubquery_engine::fix_length_and_dec(Item_cache **row) DBUG_ASSERT(0); } -int subselect_single_select_engine::exec() +int init_read_record_seq(JOIN_TAB *tab); +int join_read_always_key_or_null(JOIN_TAB *tab); +int join_read_next_same_or_null(READ_RECORD *info); + +int subselect_single_select_engine::exec(bool full_scan) { DBUG_ENTER("subselect_single_select_engine::exec"); char const *save_where= thd->where; @@ -1595,7 +1708,43 @@ int subselect_single_select_engine::exec() if (!executed) { item->reset_value_registration(); + if (full_scan) + { + /* + We should not apply optimizations based on the condition that was + pushed down into the subquery. Those optimizations are ref[_or_null] + acceses. Change them to be full table scans. + */ + for (uint i=join->const_tables ; i < join->tables ; i++) + { + JOIN_TAB *tab=join->join_tab+i; + if (tab->keyuse && tab->keyuse->outer_ref) + { + tab->read_first_record= init_read_record_seq; + tab->read_record.record= tab->table->record[0]; + tab->read_record.thd= join->thd; + tab->read_record.ref_length= tab->table->file->ref_length; + } + } + } + join->exec(); + + if (full_scan) + { + /* Enable the optimizations back */ + for (uint i=join->const_tables ; i < join->tables ; i++) + { + JOIN_TAB *tab=join->join_tab+i; + if (tab->keyuse && tab->keyuse->outer_ref) + { + tab->read_record.record= 0; + tab->read_record.ref_length= 0; + tab->read_first_record= join_read_always_key_or_null; + tab->read_record.read_record= join_read_next_same_or_null; + } + } + } executed= 1; thd->where= save_where; thd->lex->current_select= save_select; @@ -1606,29 +1755,161 @@ int subselect_single_select_engine::exec() DBUG_RETURN(0); } -int subselect_union_engine::exec() +int subselect_union_engine::exec(bool full_scan) { char const *save_where= thd->where; + /* + Ignore the full_scan parameter: the pushed down predicates are only used + for filtering, and the caller has disabled them if necessary. + */ int res= unit->exec(); thd->where= save_where; return res; } -int subselect_uniquesubquery_engine::exec() +/* + Search for at least on row satisfying select condition + + SYNOPSIS + subselect_uniquesubquery_engine::scan_table() + + DESCRIPTION + Scan the table using sequential access until we find at least one row + satisfying select condition. + + The result of this function (info about whether a row was found) is + stored in this->empty_result_set. + + RETURN + FALSE - OK + TRUE - Error +*/ + +int subselect_uniquesubquery_engine::scan_table() { - DBUG_ENTER("subselect_uniquesubquery_engine::exec"); int error; TABLE *table= tab->table; - for (store_key **copy=tab->ref.key_copy ; *copy ; copy++) + DBUG_ENTER("subselect_uniquesubquery_engine::scan_table"); + + empty_result_set= TRUE; + bool is_uncorrelated= !cond || !(cond->used_tables() & OUTER_REF_TABLE_BIT); + + if (table->file->inited) + table->file->ha_index_end(); + + table->file->ha_rnd_init(1); + table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); + table->null_row= 0; + for (;;) + { + error=table->file->rnd_next(table->record[0]); + if (error && error != HA_ERR_END_OF_FILE) + { + error= report_error(table, error); + break; + } + /* No more rows */ + if (table->status) + break; + + if (!cond || cond->val_int()) + { + empty_result_set= FALSE; + break; + } + } + + table->file->ha_rnd_end(); + DBUG_RETURN(error != 0); +} + + +/* + Copy ref key and check for null parts in it + + SYNOPSIS + subselect_uniquesubquery_engine::copy_ref_key() + + DESCRIPTION + Copy ref key and check for null parts in it. + + RETURN + FALSE - ok, index lookup key without keys copied. + TRUE - an error occured while copying the key +*/ + +bool subselect_uniquesubquery_engine::copy_ref_key() +{ + DBUG_ENTER("subselect_uniquesubquery_engine::copy_ref_key"); + + for (store_key **copy= tab->ref.key_copy ; *copy ; copy++) { - if ((tab->ref.key_err= (*copy)->copy()) & 1) + tab->ref.key_err= (*copy)->copy(); + + /* + When there is a NULL part in the key we don't need to make index + lookup for such key thus we don't need to copy whole key. + If we later should do a sequential scan return OK. Fail otherwise. + + See also the comment for the subselect_uniquesubquery_engine::exec() + function. + */ + null_keypart= (*copy)->null_key; + bool top_level= ((Item_in_subselect *) item)->is_top_level_item(); + if (null_keypart && !top_level) + break; + if ((tab->ref.key_err) & 1 || (null_keypart && top_level)) { - table->status= STATUS_NOT_FOUND; + tab->table->status= STATUS_NOT_FOUND; DBUG_RETURN(1); } } + DBUG_RETURN(0); +} + +/* + Execute subselect + + SYNOPSIS + subselect_uniquesubquery_engine::exec() + + DESCRIPTION + Find rows corresponding to the ref key using index access. + If some part of the lookup key is NULL, then we're evaluating + NULL IN (SELECT ... ) + This is a special case, we don't need to search for NULL in the table, + instead, the result value is + - NULL if select produces empty row set + - FALSE otherwise. + + In some cases (IN subselect is a top level item, i.e. abort_on_null==TRUE) + the caller doesn't distinguish between NULL and FALSE result and we just + return FALSE. + Otherwise we make a full table scan to see if there is at least one matching row. + + NOTE + + RETURN + FALSE - ok + TRUE - an error occured while scanning +*/ + +int subselect_uniquesubquery_engine::exec(bool full_scan) +{ + DBUG_ENTER("subselect_uniquesubquery_engine::exec"); + int error; + TABLE *table= tab->table; + + /* TODO: change to use of 'full_scan' here? */ + if (copy_ref_key()) + DBUG_RETURN(1); + + if (null_keypart) + DBUG_RETURN(scan_table()); + if (!table->file->inited) table->file->ha_index_init(tab->ref.key); error= table->file->index_read(table->record[0], @@ -1657,14 +1938,35 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine() } -int subselect_indexsubquery_engine::exec() +/* + Index-lookup subselect 'engine' - run the subquery + + SYNOPSIS + subselect_uniquesubquery_engine:exec() + full_scan + + DESCRIPTION + Resolve subquery using index lookup(s). + First look for specified constant, + If not found and we need to check for NULLs, do that too. + + NULL IN (SELECT ...) is a special case. + + RETURN + 0 + 1 +*/ + +int subselect_indexsubquery_engine::exec(bool full_scan) { - DBUG_ENTER("subselect_indexsubselect_engine::exec"); + DBUG_ENTER("subselect_indexsubquery_engine::exec"); int error; bool null_finding= 0; TABLE *table= tab->table; ((Item_in_subselect *) item)->value= 0; + empty_result_set= TRUE; + null_keypart= 0; if (check_null) { @@ -1673,14 +1975,12 @@ int subselect_indexsubquery_engine::exec() ((Item_in_subselect *) item)->was_null= 0; } - for (store_key **copy=tab->ref.key_copy ; *copy ; copy++) - { - if ((tab->ref.key_err= (*copy)->copy()) & 1) - { - table->status= STATUS_NOT_FOUND; - DBUG_RETURN(1); - } - } + /* Copy the ref key and check for nulls... */ + if (copy_ref_key()) + DBUG_RETURN(1); + + if (null_keypart) + DBUG_RETURN(scan_table()); if (!table->file->inited) table->file->ha_index_init(tab->ref.key); diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 45df4f3880d..9410dbc465e 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -60,6 +60,9 @@ public: /* subquery is transformed */ bool changed; + /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */ + bool is_correlated; + enum trans_res {RES_OK, RES_REDUCE, RES_ERROR}; enum subs_type {UNKNOWN_SUBS, SINGLEROW_SUBS, EXISTS_SUBS, IN_SUBS, ALL_SUBS, ANY_SUBS}; @@ -92,7 +95,7 @@ public: return null_value; } bool fix_fields(THD *thd, Item **ref); - virtual bool exec(); + virtual bool exec(bool full_scan); virtual void fix_length_and_dec(); table_map used_tables() const; table_map not_null_tables() const { return 0; } @@ -215,7 +218,20 @@ public: friend class subselect_indexsubquery_engine; }; -/* IN subselect */ + +/* + IN subselect: this represents "left_exr IN (SELECT ...)" + + This class has: + - (as a descendant of Item_subselect) a "subquery execution engine" which + allows it to evaluate subqueries. (and this class participates in + execution by having was_null variable where part of execution result + is stored. + - Transformation methods (todo: more on this). + + This class is not used directly, it is "wrapped" into Item_in_optimizer + which provides some small bits of subquery evaluation. +*/ class Item_in_subselect :public Item_exists_subselect { @@ -231,12 +247,14 @@ protected: bool abort_on_null; bool transformed; public: + /* Used to trigger on/off conditions that were pushed down to subselect */ + bool enable_pushed_conds; Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery Item_in_subselect(Item * left_expr, st_select_lex *select_lex); Item_in_subselect() :Item_exists_subselect(), optimizer(0), abort_on_null(0), transformed(0), - upper_item(0) + enable_pushed_conds(TRUE), upper_item(0) {} subs_type substype() { return IN_SUBS; } @@ -256,6 +274,7 @@ public: my_decimal *val_decimal(my_decimal *); bool val_bool(); void top_level_item() { abort_on_null=1; } + inline bool is_top_level_item() { return abort_on_null; } bool test_limit(st_select_lex_unit *unit); void print(String *str); bool fix_fields(THD *thd, Item **ref); @@ -313,7 +332,28 @@ public: THD * get_thd() { return thd; } virtual int prepare()= 0; virtual void fix_length_and_dec(Item_cache** row)= 0; - virtual int exec()= 0; + /* + Execute the engine + + SYNOPSIS + exec() + full_scan TRUE - Pushed-down predicates are disabled, the engine + must disable made based on those predicates. + FALSE - Pushed-down predicates are in effect. + DESCRIPTION + Execute the engine. The result of execution is subquery value that is + either captured by previously set up select_result-based 'sink' or + stored somewhere by the exec() method itself. + + A required side effect: if full_scan==TRUE, subselect_engine->no_rows() + should return correct result. + + RETURN + 0 - OK + 1 - Either an execution error, or the engine was be "changed", and + caller should call exec() again for the new engine. + */ + virtual int exec(bool full_scan)= 0; virtual uint cols()= 0; /* return number of columns in select */ virtual uint8 uncacheable()= 0; /* query is uncacheable */ enum Item_result type() { return res_type; } @@ -325,6 +365,8 @@ public: virtual bool change_result(Item_subselect *si, select_subselect *result)= 0; virtual bool no_tables()= 0; virtual bool is_executed() const { return FALSE; } + /* Check if subquery produced any rows during last query execution */ + virtual bool no_rows() = 0; }; @@ -342,7 +384,7 @@ public: void cleanup(); int prepare(); void fix_length_and_dec(Item_cache** row); - int exec(); + int exec(bool full_scan); uint cols(); uint8 uncacheable(); void exclude(); @@ -351,6 +393,7 @@ public: bool change_result(Item_subselect *si, select_subselect *result); bool no_tables(); bool is_executed() const { return executed; } + bool no_rows(); }; @@ -364,7 +407,7 @@ public: void cleanup(); int prepare(); void fix_length_and_dec(Item_cache** row); - int exec(); + int exec(bool full_scan); uint cols(); uint8 uncacheable(); void exclude(); @@ -373,6 +416,7 @@ public: bool change_result(Item_subselect *si, select_subselect *result); bool no_tables(); bool is_executed() const; + bool no_rows(); }; @@ -382,6 +426,12 @@ class subselect_uniquesubquery_engine: public subselect_engine protected: st_join_table *tab; Item *cond; + /* + TRUE<=> last execution produced empty set. Valid only when left + expression is NULL. + */ + bool empty_result_set; + bool null_keypart; /* TRUE <=> constructed search tuple has a NULL */ public: // constructor can assign THD because it will be called after JOIN::prepare @@ -395,7 +445,7 @@ public: void cleanup(); int prepare(); void fix_length_and_dec(Item_cache** row); - int exec(); + int exec(bool full_scan); uint cols() { return 1; } uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } void exclude(); @@ -403,11 +453,15 @@ public: void print (String *str); bool change_result(Item_subselect *si, select_subselect *result); bool no_tables(); + int scan_table(); + bool copy_ref_key(); + bool no_rows() { return empty_result_set; } }; class subselect_indexsubquery_engine: public subselect_uniquesubquery_engine { + /* FALSE for 'ref', TRUE for 'ref-or-null'. */ bool check_null; public: @@ -418,7 +472,7 @@ public: :subselect_uniquesubquery_engine(thd, tab_arg, subs, where), check_null(chk_null) {} - int exec(); + int exec(bool full_scan); void print (String *str); }; diff --git a/sql/records.cc b/sql/records.cc index b352f9f395a..3e254fa3648 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -20,7 +20,7 @@ #include "mysql_priv.h" static int rr_quick(READ_RECORD *info); -static int rr_sequential(READ_RECORD *info); +int rr_sequential(READ_RECORD *info); static int rr_from_tempfile(READ_RECORD *info); static int rr_unpack_from_tempfile(READ_RECORD *info); static int rr_unpack_from_buffer(READ_RECORD *info); @@ -184,6 +184,7 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, } /* init_read_record */ + void end_read_record(READ_RECORD *info) { /* free cache if used */ if (info->cache) @@ -289,7 +290,7 @@ static int rr_index(READ_RECORD *info) } -static int rr_sequential(READ_RECORD *info) +int rr_sequential(READ_RECORD *info) { int tmp; while ((tmp=info->file->rnd_next(info->record))) diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index af81960f9ef..5eb6ea25b68 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1179,7 +1179,7 @@ void st_select_lex::init_select() select_limit= 0; /* denotes the default limit = HA_POS_ERROR */ offset_limit= 0; /* denotes the default offset = 0 */ with_sum_func= 0; - + is_correlated= 0; } /* @@ -1373,6 +1373,8 @@ void st_select_lex::mark_as_dependent(SELECT_LEX *last) SELECT_LEX_UNIT *munit= s->master_unit(); munit->uncacheable|= UNCACHEABLE_DEPENDENT; } + is_correlated= TRUE; + this->master_unit()->item->is_correlated= TRUE; } bool st_select_lex_node::set_braces(bool value) { return 1; } diff --git a/sql/sql_lex.h b/sql/sql_lex.h index a3173b73d6d..027b012542e 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -470,7 +470,7 @@ public: void set_thd(THD *thd_arg) { thd= thd_arg; } friend void lex_start(THD *thd, uchar *buf, uint length); - friend int subselect_union_engine::exec(); + friend int subselect_union_engine::exec(bool); List<Item> *get_unit_column_types(); }; @@ -562,6 +562,8 @@ public: query processing end even if we use temporary table */ bool subquery_in_having; + /* TRUE <=> this SELECT is correlated w.r.t. some ancestor select */ + bool is_correlated; /* This variable is required to ensure proper work of subqueries and stored procedures. Generally, one should use the states of diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 0f0642280ce..df333e7c9ab 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -158,8 +158,8 @@ static int join_read_prev_same(READ_RECORD *info); static int join_read_prev(READ_RECORD *info); static int join_ft_read_first(JOIN_TAB *tab); static int join_ft_read_next(READ_RECORD *info); -static int join_read_always_key_or_null(JOIN_TAB *tab); -static int join_read_next_same_or_null(READ_RECORD *info); +int join_read_always_key_or_null(JOIN_TAB *tab); +int join_read_next_same_or_null(READ_RECORD *info); static COND *make_cond_for_table(COND *cond,table_map table, table_map used_table); static Item* part_of_refkey(TABLE *form,Field *field); @@ -512,11 +512,12 @@ err: DBUG_RETURN(-1); /* purecov: inspected */ } + /* test if it is known for optimisation IN subquery - SYNOPSYS - JOIN::test_in_subselect + SYNOPSIS + JOIN::test_in_subselect() where - pointer for variable in which conditions should be stored if subquery is known @@ -551,6 +552,35 @@ bool JOIN::test_in_subselect(Item **where) /* + Check if the passed HAVING clause is a clause added by subquery optimizer + + SYNOPSIS + is_having_subq_predicates() + having Having clause + + RETURN + TRUE The passed HAVING clause was added by the subquery optimizer + FALSE Otherwise +*/ + +bool is_having_subq_predicates(Item *having) +{ + if (having->type() == Item::FUNC_ITEM) + { + if (((Item_func *) having)->functype() == Item_func::ISNOTNULLTEST_FUNC) + return TRUE; + if (((Item_func *) having)->functype() == Item_func::TRIG_COND_FUNC) + { + having= ((Item_func*)having)->arguments()[0]; + if (((Item_func *) having)->functype() == Item_func::ISNOTNULLTEST_FUNC) + return TRUE; + } + return TRUE; + } + return FALSE; +} + +/* global select optimisation. return 0 - success 1 - error @@ -1016,9 +1046,7 @@ JOIN::optimize() } } else if (join_tab[0].type == JT_REF_OR_NULL && join_tab[0].ref.items[0]->name == in_left_expr_name && - having->type() == Item::FUNC_ITEM && - ((Item_func *) having)->functype() == - Item_func::ISNOTNULLTEST_FUNC) + is_having_subq_predicates(having)) { join_tab[0].type= JT_INDEX_SUBQUERY; error= 0; @@ -2512,6 +2540,9 @@ typedef struct key_field_t { // Used when finding key fields when val IS NULL. */ bool null_rejecting; + + /* TRUE<=> This ref access is an outer subquery reference access */ + bool outer_ref; } KEY_FIELD; /* Values in optimize */ @@ -2810,6 +2841,7 @@ add_key_field(KEY_FIELD **key_fields,uint and_level, Item_func *cond, cond->functype() == Item_func::MULT_EQUAL_FUNC) && ((*value)->type() == Item::FIELD_ITEM) && ((Item_field*)*value)->field->maybe_null()); + (*key_fields)->outer_ref= FALSE; (*key_fields)++; } @@ -2868,7 +2900,7 @@ add_key_equal_fields(KEY_FIELD **key_fields, uint and_level, } static void -add_key_fields(KEY_FIELD **key_fields,uint *and_level, +add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, COND *cond, table_map usable_tables, SARGABLE_PARAM **sargables) { @@ -2881,28 +2913,54 @@ add_key_fields(KEY_FIELD **key_fields,uint *and_level, { Item *item; while ((item=li++)) - add_key_fields(key_fields,and_level,item,usable_tables,sargables); + add_key_fields(join, key_fields, and_level, item, usable_tables, + sargables); for (; org_key_fields != *key_fields ; org_key_fields++) org_key_fields->level= *and_level; } else { (*and_level)++; - add_key_fields(key_fields,and_level,li++,usable_tables,sargables); + add_key_fields(join, key_fields, and_level, li++, usable_tables, + sargables); Item *item; while ((item=li++)) { KEY_FIELD *start_key_fields= *key_fields; (*and_level)++; - add_key_fields(key_fields,and_level,item,usable_tables,sargables); + add_key_fields(join, key_fields, and_level, item, usable_tables, + sargables); *key_fields=merge_key_fields(org_key_fields,start_key_fields, *key_fields,++(*and_level)); } } return; } - /* If item is of type 'field op field/constant' add it to key_fields */ + /* + Subquery optimization: check if the encountered condition is one + added by condition push down into subquery. + */ + { + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC) + { + cond= ((Item_func*)cond)->arguments()[0]; + if (!join->group_list && !join->order && + join->unit->item && + join->unit->item->substype() == Item_subselect::IN_SUBS && + !join->unit->first_select()->next_select()) + { + add_key_fields(join, key_fields, and_level, cond, usable_tables, + sargables); + // Indicate that this ref access candidate is for subquery lookup: + (*key_fields)[-1].outer_ref= TRUE; + } + return; + } + } + + /* If item is of type 'field op field/constant' add it to key_fields */ if (cond->type() != Item::FUNC_ITEM) return; Item_func *cond_func= (Item_func*) cond; @@ -3076,6 +3134,7 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field) keyuse.used_tables=key_field->val->used_tables(); keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; keyuse.null_rejecting= key_field->null_rejecting; + keyuse.outer_ref= key_field->outer_ref; VOID(insert_dynamic(keyuse_array,(gptr) &keyuse)); } } @@ -3198,7 +3257,7 @@ sort_keyuse(KEYUSE *a,KEYUSE *b) Here we can add 'ref' access candidates for t1 and t2, but not for t3. */ -static void add_key_fields_for_nj(TABLE_LIST *nested_join_table, +static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table, KEY_FIELD **end, uint *and_level, SARGABLE_PARAM **sargables) { @@ -3210,12 +3269,13 @@ static void add_key_fields_for_nj(TABLE_LIST *nested_join_table, while ((table= li++)) { if (table->nested_join) - add_key_fields_for_nj(table, end, and_level, sargables); + add_key_fields_for_nj(join, table, end, and_level, sargables); else if (!table->on_expr) tables |= table->table->map; } - add_key_fields(end, and_level, nested_join_table->on_expr, tables, sargables); + add_key_fields(join, end, and_level, nested_join_table->on_expr, tables, + sargables); } @@ -3290,7 +3350,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, return TRUE; if (cond) { - add_key_fields(&end,&and_level,cond,normal_tables,sargables); + add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables, + sargables); for (; field != end ; field++) { add_key_part(keyuse,field); @@ -3312,8 +3373,9 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, into account as well. */ if (*join_tab[i].on_expr_ref) - add_key_fields(&end,&and_level,*join_tab[i].on_expr_ref, - join_tab[i].table->map,sargables); + add_key_fields(join_tab->join, &end, &and_level, + *join_tab[i].on_expr_ref, + join_tab[i].table->map, sargables); } /* Process ON conditions for the nested joins */ @@ -3323,7 +3385,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, while ((table= li++)) { if (table->nested_join) - add_key_fields_for_nj(table, &end, &and_level, sargables); + add_key_fields_for_nj(join_tab->join, table, &end, &and_level, + sargables); } } @@ -10784,6 +10847,13 @@ join_init_quick_read_record(JOIN_TAB *tab) } +int rr_sequential(READ_RECORD *info); +int init_read_record_seq(JOIN_TAB *tab) +{ + tab->read_record.read_record= rr_sequential; + return tab->read_record.file->ha_rnd_init(1); +} + static int test_if_quick_select(JOIN_TAB *tab) { @@ -10912,7 +10982,7 @@ join_ft_read_next(READ_RECORD *info) Reading of key with key reference and one part that may be NULL */ -static int +int join_read_always_key_or_null(JOIN_TAB *tab) { int res; @@ -10928,7 +10998,7 @@ join_read_always_key_or_null(JOIN_TAB *tab) } -static int +int join_read_next_same_or_null(READ_RECORD *info) { int error; diff --git a/sql/sql_select.h b/sql/sql_select.h index 30b8f834ddf..629b44538d8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -36,6 +36,8 @@ typedef struct keyuse_t { satisfied if val has NULL 'value'. */ bool null_rejecting; + /* TRUE<=> This ref access is an outer subquery reference access */ + bool outer_ref; } KEYUSE; class store_key; @@ -455,10 +457,11 @@ class store_key :public Sql_alloc Field *to_field; // Store data here char *null_ptr; char err; - public: +public: + bool null_key; /* TRUE <=> the value of the key has a null part */ enum store_key_result { STORE_KEY_OK, STORE_KEY_FATAL, STORE_KEY_CONV }; store_key(THD *thd, Field *field_arg, char *ptr, char *null, uint length) - :null_ptr(null),err(0) + :null_ptr(null), err(0), null_key(0) { if (field_arg->type() == FIELD_TYPE_BLOB) { @@ -496,6 +499,7 @@ class store_key_field: public store_key enum store_key_result copy() { copy_field.do_copy(©_field); + null_key= to_field->is_null(); return err != 0 ? STORE_KEY_FATAL : STORE_KEY_OK; } const char *name() const { return field_name; } @@ -516,8 +520,8 @@ public: enum store_key_result copy() { int res= item->save_in_field(to_field, 1); + null_key= to_field->is_null() || item->null_value; return (err != 0 || res > 2 ? STORE_KEY_FATAL : (store_key_result) res); - } const char *name() const { return "func"; } }; @@ -547,6 +551,7 @@ public: err= res; } } + null_key= to_field->is_null() || item->null_value; return (err > 2 ? STORE_KEY_FATAL : (store_key_result) err); } const char *name() const { return "const"; } |