Fix bug lp:1009187, mdev-373, mysql bug#58628

Analysis: The queries in question use the [unique | index]_subquery execution methods. These methods reuse the ref keys constructed by create_ref_for_key(). The way create_ref_for_key() works is that it doesn't store in ref.key_copy[] store_key elements that represent constants. In particular it doesn't store the store_key for NULL constants. The execution of [unique | index]_subquery calls subselect_uniquesubquery_engine::copy_ref_key, which in addition to copy the left IN argument into a index lookup key, is supposed to detect if the left IN argument contains NULLs. Since the store_key for the NULL constant is not copied into the key array, the null is not detected, and execution erroneously proceeds as if it should look for a complete match. Solution: The solution (unlike MySQL) is to reuse already computed information about NULL presence. Item_in_optimizer::val_int already finds out if the left IN operand contains NULLs. The fix propagates this to the execution methods subselect_[unique | index]subquery_engine::exec so it knows if there were NULL values independent of the presence of keys. In addition the patch siplifies copy_ref_key() and the logic that hanldes the case of NULLs in the left IN operand.
author: unknown <timour@askmonty.org> 2012-09-14 11:26:01 +0300
committer: unknown <timour@askmonty.org> 2012-09-14 11:26:01 +0300
commit: b917fb63a638fd117e1e52d3bc29b57a81124fea (patch)
tree: 86d017a6bd2b296ec536fe4447efb344488715ac
parent: caea98a4173ed512522ea06f154ac37bf6b1552b (diff)
download: mariadb-git-b917fb63a638fd117e1e52d3bc29b57a81124fea.tar.gz
4 files changed, 180 insertions, 86 deletions
diff --git a/mysql-test/r/subselect4.result b/mysql-test/r/subselect4.result
index 8385ee6f4c0..952717c5c37 100644
--- a/mysql-test/r/subselect4.result
+++ b/mysql-test/r/subselect4.result
@@ -197,5 +197,86 @@ default(a)
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 DROP TABLE t;
 #
+# LP BUG#1009187, MDEV-373, MYSQL bug#58628
+# Wrong result for a query with [NOT] IN subquery predicate if
+# the left part of the predicate is explicit NULL
+#
+CREATE TABLE t1 (pk INT NOT NULL, i INT NOT NULL);
+INSERT INTO t1 VALUES (0,10), (1,20), (2,30), (3,40);
+CREATE TABLE t2a (pk INT NOT NULL, i INT NOT NULL, PRIMARY KEY(i,pk));
+INSERT INTO t2a VALUES (0,0), (1,1), (2,2), (3,3);
+CREATE TABLE t2b (pk INT, i INT);
+INSERT INTO t2b VALUES (0,0), (1,1), (2,2), (3,3);
+CREATE TABLE t2c (pk INT NOT NULL, i INT NOT NULL);
+INSERT INTO t2c VALUES (0,0), (1,1), (2,2), (3,3);
+create index it2c on t2c (i,pk);
+EXPLAIN
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	4	Using where
+2	DEPENDENT SUBQUERY	t2a	unique_subquery	PRIMARY	PRIMARY	8	const,test.t1.pk	1	Using index; Using where; Full scan on NULL key
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
+pk	i
+SELECT * FROM t1 WHERE 1+NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
+pk	i
+SELECT * FROM t1 WHERE NULL     IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) IS UNKNOWN;
+pk	i
+0	10
+1	20
+2	30
+3	40
+SELECT t1.pk, NULL NOT IN (SELECT t2a.i FROM t2a  WHERE t2a.pk = t1.pk) FROM t1;
+pk	NULL NOT IN (SELECT t2a.i FROM t2a  WHERE t2a.pk = t1.pk)
+0	NULL
+1	NULL
+2	NULL
+3	NULL
+EXPLAIN
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	4	Using where
+2	DEPENDENT SUBQUERY	t2b	ALL	NULL	NULL	NULL	NULL	4	Using where
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
+pk	i
+SELECT * FROM t1 WHERE NULL     IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) IS UNKNOWN;
+pk	i
+0	10
+1	20
+2	30
+3	40
+SELECT t1.pk, NULL NOT IN (SELECT t2b.i FROM t2b  WHERE t2b.pk = t1.pk) FROM t1;
+pk	NULL NOT IN (SELECT t2b.i FROM t2b  WHERE t2b.pk = t1.pk)
+0	NULL
+1	NULL
+2	NULL
+3	NULL
+EXPLAIN
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	4	Using where
+2	DEPENDENT SUBQUERY	t2c	index_subquery	it2c	it2c	8	const,test.t1.pk	2	Using index; Using where; Full scan on NULL key
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
+pk	i
+SELECT * FROM t1 WHERE NULL     IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) IS UNKNOWN;
+pk	i
+0	10
+1	20
+2	30
+3	40
+SELECT t1.pk, NULL NOT IN (SELECT t2c.i FROM t2c  WHERE t2c.pk = t1.pk) FROM t1;
+pk	NULL NOT IN (SELECT t2c.i FROM t2c  WHERE t2c.pk = t1.pk)
+0	NULL
+1	NULL
+2	NULL
+3	NULL
+EXPLAIN
+SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	4	Using where
+2	DEPENDENT SUBQUERY	t2a	eq_ref	PRIMARY	PRIMARY	8	const,test.t1.pk	2	Using where; Using index; Full scan on NULL key
+SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
+pk	i
+drop table t1, t2a, t2b, t2c;
+#
 # End of 5.1 tests.
 #
diff --git a/mysql-test/t/subselect4.test b/mysql-test/t/subselect4.test
index bb97f246488..3de2dfc394e 100644
--- a/mysql-test/t/subselect4.test
+++ b/mysql-test/t/subselect4.test
@@ -171,5 +171,49 @@ SELECT * FROM (SELECT default(a) FROM t GROUP BY a) d;
 DROP TABLE t;
 
 --echo #
+--echo # LP BUG#1009187, MDEV-373, MYSQL bug#58628
+--echo # Wrong result for a query with [NOT] IN subquery predicate if
+--echo # the left part of the predicate is explicit NULL
+--echo #
+
+CREATE TABLE t1 (pk INT NOT NULL, i INT NOT NULL);
+INSERT INTO t1 VALUES (0,10), (1,20), (2,30), (3,40);
+
+CREATE TABLE t2a (pk INT NOT NULL, i INT NOT NULL, PRIMARY KEY(i,pk));
+INSERT INTO t2a VALUES (0,0), (1,1), (2,2), (3,3);
+
+CREATE TABLE t2b (pk INT, i INT);
+INSERT INTO t2b VALUES (0,0), (1,1), (2,2), (3,3);
+
+CREATE TABLE t2c (pk INT NOT NULL, i INT NOT NULL);
+INSERT INTO t2c VALUES (0,0), (1,1), (2,2), (3,3);
+create index it2c on t2c (i,pk);
+
+EXPLAIN
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
+SELECT * FROM t1 WHERE 1+NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
+SELECT * FROM t1 WHERE NULL     IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) IS UNKNOWN;
+SELECT t1.pk, NULL NOT IN (SELECT t2a.i FROM t2a  WHERE t2a.pk = t1.pk) FROM t1;
+
+EXPLAIN
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
+SELECT * FROM t1 WHERE NULL     IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) IS UNKNOWN;
+SELECT t1.pk, NULL NOT IN (SELECT t2b.i FROM t2b  WHERE t2b.pk = t1.pk) FROM t1;
+
+EXPLAIN
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
+SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
+SELECT * FROM t1 WHERE NULL     IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) IS UNKNOWN;
+SELECT t1.pk, NULL NOT IN (SELECT t2c.i FROM t2c  WHERE t2c.pk = t1.pk) FROM t1;
+
+EXPLAIN
+SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
+SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
+
+drop table t1, t2a, t2b, t2c;
+
+--echo #
 --echo # End of 5.1 tests.
 --echo #
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index fc214a584ee..888c346c199 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1686,6 +1686,12 @@ bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
 }
 
 
+inline bool Item_in_subselect::left_expr_has_null()
+{
+  return (*(optimizer->get_cache()))->null_value;
+}
+
+
 Item_subselect::trans_res
 Item_allany_subselect::select_transformer(JOIN *join)
 {
@@ -2131,39 +2137,19 @@ int subselect_uniquesubquery_engine::scan_table()
     subselect_uniquesubquery_engine::copy_ref_key()
 
   DESCRIPTION
-    Copy ref key and check for null parts in it.
-    Depending on the nullability and conversion problems this function
-    recognizes and processes the following states :
-      1. Partial match on top level. This means IN has a value of FALSE
-         regardless of the data in the subquery table.
-         Detected by finding a NULL in the left IN operand of a top level
-         expression.
-         We may actually skip reading the subquery, so return TRUE to skip
-         the table scan in subselect_uniquesubquery_engine::exec and make
-         the value of the IN predicate a NULL (that is equal to FALSE on
-         top level).
-      2. No exact match when IN is nested inside another predicate.
-         Detected by finding a NULL in the left IN operand when IN is not
-         a top level predicate.
-         We cannot have an exact match. But we must proceed further with a
-         table scan to find out if it's a partial match (and IN has a value
-         of NULL) or no match (and IN has a value of FALSE).
-         So we return FALSE to continue with the scan and see if there are
-         any record that would constitute a partial match (as we cannot
-         determine that from the index).
-      3. Error converting the left IN operand to the column type of the
-         right IN operand. This counts as no match (and IN has the value of
-         FALSE). We mark the subquery table cursor as having no more rows
-         (to ensure that the processing that follows will not find a match)
-         and return FALSE, so IN is not treated as returning NULL.
+    Copy ref key and check for conversion problems.
 
+    If there is an error converting the left IN operand to the column type of
+    the right IN operand count it as no match. In this case IN has the value of
+    FALSE. We mark the subquery table cursor as having no more rows (to ensure
+    that the processing that follows will not find a match) and return FALSE,
+    so IN is not treated as returning NULL.
 
   RETURN
     FALSE - The value of the IN predicate is not known. Proceed to find the
-            value of the IN predicate using the determined values of
-            null_keypart and table->status.
-    TRUE  - IN predicate has a value of NULL. Stop the processing right there
-            and return NULL to the outer predicates.
+            value of the IN predicate using the copied key.
+    TRUE  - IN predicate has a value of FALSE. Stop the processing right there
+            and tell the outer predicate it is FALSE.
 */
 
 bool subselect_uniquesubquery_engine::copy_ref_key()
@@ -2177,30 +2163,6 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
     tab->ref.key_err= (*copy)->copy();
 
     /*
-      When there is a NULL part in the key we don't need to make index
-      lookup for such key thus we don't need to copy whole key.
-      If we later should do a sequential scan return OK. Fail otherwise.
-
-      See also the comment for the subselect_uniquesubquery_engine::exec()
-      function.
-    */
-    null_keypart= (*copy)->null_key;
-    if (null_keypart)
-    {
-      bool top_level= ((Item_in_subselect *) item)->is_top_level_item();
-      if (top_level)
-      {
-        /* Partial match on top level */
-        DBUG_RETURN(1);
-      }
-      else
-      {
-        /* No exact match when IN is nested inside another predicate */
-        break;
-      }
-    }
-
-    /*
       Check if the error is equal to STORE_KEY_FATAL. This is not expressed 
       using the store_key::store_key_result enum because ref.key_err is a 
       boolean and we want to detect both TRUE and STORE_KEY_FATAL from the 
@@ -2215,10 +2177,10 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
        IN operand. 
       */
       tab->table->status= STATUS_NOT_FOUND;
-      break;
+      DBUG_RETURN(true);
     }
   }
-  DBUG_RETURN(0);
+  DBUG_RETURN(false);
 }
 
 
@@ -2248,8 +2210,9 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
   NOTE
     
   RETURN
-    FALSE - ok
-    TRUE  - an error occured while scanning
+    0 - ok
+    1 - notify caller to call Item_subselect::reset(),
+        in most cases reset() sets the result to NULL
 */
 
 int subselect_uniquesubquery_engine::exec()
@@ -2259,23 +2222,27 @@ int subselect_uniquesubquery_engine::exec()
   TABLE *table= tab->table;
   empty_result_set= TRUE;
   table->status= 0;
+  Item_in_subselect *in_subs= (Item_in_subselect *) item;
  
-  /* TODO: change to use of 'full_scan' here? */
-  if (copy_ref_key())
-    DBUG_RETURN(1);
-  if (table->status)
+  if (in_subs->left_expr_has_null())
   {
-    /* 
-      We know that there will be no rows even if we scan. 
-      Can be set in copy_ref_key.
+    /*
+      The case when all values in left_expr are NULL is handled by
+      Item_in_optimizer::val_int().
     */
-    ((Item_in_subselect *) item)->value= 0;
+    if (in_subs->is_top_level_item())
+      DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */
+    else
+      DBUG_RETURN(scan_table());
+  }
+
+  if (copy_ref_key())
+  {
+    /* We know that there will be no rows even if we scan. */
+    in_subs->value= 0;
     DBUG_RETURN(0);
   }
 
-  if (null_keypart)
-    DBUG_RETURN(scan_table());
- 
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 0);
   error= table->file->ha_index_read_map(table->record[0],
@@ -2357,9 +2324,9 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
     cheaper. We can use index statistics to quickly check whether "ref" scan
     will be cheaper than full table scan.
 
-  RETURN
-    0
-    1
+    0 - ok
+    1 - notify caller to call Item_subselect::reset(),
+        in most cases reset() sets the result to NULL
 */
 
 int subselect_indexsubquery_engine::exec()
@@ -2368,10 +2335,10 @@ int subselect_indexsubquery_engine::exec()
   int error;
   bool null_finding= 0;
   TABLE *table= tab->table;
+  Item_in_subselect *in_subs= (Item_in_subselect *) item;
 
-  ((Item_in_subselect *) item)->value= 0;
+  in_subs->value= 0;
   empty_result_set= TRUE;
-  null_keypart= 0;
   table->status= 0;
 
   if (check_null)
@@ -2381,22 +2348,24 @@ int subselect_indexsubquery_engine::exec()
     ((Item_in_subselect *) item)->was_null= 0;
   }
 
-  /* Copy the ref key and check for nulls... */
-  if (copy_ref_key())
-    DBUG_RETURN(1);
-
-  if (table->status)
+  if (in_subs->left_expr_has_null())
   {
-    /* 
-      We know that there will be no rows even if we scan. 
-      Can be set in copy_ref_key.
+    /*
+      The case when all values in left_expr are NULL is handled by
+      Item_in_optimizer::val_int().
     */
-    ((Item_in_subselect *) item)->value= 0;
-    DBUG_RETURN(0);
+    if (in_subs->is_top_level_item())
+      DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */
+    else
+      DBUG_RETURN(scan_table());
   }
 
-  if (null_keypart)
-    DBUG_RETURN(scan_table());
+  if (copy_ref_key())
+  {
+    /* We know that there will be no rows even if we scan. */
+    in_subs->value= 0;
+    DBUG_RETURN(0);
+  }
 
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key, 1);
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 7eb45e74f40..b6b9750b910 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -334,6 +334,7 @@ public:
   bool test_limit(st_select_lex_unit *unit);
   virtual void print(String *str, enum_query_type query_type);
   bool fix_fields(THD *thd, Item **ref);
+  inline bool left_expr_has_null();
 
   friend class Item_ref_null_helper;
   friend class Item_is_not_null_test;
@@ -513,7 +514,6 @@ protected:
     expression is NULL.
   */
   bool empty_result_set;
-  bool null_keypart; /* TRUE <=> constructed search tuple has a NULL */
 public:
 
   // constructor can assign THD because it will be called after JOIN::prepare
author	unknown <timour@askmonty.org>	2012-09-14 11:26:01 +0300
committer	unknown <timour@askmonty.org>	2012-09-14 11:26:01 +0300
commit	b917fb63a638fd117e1e52d3bc29b57a81124fea (patch)
tree	86d017a6bd2b296ec536fe4447efb344488715ac
parent	caea98a4173ed512522ea06f154ac37bf6b1552b (diff)
download	mariadb-git-b917fb63a638fd117e1e52d3bc29b57a81124fea.tar.gz