summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMartin Hansson <martin.hansson@sun.com>2010-09-07 11:21:09 +0200
committerMartin Hansson <martin.hansson@sun.com>2010-09-07 11:21:09 +0200
commit4f4d03a416f2a440d946bcdbc59cfaa99dd97aa4 (patch)
treee90157e598b9a55c9a703d196421feab55ab5c04 /sql
parentd6f6db6f4cb02fb5a4c6225747af0673bd91ea94 (diff)
downloadmariadb-git-4f4d03a416f2a440d946bcdbc59cfaa99dd97aa4.tar.gz
Bug#51070: Query with a NOT IN subquery predicate returns a wrong result set
The EXISTS transformation has additional switches to catch the known corner cases that appear when transforming an IN predicate into EXISTS. Guarded conditions are used which are deactivated when a NULL value is seen in the outer expression's row. When the inner query block supplies NULL values, however, they are filtered out because no distinction is made between the guarded conditions; guarded NOT x IS NULL conditions in the HAVING clause that filter out NULL values cannot be de-activated in isolation from those that match values or from the outer expression or NULL's. The above problem is handled by making the guarded conditions remember whether they have rejected a NULL value or not, and index access methods are taking this into account as well. The bug consisted of 1) Not resetting the property for every nested loop iteration on the inner query's result. 2) Not propagating the NULL result properly from inner query to IN optimizer. 3) A hack that may or may not have been needed at some point. According to a comment it was aimed to fix #2 by returning NULL when FALSE was actually the result. This caused failures when #2 was properly fixed. The hack is now removed. The fix resolves all three points.
Diffstat (limited to 'sql')
-rw-r--r--sql/item_cmpfunc.cc79
-rw-r--r--sql/item_subselect.cc48
2 files changed, 95 insertions, 32 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index fe4616f64d7..605d12742f8 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -1751,6 +1751,76 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
}
+/**
+ The implementation of optimized \<outer expression\> [NOT] IN \<subquery\>
+ predicates. The implementation works as follows.
+
+ For the current value of the outer expression
+
+ - If it contains only NULL values, the original (before rewrite by the
+ Item_in_subselect rewrite methods) inner subquery is non-correlated and
+ was previously executed, there is no need to re-execute it, and the
+ previous return value is returned.
+
+ - If it contains NULL values, check if there is a partial match for the
+ inner query block by evaluating it. For clarity we repeat here the
+ transformation previously performed on the sub-query. The expression
+
+ <tt>
+ ( oc_1, ..., oc_n )
+ \<in predicate\>
+ ( SELECT ic_1, ..., ic_n
+ FROM \<table\>
+ WHERE \<inner where\>
+ )
+ </tt>
+
+ was transformed into
+
+ <tt>
+ ( oc_1, ..., oc_n )
+ \<in predicate\>
+ ( SELECT ic_1, ..., ic_n
+ FROM \<table\>
+ WHERE \<inner where\> AND ... ( ic_k = oc_k OR ic_k IS NULL )
+ HAVING ... NOT ic_k IS NULL
+ )
+ </tt>
+
+ The evaluation will now proceed according to special rules set up
+ elsewhere. These rules include:
+
+ - The HAVING NOT \<inner column\> IS NULL conditions added by the
+ aforementioned rewrite methods will detect whether they evaluated (and
+ rejected) a NULL value and if so, will cause the subquery to evaluate
+ to NULL.
+
+ - The added WHERE and HAVING conditions are present only for those inner
+ columns that correspond to outer column that are not NULL at the moment.
+
+ - If there is an eligible index for executing the subquery, the special
+ access method "Full scan on NULL key" is employed which ensures that
+ the inner query will detect if there are NULL values resulting from the
+ inner query. This access method will quietly resort to table scan if it
+ needs to find NULL values as well.
+
+ - Under these conditions, the sub-query need only be evaluated in order to
+ find out whether it produced any rows.
+
+ - If it did, we know that there was a partial match since there are
+ NULL values in the outer row expression.
+
+ - If it did not, the result is FALSE or UNKNOWN. If at least one of the
+ HAVING sub-predicates rejected a NULL value corresponding to an outer
+ non-NULL, and hence the inner query block returns UNKNOWN upon
+ evaluation, there was a partial match and the result is UNKNOWN.
+
+ - If it contains no NULL values, the call is forwarded to the inner query
+ block.
+
+ @see Item_in_subselect::val_bool()
+ @see Item_is_not_null_test::val_int()
+ */
longlong Item_in_optimizer::val_int()
{
bool tmp;
@@ -1804,7 +1874,7 @@ longlong Item_in_optimizer::val_int()
all_left_cols_null= false;
}
- if (!((Item_in_subselect*)args[1])->is_correlated &&
+ if (!item_subs->is_correlated &&
all_left_cols_null && result_for_null_param != UNKNOWN)
{
/*
@@ -1818,8 +1888,11 @@ longlong Item_in_optimizer::val_int()
else
{
/* The subquery has to be evaluated */
- (void) args[1]->val_bool_result();
- null_value= !item_subs->engine->no_rows();
+ (void) item_subs->val_bool_result();
+ if (item_subs->engine->no_rows())
+ null_value= item_subs->null_value;
+ else
+ null_value= TRUE;
if (all_left_cols_null)
result_for_null_param= null_value;
}
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index b93ea6f241b..d0c933df845 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -47,7 +47,7 @@ Item_subselect::Item_subselect():
item value is NULL if select_subselect not changed this value
(i.e. some rows will be found returned)
*/
- null_value= 1;
+ null_value= TRUE;
}
@@ -427,9 +427,9 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type)
void Item_singlerow_subselect::reset()
{
- null_value= 1;
+ null_value= TRUE;
if (value)
- value->null_value= 1;
+ value->null_value= TRUE;
}
@@ -574,7 +574,7 @@ double Item_singlerow_subselect::val_real()
DBUG_ASSERT(fixed == 1);
if (!exec() && !value->null_value)
{
- null_value= 0;
+ null_value= FALSE;
return value->val_real();
}
else
@@ -589,7 +589,7 @@ longlong Item_singlerow_subselect::val_int()
DBUG_ASSERT(fixed == 1);
if (!exec() && !value->null_value)
{
- null_value= 0;
+ null_value= FALSE;
return value->val_int();
}
else
@@ -603,7 +603,7 @@ String *Item_singlerow_subselect::val_str(String *str)
{
if (!exec() && !value->null_value)
{
- null_value= 0;
+ null_value= FALSE;
return value->val_str(str);
}
else
@@ -618,7 +618,7 @@ my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value)
{
if (!exec() && !value->null_value)
{
- null_value= 0;
+ null_value= FALSE;
return value->val_decimal(decimal_value);
}
else
@@ -633,7 +633,7 @@ bool Item_singlerow_subselect::val_bool()
{
if (!exec() && !value->null_value)
{
- null_value= 0;
+ null_value= FALSE;
return value->val_bool();
}
else
@@ -651,7 +651,7 @@ Item_exists_subselect::Item_exists_subselect(st_select_lex *select_lex):
bool val_bool();
init(select_lex, new select_exists_subselect(this));
max_columns= UINT_MAX;
- null_value= 0; //can't be NULL
+ null_value= FALSE; //can't be NULL
maybe_null= 0; //can't be NULL
value= 0;
DBUG_VOID_RETURN;
@@ -814,15 +814,14 @@ double Item_in_subselect::val_real()
*/
DBUG_ASSERT(0);
DBUG_ASSERT(fixed == 1);
- null_value= 0;
+ null_value= was_null= FALSE;
if (exec())
{
reset();
- null_value= 1;
return 0;
}
if (was_null && !value)
- null_value= 1;
+ null_value= TRUE;
return (double) value;
}
@@ -835,15 +834,14 @@ longlong Item_in_subselect::val_int()
*/
DBUG_ASSERT(0);
DBUG_ASSERT(fixed == 1);
- null_value= 0;
+ null_value= was_null= FALSE;
if (exec())
{
reset();
- null_value= 1;
return 0;
}
if (was_null && !value)
- null_value= 1;
+ null_value= TRUE;
return value;
}
@@ -856,16 +854,15 @@ String *Item_in_subselect::val_str(String *str)
*/
DBUG_ASSERT(0);
DBUG_ASSERT(fixed == 1);
- null_value= 0;
+ null_value= was_null= FALSE;
if (exec())
{
reset();
- null_value= 1;
return 0;
}
if (was_null && !value)
{
- null_value= 1;
+ null_value= TRUE;
return 0;
}
str->set((ulonglong)value, &my_charset_bin);
@@ -876,20 +873,14 @@ String *Item_in_subselect::val_str(String *str)
bool Item_in_subselect::val_bool()
{
DBUG_ASSERT(fixed == 1);
- null_value= 0;
+ null_value= was_null= FALSE;
if (exec())
{
reset();
- /*
- Must mark the IN predicate as NULL so as to make sure an enclosing NOT
- predicate will return FALSE. See the comments in
- subselect_uniquesubquery_engine::copy_ref_key for further details.
- */
- null_value= 1;
return 0;
}
if (was_null && !value)
- null_value= 1;
+ null_value= TRUE;
return value;
}
@@ -900,16 +891,15 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value)
method should not be used
*/
DBUG_ASSERT(0);
- null_value= 0;
+ null_value= was_null= FALSE;
DBUG_ASSERT(fixed == 1);
if (exec())
{
reset();
- null_value= 1;
return 0;
}
if (was_null && !value)
- null_value= 1;
+ null_value= TRUE;
int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value);
return decimal_value;
}