diff options
author | unknown <sergefp@mysql.com> | 2006-08-15 21:08:22 +0400 |
---|---|---|
committer | unknown <sergefp@mysql.com> | 2006-08-15 21:08:22 +0400 |
commit | 9cf4750ec3cfa97ee839bb84cc25629c5512bab2 (patch) | |
tree | c79e63e45405045be41cc62433525fd9c65b17b0 /sql/opt_range.cc | |
parent | 2f5ae7c536ae37a408f1e391ef21c743bd9b1b0e (diff) | |
download | mariadb-git-9cf4750ec3cfa97ee839bb84cc25629c5512bab2.tar.gz |
BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)
In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
where N>1000, was incorrectly converted to
(-inf < X < c_min) OR (c_max < X)
Now this conversion is removed, we dont produce any range lists for such
conditions.
mysql-test/r/range.result:
BUG#21282: Testcase
mysql-test/t/range.test:
BUG#21282: Testcase
sql/opt_range.cc:
BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)
In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
where N>1000, was incorrectly converted to
(-inf < X < c_min) OR (c_max < X)
Now this conversion is removed, we dont produce any range lists for such
conditions.
Diffstat (limited to 'sql/opt_range.cc')
-rw-r--r-- | sql/opt_range.cc | 107 |
1 files changed, 46 insertions, 61 deletions
diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 3b77d1b419e..026a2c5a622 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, if (func->array && func->cmp_type != ROW_RESULT) { /* - We get here for conditions in form "t.key NOT IN (c1, c2, ...)" - (where c{i} are constants). - Our goal is to produce a SEL_ARG graph that represents intervals: + We get here for conditions in form "t.key NOT IN (c1, c2, ...)", + where c{i} are constants. Our goal is to produce a SEL_TREE that + represents intervals: ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*) where $MIN is either "-inf" or NULL. - The most straightforward way to handle NOT IN would be to convert - it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range - optimizer to build SEL_ARG graph from that. However that will cause - the range optimizer to use O(N^2) memory (it's a bug, not filed), - and people do use big NOT IN lists (see BUG#15872). Also, for big - NOT IN lists constructing/using graph (*) does not make the query - faster. - - So, we will handle NOT IN manually in the following way: - * if the number of entries in the NOT IN list is less then - NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*) - manually. - * Otherwise, we will construct a smaller graph: for - "t.key NOT IN (c1,...cN)" we construct a graph representing - ($MIN < t.key) OR (cN < t.key) // here sequence of c_i is - // ordered. - - A note about partially-covering indexes: for those (e.g. for - "a CHAR(10), KEY(a(5))") the handling is correct (albeit not very - efficient): - Instead of "t.key < c1" we get "t.key <= prefix-val(c1)". - Combining the intervals in (*) together, we get: - (-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ... - i.e. actually we get intervals combined into one interval: - (-inf<=t.key<=+inf). This doesn't make much sense but it doesn't - cause any problems. + The most straightforward way to produce it is to convert NOT IN + into "(t.key != c1) AND (t.key != c2) AND ... " and let the range + analyzer to build SEL_TREE from that. The problem is that the + range analyzer will use O(N^2) memory (which is probably a bug), + and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282), + will run out of memory. + + Another problem with big lists like (*) is that a big list is + unlikely to produce a good "range" access, while considering that + range access will require expensive CPU calculations (and for + MyISAM even index accesses). In short, big NOT IN lists are rarely + worth analyzing. + + Considering the above, we'll handle NOT IN as follows: + * if the number of entries in the NOT IN list is less than + NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually. + * Otherwise, don't produce a SEL_TREE. */ +#define NOT_IN_IGNORE_THRESHOLD 1000 MEM_ROOT *tmp_root= param->mem_root; param->thd->mem_root= param->old_root; /* @@ -3656,9 +3648,9 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, Item *value_item= func->array->create_item(); param->thd->mem_root= tmp_root; - if (!value_item) + if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item) break; - + /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ uint i=0; do @@ -3677,45 +3669,39 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, tree= NULL; break; } -#define NOT_IN_IGNORE_THRESHOLD 1000 SEL_TREE *tree2; - if (func->array->count < NOT_IN_IGNORE_THRESHOLD) + for (; i < func->array->count; i++) { - for (; i < func->array->count; i++) + if (func->array->compare_elems(i, i-1)) { - if (func->array->compare_elems(i, i-1)) + /* Get a SEL_TREE for "-inf < X < c_i" interval */ + func->array->value_to_item(i, value_item); + tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, + value_item, cmp_type); + if (!tree2) { - /* Get a SEL_TREE for "-inf < X < c_i" interval */ - func->array->value_to_item(i, value_item); - tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, - value_item, cmp_type); - if (!tree2) - { - tree= NULL; - break; - } + tree= NULL; + break; + } - /* Change all intervals to be "c_{i-1} < X < c_i" */ - for (uint idx= 0; idx < param->keys; idx++) + /* Change all intervals to be "c_{i-1} < X < c_i" */ + for (uint idx= 0; idx < param->keys; idx++) + { + SEL_ARG *new_interval, *last_val; + if (((new_interval= tree2->keys[idx])) && + ((last_val= tree->keys[idx]->last()))) { - SEL_ARG *new_interval, *last_val; - if (((new_interval= tree2->keys[idx])) && - ((last_val= tree->keys[idx]->last()))) - { - new_interval->min_value= last_val->max_value; - new_interval->min_flag= NEAR_MIN; - } + new_interval->min_value= last_val->max_value; + new_interval->min_flag= NEAR_MIN; } - /* - The following doesn't try to allocate memory so no need to - check for NULL. - */ - tree= tree_or(param, tree, tree2); } + /* + The following doesn't try to allocate memory so no need to + check for NULL. + */ + tree= tree_or(param, tree, tree2); } } - else - func->array->value_to_item(func->array->count - 1, value_item); if (tree && tree->type != SEL_TREE::IMPOSSIBLE) { @@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, } DBUG_RETURN(tree); - } /* make a select tree of all keys in condition */ |