diff options
Diffstat (limited to 'sql/opt_range.cc')
-rw-r--r-- | sql/opt_range.cc | 364 |
1 files changed, 231 insertions, 133 deletions
diff --git a/sql/opt_range.cc b/sql/opt_range.cc index a5a46ba11b6..3f133924469 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -549,6 +549,7 @@ public: uint fields_bitmap_size; MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ + MY_BITMAP tmp_covered_fields; key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ @@ -1916,6 +1917,7 @@ static int fill_used_fields_bitmap(PARAM *param) TABLE *table= param->table; my_bitmap_map *tmp; uint pk; + param->tmp_covered_fields.bitmap= 0; param->fields_bitmap_size= table->s->column_bitmap_size; if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root, param->fields_bitmap_size)) || @@ -4494,11 +4496,15 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, /*I=set of all covering indexes */ ror_scan_mark= tree->ror_scans; - my_bitmap_map int_buf[MAX_KEY/(sizeof(my_bitmap_map)*8)+1]; - MY_BITMAP covered_fields; - if (bitmap_init(&covered_fields, int_buf, param->table->s->fields, FALSE)) + MY_BITMAP *covered_fields= ¶m->tmp_covered_fields; + if (!covered_fields->bitmap) + covered_fields->bitmap= (my_bitmap_map*)alloc_root(param->mem_root, + param->fields_bitmap_size); + if (!covered_fields->bitmap || + bitmap_init(covered_fields, covered_fields->bitmap, + param->table->s->fields, FALSE)) DBUG_RETURN(0); - bitmap_clear_all(&covered_fields); + bitmap_clear_all(covered_fields); double total_cost= 0.0f; ha_rows records=0; @@ -4518,7 +4524,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, */ for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan) { - bitmap_subtract(&(*scan)->covered_fields, &covered_fields); + bitmap_subtract(&(*scan)->covered_fields, covered_fields); (*scan)->used_fields_covered= bitmap_bits_set(&(*scan)->covered_fields); (*scan)->first_uncovered_field= @@ -4540,8 +4546,8 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, if (total_cost > read_time) DBUG_RETURN(NULL); /* F=F-covered by first(I) */ - bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields); - all_covered= bitmap_is_subset(¶m->needed_fields, &covered_fields); + bitmap_union(covered_fields, &(*ror_scan_mark)->covered_fields); + all_covered= bitmap_is_subset(¶m->needed_fields, covered_fields); } while ((++ror_scan_mark < ror_scans_end) && !all_covered); if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1) @@ -4876,25 +4882,37 @@ static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, break; case Item_func::BETWEEN: - if (inv) - { - tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1], - cond_func->arguments()[2], cmp_type); - } - else + { + if (!value) { - tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC, - cond_func->arguments()[1],cmp_type); - if (tree) + if (inv) { - tree= tree_and(param, tree, get_mm_parts(param, cond_func, field, - Item_func::LE_FUNC, - cond_func->arguments()[2], - cmp_type)); + tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1], + cond_func->arguments()[2], cmp_type); + } + else + { + tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC, + cond_func->arguments()[1],cmp_type); + if (tree) + { + tree= tree_and(param, tree, get_mm_parts(param, cond_func, field, + Item_func::LE_FUNC, + cond_func->arguments()[2], + cmp_type)); + } } } + else + tree= get_mm_parts(param, cond_func, field, + (inv ? + (value == (Item*)1 ? Item_func::GT_FUNC : + Item_func::LT_FUNC): + (value == (Item*)1 ? Item_func::LE_FUNC : + Item_func::GE_FUNC)), + cond_func->arguments()[0], cmp_type); break; - + } case Item_func::IN_FUNC: { Item_func_in *func=(Item_func_in*) cond_func; @@ -4904,41 +4922,33 @@ static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, if (func->array && func->cmp_type != ROW_RESULT) { /* - We get here for conditions in form "t.key NOT IN (c1, c2, ...)" - (where c{i} are constants). - Our goal is to produce a SEL_ARG graph that represents intervals: + We get here for conditions in form "t.key NOT IN (c1, c2, ...)", + where c{i} are constants. Our goal is to produce a SEL_TREE that + represents intervals: ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*) where $MIN is either "-inf" or NULL. - The most straightforward way to handle NOT IN would be to convert - it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range - optimizer to build SEL_ARG graph from that. However that will cause - the range optimizer to use O(N^2) memory (it's a bug, not filed), - and people do use big NOT IN lists (see BUG#15872). Also, for big - NOT IN lists constructing/using graph (*) does not make the query - faster. - - So, we will handle NOT IN manually in the following way: - * if the number of entries in the NOT IN list is less then - NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*) - manually. - * Otherwise, we will construct a smaller graph: for - "t.key NOT IN (c1,...cN)" we construct a graph representing - ($MIN < t.key) OR (cN < t.key) // here sequence of c_i is - // ordered. - - A note about partially-covering indexes: for those (e.g. for - "a CHAR(10), KEY(a(5))") the handling is correct (albeit not very - efficient): - Instead of "t.key < c1" we get "t.key <= prefix-val(c1)". - Combining the intervals in (*) together, we get: - (-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ... - i.e. actually we get intervals combined into one interval: - (-inf<=t.key<=+inf). This doesn't make much sense but it doesn't - cause any problems. + The most straightforward way to produce it is to convert NOT IN + into "(t.key != c1) AND (t.key != c2) AND ... " and let the range + analyzer to build SEL_TREE from that. The problem is that the + range analyzer will use O(N^2) memory (which is probably a bug), + and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282), + will run out of memory. + + Another problem with big lists like (*) is that a big list is + unlikely to produce a good "range" access, while considering that + range access will require expensive CPU calculations (and for + MyISAM even index accesses). In short, big NOT IN lists are rarely + worth analyzing. + + Considering the above, we'll handle NOT IN as follows: + * if the number of entries in the NOT IN list is less than + NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually. + * Otherwise, don't produce a SEL_TREE. */ +#define NOT_IN_IGNORE_THRESHOLD 1000 MEM_ROOT *tmp_root= param->mem_root; param->thd->mem_root= param->old_root; /* @@ -4952,9 +4962,9 @@ static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Item *value_item= func->array->create_item(); param->thd->mem_root= tmp_root; - if (!value_item) + if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item) break; - + /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ uint i=0; do @@ -4973,45 +4983,39 @@ static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, tree= NULL; break; } -#define NOT_IN_IGNORE_THRESHOLD 1000 SEL_TREE *tree2; - if (func->array->count < NOT_IN_IGNORE_THRESHOLD) + for (; i < func->array->count; i++) { - for (; i < func->array->count; i++) + if (func->array->compare_elems(i, i-1)) { - if (func->array->compare_elems(i, i-1)) + /* Get a SEL_TREE for "-inf < X < c_i" interval */ + func->array->value_to_item(i, value_item); + tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, + value_item, cmp_type); + if (!tree2) { - /* Get a SEL_TREE for "-inf < X < c_i" interval */ - func->array->value_to_item(i, value_item); - tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, - value_item, cmp_type); - if (!tree2) - { - tree= NULL; - break; - } + tree= NULL; + break; + } - /* Change all intervals to be "c_{i-1} < X < c_i" */ - for (uint idx= 0; idx < param->keys; idx++) + /* Change all intervals to be "c_{i-1} < X < c_i" */ + for (uint idx= 0; idx < param->keys; idx++) + { + SEL_ARG *new_interval, *last_val; + if (((new_interval= tree2->keys[idx])) && + ((last_val= tree->keys[idx]->last()))) { - SEL_ARG *new_interval, *last_val; - if (((new_interval= tree2->keys[idx])) && - ((last_val= tree->keys[idx]->last()))) - { - new_interval->min_value= last_val->max_value; - new_interval->min_flag= NEAR_MIN; - } + new_interval->min_value= last_val->max_value; + new_interval->min_flag= NEAR_MIN; } - /* - The following doesn't try to allocate memory so no need to - check for NULL. - */ - tree= tree_or(param, tree, tree2); } + /* + The following doesn't try to allocate memory so no need to + check for NULL. + */ + tree= tree_or(param, tree, tree2); } } - else - func->array->value_to_item(func->array->count - 1, value_item); if (tree && tree->type != SEL_TREE::IMPOSSIBLE) { @@ -5076,7 +5080,119 @@ static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, } DBUG_RETURN(tree); +} + +/* + Build conjunction of all SEL_TREEs for a simple predicate applying equalities + + SYNOPSIS + get_full_func_mm_tree() + param PARAM from SQL_SELECT::test_quick_select + cond_func item for the predicate + field_item field in the predicate + value constant in the predicate + (for BETWEEN it contains the number of the field argument, + for IN it's always 0) + inv TRUE <> NOT cond_func is considered + (makes sense only when cond_func is BETWEEN or IN) + + DESCRIPTION + For a simple SARGable predicate of the form (f op c), where f is a field and + c is a constant, the function builds a conjunction of all SEL_TREES that can + be obtained by the substitution of f for all different fields equal to f. + + NOTES + If the WHERE condition contains a predicate (fi op c), + then not only SELL_TREE for this predicate is built, but + the trees for the results of substitution of fi for + each fj belonging to the same multiple equality as fi + are built as well. + E.g. for WHERE t1.a=t2.a AND t2.a > 10 + a SEL_TREE for t2.a > 10 will be built for quick select from t2 + and + a SEL_TREE for t1.a > 10 will be built for quick select from t1. + + A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated + in a similar way: we build a conjuction of trees for the results + of all substitutions of fi for equal fj. + Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed + differently. It is considered as a conjuction of two SARGable + predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree + is called for each of them separately producing trees for + AND j (f1j <=c ) and AND j (f2j <= c) + After this these two trees are united in one conjunctive tree. + It's easy to see that the same tree is obtained for + AND j,k (f1j <=c AND f2k<=c) + which is equivalent to + AND j,k (c BETWEEN f1j AND f2k). + The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i) + which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the + function get_full_func_mm_tree is called for (f1i > c) and (f2i < c) + producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two + trees are united in one OR-tree. The expression + (AND j (f1j > c) OR AND j (f2j < c) + is equivalent to the expression + AND j,k (f1j > c OR f2k < c) + which is just a translation of + AND j,k (c NOT BETWEEN f1j AND f2k) + + In the cases when one of the items f1, f2 is a constant c1 we do not create + a tree for it at all. It works for BETWEEN predicates but does not + work for NOT BETWEEN predicates as we have to evaluate the expression + with it. If it is TRUE then the other tree can be completely ignored. + We do not do it now and no trees are built in these cases for + NOT BETWEEN predicates. + + As to IN predicates only ones of the form (f IN (c1,...,cn)), + where f1 is a field and c1,...,cn are constant, are considered as + SARGable. We never try to narrow the index scan using predicates of + the form (c IN (c1,...,f,...,cn)). + + RETURN + Pointer to the tree representing the built conjunction of SEL_TREEs +*/ + +static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param, + Item_func *cond_func, + Item_field *field_item, Item *value, + bool inv) +{ + SEL_TREE *tree= 0; + SEL_TREE *ftree= 0; + table_map ref_tables= 0; + table_map param_comp= ~(param->prev_tables | param->read_tables | + param->current_table); + DBUG_ENTER("get_full_func_mm_tree"); + + for (uint i= 0; i < cond_func->arg_count; i++) + { + Item *arg= cond_func->arguments()[i]->real_item(); + if (arg != field_item) + ref_tables|= arg->used_tables(); + } + Field *field= field_item->field; + Item_result cmp_type= field->cmp_type(); + if (!((ref_tables | field->table->map) & param_comp)) + ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv); + Item_equal *item_equal= field_item->item_equal; + if (item_equal) + { + Item_equal_iterator it(*item_equal); + Item_field *item; + while ((item= it++)) + { + Field *f= item->field; + if (field->eq(f)) + continue; + if (!((ref_tables | f->table->map) & param_comp)) + { + tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv); + ftree= !ftree ? tree : tree_and(param, ftree, tree); + } + } + } + DBUG_RETURN(ftree); } /* make a select tree of all keys in condition */ @@ -5087,7 +5203,7 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) SEL_TREE *ftree= 0; Item_field *field_item= 0; bool inv= FALSE; - Item *value; + Item *value= 0; DBUG_ENTER("get_mm_tree"); if (cond->type() == Item::COND_ITEM) @@ -5167,10 +5283,37 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) switch (cond_func->functype()) { case Item_func::BETWEEN: - if (cond_func->arguments()[0]->real_item()->type() != Item::FIELD_ITEM) - DBUG_RETURN(0); - field_item= (Item_field*) (cond_func->arguments()[0]->real_item()); - value= NULL; + if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM) + { + field_item= (Item_field*) (cond_func->arguments()[0]->real_item()); + ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv); + } + + /* + Concerning the code below see the NOTES section in + the comments for the function get_full_func_mm_tree() + */ + for (uint i= 1 ; i < cond_func->arg_count ; i++) + { + + if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM) + { + field_item= (Item_field*) (cond_func->arguments()[i]->real_item()); + SEL_TREE *tmp= get_full_func_mm_tree(param, cond_func, + field_item, (Item*) i, inv); + if (inv) + tree= !tree ? tmp : tree_or(param, tree, tmp); + else + tree= tree_and(param, tree, tmp); + } + else if (inv) + { + tree= 0; + break; + } + } + + ftree = tree_and(param, ftree, tree); break; case Item_func::IN_FUNC: { @@ -5178,7 +5321,7 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) if (func->key_item()->real_item()->type() != Item::FIELD_ITEM) DBUG_RETURN(0); field_item= (Item_field*) (func->key_item()->real_item()); - value= NULL; + ftree= get_full_func_mm_tree(param, cond_func, field_item, NULL, inv); break; } case Item_func::MULT_EQUAL_FUNC: @@ -5217,47 +5360,9 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) } else DBUG_RETURN(0); + ftree= get_full_func_mm_tree(param, cond_func, field_item, value, inv); } - /* - If the where condition contains a predicate (ti.field op const), - then not only SELL_TREE for this predicate is built, but - the trees for the results of substitution of ti.field for - each tj.field belonging to the same multiple equality as ti.field - are built as well. - E.g. for WHERE t1.a=t2.a AND t2.a > 10 - a SEL_TREE for t2.a > 10 will be built for quick select from t2 - and - a SEL_TREE for t1.a > 10 will be built for quick select from t1. - */ - - for (uint i= 0; i < cond_func->arg_count; i++) - { - Item *arg= cond_func->arguments()[i]->real_item(); - if (arg != field_item) - ref_tables|= arg->used_tables(); - } - Field *field= field_item->field; - Item_result cmp_type= field->cmp_type(); - if (!((ref_tables | field->table->map) & param_comp)) - ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv); - Item_equal *item_equal= field_item->item_equal; - if (item_equal) - { - Item_equal_iterator it(*item_equal); - Item_field *item; - while ((item= it++)) - { - Field *f= item->field; - if (field->eq(f)) - continue; - if (!((ref_tables | f->table->map) & param_comp)) - { - tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv); - ftree= !ftree ? tree : tree_and(param, ftree, tree); - } - } - } DBUG_RETURN(ftree); } @@ -7584,16 +7689,10 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() QUICK_RANGE_SELECT* cur_quick; int result; Unique *unique; - MY_BITMAP *save_read_set, *save_write_set; handler *file= head->file; DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge"); - /* We're going to just read rowids. */ - save_read_set= head->read_set; - save_write_set= head->write_set; file->extra(HA_EXTRA_KEYREAD); - bitmap_clear_all(&head->tmp_set); - head->column_bitmaps_set(&head->tmp_set, &head->tmp_set); head->prepare_for_position(); cur_quick_it.rewind(); @@ -7658,7 +7757,6 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() doing_pk_scan= FALSE; /* index_merge currently doesn't support "using index" at all */ file->extra(HA_EXTRA_NO_KEYREAD); - head->column_bitmaps_set(save_read_set, save_write_set); /* start table scan */ init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1); DBUG_RETURN(result); |