diff options
-rw-r--r-- | mysql-test/r/group_min_max.result | 6 | ||||
-rw-r--r-- | mysql-test/t/group_min_max.test | 16 | ||||
-rw-r--r-- | sql/item.h | 15 | ||||
-rw-r--r-- | sql/item_sum.cc | 38 | ||||
-rw-r--r-- | sql/item_sum.h | 29 | ||||
-rw-r--r-- | sql/opt_range.cc | 783 | ||||
-rw-r--r-- | sql/sql_select.cc | 10 | ||||
-rw-r--r-- | sql/sql_select.h | 2 |
8 files changed, 432 insertions, 467 deletions
diff --git a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result index 05b9fecb99b..9000d35c7b9 100644 --- a/mysql-test/r/group_min_max.result +++ b/mysql-test/r/group_min_max.result @@ -1869,6 +1869,12 @@ id select_type table type possible_keys key key_len ref rows Extra explain select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 76 Using where; Using index +explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 Using where; Using index +select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +a1 a2 min(b) c +a a a a111 explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 Using where; Using index diff --git a/mysql-test/t/group_min_max.test b/mysql-test/t/group_min_max.test index 7bc2a4b7d17..34e915b8f6b 100644 --- a/mysql-test/t/group_min_max.test +++ b/mysql-test/t/group_min_max.test @@ -3,6 +3,12 @@ # The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT. # +# +# TODO: +# Add queries with: +# - C != const +# - C IS NOT NULL +# - HAVING clause --disable_warnings drop table if exists t1; @@ -175,8 +181,6 @@ explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by explain select min(a2) from t1 group by a1; explain select a2, min(c), max(c) from t1 group by a1,a2,b; --- TODO: Queries with HAVING - -- queries select a1, min(a2) from t1 group by a1; select a1, max(a2) from t1 group by a1; @@ -190,8 +194,6 @@ select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, select min(a2) from t1 group by a1; select a2, min(c), max(c) from t1 group by a1,a2,b; --- TODO: Queries with HAVING - -- -- Queries with a where clause -- @@ -300,7 +302,6 @@ select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2; select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2; select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2; --- TODO: IS NOT NULL ? -- C) Range predicates for the MIN/MAX attribute -- plans @@ -553,6 +554,11 @@ where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1; explain select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b; +-- non-group field with an equality predicate that references a keypart after the +-- MIN/MAX argument +explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; +select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; + -- disjunction for a non-group select attribute explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1; diff --git a/sql/item.h b/sql/item.h index 2164a6bf434..60edf801686 100644 --- a/sql/item.h +++ b/sql/item.h @@ -260,22 +260,7 @@ public: virtual bool remove_dependence_processor(byte * arg) { return 0; } virtual bool remove_fixed(byte * arg) { fixed= 0; return 0; } - /* - All collect_* methods are used as arguments to walk() to collect - specific types items. - TODO: - A more generic implementation would add a special class - Collect_processor_param that can store arbitrary sets of item kinds - (currently specified as enums), along with a list to store items of the - specified kinds. This would allow to collect combinations of items of - arbitrary kinds without having to add a new collect method each time. - There can be one generic collect_processor method that checks the item type - and compares it with the item types in Collect_processor_param. - */ virtual bool collect_item_field_processor(byte * arg) { return 0; } - virtual bool collect_item_sum_min_processor(byte * arg) { return 0; } - virtual bool collect_item_sum_max_processor(byte * arg) { return 0; } - virtual bool has_non_min_max_sum_processor(byte * arg) { return 0; } virtual Item *this_item() { return this; } /* For SPs mostly. */ virtual Item *this_const_item() const { return const_cast<Item*>(this); } /* For SPs mostly. */ diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 6faff2888f1..3980b9aa135 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -183,44 +183,6 @@ bool Item_sum::walk (Item_processor processor, byte *argument) } -/* - Store the pointer to this item into a list if not already there. - - SYNOPSIS - Item_sum::collect() - item_list pointer to a List<Item_sum> where item_sum objects are collected - - DESCRIPTION - The method is used by collect_item_sum_*_processor, called by - Item_sum::walk, to collect all unique Item_sum_min and Item_sum_max objects - from a tree of Items into a set of items represented as a list. - - IMPLEMENTATION - Item_cond::walk() and Item_func::walk() stop the evaluation of the - processor function for its arguments once the processor returns - true.Therefore in order to force this method being called for all item - arguments in a condition the method must return false. - - RETURN - FALSE on success (force the evaluation of collect_item_sum_*_processor - for the subsequent items.) - TRUE o/w (stop evaluation of subsequent items.) -*/ - -bool Item_sum::collect(List<Item_sum> *item_list) -{ - List_iterator<Item_sum> item_list_it(*item_list); - Item_sum *curr_item; - while ((curr_item= item_list_it++)) - { - if (curr_item == this) - return FALSE; /* Already in the set. */ - } - item_list->push_back(this); - return FALSE; -} - - String * Item_sum_num::val_str(String *str) { diff --git a/sql/item_sum.h b/sql/item_sum.h index 3c144e8f51c..2dde6f73425 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -27,8 +27,6 @@ class Item_arena; class Item_sum :public Item_result_field { -private: - bool collect(List<Item_sum> *item_list); public: enum Sumfunctype { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC, @@ -100,33 +98,6 @@ public: bool save_args(Item_arena* stmt); bool walk (Item_processor processor, byte *argument); - - /* Collect Item_sum_min objects into a list supplied by the caller. */ - bool collect_item_sum_min_processor(byte *arg) - { - if (Item_sum::MIN_FUNC == this->sum_func()) - return collect((List<Item_sum>*) arg); - else - return FALSE; - } - - /* Collect Item_sum_max objects into a list supplied by the caller. */ - bool collect_item_sum_max_processor(byte *arg) - { - if (Item_sum::MAX_FUNC == this->sum_func()) - return collect((List<Item_sum>*) arg); - else - return FALSE; - } - - /* Check if there are any aggregate functions other than MIN and MAX. */ - bool has_non_min_max_sum_processor(byte * arg) - { - Sumfunctype sum_type= this->sum_func(); - if ((sum_type != Item_sum::MIN_FUNC) && (sum_type != Item_sum::MAX_FUNC)) - return TRUE; - return FALSE; - } }; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index da0e0437ebb..ed027c62c93 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -1477,11 +1477,11 @@ public: KEY_PART_INFO *min_max_arg_part, uint group_prefix_len, uint used_key_parts, uint group_key_parts, KEY *index_info, uint index, uint key_infix_len, byte *key_infix, - SEL_TREE *tree, PARAM *param); + SEL_TREE *tree, SEL_ARG *index_tree, uint param_idx, + ha_rows quick_prefix_records, PARAM *param); QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc); - void update_cost(); }; @@ -6362,17 +6362,21 @@ static inline uint get_field_keypart(KEY *index, Field *field); static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param, uint *param_idx); static bool -get_constant_key_infix(List<Item_field> &non_group_fields, - SEL_ARG *index_range_tree, +get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree, KEY_PART_INFO *first_non_group_part, - KEY_PART_INFO *end_part, - byte *key_infix, uint *key_infix_len); + KEY_PART_INFO *min_max_arg_part, + KEY_PART_INFO *last_part, THD *thd, + byte *key_infix, uint *key_infix_len, + KEY_PART_INFO **first_non_infix_part); static bool -check_group_min_max_predicates(COND *cond, ORDER *group_list, - List<Item_field> &select_fields, - List<Item_field> &non_group_fields, - Item_field *min_max_arg_item); +check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item); +static void +cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, + uint group_key_parts, SEL_TREE *range_tree, + SEL_ARG *index_tree, ha_rows quick_prefix_records, + bool have_min, bool have_max, + double *read_cost, ha_rows *records); /* Test if this access method is applicable to a GROUP query with MIN/MAX @@ -6400,7 +6404,7 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, = SA - if Q is a DISTINCT query (based on the equivalence of DISTINCT and GROUP queries. - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in GROUP BY - and not referenced by a MIN/MAX functions. + and not referenced by MIN/MAX functions. with the following properties specified below. SA1. There is at most one attribute in SA referenced by any number of @@ -6412,9 +6416,12 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, - (const {< | <= | > | >= | =} C) - (C between const_i and const_j) - C IS NULL + - C IS NOT NULL + - C != const SA4. If Q has a GROUP BY clause, there are no other aggregate functions except MIN and MAX. For queries with DISTINCT, aggregate functions are allowed. + SA5. The select list in DISTINCT queries should not contain expressions. GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if G_i = A_j => i = j. GA2. If Q has a DISTINCT clause, then there is a permutation of SA that @@ -6444,14 +6451,16 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, in the index I (if this was already tested for GA, NGA and C). C) Overall query form: - SELECT [A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)] -- at least one - FROM T -- of {A_i} or - WHERE [RNG(A_1,...,A_p ; where p <= k)] -- MIN/MAX must - [AND EQ(B_1,...,B_m)] -- be present. + SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)]) + FROM T + WHERE [RNG(A_1,...,A_p ; where p <= k)] + [AND EQ(B_1,...,B_m)] [AND PC(C)] [AND PA(A_i1,...,A_iq)] - GROUP BY A_1,...,A_k; - or + GROUP BY A_1,...,A_k + [HAVING PH(A_1, ..., B_1,..., C)] + where EXPR(...) is an arbitrary expression over some or all SELECT fields, + or: SELECT DISTINCT A_i1,...,A_ik FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] @@ -6506,11 +6515,6 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) uint index= 0; /* The id of the chosen index. */ uint group_key_parts= 0; /* Number of index key parts in the group prefix. */ uint used_key_parts= 0; /* Number of index key parts used for access. */ - List<Item_field> query_fields;/* Set of all fields referenced in the query. */ - List<Item_field> select_fields; /* Set of the fields referenced in SELECT. */ - List<Item_field> non_group_fields;/* Set of the query fields not in GROUP BY*/ - /* and not referenced in a MIN/MAX. */ - List<Item_sum> min_max_functions; /* All MIN/MAX functions in SELECT. */ byte key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/ uint key_infix_len= 0; /* Length of key_infix. */ TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */ @@ -6532,109 +6536,44 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) DBUG_RETURN(NULL); /* Analyze the query in more detail. */ + List_iterator<Item> select_items_it(join->fields_list); - List<Item> &select_items= join->fields_list; - List_iterator<Item> select_items_it(select_items); - List_iterator<Item_field> query_fields_it(query_fields); - List_iterator<Item_field> select_fields_it(select_fields); - List_iterator<Item_sum> min_max_functions_it(min_max_functions); - - /* - Collect all fields referenced in the query. Notice that since a HAVING - clause must reference only SELECT attributes, there is no need to extract - the attributes of HAVING. - Also: - - check (SA4) if there are any aggregate functions other than MIN and MAX, - - collect all references to MIN/MAX functions. - */ - while ((item= select_items_it++)) + /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/ + if(join->make_sum_func_list(join->all_fields, join->fields_list, 1)) + DBUG_RETURN(NULL); + if (join->sum_funcs[0]) { - if (item->walk(&Item::has_non_min_max_sum_processor, NULL)) - DBUG_RETURN(NULL); - item->walk(&Item::collect_item_field_processor, (byte*) &query_fields); - item->walk(&Item::collect_item_field_processor, (byte*) &select_fields); - item->walk(&Item::collect_item_sum_min_processor, - (byte*) &min_max_functions); - item->walk(&Item::collect_item_sum_max_processor, - (byte*) &min_max_functions); - } - if (join->conds) - join->conds->walk(&Item::collect_item_field_processor, - (byte*) &query_fields); - - /* Check (SA1) and store the only MIN/MAX argument - the C attribute.*/ - Item_sum *min_max_item; - while ((min_max_item= min_max_functions_it++)) - { - if (min_max_item->sum_func() == Item_sum::MIN_FUNC) - have_min= TRUE; - else if (min_max_item->sum_func() == Item_sum::MAX_FUNC) - have_max= TRUE; - else - DBUG_RETURN(NULL); - - Item *expr= min_max_item->args[0]; /* This is the argument of MIN/MAX. */ - if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */ + Item_sum *min_max_item; + Item_sum **func_ptr= join->sum_funcs; + while ((min_max_item= *(func_ptr++))) { - if (! min_max_arg_item) - min_max_arg_item= (Item_field*) expr; - else if (! min_max_arg_item->eq(expr, 1)) + if (min_max_item->sum_func() == Item_sum::MIN_FUNC) + have_min= TRUE; + else if (min_max_item->sum_func() == Item_sum::MAX_FUNC) + have_max= TRUE; + else DBUG_RETURN(NULL); - } - else - DBUG_RETURN(NULL); - } - /* Collect the NGA fields. */ - query_fields_it.rewind(); - List_iterator<Item_field> non_group_fields_it(non_group_fields); - Item_field *curr_field; - while ((item_field= query_fields_it++)) - { - bool found_field= FALSE; - if (min_max_arg_item && item_field->eq(min_max_arg_item, 1)) - found_field= TRUE; - else if (join->group_list) - { - for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next) + Item *expr= min_max_item->args[0]; /* The argument of MIN/MAX. */ + if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */ { - if (item_field->eq(*tmp_group->item, 1)) - { - found_field= TRUE; - break; - } - } - } - else if (join->select_distinct) - { /* For DISTINCT queries select fields are treated as group fields. */ - Item_field *sel_field; - select_fields_it.rewind(); - while ((sel_field= select_fields_it++)) - { - if (item_field->eq(sel_field, 1)) - { - found_field= TRUE; - break; - } + if (! min_max_arg_item) + min_max_arg_item= (Item_field*) expr; + else if (! min_max_arg_item->eq(expr, 1)) + DBUG_RETURN(NULL); } + else + DBUG_RETURN(NULL); } - else - DBUG_ASSERT(FALSE); + } - if (!found_field) + /* Check (SA5). */ + if (join->select_distinct) + { + while ((item= select_items_it++)) { - /* Make sure non_group_fields is a set. */ - non_group_fields_it.rewind(); - while ((curr_field= non_group_fields_it++)) - { - if (curr_field->eq(item_field, 1)) - { - found_field= TRUE; - break; - } - } - if (!found_field) - non_group_fields.push_back(item_field); + if (item->type() != Item::FIELD_ITEM) + DBUG_RETURN(NULL); } } @@ -6653,17 +6592,32 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) KEY *cur_index_info= table->key_info; KEY *cur_index_info_end= cur_index_info + table->keys; KEY_PART_INFO *cur_part= NULL; - KEY_PART_INFO *end_part; - for (uint tmp_index= 0 ; cur_index_info != cur_index_info_end ; - cur_index_info++, tmp_index++) - { - /* Check (B1) - each query field participates in the current index. */ - query_fields_it.rewind(); - while ((item_field= query_fields_it++)) - { - if ( ! item_field->field->part_of_key.is_set(tmp_index) ) - goto next_index; - } + KEY_PART_INFO *end_part; /* Last part for loops. */ + /* Last index part. */ + KEY_PART_INFO *last_part= NULL; + KEY_PART_INFO *first_non_group_part= NULL; + KEY_PART_INFO *first_non_infix_part= NULL; + uint key_infix_parts= 0; + uint cur_group_key_parts= 0; + uint cur_group_prefix_len= 0; + /* Cost-related variables for the best index so far. */ + double best_read_cost= DBL_MAX; + ha_rows best_records= 0; + SEL_ARG *best_index_tree= NULL; + ha_rows best_quick_prefix_records= 0; + uint best_param_idx= 0; + double cur_read_cost= DBL_MAX; + ha_rows cur_records; + SEL_ARG *cur_index_tree= NULL; + ha_rows cur_quick_prefix_records= 0; + uint cur_param_idx; + + for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ; + cur_index_info++, cur_index++) + { + /* Check (B1) - if current index is covering. */ + if (!table->used_keys.is_set(cur_index)) + goto next_index; /* Check (GA1) for GROUP BY queries. @@ -6686,8 +6640,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) Item_field *group_field= (Item_field *) (*tmp_group->item); if (group_field->field->eq(cur_part->field)) { - group_prefix_len+= cur_part->store_length; - ++group_key_parts; + cur_group_prefix_len+= cur_part->store_length; + ++cur_group_key_parts; } else goto next_index; @@ -6703,18 +6657,18 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) */ else if (join->select_distinct) { - DBUG_ASSERT(!join->group_list); /* Only DISTINCT, no GROUP BY. */ - select_fields_it.rewind(); - while ((item_field= select_fields_it++)) + select_items_it.rewind(); + while ((item= select_items_it++)) { + item_field= (Item_field*) item; /* (SA5) already checked above. */ /* Find the order of the key part in the index. */ key_part_nr= get_field_keypart(cur_index_info, item_field->field); - if (key_part_nr < 1 || key_part_nr > select_fields.elements) + if (key_part_nr < 1 || key_part_nr > join->fields_list.elements) goto next_index; cur_part= cur_index_info->key_part + key_part_nr - 1; - group_prefix_len+= cur_part->store_length; + cur_group_prefix_len+= cur_part->store_length; } - group_key_parts= select_fields.elements; + cur_group_key_parts= join->fields_list.elements; } else DBUG_ASSERT(FALSE); @@ -6723,7 +6677,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) if (min_max_arg_item) { key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field); - if (key_part_nr <= group_key_parts) + if (key_part_nr <= cur_group_key_parts) goto next_index; min_max_arg_part= cur_index_info->key_part + key_part_nr - 1; } @@ -6732,36 +6686,98 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) Check (NGA1, NGA2) and extract a sequence of constants to be used as part of all search keys. */ - if (non_group_fields.elements > 0) + + /* + If there is MIN/MAX, each keypart between the last group part and the + MIN/MAX part must participate in one equality with constants, and all + keyparts after the MIN/MAX part must not be referenced in the query. + + If there is no MIN/MAX, the keyparts after the last group part can be + referenced only in equalities with constants, and the referenced keyparts + must form a sequence without any gaps that starts immediately after the + last group keypart. + */ + last_part= cur_index_info->key_part + cur_index_info->key_parts; + first_non_group_part= (cur_group_key_parts < cur_index_info->key_parts) ? + cur_index_info->key_part + cur_group_key_parts : + NULL; + first_non_infix_part= min_max_arg_part ? + (min_max_arg_part < last_part) ? + min_max_arg_part + 1 : + NULL : + NULL; + if (first_non_group_part && + (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0))) { - if (!tree) - goto next_index; /* No range tree - no predicates at all. */ - uint dummy; - SEL_ARG *index_range_tree= get_index_range_tree(tmp_index, tree, param, - &dummy); - KEY_PART_INFO *first_non_group_part= cur_index_info->key_part + - group_key_parts; - KEY_PART_INFO *end_part= min_max_arg_part ? - min_max_arg_part : - cur_index_info->key_part + - cur_index_info->key_parts; - if (!get_constant_key_infix(non_group_fields, index_range_tree, - first_non_group_part, end_part, - key_infix, &key_infix_len)) + if (tree) + { + uint dummy; + SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param, + &dummy); + if (!get_constant_key_infix(cur_index_info, index_range_tree, + first_non_group_part, min_max_arg_part, + last_part, thd, key_infix, &key_infix_len, + &first_non_infix_part)) + goto next_index; + } + else if (min_max_arg_part && + (min_max_arg_part - first_non_group_part > 0)) + /* + There is a gap but no range tree, thus no predicates at all for the + non-group keyparts. + */ goto next_index; } + /* + Test (WA1) partially - that no other keypart after the last infix part is + referenced in the query. + */ + if (first_non_infix_part) + { + for (cur_part= first_non_infix_part; cur_part != last_part; cur_part++) + { + if (cur_part->field->query_id == thd->query_id) + goto next_index; + } + } + /* If we got to this point, cur_index_info passes the test. */ - index_info= cur_index_info; - index= tmp_index; - used_key_parts= group_key_parts + non_group_fields.elements; + key_infix_parts= key_infix_len ? + (first_non_infix_part - first_non_group_part) : 0; + used_key_parts= cur_group_key_parts + key_infix_parts; - /* Exit the loop because we found a usable index. */ - break; + /* Compute the cost of using this index. */ + if (tree) + { + /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */ + cur_index_tree= get_index_range_tree(cur_index, tree, param, + &cur_param_idx); + /* Check if this range tree can be used for prefix retrieval. */ + cur_quick_prefix_records= check_quick_select(param, cur_param_idx, + cur_index_tree); + } + cost_group_min_max(table, cur_index_info, used_key_parts, + cur_group_key_parts, tree, cur_index_tree, + cur_quick_prefix_records, have_min, have_max, + &cur_read_cost, &cur_records); + + if (cur_read_cost < best_read_cost) + { + index_info= cur_index_info; + index= cur_index; + best_read_cost= cur_read_cost; + best_records= cur_records; + best_index_tree= cur_index_tree; + best_quick_prefix_records= cur_quick_prefix_records; + best_param_idx= cur_param_idx; + group_key_parts= cur_group_key_parts; + group_prefix_len= cur_group_prefix_len; + } next_index: - group_key_parts= 0; - group_prefix_len= 0; + cur_group_key_parts= 0; + cur_group_prefix_len= 0; continue; } if (!index_info) /* No usable index found. */ @@ -6769,9 +6785,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) /* Check (SA3,WA1) for the where clause. */ - if (!check_group_min_max_predicates(join->conds, join->group_list, - select_fields, non_group_fields, - min_max_arg_item)) + if (!check_group_min_max_predicates(join->conds, min_max_arg_item)) DBUG_RETURN(NULL); /* The query passes all tests, so construct a new TRP object. */ @@ -6781,13 +6795,16 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) used_key_parts, group_key_parts, index_info, index, key_infix_len, (key_infix_len > 0) ? key_infix : NULL, - tree, param); + tree, best_index_tree, best_param_idx, + best_quick_prefix_records, param); if (read_plan) { if (tree && read_plan->quick_prefix_records == 0) DBUG_RETURN(NULL); - read_plan->update_cost(); + read_plan->read_cost= best_read_cost; + read_plan->records= best_records; + DBUG_PRINT("info", ("Returning group min/max plan: cost: %g, records: %lu", read_plan->read_cost, (ulong) read_plan->records)); @@ -6798,33 +6815,20 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) /* - Check that all where predicates reference only select fields and the MIN/MAX - attribute participates only in range predicates with constants. + Check that the MIN/MAX attribute participates only in range predicates + with constants. SYNOPSIS check_group_min_max_predicates() cond tree (or subtree) describing all or part of the WHERE clause being analyzed - select_fields list of fields in the SELECT clause min_max_arg_item the field referenced by the MIN/MAX function(s) DESCRIPTION The function walks recursively over the cond tree representing a WHERE - clause, and checks that: - (WA1) every predicate references only GA and NGA fields as specified - in get_best_group_min_max(). - (SA3) if a field is referenced by a MIN/MAX aggregate function, it is - referenced only by the following predicates: - {=, <, <=, >, >=, between, is null}. - The function assumes that the select list has already been checked that - each selected attribute is part of an index. As a result this test - essentially checks condition B6 from get_best_group_min_max(), namely - that every attribute referenced in the WHERE clause is part of the index I. - - IMPLEMENTATION - The function assumes that the two sets of attributes GA (group_list) and NGA - (non_group_fields) are already checked to satisfy all conditions given in - get_best_group_min_max's specification. + clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX + aggregate function, it is referenced only by the following predicates: + {=, !=, <, <=, >, >=, between, is null, is not null}. RETURN TRUE if cond passes the test @@ -6832,10 +6836,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree) */ static bool -check_group_min_max_predicates(COND *cond, ORDER *group_list, - List<Item_field> &select_fields, - List<Item_field> &non_group_fields, - Item_field *min_max_arg_item) +check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item) { DBUG_ENTER("check_group_min_max_predicates"); if (!cond) /* If no WHERE clause, then all is OK. */ @@ -6849,8 +6850,7 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, Item *and_or_arg; while ((and_or_arg= li++)) { - if(!check_group_min_max_predicates(and_or_arg, group_list, select_fields, - non_group_fields, min_max_arg_item)) + if(!check_group_min_max_predicates(and_or_arg, min_max_arg_item)) DBUG_RETURN(FALSE); } DBUG_RETURN(TRUE); @@ -6876,14 +6876,16 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, with a constant. */ Item_func::Functype pred_type= pred->functype(); - if (pred_type != Item_func::EQUAL_FUNC && - pred_type != Item_func::LT_FUNC && - pred_type != Item_func::LE_FUNC && - pred_type != Item_func::GT_FUNC && - pred_type != Item_func::GE_FUNC && - pred_type != Item_func::BETWEEN && - pred_type != Item_func::ISNULL_FUNC && - pred_type != Item_func::EQ_FUNC) + if (pred_type != Item_func::EQUAL_FUNC && + pred_type != Item_func::LT_FUNC && + pred_type != Item_func::LE_FUNC && + pred_type != Item_func::GT_FUNC && + pred_type != Item_func::GE_FUNC && + pred_type != Item_func::BETWEEN && + pred_type != Item_func::ISNULL_FUNC && + pred_type != Item_func::ISNOTNULL_FUNC && + pred_type != Item_func::EQ_FUNC && + pred_type != Item_func::NE_FUNC) DBUG_RETURN(FALSE); /* Check that pred compares min_max_arg_item with a constant. */ @@ -6893,54 +6895,10 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, if (!simple_pred(pred, args, &inv)) DBUG_RETURN(FALSE); } - else - {/* pred can be arbitrary predicate, but cur_arg must be in GA or NGA. */ - bool is_in_ga_or_nga= FALSE; - Item_field *item_field; - if (non_group_fields.elements > 0) - { - List_iterator<Item_field> non_group_fields_it(non_group_fields); - while ((item_field= non_group_fields_it++)) - { - if (cur_arg->eq(item_field, 1)) - { - is_in_ga_or_nga= TRUE; - break; - } - } - } - if (group_list && !is_in_ga_or_nga) /* This is a GROUP-BY query. */ - { - ORDER *tmp_group; - for (tmp_group= group_list; tmp_group; tmp_group= tmp_group->next) - { - if (cur_arg->eq(*tmp_group->item, 1)) - { - is_in_ga_or_nga= TRUE; - break; - } - } - } - else /* This is a DISTINCT query. */ - { - List_iterator<Item_field> select_fields_it(select_fields); - while ((item_field= select_fields_it++)) - { - if (cur_arg->eq(item_field, 1)) - { - is_in_ga_or_nga= TRUE; - break; - } - } - } - if (!is_in_ga_or_nga) - DBUG_RETURN(FALSE); - } } else if (cur_arg->type() == Item::FUNC_ITEM) { - if(!check_group_min_max_predicates(cur_arg, group_list, select_fields, - non_group_fields, min_max_arg_item)) + if(!check_group_min_max_predicates(cur_arg, min_max_arg_item)) DBUG_RETURN(FALSE); } else if (cur_arg->const_item()) @@ -6954,128 +6912,50 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list, DBUG_RETURN(TRUE); } -/* - Find the key part referenced by a field. - - SYNOPSIS - get_field_keypart() - index descriptor of an index - field field that possibly references some key part in index - - NOTES - The return value can be used to get a KEY_PART_INFO pointer by - part= index->key_part + get_field_keypart(...) - 1; - - RETURN - Positive number which is the consecutive number of the key part, or - 0 if field does not reference any index field. -*/ - -static inline uint -get_field_keypart(KEY *index, Field *field) -{ - KEY_PART_INFO *part= index->key_part; - uint key_part_num= 0; - - while (part != part + index->key_parts) - { - key_part_num++; - if (field->eq(part->field)) - return key_part_num; - part++; - } - return key_part_num; -} - - -/* - Find the SEL_ARG sub-tree that corresponds to the chosen index. - - SYNOPSIS - get_index_range_tree() - index - range_tree - param - param_idx [out] - - DESCRIPTION - -*/ - -SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param, - uint *param_idx) -{ - uint idx= 0; /* Index nr in param->key_parts */ - while (idx < param->keys) - { - if (index == param->real_keynr[idx]) - break; - idx++; - } - *param_idx= idx; - return(range_tree->keys[idx]); -} - /* Extract a sequence of constants from a conjunction of equality predicates. SYNOPSIS get_constant_key_infix() - cond [in] The WHERE clause of the current query - non_group_fields[in] Select fields that are not in GROUP-BY list - index_info [in] The index which key parts are being considered - group_prefix_len [in] Length of the key parts referenced by group - attributes - first_non_group_part [in] First index part after group attribute parts - min_max_part [in] Index keypart referenced by the MIN/MAX argument - key_infix [out] - key_infix_len [out] + index_info [in] Descriptor of the chosen index. + index_range_tree [in] Range tree for the chosen index + first_non_group_part [in] First index part after group attribute parts + min_max_arg_part [in] The keypart of the MIN/MAX argument if any + last_part [in] Last keypart of the index + thd [in] Current thread + key_infix [out] Infix of constants to be used for index lookup + key_infix_len [out] Lenghth of the infix + first_non_infix_part [out] The first keypart after the infix (if any) DESCRIPTION Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely, - for each distinct attribute NG_i from non_group_fields test if: - - there is a constant equality predicate among conds with the form - (NG_i = const_ci) or (const_ci = NG_i), and - - the NG_i attributes are a sub-sequence of the index which starts - immediately after the last group attribute, and ends just before the - MIN/MAX attribute. - Thus all the NG_i attributes must fill the 'gap' between the last group-by + for each keypart field NGF_i not in GROUP-BY, check that there is a constant + equality predicate among conds with the form (NGF_i = const_ci) or + (const_ci = NGF_i). + Thus all the NGF_i attributes must fill the 'gap' between the last group-by attribute and the MIN/MAX attribute in the index (if present). If these conditions hold, copy each constant from its corresponding predicate into key_infix, in the order its NG_i attribute appears in the index, and update key_infix_len with the total length of the key parts in key_infix. - IMPLEMENTATION - TODO - RETURN - TRUE if non_group_fields pass the test + TRUE if the index passes the test FALSE o/w */ static bool -get_constant_key_infix(List<Item_field> &non_group_fields, - SEL_ARG *index_range_tree, +get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree, KEY_PART_INFO *first_non_group_part, - KEY_PART_INFO *end_part, - byte *key_infix, uint *key_infix_len) + KEY_PART_INFO *min_max_arg_part, + KEY_PART_INFO *last_part, THD *thd, + byte *key_infix, uint *key_infix_len, + KEY_PART_INFO **first_non_infix_part) { - Item_field *non_group_field; - List_iterator<Item_field> non_group_fields_it(non_group_fields); SEL_ARG *cur_range; KEY_PART_INFO *cur_part; - - DBUG_ASSERT(non_group_fields.elements > 0); - - /* - For each keypart in the 'gap' between the last group part, and the min/max - argument (if present) or the last keypart, there must be exactly one - non-group field, therefore their count must match. Notice that - non_group_fields is a set. - */ - if (non_group_fields.elements != (uint)(end_part - first_non_group_part)) - return FALSE; + /* End part for the first loop below. */ + KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part; *key_infix_len= 0; byte *key_ptr= key_infix; @@ -7092,17 +6972,15 @@ get_constant_key_infix(List<Item_field> &non_group_fields, break; } if (!cur_range) - return FALSE; /* The current keypart has no range predicates at all. */ - - /* Check if there is a matching non-group field. */ - non_group_fields_it.rewind(); - while ((non_group_field= non_group_fields_it++)) { - if (cur_range->field->eq(non_group_field->field)) - break; + if (min_max_arg_part) + return FALSE; /* The current keypart has no range predicates at all. */ + else + { + *first_non_infix_part= cur_part; + return TRUE; + } } - if (!non_group_field) - return FALSE; /* Check that the current range tree is a single point interval. */ if (cur_range->prev || cur_range->next) @@ -7126,10 +7004,85 @@ get_constant_key_infix(List<Item_field> &non_group_fields, return FALSE; } + if (!min_max_arg_part && (cur_part == last_part)) + *first_non_infix_part= last_part; + return TRUE; } +/* + Find the key part referenced by a field. + + SYNOPSIS + get_field_keypart() + index descriptor of an index + field field that possibly references some key part in index + + NOTES + The return value can be used to get a KEY_PART_INFO pointer by + part= index->key_part + get_field_keypart(...) - 1; + + RETURN + Positive number which is the consecutive number of the key part, or + 0 if field does not reference any index field. +*/ + +static inline uint +get_field_keypart(KEY *index, Field *field) +{ + KEY_PART_INFO *part= index->key_part; + uint key_part_num= 0; + + while (part != part + index->key_parts) + { + key_part_num++; + if (field->eq(part->field)) + return key_part_num; + part++; + } + return key_part_num; +} + + +/* + Find the SEL_ARG sub-tree that corresponds to the chosen index. + + SYNOPSIS + get_index_range_tree() + index [in] The ID of the index being looked for + range_tree[in] Tree of ranges being searched + param [in] PARAM from SQL_SELECT::test_quick_select + param_idx [out] Index in the array PARAM::key that corresponds to 'index' + + DESCRIPTION + + A SEL_TREE contains range trees for all usable indexes. This procedure + finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are + ordered in the same way as the members of PARAM::key, thus we first find + the corresponding index in the array PARAM::key. This index is returned + through the variable param_idx, to be used later as argument of + check_quick_select(). + + RETURN + Pointer to the SEL_ARG subtree that corresponds to index. +*/ + +SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param, + uint *param_idx) +{ + uint idx= 0; /* Index nr in param->key_parts */ + while (idx < param->keys) + { + if (index == param->real_keynr[idx]) + break; + idx++; + } + *param_idx= idx; + return(range_tree->keys[idx]); +} + + TRP_GROUP_MIN_MAX::TRP_GROUP_MIN_MAX(JOIN *join, TABLE* table, bool have_min, bool have_max, KEY_PART_INFO *min_max_arg_part, @@ -7137,31 +7090,37 @@ TRP_GROUP_MIN_MAX::TRP_GROUP_MIN_MAX(JOIN *join, TABLE* table, uint group_key_parts, KEY *index_info, uint index, uint key_infix_len, byte *key_infix, SEL_TREE *tree, - PARAM *param) + SEL_ARG *index_tree, uint param_idx, + ha_rows quick_prefix_records, PARAM *param) : join(join), table(table), have_min(have_min), have_max(have_max), min_max_arg_part(min_max_arg_part), group_prefix_len(group_prefix_len), used_key_parts(used_key_parts), group_key_parts(group_key_parts), index_info(index_info), index(index), key_infix_len(key_infix_len), - range_tree(tree) + range_tree(tree), index_tree(index_tree), param_idx(param_idx), + quick_prefix_records(quick_prefix_records) { if (key_infix_len) memcpy(this->key_infix, key_infix, MAX_KEY_LENGTH); - if (range_tree) - { - /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */ - index_tree= get_index_range_tree(index, range_tree, param, - ¶m_idx); - /* Check if this range tree can be used for prefix retrieval. */ - quick_prefix_records= check_quick_select(param, param_idx, index_tree); - } } /* - Compute the cost of this trp. + Compute the cost of a quick_group_min_max_select for a particular index. SYNOPSIS - TRP_GROUP_MIN_MAX::update_cost() + cost_group_min_max() + table [in] The table being accessed + index_info [in] The index used to access the table + used_key_parts [in] Number of key parts used to access the index + group_key_parts [in] Number of index key parts in the group prefix + range_tree [in] Tree of ranges for all indexes + index_tree [in] The range tree for the current index + quick_prefix_records [in] Number of records retrieved by the internally used + quick range select if any + have_min [in] True if there is a MIN function + have_max [in] True if there is a MAX function + read_cost [out] The cost to retrieve rows via this quick select + records [out] The number of rows retrieved DESCRIPTION This method computes the access cost of a TRP_GROUP_MIN_MAX instance and the @@ -7205,7 +7164,11 @@ TRP_GROUP_MIN_MAX::TRP_GROUP_MIN_MAX(JOIN *join, TABLE* table, None */ -void TRP_GROUP_MIN_MAX::update_cost() +void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, + uint group_key_parts, SEL_TREE *range_tree, + SEL_ARG *index_tree, ha_rows quick_prefix_records, + bool have_min, bool have_max, + double *read_cost, ha_rows *records) { uint table_records; uint num_groups; @@ -7219,7 +7182,7 @@ void TRP_GROUP_MIN_MAX::update_cost() double io_cost; double cpu_cost= 0; /* TODO: CPU cost of index_read calls? */ - DBUG_ENTER("TRP_GROUP_MIN_MAX::update_cost"); + DBUG_ENTER("TRP_GROUP_MIN_MAX::cost"); table_records= table->file->records; keys_per_block= (table->file->block_size / 2 / (index_info->key_length + table->file->ref_length) @@ -7270,8 +7233,8 @@ void TRP_GROUP_MIN_MAX::update_cost() */ cpu_cost= (double) num_groups / TIME_FOR_COMPARE; - read_cost= io_cost /*+ cpu_cost*/; - records= num_groups; + *read_cost= io_cost /*+ cpu_cost*/; + *records= num_groups; DBUG_PRINT("info", ("records=%u, keys/block=%u, keys/group=%u, records=%u, blocks=%u", @@ -7434,6 +7397,12 @@ QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT( SYNOPSIS QUICK_GROUP_MIN_MAX_SELECT::init() + DESCRIPTION + The method performs initialization that cannot be done in the constructor + such as memory allocations that may fail. It allocates memory for the + group prefix and inifix buffers, and for the lists of MIN/MAX item to be + updated during execution. + RETURN 0 OK other Error code @@ -7472,23 +7441,46 @@ int QUICK_GROUP_MIN_MAX_SELECT::init() if(my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16)) return 1; - if (!(min_functions= new List<Item_sum>)) - return 1; - if (!(max_functions= new List<Item_sum>)) - return 1; + if (have_min) + { + if(!(min_functions= new List<Item_sum>)) + return 1; + } + else + min_functions= NULL; + if (have_max) + { + if(!(max_functions= new List<Item_sum>)) + return 1; + } + else + max_functions= NULL; - List_iterator<Item> select_items_it(join->fields_list); - Item *item; - while ((item= select_items_it++)) + Item_sum *min_max_item; + Item_sum **func_ptr= join->sum_funcs; + while ((min_max_item= *(func_ptr++))) { - item->walk(&Item::collect_item_sum_min_processor, (byte*) min_functions); - item->walk(&Item::collect_item_sum_max_processor, (byte*) max_functions); + if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC)) + min_functions->push_back(min_max_item); + else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC)) + max_functions->push_back(min_max_item); } - if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions))) - return 1; - if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions))) - return 1; + if (have_min) + { + if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions))) + return 1; + } + else + min_functions_it= NULL; + + if (have_max) + { + if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions))) + return 1; + } + else + max_functions_it= NULL; } return 0; @@ -7619,6 +7611,10 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat() SYNOPSIS QUICK_GROUP_MIN_MAX_SELECT::reset() + DESCRIPTION + Initialize the index chosen for access and find and store the prefix + of the last group. The method is expensive since it performs disk access. + RETURN 0 OK other Error code @@ -8211,6 +8207,21 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_max_result() } +/* + Append comma-separated list of keys this quick select uses to key_names; + append comma-separated list of corresponding used lengths to used_lengths. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths() + key_names [out] Names of used indexes + used_lengths [out] Corresponding lengths of the index names + + DESCRIPTION + This method is used by select_describe to extract the names of the + indexes used by a quick select. + +*/ + void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, String *used_lengths) { @@ -8222,6 +8233,26 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, } +/* + Print quick select information to DBUG_FILE. + + SYNOPSIS + QUICK_GROUP_MIN_MAX_SELECT::dbug_dump() + indent Indentation offset + verbose If TRUE show more detailed output. + + DESCRIPTION + Print the contents of this quick select to DBUG_FILE. The method also + calls dbug_dump() for the used quick select if any. + + IMPLEMENTATION + Caller is responsible for locking DBUG_FILE before this call and unlocking + it afterwards. + + RETURN + None +*/ + void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose) { fprintf(DBUG_FILE, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 44afe5feacb..3f6af75b8e9 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1327,7 +1327,7 @@ JOIN::exec() } } if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list, - 1) || + 1, TRUE) || (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) { @@ -1415,7 +1415,7 @@ JOIN::exec() set_items_ref_array(items3); if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list, - 1) || thd->is_fatal_error) + 1, TRUE) || thd->is_fatal_error) DBUG_VOID_RETURN; } if (curr_join->group_list || curr_join->order) @@ -10765,6 +10765,7 @@ bool JOIN::alloc_func_list() field_list All items send_fields Items in select list before_group_by Set to 1 if this is called before GROUP BY handling + recompute Set to TRUE if sum_funcs must be recomputed NOTES Calls ::setup() for all item_sum objects in field_list @@ -10775,13 +10776,16 @@ bool JOIN::alloc_func_list() */ bool JOIN::make_sum_func_list(List<Item> &field_list, List<Item> &send_fields, - bool before_group_by) + bool before_group_by, bool recompute) { List_iterator_fast<Item> it(field_list); Item_sum **func; Item *item; DBUG_ENTER("make_sum_func_list"); + if (*sum_funcs && !recompute) + DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */ + func= sum_funcs; while ((item=it++)) { diff --git a/sql/sql_select.h b/sql/sql_select.h index 03c778637fd..b1af15017cc 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -303,7 +303,7 @@ class JOIN :public Sql_alloc void restore_tmp(); bool alloc_func_list(); bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields, - bool before_group_by); + bool before_group_by, bool recompute= FALSE); inline void set_items_ref_array(Item **ptr) { |