diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/filesort.cc | 150 | ||||
-rw-r--r-- | sql/ha_partition.cc | 43 | ||||
-rw-r--r-- | sql/mysql_priv.h | 45 | ||||
-rw-r--r-- | sql/mysqld.cc | 5 | ||||
-rw-r--r-- | sql/opt_range.cc | 2994 | ||||
-rw-r--r-- | sql/opt_range.h | 131 | ||||
-rw-r--r-- | sql/sql_acl.cc | 17 | ||||
-rw-r--r-- | sql/sql_class.h | 28 | ||||
-rw-r--r-- | sql/sql_list.h | 5 | ||||
-rw-r--r-- | sql/sql_select.cc | 14 | ||||
-rw-r--r-- | sql/sql_sort.h | 6 | ||||
-rw-r--r-- | sql/table.h | 2 | ||||
-rw-r--r-- | sql/uniques.cc | 92 | ||||
-rw-r--r-- | sql/unireg.h | 2 |
14 files changed, 2901 insertions, 633 deletions
diff --git a/sql/filesort.cc b/sql/filesort.cc index 1ee3972c4fd..8f03ee26691 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -50,10 +50,6 @@ static int write_keys(SORTPARAM *param,uchar * *sort_keys, uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile); static void make_sortkey(SORTPARAM *param,uchar *to, uchar *ref_pos); static void register_used_fields(SORTPARAM *param); -static int merge_index(SORTPARAM *param,uchar *sort_buffer, - BUFFPEK *buffpek, - uint maxbuffer,IO_CACHE *tempfile, - IO_CACHE *outfile); static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count, FILESORT_INFO *table_sort); static uint suffix_length(ulong string_length); @@ -145,8 +141,6 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length, /* filesort cannot handle zero-length records. */ DBUG_ASSERT(param.sort_length); param.ref_length= table->file->ref_length; - param.addon_field= 0; - param.addon_length= 0; if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) && !table->fulltext_searched && !sort_positions) { @@ -1221,8 +1215,11 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, QUEUE queue; qsort2_cmp cmp; void *first_cmp_arg; - volatile THD::killed_state *killed= ¤t_thd->killed; + element_count dupl_count; + uchar *src; THD::killed_state not_killable; + uchar *unique_buff= param->unique_buff; + volatile THD::killed_state *killed= ¤t_thd->killed; DBUG_ENTER("merge_buffers"); status_var_increment(current_thd->status_var.filesort_merge_passes); @@ -1237,7 +1234,13 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, rec_length= param->rec_length; res_length= param->res_length; sort_length= param->sort_length; - offset= rec_length-res_length; + uint dupl_count_ofs= rec_length-sizeof(element_count); + uint min_dupl_count= param->min_dupl_count; + bool check_dupl_count= flag && min_dupl_count; + offset= (rec_length- + (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length); + uint wr_len= flag ? res_length : rec_length; + uint wr_offset= flag ? offset : 0; maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1)); to_start_filepos= my_b_tell(to_file); strpos= sort_buffer; @@ -1246,7 +1249,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, /* The following will fire if there is not enough space in sort_buffer */ DBUG_ASSERT(maxcount!=0); - if (param->unique_buff) + if (unique_buff) { cmp= param->compare; first_cmp_arg= (void *) ¶m->cmp_context; @@ -1271,28 +1274,29 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, queue_insert(&queue, (uchar*) buffpek); } - if (param->unique_buff) + if (unique_buff) { /* Called by Unique::get() - Copy the first argument to param->unique_buff for unique removal. + Copy the first argument to unique_buff for unique removal. Store it also in 'to_file'. - - This is safe as we know that there is always more than one element - in each block to merge (This is guaranteed by the Unique:: algorithm */ buffpek= (BUFFPEK*) queue_top(&queue); - memcpy(param->unique_buff, buffpek->key, rec_length); - if (my_b_write(to_file, (uchar*) buffpek->key, rec_length)) - { - error=1; goto err; /* purecov: inspected */ - } + memcpy(unique_buff, buffpek->key, rec_length); + if (min_dupl_count) + memcpy(&dupl_count, unique_buff+dupl_count_ofs, + sizeof(dupl_count)); buffpek->key+= rec_length; - buffpek->mem_count--; - if (!--max_rows) + if (! --buffpek->mem_count) { - error= 0; /* purecov: inspected */ - goto end; /* purecov: inspected */ + if (!(error= (int) read_to_buffer(from_file, buffpek, + rec_length))) + { + VOID(queue_remove(&queue,0)); + reuse_freed_buff(&queue, buffpek, rec_length); + } + else if (error == -1) + goto err; /* purecov: inspected */ } queue_replace_top(&queue); // Top element has been used } @@ -1308,27 +1312,50 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, for (;;) { buffpek= (BUFFPEK*) queue_top(&queue); + src= buffpek->key; if (cmp) // Remove duplicates { - if (!(*cmp)(first_cmp_arg, &(param->unique_buff), + if (!(*cmp)(first_cmp_arg, &unique_buff, (uchar**) &buffpek->key)) - goto skip_duplicate; - memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length); - } - if (flag == 0) - { - if (my_b_write(to_file,(uchar*) buffpek->key, rec_length)) - { - error=1; goto err; /* purecov: inspected */ + { + if (min_dupl_count) + { + element_count cnt; + memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt)); + dupl_count+= cnt; + } + goto skip_duplicate; + } + if (min_dupl_count) + { + memcpy(unique_buff+dupl_count_ofs, &dupl_count, + sizeof(dupl_count)); } + src= unique_buff; } - else + + /* + Do not write into the output file if this is the final merge called + for a Unique object used for intersection and dupl_count is less + than min_dupl_count. + If the Unique object is used to intersect N sets of unique elements + then for any element: + dupl_count >= N <=> the element is occurred in each of these N sets. + */ + if (!check_dupl_count || dupl_count >= min_dupl_count) { - if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length)) + if (my_b_write(to_file, src+wr_offset, wr_len)) { error=1; goto err; /* purecov: inspected */ } } + if (cmp) + { + memcpy(unique_buff, (uchar*) buffpek->key, rec_length); + if (min_dupl_count) + memcpy(&dupl_count, unique_buff+dupl_count_ofs, + sizeof(dupl_count)); + } if (!--max_rows) { error= 0; /* purecov: inspected */ @@ -1339,7 +1366,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, buffpek->key+= rec_length; if (! --buffpek->mem_count) { - if (!(error= (int) read_to_buffer(from_file,buffpek, + if (!(error= (int) read_to_buffer(from_file, buffpek, rec_length))) { VOID(queue_remove_top(&queue)); @@ -1362,11 +1389,35 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, */ if (cmp) { - if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key)) + if (!(*cmp)(first_cmp_arg, &unique_buff, (uchar**) &buffpek->key)) { - buffpek->key+= rec_length; // Remove duplicate + if (min_dupl_count) + { + element_count cnt; + memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt)); + dupl_count+= cnt; + } + buffpek->key+= rec_length; --buffpek->mem_count; } + + if (min_dupl_count) + memcpy(unique_buff+dupl_count_ofs, &dupl_count, + sizeof(dupl_count)); + + if (!check_dupl_count || dupl_count >= min_dupl_count) + { + src= unique_buff; + if (my_b_write(to_file, src+wr_offset, wr_len)) + { + error=1; goto err; /* purecov: inspected */ + } + if (!--max_rows) + { + error= 0; + goto end; + } + } } do @@ -1379,7 +1430,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, max_rows-= buffpek->mem_count; if (flag == 0) { - if (my_b_write(to_file,(uchar*) buffpek->key, + if (my_b_write(to_file, (uchar*) buffpek->key, (rec_length*buffpek->mem_count))) { error= 1; goto err; /* purecov: inspected */ @@ -1388,19 +1439,25 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key+offset; - for (end= strpos+buffpek->mem_count*rec_length ; - strpos != end ; - strpos+= rec_length) - { - if (my_b_write(to_file, strpos, res_length)) + src= buffpek->key+offset; + for (end= src+buffpek->mem_count*rec_length ; + src != end ; + src+= rec_length) + { + if (check_dupl_count) + { + memcpy((uchar *) &dupl_count, src+dupl_count_ofs, sizeof(dupl_count)); + if (dupl_count < min_dupl_count) + continue; + } + if (my_b_write(to_file, src, wr_len)) { error=1; goto err; } } } } - while ((error=(int) read_to_buffer(from_file,buffpek, rec_length)) + while ((error=(int) read_to_buffer(from_file, buffpek, rec_length)) != -1 && error != 0); end: @@ -1414,7 +1471,7 @@ err: /* Do a merge to output-file (save only positions) */ -static int merge_index(SORTPARAM *param, uchar *sort_buffer, +int merge_index(SORTPARAM *param, uchar *sort_buffer, BUFFPEK *buffpek, uint maxbuffer, IO_CACHE *tempfile, IO_CACHE *outfile) { @@ -1720,3 +1777,4 @@ void change_double_for_sort(double nr,uchar *to) } } } + diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 0dbeb4718e2..1ef78e30335 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -2434,7 +2434,7 @@ bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root) for (i= 0; i < m_tot_parts; i++) m_engine_array[i]= ha_lock_engine(NULL, engine_array[i]); - my_afree((gptr) engine_array); + my_afree(engine_array); if (!m_file && create_handlers(mem_root)) { @@ -2444,7 +2444,7 @@ bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root) DBUG_RETURN(FALSE); err3: - my_afree((gptr) engine_array); + my_afree(engine_array); err2: my_free(file_buffer, MYF(0)); err1: @@ -4643,19 +4643,6 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf) break; case partition_index_first: DBUG_PRINT("info", ("index_first on partition %d", i)); - /* - MyISAM engine can fail if we call index_first() when indexes disabled - that happens if the table is empty. - Here we use file->stats.records instead of file->records() because - file->records() is supposed to return an EXACT count, and it can be - possibly slow. We don't need an exact number, an approximate one- from - the last ::info() call - is sufficient. - */ - if (file->stats.records == 0) - { - error= HA_ERR_END_OF_FILE; - break; - } error= file->ha_index_first(buf); break; case partition_index_first_unordered: @@ -4749,36 +4736,10 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) m_start_key.flag); break; case partition_index_first: - /* - MyISAM engine can fail if we call index_first() when indexes disabled - that happens if the table is empty. - Here we use file->stats.records instead of file->records() because - file->records() is supposed to return an EXACT count, and it can be - possibly slow. We don't need an exact number, an approximate one- from - the last ::info() call - is sufficient. - */ - if (file->stats.records == 0) - { - error= HA_ERR_END_OF_FILE; - break; - } error= file->ha_index_first(rec_buf_ptr); reverse_order= FALSE; break; case partition_index_last: - /* - MyISAM engine can fail if we call index_last() when indexes disabled - that happens if the table is empty. - Here we use file->stats.records instead of file->records() because - file->records() is supposed to return an EXACT count, and it can be - possibly slow. We don't need an exact number, an approximate one- from - the last ::info() call - is sufficient. - */ - if (file->stats.records == 0) - { - error= HA_ERR_END_OF_FILE; - break; - } error= file->ha_index_last(rec_buf_ptr); reverse_order= TRUE; break; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index ca5ccc699cd..1863bfa5cc7 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -351,7 +351,10 @@ protected: Number of comparisons of table rowids equivalent to reading one row from a table. */ -#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2) +#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*100) + +/* cost1 is better that cost2 only if cost1 + COST_EPS < cost2 */ +#define COST_EPS 0.001 /* For sequential disk seeks the cost formula is: @@ -561,27 +564,27 @@ protected: #define OPTIMIZER_SWITCH_INDEX_MERGE_UNION 2 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8 -#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16 - -#define OPTIMIZER_SWITCH_FIRSTMATCH 32 -#define OPTIMIZER_SWITCH_LOOSE_SCAN 64 -#define OPTIMIZER_SWITCH_MATERIALIZATION 128 -#define OPTIMIZER_SWITCH_SEMIJOIN 256 -#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512 -#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024 -#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11) -#define OPTIMIZER_SWITCH_MRR_SORT_KEYS (1<<12) -#define OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE (1<<13) -#define OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE (1<<14) -#define OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL (1<<15) -#define OPTIMIZER_SWITCH_JOIN_CACHE_HASHED (1<<16) -#define OPTIMIZER_SWITCH_JOIN_CACHE_BKA (1<<17) - +#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT 16 +#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 32 + +#define OPTIMIZER_SWITCH_FIRSTMATCH 64 +#define OPTIMIZER_SWITCH_LOOSE_SCAN 128 +#define OPTIMIZER_SWITCH_MATERIALIZATION 256 +#define OPTIMIZER_SWITCH_SEMIJOIN 512 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11) +#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12) +#define OPTIMIZER_SWITCH_MRR_SORT_KEYS (1<<13) +#define OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE (1<<14) +#define OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE (1<<15) +#define OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL (1<<16) +#define OPTIMIZER_SWITCH_JOIN_CACHE_HASHED (1<<17) +#define OPTIMIZER_SWITCH_JOIN_CACHE_BKA (1<<18) #ifdef DBUG_OFF -# define OPTIMIZER_SWITCH_LAST (1<<18) -#else -# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<18) # define OPTIMIZER_SWITCH_LAST (1<<19) +#else +# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<19) +# define OPTIMIZER_SWITCH_LAST (1<<20) #endif #ifdef DBUG_OFF @@ -2330,6 +2333,8 @@ ha_rows filesort(THD *thd, TABLE *form,struct st_sort_field *sortorder, ha_rows max_rows, bool sort_positions, ha_rows *examined_rows); void filesort_free_buffers(TABLE *table, bool full); +double get_merge_many_buffs_cost(uint *buffer, uint last_n_elems, + int elem_size); void change_double_for_sort(double nr,uchar *to); double my_double_round(double value, longlong dec, bool dec_unsigned, bool truncate); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index d7a457c6921..f79e7f63d5d 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -339,7 +339,7 @@ TYPELIB sql_mode_typelib= { array_elements(sql_mode_names)-1,"", static const char *optimizer_switch_names[]= { "index_merge","index_merge_union","index_merge_sort_union", - "index_merge_intersection", + "index_merge_intersection","index_merge_sort_intersection", "index_condition_pushdown", "firstmatch","loosescan","materialization", "semijoin", "partial_match_rowid_merge", @@ -364,6 +364,7 @@ static const unsigned int optimizer_switch_names_len[]= sizeof("index_merge_union") - 1, sizeof("index_merge_sort_union") - 1, sizeof("index_merge_intersection") - 1, + sizeof("index_merge_sort_intersection") - 1, sizeof("index_condition_pushdown") - 1, sizeof("firstmatch") - 1, sizeof("loosescan") - 1, @@ -469,6 +470,7 @@ static const char *sql_mode_str= "OFF"; static const char *optimizer_switch_str="index_merge=on,index_merge_union=on," "index_merge_sort_union=on," "index_merge_intersection=on," + "index_merge_sort_intersection=off" "index_condition_pushdown=on," "firstmatch=on," "loosescan=on," @@ -7404,6 +7406,7 @@ thread is in the relay logs.", {"optimizer_switch", OPT_OPTIMIZER_SWITCH, "optimizer_switch=option=val[,option=val...], where option={index_merge, " "index_merge_union, index_merge_sort_union, index_merge_intersection, " + "index_merge_sort_intersection, " "index_condition_pushdown, firstmatch, loosescan, materialization, " "semijoin, partial_match_rowid_merge, partial_match_table_scan, " "subquery_cache, outer_join_with_cache, semijoin_with_cache, " diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 9d89911eec3..04842374ae1 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -304,6 +304,11 @@ public: uint8 part; // Which key part uint8 maybe_null; /* + The ordinal number the least significant component encountered in + the ranges of the SEL_ARG tree (the first component has number 1) + */ + uint16 max_part_no; + /* Number of children of this element in the RB-tree, plus 1 for this element itself. */ @@ -540,6 +545,11 @@ public: pos->increment_use_count(count); } } + void incr_refs() + { + increment_use_count(1); + use_count++; + } void free_tree() { for (SEL_ARG *pos=first(); pos ; pos=pos->next) @@ -601,7 +611,100 @@ public: class SEL_IMERGE; +#define CLONE_KEY1_MAYBE 1 +#define CLONE_KEY2_MAYBE 2 +#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) + +/* + While objects of the class SEL_ARG represent ranges for indexes or + index infixes (including ranges for index prefixes and index suffixes), + objects of the class SEL_TREE represent AND/OR formulas of such ranges. + Currently an AND/OR formula represented by a SEL_TREE object can have + at most three levels: + + <SEL_TREE formula> ::= + [ <SEL_RANGE_TREE formula> AND ] + [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ] + + <SEL_RANGE_TREE formula> ::= + <SEL_ARG formula> [ AND <SEL_ARG_formula> ... ] + + <SEL_IMERGE formula> ::= + <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ] + + As we can see from the above definitions: + - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas + - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas + - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula + and SEL_IMERGE formulas. + It's required above that a SEL_TREE formula has at least one conjunct. + + Usually we will consider normalized SEL_RANGE_TREE formulas where we use + TRUE as conjunct members for those indexes whose SEL_ARG trees are empty. + + We will call an SEL_TREE object simply 'tree'. + The part of a tree that represents SEL_RANGE_TREE formula is called + 'range part' of the tree while the remaining part is called 'imerge part'. + If a tree contains only a range part then we call such a tree 'range tree'. + Components of a range tree that represent SEL_ARG formulas are called ranges. + If a tree does not contain any range part we call such a tree 'imerge tree'. + Components of the imerge part of a tree that represent SEL_IMERGE formula + are called imerges. + + Usually we'll designate: + SEL_TREE formulas by T_1,...,T_k + SEL_ARG formulas by R_1,...,R_k + SEL_RANGE_TREE formulas by RT_1,...,RT_k + SEL_IMERGE formulas by M_1,...,M_k + Accordingly we'll use: + t_1,...,t_k - to designate trees representing T_1,...,T_k + r_1,...,r_k - to designate ranges representing R_1,...,R_k + rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k + m_1,...,m_k - to designate imerges representing M_1,...,M_k + + SEL_TREE objects are usually built from WHERE conditions or + ON expressions. + A SEL_TREE object always represents an inference of the condition it is + built from. Therefore, if a row satisfies a SEL_TREE formula it also + satisfies the condition it is built from. + + The following transformations of tree t representing SEL_TREE formula T + yield a new tree t1 thar represents an inference of T: T=>T1. + (1) remove any of SEL_ARG tree from the range part of t + (2) remove any imerge from the tree t + (3) remove any of SEL_ARG tree from any range tree contained + in any imerge of tree + + Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE + objects in many cases can be effectively used to filter out a big part + of table rows that do not satisfy WHERE/IN conditions utilizing + only single or multiple range index scans. + + A single range index scan is constructed for a range tree that contains + only one SEL_ARG object for an index or an index prefix. + An index intersection scan can be constructed for a range tree + that contains several SEL_ARG objects. Currently index intersection + scans are constructed only for single-point ranges. + An index merge scan is constructed for a imerge tree that contains only + one imerge. If range trees of this imerge contain only single-point merges + than a union of index intersections can be built. + + Usually the tree built by the range optimizer for a query table contains + more than one range in the range part, and additionally may contain some + imerges in the imerge part. The range optimizer evaluates all of them one + by one and chooses the range or the imerge that provides the cheapest + single or multiple range index scan of the table. According to rules + (1)-(3) this scan always filter out only those rows that do not satisfy + the query conditions. + + For any condition the SEL_TREE object for it is built in a bottom up + manner starting from the range trees for the predicates. The tree_and + function builds a tree for any conjunction of formulas from the trees + for its conjuncts. The tree_or function builds a tree for any disjunction + of formulas from the trees for its disjuncts. +*/ + class SEL_TREE :public Sql_alloc { public: @@ -617,7 +720,7 @@ public: keys_map.clear_all(); bzero((char*) keys,sizeof(keys)); } - SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param); + SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param); /* Note: there may exist SEL_TREE objects with sel_tree->type=KEY and keys[i]=0 for all i. (SergeyP: it is not clear whether there is any @@ -637,9 +740,15 @@ public: key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ uint n_ror_scans; /* number of set bits in ror_scans_map */ + struct st_index_scan_info **index_scans; /* list of index scans */ + struct st_index_scan_info **index_scans_end; /* last index scan */ + struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ /* Note that #records for each key scan is stored in table->quick_rows */ + + bool without_ranges() { return keys_map.is_clear_all(); } + bool without_imerges() { return merges.is_empty(); } }; class RANGE_OPT_PARAM @@ -693,12 +802,13 @@ public: /* Number of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */ uint alloced_sel_args; bool force_default_mrr; + KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ }; class PARAM : public RANGE_OPT_PARAM { public: - KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ + ha_rows quick_rows[MAX_KEY]; longlong baseflag; uint max_key_part, range_count; @@ -725,9 +835,11 @@ class TABLE_READ_PLAN; class TRP_RANGE; class TRP_ROR_INTERSECT; class TRP_ROR_UNION; - class TRP_ROR_INDEX_MERGE; + class TRP_INDEX_INTERSECT; + class TRP_INDEX_MERGE; class TRP_GROUP_MIN_MAX; +struct st_index_scan_info; struct st_ror_scan_info; static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, @@ -752,6 +864,9 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, bool update_tbl_stats, double read_time); static +TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, + double read_time); +static TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, double read_time, bool *are_all_covering); @@ -762,7 +877,12 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, static TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, double read_time); -static TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree); +static +TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge, + TRP_INDEX_MERGE *imerge_trp, + double read_time); +static +TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree); #ifndef DBUG_OFF static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, @@ -773,11 +893,15 @@ static void print_ror_scans_arr(TABLE *table, const char *msg, static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif -static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); -static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_and(RANGE_OPT_PARAM *param, + SEL_TREE *tree1, SEL_TREE *tree2); +static SEL_TREE *tree_or(RANGE_OPT_PARAM *param, + SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); -static SEL_ARG *key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2); -static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, +static SEL_ARG *key_or(RANGE_OPT_PARAM *param, + SEL_ARG *key1, SEL_ARG *key2); +static SEL_ARG *key_and(RANGE_OPT_PARAM *param, + SEL_ARG *key1, SEL_ARG *key2, uint clone_flag); static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1); bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, @@ -788,11 +912,27 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b); static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length); -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param); static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); #include "opt_range_mrr.cc" +static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys); +static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, + SEL_TREE *tree); + +static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys); +static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map common_keys); +static int and_range_trees(RANGE_OPT_PARAM *param, + SEL_TREE *tree1, SEL_TREE *tree2, + SEL_TREE *result); +static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree); + + /* SEL_IMERGE is a list of possible ways to do index merge, i.e. it is a condition in the following form: @@ -822,23 +962,39 @@ public: trees_next(trees), trees_end(trees + PREALLOCED_TREES) {} - SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param); + SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param); int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); - int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree); - int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge); + bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree); + int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, + SEL_IMERGE *new_imerge); + int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, + uint n_init_trees, + SEL_TREE *new_tree, + bool is_first_check_pass, + bool *is_last_check_pass); + int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, + uint n_init_trees, + SEL_IMERGE* imerge, + bool is_first_check_pass, + bool *is_last_check_pass); }; /* - Add SEL_TREE to this index_merge without any checks, + Add a range tree to the range trees of this imerge - NOTES - This function implements the following: - (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs + SYNOPSIS + or_sel_tree() + param Context info for the operation + tree SEL_TREE to add to this imerge + + DESCRIPTION + The function just adds the range tree 'tree' to the range trees + of this imerge. RETURN - 0 - OK - -1 - Out of memory. + 0 if the operation is success + -1 if the function runs out memory */ int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) @@ -863,96 +1019,303 @@ int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) /* - Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree, - combining new_tree with one of the trees in this SEL_IMERGE if they both - have SEL_ARGs for the same key. + Check if any of the range trees of this imerge intersects with a given tree SYNOPSIS - or_sel_tree_with_checks() - param PARAM from SQL_SELECT::test_quick_select - new_tree SEL_TREE with type KEY or KEY_SMALLER. + have_common_keys() + param Context info for the function + tree SEL_TREE intersection with the imerge range trees is checked for - NOTES - This does the following: - (t_1||...||t_k)||new_tree = - either - = (t_1||...||t_k||new_tree) - or - = (t_1||....||(t_j|| new_tree)||...||t_k), - - where t_i, y are SEL_TREEs. - new_tree is combined with the first t_j it has a SEL_ARG on common - key with. As a consequence of this, choice of keys to do index_merge - read may depend on the order of conditions in WHERE part of the query. + DESCRIPTION + The function checks whether there is any range tree rt_i in this imerge + such that there are some indexes for which ranges are defined in both + rt_i and the range part of the SEL_TREE tree. + To check this the function calls the function sel_trees_have_common_keys. + + RETURN + TRUE if there are such range trees in this imerge + FALSE otherwise +*/ +bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + for (SEL_TREE** or_tree= trees, **bound= trees_next; + or_tree != bound; or_tree++) + { + key_map common_keys; + if (sel_trees_have_common_keys(*or_tree, tree, &common_keys)) + return TRUE; + } + return FALSE; +} + + +/* + Perform AND operation for this imerge and the range part of a tree + + SYNOPSIS + and_sel_tree() + param Context info for the operation + tree SEL_TREE for the second operand of the operation + new_imerge OUT imerge for the result of the operation + + DESCRIPTION + This function performs AND operation for this imerge m and the + range part of the SEL_TREE tree rt. In other words the function + pushes rt into this imerge. The resulting imerge is returned in + the parameter new_imerge. + If this imerge m represent the formula + RT_1 OR ... OR RT_k + then the resulting imerge of the function represents the formula + (RT_1 AND RT) OR ... OR (RT_k AND RT) + The function calls the function and_range_trees to construct the + range tree representing (RT_i AND RT). + + NOTE + The function may return an empty imerge without any range trees. + This happens when each call of and_range_trees returns an + impossible range tree (SEL_TREE::IMPOSSIBLE). + Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6). + RETURN - 0 OK - 1 One of the trees was combined with new_tree to SEL_TREE::ALWAYS, - and (*this) should be discarded. - -1 An error occurred. + 0 if the operation is a success + -1 otherwise: there is not enough memory to perform the operation */ -int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree) +int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, + SEL_IMERGE *new_imerge) { - for (SEL_TREE** tree = trees; - tree != trees_next; - tree++) + for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++) { - if (sel_trees_can_be_ored(*tree, new_tree, param)) + SEL_TREE *res_or_tree= 0; + if (!(res_or_tree= new SEL_TREE())) + return (-1); + if (!and_range_trees(param, *or_tree, tree, res_or_tree)) { - *tree = tree_or(param, *tree, new_tree); - if (!*tree) - return 1; - if (((*tree)->type == SEL_TREE::MAYBE) || - ((*tree)->type == SEL_TREE::ALWAYS)) + if (new_imerge->or_sel_tree(param, res_or_tree)) + return (-1); + } + } + return 0; +} + + +/* + Perform OR operation on this imerge and the range part of a tree + + SYNOPSIS + or_sel_tree_with_checks() + param Context info for the operation + n_trees Number of trees in this imerge to check for oring + tree SEL_TREE whose range part is to be ored + is_first_check_pass <=> the first call of the function for this imerge + is_last_check_pass OUT <=> no more calls of the function for this imerge + + DESCRIPTION + The function performs OR operation on this imerge m and the range part + of the SEL_TREE tree rt. It always replaces this imerge with the result + of the operation. + + The operation can be performed in two different modes: with + is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming + this imerge differently. + + Given this imerge represents the formula + RT_1 OR ... OR RT_k: + + 1. In the first mode, when is_first_check_pass==TRUE : + 1.1. If rt must be ored(see the function sel_trees_must_be_ored) with + some rt_j (there may be only one such range tree in the imerge) + then the function produces an imerge representing the formula + RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k, + where the tree for (RT_j OR RT) is built by oring the pairs + of SEL_ARG trees for the corresponding indexes + 1.2. Otherwise the function produces the imerge representing the formula: + RT_1 OR ... OR RT_k OR RT. + + 2. In the second mode, when is_first_check_pass==FALSE : + 2.1. For each rt_j in the imerge that can be ored (see the function + sel_trees_can_be_ored), but not must be ored, with rt the function + replaces rt_j for a range tree such that for each index for which + ranges are defined in both in rt_j and rt the tree contains the + result of oring of these ranges. + 2.2. In other cases the function does not produce any imerge. + + When is_first_check==TRUE the function returns FALSE in the parameter + is_last_check_pass if there is no rt_j such that rt_j can be ored with rt, + but, at the same time, it's not true that rt_j must be ored with rt. + When is_first_check==FALSE the function always returns FALSE in the + parameter is_last_check_pass. + + RETURN + 1 The result of oring of rt_j and rt that must be ored returns the + the range tree with type==SEL_TREE::ALWAYS + (in this case the imerge m should be discarded) + -1 The function runs out of memory + 0 in all other cases +*/ + +int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, + uint n_trees, + SEL_TREE *tree, + bool is_first_check_pass, + bool *is_last_check_pass) +{ + bool was_ored= FALSE; + *is_last_check_pass= TRUE; + SEL_TREE** or_tree = trees; + for (uint i= 0; i < n_trees; i++, or_tree++) + { + SEL_TREE *result= 0; + key_map result_keys; + key_map ored_keys; + if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys)) + { + bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree, + ored_keys); + if (must_be_ored || !is_first_check_pass) + { + result_keys.clear_all(); + result= *or_tree; + for (uint key_no= 0; key_no < param->keys; key_no++) + { + if (!ored_keys.is_set(key_no)) + { + result->keys[key_no]= 0; + continue; + } + SEL_ARG *key1= (*or_tree)->keys[key_no]; + SEL_ARG *key2= tree->keys[key_no]; + key2->incr_refs(); + if ((result->keys[key_no]= key_or(param, key1, key2))) + { + + result_keys.set_bit(key_no); +#ifdef EXTRA_DEBUG + if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) + { + key1= result->keys[key_no]; + (key1)->test_use_count(key1); + } +#endif + } + } + } + else if(is_first_check_pass) + *is_last_check_pass= FALSE; + } + + if (result) + { + if (result_keys.is_clear_all()) + result->type= SEL_TREE::ALWAYS; + *is_last_check_pass= TRUE; + if ((result->type == SEL_TREE::MAYBE) || + (result->type == SEL_TREE::ALWAYS)) return 1; /* SEL_TREE::IMPOSSIBLE is impossible here */ - return 0; + result->keys_map= result_keys; + *or_tree= result; + if (is_first_check_pass) + return 0; + was_ored= TRUE; } } + if (was_ored) + return 0; - /* New tree cannot be combined with any of existing trees. */ - return or_sel_tree(param, new_tree); + if (!*is_last_check_pass && + !(tree= new SEL_TREE(tree, FALSE, param))) + return (-1); + return or_sel_tree(param, tree); } /* - Perform OR operation on this index_merge and supplied index_merge list. + Perform OR operation on this imerge and and another imerge + + SYNOPSIS + or_sel_imerge_with_checks() + param Context info for the operation + n_trees Number of trees in this imerge to check for oring + imerge The second operand of the operation + is_first_check_pass <=> the first call of the function for this imerge + is_last_check_pass OUT <=> no more calls of the function for this imerge + DESCRIPTION + For each range tree rt from 'imerge' the function calls the method + SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this + SEL_IMERGE object m and the tree rt. The mode of the operation is + specified by the parameter is_first_check_pass. Each call of + SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m. + The function returns FALSE in the prameter is_last_check_pass if + at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks + returns FALSE as the value of its last parameter. + RETURN - 0 - OK - 1 - One of conditions in result is always TRUE and this SEL_IMERGE - should be discarded. - -1 - An error occurred + 1 One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1. + (in this case the imerge m should be discarded) + -1 The function runs out of memory + 0 in all other cases */ -int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge) -{ - for (SEL_TREE** tree= imerge->trees; - tree != imerge->trees_next; - tree++) - { - if (or_sel_tree_with_checks(param, *tree)) - return 1; +int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, + uint n_trees, + SEL_IMERGE* imerge, + bool is_first_check_pass, + bool *is_last_check_pass) +{ + *is_last_check_pass= TRUE; + SEL_TREE** tree= imerge->trees; + SEL_TREE** tree_end= imerge->trees_next; + for ( ; tree < tree_end; tree++) + { + uint rc; + bool is_last= TRUE; + rc= or_sel_tree_with_checks(param, n_trees, *tree, + is_first_check_pass, &is_last); + if (!is_last) + *is_last_check_pass= FALSE; + if (rc) + return rc; } return 0; } -SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc() +/* + Copy constructor for SEL_TREE objects + + SYNOPSIS + SEL_TREE + arg The source tree for the constructor + without_merges <=> only the range part of the tree arg is copied + param Context info for the operation + + DESCRIPTION + The constructor creates a full copy of the SEL_TREE arg if + the prameter without_merges==FALSE. Otherwise a tree is created + that contains the copy only of the range part of the tree arg. +*/ + +SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges, + RANGE_OPT_PARAM *param): Sql_alloc() { keys_map= arg->keys_map; type= arg->type; - for (int idx= 0; idx < MAX_KEY; idx++) + for (uint idx= 0; idx < param->keys; idx++) { if ((keys[idx]= arg->keys[idx])) - keys[idx]->increment_use_count(1); + keys[idx]->incr_refs(); } + if (without_merges) + return; + List_iterator<SEL_IMERGE> it(arg->merges); for (SEL_IMERGE *el= it++; el; el= it++) { - SEL_IMERGE *merge= new SEL_IMERGE(el, param); + SEL_IMERGE *merge= new SEL_IMERGE(el, 0, param); if (!merge || merge->trees == merge->trees_next) { merges.empty(); @@ -963,7 +1326,23 @@ SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param): Sql_alloc() } -SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc() +/* + Copy constructor for SEL_IMERGE objects + + SYNOPSIS + SEL_IMERGE + arg The source imerge for the constructor + cnt How many trees from arg are to be copied + param Context info for the operation + + DESCRIPTION + The cnt==0 then the constructor creates a full copy of the + imerge arg. Otherwise only the first cnt trees of the imerge + are copied. +*/ + +SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt, + RANGE_OPT_PARAM *param) : Sql_alloc() { uint elements= (arg->trees_end - arg->trees); if (elements > PREALLOCED_TREES) @@ -975,13 +1354,13 @@ SEL_IMERGE::SEL_IMERGE (SEL_IMERGE *arg, RANGE_OPT_PARAM *param) : Sql_alloc() else trees= &trees_prealloced[0]; - trees_next= trees; + trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees); trees_end= trees + elements; - for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_end; + for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_next; tree++, arg_tree++) { - if (!(*tree= new SEL_TREE(*arg_tree, param))) + if (!(*tree= new SEL_TREE(*arg_tree, FALSE, param))) goto mem_err; } @@ -995,7 +1374,19 @@ mem_err: /* - Perform AND operation on two index_merge lists and store result in *im1. + Perform AND operation on two imerge lists + + SYNOPSIS + imerge_list_and_list() + param Context info for the operation + im1 The first imerge list for the operation + im2 The second imerge list for the operation + + DESCRIPTION + The function just appends the imerge list im2 to the imerge list im1 + + RETURN VALUE + none */ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) @@ -1005,73 +1396,242 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) /* - Perform OR operation on 2 index_merge lists, storing result in first list. - - NOTES - The following conversion is implemented: - (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) => - => (a_1||b_1). - - i.e. all conjuncts except the first one are currently dropped. - This is done to avoid producing N*K ways to do index_merge. - - If (a_1||b_1) produce a condition that is always TRUE, NULL is returned - and index_merge is discarded (while it is actually possible to try - harder). - - As a consequence of this, choice of keys to do index_merge read may depend - on the order of conditions in WHERE part of the query. + Perform OR operation on two imerge lists + SYNOPSIS + imerge_list_or_list() + param Context info for the operation + im1 The first imerge list for the operation + im2 The second imerge list for the operation + + DESCRIPTION + Assuming that the first imerge list represents the formula + F1= M1_1 AND ... AND M1_k1 + while the second imerge list represents the formula + F2= M2_1 AND ... AND M2_k2, + where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1]) + and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]), + the function builds a list of imerges for some formula that can be + inferred from the formula (F1 OR F2). + + More exactly the function builds imerges for the formula (M1_1 OR M2_1). + Note that + (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) = + AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) => + M1_1 OR M2_1. + So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2). + + To build imerges for the formula (M1_1 OR M2_1) the function invokes, + possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks + for the imerge m1_1. + At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks + performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by + calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE. + The resulting imerge of the operation is ored with the next range tree of + the imerge m2_1. This oring continues until the last range tree from + m2_1 has been ored. + At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks + performs the same sequence of OR operations, but now calling + SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE. + + The imerges that the operation produces replace those in the list im1 + RETURN - 0 OK, result is stored in *im1 - other Error, both passed lists are unusable + 0 if the operation is a success + -1 if the function has run out of memory */ int imerge_list_or_list(RANGE_OPT_PARAM *param, List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) { + + uint rc; + bool is_last_check_pass= FALSE; + SEL_IMERGE *imerge= im1->head(); + uint elems= imerge->trees_next-imerge->trees; im1->empty(); im1->push_back(imerge); - return imerge->or_sel_imerge_with_checks(param, im2->head()); + rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(), + TRUE, &is_last_check_pass); + if (rc) + { + if (rc == 1) + { + im1->empty(); + rc= 0; + } + return rc; + } + + if (!is_last_check_pass) + { + SEL_IMERGE* new_imerge= new SEL_IMERGE(imerge, elems, param); + if (new_imerge) + { + is_last_check_pass= TRUE; + rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(), + FALSE, &is_last_check_pass); + if (!rc) + im1->push_back(new_imerge); + } + } + return rc; } /* - Perform OR operation on index_merge list and key tree. + Perform OR operation for each imerge from a list and the range part of a tree + + SYNOPSIS + imerge_list_or_tree() + param Context info for the operation + merges The list of imerges to be ored with the range part of tree + tree SEL_TREE whose range part is to be ored with the imerges + + DESCRIPTION + For each imerge mi from the list 'merges' the function performes OR + operation with mi and the range part of 'tree' rt, producing one or + two imerges. + Given the merge mi represent the formula RTi_1 OR ... OR RTi_k, + the function forms the merges by the following rules: + + 1. If rt cannot be ored with any of the trees rti the function just + produces an imerge that represents the formula + RTi_1 OR ... RTi_k OR RT. + 2. If there exist a tree rtj that must be ored with rt the function + produces an imerge the represents the formula + RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k, + where the range tree for (RTi_j OR RT) is constructed by oring the + SEL_ARG trees that must be ored. + 3. For each rti_j that can be ored with rt the function produces + the new tree rti_j' and substitutes rti_j for this new range tree. + + In any case the function removes mi from the list and then adds all + produced imerges. + + To build imerges by rules 1-3 the function calls the method + SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first + call it passes TRUE for the third parameter of the function. + At this first call imerges by rules 1-2 are built. If the call + returns FALSE as the return value of its fourth parameter then the + function are called for the second time. At this call the imerge + of rule 3 is produced. + + If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then + then it means that the produced tree contains an always true + range tree and the whole imerge can be discarded. + RETURN - 0 OK, result is stored in *im1. - other Error + 1 if no imerges are produced + 0 otherwise */ +static int imerge_list_or_tree(RANGE_OPT_PARAM *param, - List<SEL_IMERGE> *im1, + List<SEL_IMERGE> *merges, SEL_TREE *tree) { + SEL_IMERGE *imerge; - List_iterator<SEL_IMERGE> it(*im1); - bool tree_used= FALSE; + List<SEL_IMERGE> additional_merges; + List_iterator<SEL_IMERGE> it(*merges); + while ((imerge= it++)) { - SEL_TREE *or_tree; - if (tree_used) + bool is_last_check_pass; + int rc= 0; + int rc1= 0; + SEL_TREE *or_tree= new SEL_TREE (tree, FALSE, param); + if (or_tree) { - or_tree= new SEL_TREE (tree, param); - if (!or_tree || - (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())) - return FALSE; + uint elems= imerge->trees_next-imerge->trees; + rc= imerge->or_sel_tree_with_checks(param, elems, or_tree, + TRUE, &is_last_check_pass); + if (!is_last_check_pass) + { + SEL_IMERGE *new_imerge= new SEL_IMERGE(imerge, elems, param); + if (new_imerge) + { + rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree, + FALSE, &is_last_check_pass); + if (!rc1) + additional_merges.push_back(new_imerge); + } + } } - else - or_tree= tree; - - if (imerge->or_sel_tree_with_checks(param, or_tree)) + if (rc || rc1 || !or_tree) it.remove(); - tree_used= TRUE; } - return im1->is_empty(); + + merges->concat(&additional_merges); + return merges->is_empty(); +} + + +/* + Perform pushdown operation of the range part of a tree into given imerges + + SYNOPSIS + imerge_list_and_tree() + param Context info for the operation + merges IN/OUT List of imerges to push the range part of 'tree' into + tree SEL_TREE whose range part is to be pushed into imerges + + DESCRIPTION + For each imerge from the list merges the function pushes the range part + rt of 'tree' into the imerge. + More exactly if the imerge mi from the list represents the formula + RTi_1 OR ... OR RTi_k + the function bulds a new imerge that represents the formula + (RTi_1 AND RT) OR ... OR (RTi_k AND RT) + and adds this imerge to the list merges. + To perform this pushdown operation the function calls the method + SEL_IMERGE::and_sel_tree. + For any imerge mi the new imerge is not created if for each pair of + trees rti_j and rt the intersection of the indexes with defined ranges + is empty. + If the result of the pushdown operation for the imerge mi returns an + imerge with no trees then then not only nothing is added to the list + merges but mi itself is removed from the list. + + RETURN + 1 if no imerges are left in the list merges + 0 otherwise +*/ + +static +int imerge_list_and_tree(RANGE_OPT_PARAM *param, + List<SEL_IMERGE> *merges, + SEL_TREE *tree) +{ + SEL_IMERGE *imerge; + SEL_IMERGE *new_imerge= NULL; + List<SEL_IMERGE> new_merges; + List_iterator<SEL_IMERGE> it(*merges); + + while ((imerge= it++)) + { + if (!new_imerge) + new_imerge= new SEL_IMERGE(); + if (imerge->have_common_keys(param, tree) && + new_imerge && !imerge->and_sel_tree(param, tree, new_imerge)) + { + if (new_imerge->trees == new_imerge->trees_next) + it.remove(); + else + { + new_merges.push_back(new_imerge); + new_imerge= NULL; + } + } + } + imerge_list_and_list(&new_merges, merges); + *merges= new_merges; + return merges->is_empty(); } @@ -1257,11 +1817,11 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() } -QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, - TABLE *table) +QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param, + TABLE *table) :unique(NULL), pk_quick_select(NULL), thd(thd_param) { - DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT"); + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT"); index= MAX_KEY; head= table; bzero(&read_record, sizeof(read_record)); @@ -1269,38 +1829,37 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, DBUG_VOID_RETURN; } -int QUICK_INDEX_MERGE_SELECT::init() +int QUICK_INDEX_SORT_SELECT::init() { - DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init"); + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init"); DBUG_RETURN(0); } -int QUICK_INDEX_MERGE_SELECT::reset() +int QUICK_INDEX_SORT_SELECT::reset() { - DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset"); + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset"); DBUG_RETURN(read_keys_and_merge()); } bool -QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) +QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) { - /* - Save quick_select that does scan on clustered primary key as it will be - processed separately. - */ + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back"); if (head->file->primary_key_is_clustered() && quick_sel_range->index == head->s->primary_key) + { + /* A quick_select over a clustered primary key is handled specifically */ pk_quick_select= quick_sel_range; - else - return quick_selects.push_back(quick_sel_range); - return 0; + DBUG_RETURN(0); + } + DBUG_RETURN(quick_selects.push_back(quick_sel_range)); } -QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() +QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT() { List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); QUICK_RANGE_SELECT* quick; - DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT"); + DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT"); delete unique; quick_it.rewind(); while ((quick= quick_it++)) @@ -1314,7 +1873,6 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() DBUG_VOID_RETURN; } - QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, TABLE *table, bool retrieve_full_rows, @@ -1726,6 +2284,7 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc() min_value=arg.min_value; max_value=arg.max_value; next_key_part=arg.next_key_part; + max_part_no= arg.max_part_no; use_count=1; elements=1; } @@ -1743,9 +2302,10 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg, :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()), elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg), max_value((uchar*) max_value_arg), next(0),prev(0), - next_key_part(0),color(BLACK),type(KEY_RANGE) + next_key_part(0), color(BLACK), type(KEY_RANGE) { left=right= &null_element; + max_part_no= 1; } SEL_ARG::SEL_ARG(Field *field_,uint8 part_, @@ -1756,6 +2316,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_, field(field_), min_value(min_value_), max_value(max_value_), next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE) { + max_part_no= part+1; left=right= &null_element; } @@ -1799,6 +2360,7 @@ SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, increment_use_count(1); tmp->color= color; tmp->elements= this->elements; + tmp->max_part_no= max_part_no; return tmp; } @@ -2104,6 +2666,26 @@ public: /* + Plan for QUICK_INDEX_INTERSECT_SELECT scan. + QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows + is ignored by make_quick. +*/ + +class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN +{ +public: + TRP_INDEX_INTERSECT() {} /* Remove gcc warning */ + virtual ~TRP_INDEX_INTERSECT() {} /* Remove gcc warning */ + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */ + TRP_RANGE **range_scans_end; /* end of the array */ + /* keys whose scans are to be filtered by cpk conditions */ + key_map filtered_scans; +}; + + +/* Plan for QUICK_INDEX_MERGE_SELECT scan. QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows is ignored by make_quick. @@ -2170,6 +2752,38 @@ public: }; +typedef struct st_index_scan_info +{ + uint idx; /* # of used key in param->keys */ + uint keynr; /* # of used key in table */ + uint range_count; + ha_rows records; /* estimate of # records this scan will return */ + + /* Set of intervals over key fields that will be used for row retrieval. */ + SEL_ARG *sel_arg; + + KEY *key_info; + uint used_key_parts; + + /* Estimate of # records filtered out by intersection with cpk */ + ha_rows filtered_out; + /* Bitmap of fields used in index intersection */ + MY_BITMAP used_fields; + + /* Fields used in the query and covered by ROR scan. */ + MY_BITMAP covered_fields; + uint used_fields_covered; /* # of set bits in covered_fields */ + int key_rec_length; /* length of key record (including rowid) */ + + /* + Cost of reading all index records with values in sel_arg intervals set + (assuming there is no need to access full table records) + */ + double index_read_cost; + uint first_uncovered_field; /* first unused bit in covered_fields */ + uint key_components; /* # of parts in the key */ +} INDEX_SCAN_INFO; + /* Fill param->needed_fields with bitmap of fields used in the query. SYNOPSIS @@ -2447,72 +3061,90 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, It is possible to use a range-based quick select (but it might be slower than 'all' table scan). */ - if (tree->merges.is_empty()) - { - TRP_RANGE *range_trp; - TRP_ROR_INTERSECT *rori_trp; - bool can_build_covering= FALSE; + TRP_RANGE *range_trp; + TRP_ROR_INTERSECT *rori_trp; + TRP_INDEX_INTERSECT *intersect_trp; + bool can_build_covering= FALSE; + + remove_nonrange_trees(¶m, tree); - /* Get best 'range' plan and prepare data for making other plans */ - if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE, - best_read_time))) - { - best_trp= range_trp; - best_read_time= best_trp->read_cost; - } + /* Get best 'range' plan and prepare data for making other plans */ + if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE, + best_read_time))) + { + best_trp= range_trp; + best_read_time= best_trp->read_cost; + } + /* + Simultaneous key scans and row deletes on several handler + objects are not allowed so don't use ROR-intersection for + table deletes. + */ + if ((thd->lex->sql_command != SQLCOM_DELETE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE)) + { /* - Simultaneous key scans and row deletes on several handler - objects are not allowed so don't use ROR-intersection for - table deletes. + Get best non-covering ROR-intersection plan and prepare data for + building covering ROR-intersection. */ - if ((thd->lex->sql_command != SQLCOM_DELETE) && - optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE)) + if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time, + &can_build_covering))) { + best_trp= rori_trp; + best_read_time= best_trp->read_cost; /* - Get best non-covering ROR-intersection plan and prepare data for - building covering ROR-intersection. + Try constructing covering ROR-intersect only if it looks possible + and worth doing. */ - if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time, - &can_build_covering))) - { + if (!rori_trp->is_covering && can_build_covering && + (rori_trp= get_best_covering_ror_intersect(¶m, tree, + best_read_time))) best_trp= rori_trp; - best_read_time= best_trp->read_cost; - /* - Try constructing covering ROR-intersect only if it looks possible - and worth doing. - */ - if (!rori_trp->is_covering && can_build_covering && - (rori_trp= get_best_covering_ror_intersect(¶m, tree, - best_read_time))) - best_trp= rori_trp; - } } } - else + /* + Do not look for an index intersection plan if there is a covering + index. The scan by this covering index will be always cheaper than + any index intersection. + */ + if (param.table->covering_keys.is_clear_all() && + optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT)) { - if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE)) + if ((intersect_trp= get_best_index_intersect(¶m, tree, + best_read_time))) { - /* Try creating index_merge/ROR-union scan. */ - SEL_IMERGE *imerge; - TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; - LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ - DBUG_PRINT("info",("No range reads possible," - " trying to construct index_merge")); - List_iterator_fast<SEL_IMERGE> it(tree->merges); - while ((imerge= it++)) + best_trp= intersect_trp; + best_read_time= best_trp->read_cost; + } + } + + if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE)) + { + /* Try creating index_merge/ROR-union scan. */ + SEL_IMERGE *imerge; + TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; + LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ + DBUG_PRINT("info",("No range reads possible," + " trying to construct index_merge")); + List_iterator_fast<SEL_IMERGE> it(tree->merges); + while ((imerge= it++)) + { + new_conj_trp= get_best_disjunct_quick(¶m, imerge, best_read_time); + if (new_conj_trp) + set_if_smaller(param.table->quick_condition_rows, + new_conj_trp->records); + if (new_conj_trp && + (!best_conj_trp || + new_conj_trp->read_cost < best_conj_trp->read_cost)) { - new_conj_trp= get_best_disjunct_quick(¶m, imerge, best_read_time); - if (new_conj_trp) - set_if_smaller(param.table->quick_condition_rows, - new_conj_trp->records); - if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < - best_conj_trp->read_cost)) - best_conj_trp= new_conj_trp; + best_conj_trp= new_conj_trp; + best_read_time= best_conj_trp->read_cost; } - if (best_conj_trp) - best_trp= best_conj_trp; } + if (best_conj_trp) + best_trp= best_conj_trp; } } @@ -3776,7 +4408,6 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, { SEL_TREE **ptree; TRP_INDEX_MERGE *imerge_trp= NULL; - uint n_child_scans= imerge->trees_next - imerge->trees; TRP_RANGE **range_scans; TRP_RANGE **cur_child; TRP_RANGE **cpk_scan= NULL; @@ -3796,6 +4427,24 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, DBUG_ENTER("get_best_disjunct_quick"); DBUG_PRINT("info", ("Full table scan cost: %g", read_time)); + /* + In every tree of imerge remove SEL_ARG trees that do not make ranges. + If after this removal some SEL_ARG tree becomes empty discard imerge. + */ + for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++) + { + if (remove_nonrange_trees(param, *ptree)) + { + imerge->trees_next= imerge->trees; + break; + } + } + + uint n_child_scans= imerge->trees_next - imerge->trees; + + if (!n_child_scans) + DBUG_RETURN(NULL); + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, sizeof(TRP_RANGE*)* n_child_scans))) @@ -3900,7 +4549,9 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, imerge_cost += Unique::get_use_cost(param->imerge_cost_buff, (uint)non_cpk_scan_records, param->table->file->ref_length, - param->thd->variables.sortbuff_size); + param->thd->variables.sortbuff_size, + TIME_FOR_COMPARE_ROWID, + FALSE, NULL); DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", imerge_cost, read_time)); if (imerge_cost < read_time) @@ -3915,6 +4566,13 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, imerge_trp->range_scans_end= range_scans + n_child_scans; read_time= imerge_cost; } + if (imerge_trp) + { + TABLE_READ_PLAN *trp= merge_same_index_scans(param, imerge, imerge_trp, + read_time); + if (trp != imerge_trp) + DBUG_RETURN(trp); + } } build_ror_index_merge: @@ -3930,6 +4588,7 @@ build_ror_index_merge: sizeof(TABLE_READ_PLAN*)* n_child_scans))) DBUG_RETURN(imerge_trp); + skip_to_ror_scan: roru_index_costs= 0.0; roru_total_records= 0; @@ -4013,30 +4672,990 @@ skip_to_ror_scan: DBUG_RETURN(roru); } } - DBUG_RETURN(imerge_trp); + DBUG_RETURN(imerge_trp); } -typedef struct st_ror_scan_info + +/* + Merge index scans for the same indexes in an index merge plan + + SYNOPSIS + merge_same_index_scans() + param Context info for the operation + imerge IN/OUT SEL_IMERGE from which imerge_trp has been extracted + imerge_trp The index merge plan where index scans for the same + indexes are to be merges + read_time The upper bound for the cost of the plan to be evaluated + + DESRIPTION + For the given index merge plan imerge_trp extracted from the SEL_MERGE + imerge the function looks for range scans with the same indexes and merges + them into SEL_ARG trees. Then for each such SEL_ARG tree r_i the function + creates a range tree rt_i that contains only r_i. All rt_i are joined + into one index merge that replaces the original index merge imerge. + The function calls get_best_disjunct_quick for the new index merge to + get a new index merge plan that contains index scans only for different + indexes. + If there are no index scans for the same index in the original index + merge plan the function does not change the original imerge and returns + imerge_trp as its result. + + RETURN + The original or or improved index merge plan +*/ + +static +TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge, + TRP_INDEX_MERGE *imerge_trp, + double read_time) { - uint idx; /* # of used key in param->keys */ - uint keynr; /* # of used key in table */ - ha_rows records; /* estimate of # records this scan will return */ + uint16 first_scan_tree_idx[MAX_KEY]; + SEL_TREE **tree; + TRP_RANGE **cur_child; + uint removed_cnt= 0; - /* Set of intervals over key fields that will be used for row retrieval. */ - SEL_ARG *sel_arg; + DBUG_ENTER("merge_same_index_scans"); - /* Fields used in the query and covered by this ROR scan. */ - MY_BITMAP covered_fields; - uint used_fields_covered; /* # of set bits in covered_fields */ - int key_rec_length; /* length of key record (including rowid) */ + bzero(first_scan_tree_idx, sizeof(first_scan_tree_idx[0])*param->keys); - /* - Cost of reading all index records with values in sel_arg intervals set - (assuming there is no need to access full table records) - */ - double index_read_cost; - uint first_uncovered_field; /* first unused bit in covered_fields */ - uint key_components; /* # of parts in the key */ + for (tree= imerge->trees, cur_child= imerge_trp->range_scans; + tree != imerge->trees_next; + tree++, cur_child++) + { + DBUG_ASSERT(tree); + uint key_idx= (*cur_child)->key_idx; + uint16 *tree_idx_ptr= &first_scan_tree_idx[key_idx]; + if (!*tree_idx_ptr) + *tree_idx_ptr= (uint16) (tree-imerge->trees+1); + else + { + SEL_TREE **changed_tree= imerge->trees+(*tree_idx_ptr-1); + SEL_ARG *key= (*changed_tree)->keys[key_idx]; + bzero((*changed_tree)->keys, + sizeof((*changed_tree)->keys[0])*param->keys); + (*changed_tree)->keys_map.clear_all(); + if (((*changed_tree)->keys[key_idx]= + key_or(param, key, (*tree)->keys[key_idx]))) + (*changed_tree)->keys_map.set_bit(key_idx); + *tree= NULL; + removed_cnt++; + } + } + if (!removed_cnt) + DBUG_RETURN(imerge_trp); + + TABLE_READ_PLAN *trp= NULL; + SEL_TREE **new_trees_next= imerge->trees; + for (tree= new_trees_next; tree != imerge->trees_next; tree++) + { + if (!*tree) + continue; + if (tree > new_trees_next) + *new_trees_next= *tree; + new_trees_next++; + } + imerge->trees_next= new_trees_next; + + DBUG_ASSERT(imerge->trees_next>imerge->trees); + + if (imerge->trees_next-imerge->trees > 1) + trp= get_best_disjunct_quick(param, imerge, read_time); + else + { + /* + This alternative theoretically can be reached when the cost + of the index merge for such a formula as + (key1 BETWEEN c1_1 AND c1_2) AND key2 > c2 OR + (key1 BETWEEN c1_3 AND c1_4) AND key3 > c3 + is estimated as being cheaper than the cost of index scan for + the formula + (key1 BETWEEN c1_1 AND c1_2) OR (key1 BETWEEN c1_3 AND c1_4) + + In the current code this may happen for two reasons: + 1. for a single index range scan data records are accessed in + a random order + 2. the functions that estimate the cost of a range scan and an + index merge retrievals are not well calibrated + */ + trp= get_key_scans_params(param, *imerge->trees, FALSE, TRUE, + read_time); + } + + DBUG_RETURN(trp); +} + + +/* + This structure contains the info common for all steps of a partial + index intersection plan. Morever it contains also the info common + for index intersect plans. This info is filled in by the function + prepare_search_best just before searching for the best index + intersection plan. +*/ + +typedef struct st_common_index_intersect_info +{ + PARAM *param; /* context info for range optimizations */ + uint key_size; /* size of a ROWID element stored in Unique object */ + uint compare_factor; /* 1/compare - cost to compare two ROWIDs */ + ulonglong max_memory_size; /* maximum space allowed for Unique objects */ + ha_rows table_cardinality; /* estimate of the number of records in table */ + double cutoff_cost; /* discard index intersects with greater costs */ + INDEX_SCAN_INFO *cpk_scan; /* clustered primary key used in intersection */ + + bool in_memory; /* unique object for intersection is completely in memory */ + + INDEX_SCAN_INFO **search_scans; /* scans possibly included in intersect */ + uint n_search_scans; /* number of elements in search_scans */ + + bool best_uses_cpk; /* current best intersect uses clustered primary key */ + double best_cost; /* cost of the current best index intersection */ + /* estimate of the number of records in the current best intersection */ + ha_rows best_records; + uint best_length; /* number of indexes in the current best intersection */ + INDEX_SCAN_INFO **best_intersect; /* the current best index intersection */ + /* scans from the best intersect to be filtrered by cpk conditions */ + key_map filtered_scans; + + uint *buff_elems; /* buffer to calculate cost of index intersection */ + +} COMMON_INDEX_INTERSECT_INFO; + + +/* + This structure contains the info specific for one step of an index + intersection plan. The structure is filled in by the function + check_index_intersect_extension. +*/ + +typedef struct st_partial_index_intersect_info +{ + COMMON_INDEX_INTERSECT_INFO *common_info; /* shared by index intersects */ + uint length; /* number of index scans in the partial intersection */ + ha_rows records; /* estimate of the number of records in intersection */ + double cost; /* cost of the partial index intersection */ + + /* estimate of total number of records of all scans of the partial index + intersect sent to the Unique object used for the intersection */ + ha_rows records_sent_to_unique; + + /* total cost of the scans of indexes from the partial index intersection */ + double index_read_cost; + + bool use_cpk_filter; /* cpk filter is to be used for this scan */ + bool in_memory; /* uses unique object in memory */ + double in_memory_cost; /* cost of using unique object in memory */ + + key_map filtered_scans; /* scans to be filtered by cpk conditions */ + + MY_BITMAP *intersect_fields; /* bitmap of fields used in intersection */ +} PARTIAL_INDEX_INTERSECT_INFO; + + +/* Check whether two indexes have the same first n components */ + +static +bool same_index_prefix(KEY *key1, KEY *key2, uint used_parts) +{ + KEY_PART_INFO *part1= key1->key_part; + KEY_PART_INFO *part2= key2->key_part; + for(uint i= 0; i < used_parts; i++, part1++, part2++) + { + if (part1->fieldnr != part2->fieldnr) + return FALSE; + } + return TRUE; +} + + +/* Create a bitmap for all fields of a table */ + +static +bool create_fields_bitmap(PARAM *param, MY_BITMAP *fields_bitmap) +{ + my_bitmap_map *bitmap_buf; + + if (!(bitmap_buf= (my_bitmap_map *) alloc_root(param->mem_root, + param->fields_bitmap_size))) + return TRUE; + if (bitmap_init(fields_bitmap, bitmap_buf, param->table->s->fields, FALSE)) + return TRUE; + + return FALSE; +} + +/* Compare two indexes scans for sort before search for the best intersection */ + +static +int cmp_intersect_index_scan(INDEX_SCAN_INFO **a, INDEX_SCAN_INFO **b) +{ + return (*a)->records < (*b)->records ? + -1 : (*a)->records == (*b)->records ? 0 : 1; +} + + +static inline +void set_field_bitmap_for_index_prefix(MY_BITMAP *field_bitmap, + KEY_PART_INFO *key_part, + uint used_key_parts) +{ + bitmap_clear_all(field_bitmap); + for (KEY_PART_INFO *key_part_end= key_part+used_key_parts; + key_part < key_part_end; key_part++) + { + bitmap_set_bit(field_bitmap, key_part->fieldnr-1); + } +} + + +/* + Round up table cardinality read from statistics provided by engine. + This function should go away when mysql test will allow to handle + more or less easily in the test suites deviations of InnoDB + statistical data. +*/ + +static inline +ha_rows get_table_cardinality_for_index_intersect(TABLE *table) +{ + if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) + return table->file->stats.records; + else + { + ha_rows d; + double q; + for (q= table->file->stats.records, d= 1 ; q >= 10; q/= 10, d*= 10 ) ; + return (ha_rows) (round(q) * d); + } +} + + +static +ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, + INDEX_SCAN_INFO *ext_index_scan); + +/* + Prepare to search for the best index intersection + + SYNOPSIS + prepare_search_best_index_intersect() + param common info about index ranges + tree tree of ranges for indexes than can be intersected + common OUT info needed for search to be filled by the function + init OUT info for an initial pseudo step of the intersection plans + cutoff_cost cut off cost of the interesting index intersection + + DESCRIPTION + The function initializes all fields of the structure 'common' to be used + when searching for the best intersection plan. It also allocates + memory to store the most cheap index intersection. + + NOTES + When selecting candidates for index intersection we always take only + one representative out of any set of indexes that share the same range + conditions. These indexes always have the same prefixes and the + components of this prefixes are exactly those used in these range + conditions. + Range conditions over clustered primary key (cpk) is always used only + as the condition that filters out some rowids retrieved by the scans + for secondary indexes. The cpk index will be handled in special way by + the function that search for the best index intersection. + + RETURN + FALSE in the case of success + TRUE otherwise +*/ + +static +bool prepare_search_best_index_intersect(PARAM *param, + SEL_TREE *tree, + COMMON_INDEX_INTERSECT_INFO *common, + PARTIAL_INDEX_INTERSECT_INFO *init, + double cutoff_cost) +{ + uint i; + uint n_search_scans; + double cost; + INDEX_SCAN_INFO **index_scan; + INDEX_SCAN_INFO **scan_ptr; + INDEX_SCAN_INFO *cpk_scan= NULL; + TABLE *table= param->table; + uint n_index_scans= tree->index_scans_end - tree->index_scans; + + if (!n_index_scans) + return 1; + + bzero(init, sizeof(*init)); + init->common_info= common; + init->cost= cutoff_cost; + + common->param= param; + common->key_size= table->file->ref_length; + common->compare_factor= TIME_FOR_COMPARE_ROWID; + common->max_memory_size= param->thd->variables.sortbuff_size; + common->cutoff_cost= cutoff_cost; + common->cpk_scan= NULL; + common->table_cardinality= + get_table_cardinality_for_index_intersect(table); + + if (n_index_scans <= 1) + return TRUE; + + if (table->file->primary_key_is_clustered()) + { + INDEX_SCAN_INFO **index_scan_end; + index_scan= tree->index_scans; + index_scan_end= index_scan+n_index_scans; + for ( ; index_scan < index_scan_end; index_scan++) + { + if ((*index_scan)->keynr == table->s->primary_key) + { + common->cpk_scan= cpk_scan= *index_scan; + break; + } + } + } + + i= n_index_scans - test(cpk_scan != NULL) + 1; + + if (!(common->search_scans = + (INDEX_SCAN_INFO **) alloc_root (param->mem_root, + sizeof(INDEX_SCAN_INFO *) * i))) + return TRUE; + bzero(common->search_scans, sizeof(INDEX_SCAN_INFO *) * i); + + INDEX_SCAN_INFO **selected_index_scans= common->search_scans; + + for (i=0, index_scan= tree->index_scans; i < n_index_scans; i++, index_scan++) + { + uint used_key_parts= (*index_scan)->used_key_parts; + KEY *key_info= (*index_scan)->key_info; + + if (*index_scan == cpk_scan) + continue; + if (cpk_scan && cpk_scan->used_key_parts >= used_key_parts && + same_index_prefix(cpk_scan->key_info, key_info, used_key_parts)) + continue; + + cost= table->file->keyread_time((*index_scan)->keynr, + (*index_scan)->range_count, + (*index_scan)->records); + if (cost >= cutoff_cost) + continue; + + for (scan_ptr= selected_index_scans; *scan_ptr ; scan_ptr++) + { + /* + When we have range conditions for two different indexes with the same + beginning it does not make sense to consider both of them for index + intersection if the range conditions are covered by common initial + components of the indexes. Actually in this case the indexes are + guaranteed to have the same range conditions. + */ + if ((*scan_ptr)->used_key_parts == used_key_parts && + same_index_prefix((*scan_ptr)->key_info, key_info, used_key_parts)) + break; + } + if (!*scan_ptr || cost < (*scan_ptr)->index_read_cost) + { + *scan_ptr= *index_scan; + (*scan_ptr)->index_read_cost= cost; + } + } + + ha_rows records_in_scans= 0; + + for (scan_ptr=selected_index_scans, i= 0; *scan_ptr; scan_ptr++, i++) + { + if (create_fields_bitmap(param, &(*scan_ptr)->used_fields)) + return TRUE; + records_in_scans+= (*scan_ptr)->records; + } + n_search_scans= i; + + if (cpk_scan && create_fields_bitmap(param, &cpk_scan->used_fields)) + return TRUE; + + if (!(common->n_search_scans= n_search_scans)) + return TRUE; + + common->best_uses_cpk= FALSE; + common->best_cost= cutoff_cost + COST_EPS; + common->best_length= 0; + + if (!(common->best_intersect= + (INDEX_SCAN_INFO **) alloc_root (param->mem_root, + sizeof(INDEX_SCAN_INFO *) * + (i + test(cpk_scan != NULL))))) + return TRUE; + + size_t calc_cost_buff_size= + Unique::get_cost_calc_buff_size(records_in_scans, + common->key_size, + common->max_memory_size); + if (!(common->buff_elems= (uint *) alloc_root(param->mem_root, + calc_cost_buff_size))) + return TRUE; + + my_qsort(selected_index_scans, n_search_scans, sizeof(INDEX_SCAN_INFO *), + (qsort_cmp) cmp_intersect_index_scan); + + if (cpk_scan) + { + PARTIAL_INDEX_INTERSECT_INFO curr; + set_field_bitmap_for_index_prefix(&cpk_scan->used_fields, + cpk_scan->key_info->key_part, + cpk_scan->used_key_parts); + curr.common_info= common; + curr.intersect_fields= &cpk_scan->used_fields; + curr.records= cpk_scan->records; + curr.length= 1; + for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++) + { + ha_rows scan_records= (*scan_ptr)->records; + ha_rows records= records_in_index_intersect_extension(&curr, *scan_ptr); + (*scan_ptr)->filtered_out= records >= scan_records ? + 0 : scan_records-records; + } + } + else + { + for (scan_ptr=selected_index_scans; *scan_ptr; scan_ptr++) + (*scan_ptr)->filtered_out= 0; + } + + return FALSE; +} + + +/* + On Estimation of the Number of Records in an Index Intersection + =============================================================== + + Consider query Q over table t. Let C be the WHERE condition of this query, + and, idx1(a1_1,...,a1_k1) and idx2(a2_1,...,a2_k2) be some indexes defined + on table t. + Let rt1 and rt2 be the range trees extracted by the range optimizer from C + for idx1 and idx2 respectively. + Let #t be the estimate of the number of records in table t provided for the + optimizer. + Let #r1 and #r2 be the estimates of the number of records in the range trees + rt1 and rt2, respectively, obtained by the range optimizer. + + We need to get an estimate for the number of records in the index + intersection of rt1 and rt2. In other words, we need to estimate the + cardinality of the set of records that are in both trees. Let's designate + this number by #r. + + If we do not make any assumptions then we can only state that + #r<=min(#r1,#r2). + With this estimate we can't say that the index intersection scan will be + cheaper than the cheapest index scan. + + Let Rt1 and Rt2 be AND/OR conditions representing rt and rt2 respectively. + The probability that a record belongs to rt1 is sel(Rt1)=#r1/#t. + The probability that a record belongs to rt2 is sel(Rt2)=#r2/#t. + + If we assume that the values in columns of idx1 and idx2 are independent + then #r/#t=sel(Rt1&Rt2)=sel(Rt1)*sel(Rt2)=(#r1/#t)*(#r2/#t). + So in this case we have: #r=#r1*#r2/#t. + + The above assumption of independence of the columns in idx1 and idx2 means + that: + - all columns are different + - values from one column do not correlate with values from any other column. + + We can't help with the case when column correlate with each other. + Yet, if they are assumed to be uncorrelated the value of #r theoretically can + be evaluated . Unfortunately this evaluation, in general, is rather complex. + + Let's consider two indexes idx1:(dept, manager), idx2:(dept, building) + over table 'employee' and two range conditions over these indexes: + Rt1: dept=10 AND manager LIKE 'S%' + Rt2: dept=10 AND building LIKE 'L%'. + We can state that: + sel(Rt1&Rt2)=sel(dept=10)*sel(manager LIKE 'S%')*sel(building LIKE 'L%') + =sel(Rt1)*sel(Rt2)/sel(dept=10). + sel(Rt1/2_0:dept=10) can be estimated if we know the cardinality #r1_0 of + the range for sub-index idx1_0 (dept) of the index idx1 or the cardinality + #rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2. + The current code does not make an estimate either for #rt1_0, or for #rt2_0, + but it can be adjusted to provide those numbers. + Alternatively, min(rec_per_key) for (dept) could be used to get an upper + bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided + now. + + Let's consider two other indexes idx1:(dept, last_name), + idx2:(first_name, last_name) and two range conditions over these indexes: + Rt1: dept=5 AND last_name='Sm%' + Rt2: first_name='Robert' AND last_name='Sm%'. + + sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert') + =sel(Rt2)*sel(dept=5) + Here max(rec_per_key) for (dept) could be used to get an upper bound for + the value of sel(Rt1&Rt2). + + When the intersected indexes have different major columns, but some + minor column are common the picture may be more complicated. + + Let's consider the following range conditions for the same indexes as in + the previous example: + Rt1: (Rt11: dept=5 AND last_name='So%') + OR + (Rt12: dept=7 AND last_name='Saw%') + Rt2: (Rt21: first_name='Robert' AND last_name='Saw%') + OR + (Rt22: first_name='Bob' AND last_name='So%') + Here we have: + sel(Rt1&Rt2)= sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5) + + sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) + Now consider the range condition: + Rt1_0: (dept=5 OR dept=7) + For this condition we can state that: + sel(Rt1_0&Rt2)=(sel(dept=5)+sel(dept=7))*(sel(Rt21)+sel(Rt22))= + sel(dept=5)*sel(Rt21)+sel(dept=7)*sel(Rt21)+ + sel(dept=5)*sel(Rt22)+sel(dept=7)*sel(Rt22)= + sel(dept=5)*sel(Rt21)+sel(Rt21)*sel(dept=7)+ + sel(Rt22)*sel(dept=5)+sel(dept=7)*sel(Rt22) > + sel(Rt11)*sel(Rt21)+sel(Rt22)*sel(dept=5)+ + sel(Rt21)*sel(dept=7)+sel(Rt12)*sel(Rt22) > + sel(Rt1 & Rt2) + + We've just demonstrated for an example what is intuitively almost obvious + in general. We can remove the ending parts fromrange trees getting less + selective range conditions for sub-indexes. + So if not a most major component with the number k of an index idx is + encountered in the index with which we intersect we can use the sub-index + idx_k-1 that includes the components of idx up to the i-th component and + the range tree for idx_k-1 to make an upper bound estimate for the number + of records in the index intersection. + The range tree for idx_k-1 we use here is the subtree of the original range + tree for idx that contains only parts from the first k-1 components. + + As it was mentioned above the range optimizer currently does not provide + an estimate for the number of records in the ranges for sub-indexes. + However, some reasonable upper bound estimate can be obtained. + + Let's consider the following range tree: + Rt: (first_name='Robert' AND last_name='Saw%') + OR + (first_name='Bob' AND last_name='So%') + Let #r be the number of records in Rt. Let f_1 be the fan-out of column + last_name: + f_1 = rec_per_key[first_name]/rec_per_key[last_name]. + The the number of records in the range tree: + Rt_0: (first_name='Robert' OR first_name='Bob') + for the sub-index (first_name) is not greater than max(#r*f_1, #t). + Strictly speaking, we can state only that it's not greater than + max(#r*max_f_1, #t), where + max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name]. + Yet, if #r/#t is big enough (and this is the case of an index intersection, + because using this index range with a single index scan is cheaper than + the cost of the intersection when #r/#t is small) then almost safely we + can use here f_1 instead of max_f_1. + + The above considerations can be used in future development. Now, they are + used partly in the function that provides a rough upper bound estimate for + the number of records in an index intersection that follow below. +*/ + +/* + Estimate the number of records selected by an extension a partial intersection + + SYNOPSIS + records_in_index_intersect_extension() + curr partial intersection plan to be extended + ext_index_scan the evaluated extension of this partial plan + + DESCRIPTION + The function provides an estimate for the number of records in the + intersection of the partial index intersection curr with the index + ext_index_scan. If all intersected indexes does not have common columns + then the function returns an exact estimate (assuming there are no + correlations between values in the columns). If the intersected indexes + have common columns the function returns an upper bound for the number + of records in the intersection provided that the intersection of curr + with ext_index_scan can is expected to have less records than the expected + number of records in the partial intersection curr. In this case the + function also assigns the bitmap of the columns in the extended + intersection to ext_index_scan->used_fields. + If the function cannot expect that the number of records in the extended + intersection is less that the expected number of records #r in curr then + the function returns a number bigger than #r. + + NOTES + See the comment before the desription of the function that explains the + reasoning used by this function. + + RETURN + The expected number of rows in the extended index intersection +*/ + +static +ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, + INDEX_SCAN_INFO *ext_index_scan) +{ + KEY *key_info= ext_index_scan->key_info; + KEY_PART_INFO* key_part= key_info->key_part; + uint used_key_parts= ext_index_scan->used_key_parts; + MY_BITMAP *used_fields= &ext_index_scan->used_fields; + + if (!curr->length) + { + /* + If this the first index in the intersection just mark the + fields in the used_fields bitmap and return the expected + number of records in the range scan for the index provided + by the range optimizer. + */ + set_field_bitmap_for_index_prefix(used_fields, key_part, used_key_parts); + return ext_index_scan->records; + } + + uint i; + bool better_selectivity= FALSE; + ha_rows records= curr->records; + MY_BITMAP *curr_intersect_fields= curr->intersect_fields; + for (i= 0; i < used_key_parts; i++, key_part++) + { + if (bitmap_is_set(curr_intersect_fields, key_part->fieldnr-1)) + break; + } + if (i) + { + ha_rows table_cardinality= curr->common_info->table_cardinality; + ha_rows ext_records= ext_index_scan->records; + if (i < used_key_parts) + { + ulong *rec_per_key= key_info->rec_per_key+i-1; + ulong f1= rec_per_key[0] ? rec_per_key[0] : 1; + ulong f2= rec_per_key[1] ? rec_per_key[1] : 1; + ext_records= (ha_rows) ((double) ext_records / f2 * f1); + } + if (ext_records < table_cardinality) + { + better_selectivity= TRUE; + records= (ha_rows) ((double) records / table_cardinality * + ext_records); + bitmap_copy(used_fields, curr_intersect_fields); + key_part= key_info->key_part; + for (uint j= 0; j < used_key_parts; j++, key_part++) + bitmap_set_bit(used_fields, key_part->fieldnr-1); + } + } + return !better_selectivity ? records+1 : + !records ? 1 : records; +} + + +/* + Estimate the cost a binary search within disjoint cpk range intervals + + Number of comparisons to check whether a cpk value satisfies + the cpk range condition = log2(cpk_scan->range_count). +*/ + +static inline +double get_cpk_filter_cost(ha_rows filtered_records, + INDEX_SCAN_INFO *cpk_scan, + double compare_factor) +{ + return log((double) (cpk_scan->range_count+1)) / (compare_factor * M_LN2) * + filtered_records; +} + + +/* + Check whether a patial index intersection plan can be extended + + SYNOPSIS + check_index_intersect_extension() + curr partial intersection plan to be extended + ext_index_scan a possible extension of this plan to be checked + next OUT the structure to be filled for the extended plan + + DESCRIPTION + The function checks whether it makes sense to extend the index + intersection plan adding the index ext_index_scan, and, if this + the case, the function fills in the structure for the extended plan. + + RETURN + TRUE if it makes sense to extend the given plan + FALSE otherwise +*/ + +static +bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, + INDEX_SCAN_INFO *ext_index_scan, + PARTIAL_INDEX_INTERSECT_INFO *next) +{ + ha_rows records; + ha_rows records_sent_to_unique; + double cost; + ha_rows ext_index_scan_records= ext_index_scan->records; + ha_rows records_filtered_out_by_cpk= ext_index_scan->filtered_out; + COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info; + double cutoff_cost= common_info->cutoff_cost; + uint idx= curr->length; + next->index_read_cost= curr->index_read_cost+ext_index_scan->index_read_cost; + if (next->index_read_cost > cutoff_cost) + return FALSE; + + if ((next->in_memory= curr->in_memory)) + next->in_memory_cost= curr->in_memory_cost; + + next->intersect_fields= &ext_index_scan->used_fields; + next->filtered_scans= curr->filtered_scans; + + records_sent_to_unique= curr->records_sent_to_unique; + + next->use_cpk_filter= FALSE; + + /* Calculate the cost of using a Unique object for index intersection */ + if (idx && next->in_memory) + { + /* + All rowids received from the first scan are expected in one unique tree + */ + ha_rows elems_in_tree= common_info->search_scans[0]->records- + common_info->search_scans[0]->filtered_out ; + next->in_memory_cost+= Unique::get_search_cost(elems_in_tree, + common_info->compare_factor)* + ext_index_scan_records; + cost= next->in_memory_cost; + } + else + { + uint *buff_elems= common_info->buff_elems; + uint key_size= common_info->key_size; + uint compare_factor= common_info->compare_factor; + ulonglong max_memory_size= common_info->max_memory_size; + + records_sent_to_unique+= ext_index_scan_records; + cost= Unique::get_use_cost(buff_elems, records_sent_to_unique, key_size, + max_memory_size, compare_factor, TRUE, + &next->in_memory); + if (records_filtered_out_by_cpk) + { + /* Check whether using cpk filter for this scan is beneficial */ + + double cost2; + bool in_memory2; + ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk; + cost2= Unique::get_use_cost(buff_elems, records2, key_size, + max_memory_size, compare_factor, TRUE, + &in_memory2); + cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan, + compare_factor); + if (cost > cost2 + COST_EPS) + { + cost= cost2; + next->in_memory= in_memory2; + next->use_cpk_filter= TRUE; + records_sent_to_unique= records2; + } + + } + if (next->in_memory) + next->in_memory_cost= cost; + } + + if (next->use_cpk_filter) + { + next->filtered_scans.set_bit(ext_index_scan->keynr); + bitmap_union(&ext_index_scan->used_fields, + &common_info->cpk_scan->used_fields); + } + next->records_sent_to_unique= records_sent_to_unique; + + records= records_in_index_intersect_extension(curr, ext_index_scan); + if (idx && records > curr->records) + return FALSE; + if (next->use_cpk_filter && curr->filtered_scans.is_clear_all()) + records-= records_filtered_out_by_cpk; + next->records= records; + + cost+= next->index_read_cost; + if (cost >= cutoff_cost) + return FALSE; + + cost+= get_sweep_read_cost(common_info->param, records); + + next->cost= cost; + next->length= curr->length+1; + + return TRUE; +} + + +/* + Search for the cheapest extensions of range scans used to access a table + + SYNOPSIS + find_index_intersect_best_extension() + curr partial intersection to evaluate all possible extension for + + DESCRIPTION + The function tries to extend the partial plan curr in all possible ways + to look for a cheapest index intersection whose cost less than the + cut off value set in curr->common_info.cutoff_cost. +*/ + +static +void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr) +{ + PARTIAL_INDEX_INTERSECT_INFO next; + COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info; + INDEX_SCAN_INFO **index_scans= common_info->search_scans; + uint idx= curr->length; + INDEX_SCAN_INFO **rem_first_index_scan_ptr= &index_scans[idx]; + double cost= curr->cost; + + if (cost + COST_EPS < common_info->best_cost) + { + common_info->best_cost= cost; + common_info->best_length= curr->length; + common_info->best_records= curr->records; + common_info->filtered_scans= curr->filtered_scans; + /* common_info->best_uses_cpk <=> at least one scan uses a cpk filter */ + common_info->best_uses_cpk= !curr->filtered_scans.is_clear_all(); + uint sz= sizeof(INDEX_SCAN_INFO *) * curr->length; + memcpy(common_info->best_intersect, common_info->search_scans, sz); + common_info->cutoff_cost= cost; + } + + if (!(*rem_first_index_scan_ptr)) + return; + + next.common_info= common_info; + + INDEX_SCAN_INFO *rem_first_index_scan= *rem_first_index_scan_ptr; + for (INDEX_SCAN_INFO **index_scan_ptr= rem_first_index_scan_ptr; + *index_scan_ptr; index_scan_ptr++) + { + *rem_first_index_scan_ptr= *index_scan_ptr; + *index_scan_ptr= rem_first_index_scan; + if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next)) + find_index_intersect_best_extension(&next); + *index_scan_ptr= *rem_first_index_scan_ptr; + *rem_first_index_scan_ptr= rem_first_index_scan; + } +} + + +/* + Get the plan of the best intersection of range scans used to access a table + + SYNOPSIS + get_best_index_intersect() + param common info about index ranges + tree tree of ranges for indexes than can be intersected + read_time cut off value for the evaluated plans + + DESCRIPTION + The function looks for the cheapest index intersection of the range + scans to access a table. The info about the ranges for all indexes + is provided by the range optimizer and is passed through the + parameters param and tree. Any plan whose cost is greater than read_time + is rejected. + After the best index intersection is found the function constructs + the structure that manages the execution by the chosen plan. + + RETURN + Pointer to the generated execution structure if a success, + 0 - otherwise. +*/ + +static +TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, + double read_time) +{ + uint i; + uint count; + TRP_RANGE **cur_range; + TRP_RANGE **range_scans; + INDEX_SCAN_INFO *index_scan; + COMMON_INDEX_INTERSECT_INFO common; + PARTIAL_INDEX_INTERSECT_INFO init; + TRP_INDEX_INTERSECT *intersect_trp= NULL; + TABLE *table= param->table; + + + DBUG_ENTER("get_best_index_intersect"); + + if (prepare_search_best_index_intersect(param, tree, &common, &init, + read_time)) + DBUG_RETURN(NULL); + + find_index_intersect_best_extension(&init); + + if (common.best_length <= 1 && !common.best_uses_cpk) + DBUG_RETURN(NULL); + + if (common.best_uses_cpk) + { + memmove((char *) (common.best_intersect+1), (char *) common.best_intersect, + sizeof(INDEX_SCAN_INFO *) * common.best_length); + common.best_intersect[0]= common.cpk_scan; + common.best_length++; + } + + count= common.best_length; + + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, + sizeof(TRP_RANGE *)* + count))) + DBUG_RETURN(NULL); + + for (i= 0, cur_range= range_scans; i < count; i++) + { + index_scan= common.best_intersect[i]; + if ((*cur_range= new (param->mem_root) TRP_RANGE(index_scan->sel_arg, + index_scan->idx, 0))) + { + TRP_RANGE *trp= *cur_range; + trp->read_cost= index_scan->index_read_cost; + trp->records= index_scan->records; + trp->is_ror= FALSE; + trp->mrr_buf_size= 0; + table->intersect_keys.set_bit(index_scan->keynr); + cur_range++; + } + } + + count= tree->index_scans_end - tree->index_scans; + for (i= 0; i < count; i++) + { + index_scan= tree->index_scans[i]; + if (!table->intersect_keys.is_set(index_scan->keynr)) + { + for (uint j= 0; j < common.best_length; j++) + { + INDEX_SCAN_INFO *scan= common.best_intersect[j]; + if (same_index_prefix(index_scan->key_info, scan->key_info, + scan->used_key_parts)) + { + table->intersect_keys.set_bit(index_scan->keynr); + break; + } + } + } + } + + if ((intersect_trp= new (param->mem_root)TRP_INDEX_INTERSECT)) + { + intersect_trp->read_cost= common.best_cost; + intersect_trp->records= common.best_records; + intersect_trp->range_scans= range_scans; + intersect_trp->range_scans_end= cur_range; + intersect_trp->filtered_scans= common.filtered_scans; + } + DBUG_RETURN(intersect_trp); +} + + +typedef struct st_ror_scan_info : INDEX_SCAN_INFO +{ } ROR_SCAN_INFO; @@ -4072,7 +5691,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror_scan->key_rec_length= (param->table->key_info[keynr].key_length + param->table->file->ref_length); ror_scan->sel_arg= sel_arg; - ror_scan->records= param->table->quick_rows[keynr]; + ror_scan->records= param->quick_rows[keynr]; if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root, param->fields_bitmap_size))) @@ -4378,7 +5997,7 @@ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, } if (!prev_covered) { - double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) / + double tmp= rows2double(info->param->quick_rows[scan->keynr]) / rows2double(prev_records); DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); selectivity_mult *= tmp; @@ -4457,7 +6076,7 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info, } else { - info->index_records += info->param->table->quick_rows[ror_scan->keynr]; + info->index_records += info->param->quick_rows[ror_scan->keynr]; info->index_scan_costs += ror_scan->index_read_cost; bitmap_union(&info->covered_fields, &ror_scan->covered_fields); if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, @@ -4708,7 +6327,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, /* Get best covering ROR-intersection. SYNOPSIS - get_best_covering_ror_intersect() + get_best_ntersectcovering_ror_intersect() param Parameter from test_quick_select function. tree SEL_TREE with sets of intervals for different keys. read_time Don't return table read plans with cost > read_time. @@ -4896,6 +6515,14 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, "tree scans");); tree->ror_scans_map.clear_all(); tree->n_ror_scans= 0; + tree->index_scans= 0; + if (!tree->keys_map.is_clear_all()) + { + tree->index_scans= + (INDEX_SCAN_INFO **) alloc_root(param->mem_root, + sizeof(INDEX_SCAN_INFO *) * param->keys); + } + tree->index_scans_end= tree->index_scans; for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++) { if (*key) @@ -4904,18 +6531,32 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, COST_VECT cost; double found_read_time; uint mrr_flags, buf_size; + INDEX_SCAN_INFO *index_scan; uint keynr= param->real_keynr[idx]; if ((*key)->type == SEL_ARG::MAYBE_KEY || (*key)->maybe_flag) param->needed_reg->set_bit(keynr); - bool read_index_only= index_read_must_be_used || - param->table->covering_keys.is_set(keynr); + bool read_index_only= index_read_must_be_used ? TRUE : + (bool) param->table->covering_keys.is_set(keynr); found_records= check_quick_select(param, idx, read_index_only, *key, update_tbl_stats, &mrr_flags, &buf_size, &cost); + if (found_records != HA_POS_ERROR && tree->index_scans && + (index_scan= (INDEX_SCAN_INFO *)alloc_root(param->mem_root, + sizeof(INDEX_SCAN_INFO)))) + { + index_scan->idx= idx; + index_scan->keynr= keynr; + index_scan->key_info= ¶m->table->key_info[keynr]; + index_scan->used_key_parts= param->max_key_part+1; + index_scan->range_count= param->range_count; + index_scan->records= found_records; + index_scan->sel_arg= *key; + *tree->index_scans_end++= index_scan; + } if ((found_records != HA_POS_ERROR) && param->is_ror_scan) { tree->n_ror_scans++; @@ -4985,6 +6626,36 @@ QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, return quick_imerge; } + +QUICK_SELECT_I *TRP_INDEX_INTERSECT::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_INDEX_INTERSECT_SELECT *quick_intersect; + QUICK_RANGE_SELECT *quick; + /* index_merge always retrieves full rows, ignore retrieve_full_rows */ + if (!(quick_intersect= new QUICK_INDEX_INTERSECT_SELECT(param->thd, param->table))) + return NULL; + + quick_intersect->records= records; + quick_intersect->read_time= read_cost; + quick_intersect->filtered_scans= filtered_scans; + for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; + range_scan++) + { + if (!(quick= (QUICK_RANGE_SELECT*) + ((*range_scan)->make_quick(param, FALSE, &quick_intersect->alloc)))|| + quick_intersect->push_quick_back(quick)) + { + delete quick; + delete quick_intersect; + return NULL; + } + } + return quick_intersect; +} + + QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, bool retrieve_full_rows, MEM_ROOT *parent_alloc) @@ -6117,13 +7788,138 @@ sel_add(SEL_ARG *key1,SEL_ARG *key2) return root; } -#define CLONE_KEY1_MAYBE 1 -#define CLONE_KEY2_MAYBE 2 -#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) +/* + Build a range tree for the conjunction of the range parts of two trees -static SEL_TREE * -tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) + SYNOPSIS + and_range_trees() + param Context info for the operation + tree1 SEL_TREE for the first conjunct + tree2 SEL_TREE for the second conjunct + result SEL_TREE for the result + + DESCRIPTION + This function takes range parts of two trees tree1 and tree2 and builds + a range tree for the conjunction of the formulas that these two range parts + represent. + More exactly: + if the range part of tree1 represents the normalized formula + R1_1 AND ... AND R1_k, + and the range part of tree2 represents the normalized formula + R2_1 AND ... AND R2_k, + then the range part of the result represents the formula: + RT = R_1 AND ... AND R_k, where R_i=(R1_i AND R2_i) for each i from [1..k] + + The function assumes that tree1 is never equal to tree2. At the same + time the tree result can be the same as tree1 (but never as tree2). + If result==tree1 then rt replaces the range part of tree1 leaving + imerges as they are. + if result!=tree1 than it is assumed that the SEL_ARG trees in tree1 and + tree2 should be preserved. Otherwise they can be destroyed. + + RETURN + 1 if the type the result tree is SEL_TREE::IMPOSSIBLE + 0 otherwise +*/ + +static +int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2, + SEL_TREE *result) +{ + DBUG_ENTER("and_ranges"); + key_map result_keys; + result_keys.clear_all(); + key_map anded_keys= tree1->keys_map; + anded_keys.merge(tree2->keys_map); + int key_no; + key_map::Iterator it(anded_keys); + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + uint flag=0; + SEL_ARG *key1= tree1->keys[key_no]; + SEL_ARG *key2= tree2->keys[key_no]; + if (key1 && !key1->simple_key()) + flag|= CLONE_KEY1_MAYBE; + if (key2 && !key2->simple_key()) + flag|=CLONE_KEY2_MAYBE; + if (result != tree1) + { + if (key1) + key1->incr_refs(); + if (key2) + key2->incr_refs(); + } + SEL_ARG *key; + if ((result->keys[key_no]= key =key_and(param, key1, key2, flag))) + { + if (key && key->type == SEL_ARG::IMPOSSIBLE) + { + result->type= SEL_TREE::IMPOSSIBLE; + DBUG_RETURN(1); + } + result_keys.set_bit(key_no); +#ifdef EXTRA_DEBUG + if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) + key->test_use_count(key); +#endif + } + } + result->keys_map= result_keys; + DBUG_RETURN(0); +} + + +/* + Build a SEL_TREE for a conjunction out of such trees for the conjuncts + + SYNOPSIS + tree_and() + param Context info for the operation + tree1 SEL_TREE for the first conjunct + tree2 SEL_TREE for the second conjunct + + DESCRIPTION + This function builds a tree for the formula (A AND B) out of the trees + tree1 and tree2 that has been built for the formulas A and B respectively. + + In a general case + tree1 represents the formula RT1 AND MT1, + where RT1 = R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1; + tree2 represents the formula RT2 AND MT2 + where RT2 = R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2. + + The result tree will represent the formula of the the following structure: + RT AND MT1 AND MT2 AND RT1MT2 AND RT2MT1, such that + rt is a tree obtained by range intersection of trees tree1 and tree2, + RT1MT2 = RT1M2_1 AND ... AND RT1M2_l2, + RT2MT1 = RT2M1_1 AND ... AND RT2M1_l1, + where rt1m2_i (i=1,...,l2) is the result of the pushdown operation + of range tree rt1 into imerge m2_i, while rt2m1_j (j=1,...,l1) is the + result of the pushdown operation of range tree rt2 into imerge m1_j. + + RT1MT2/RT2MT is empty if MT2/MT1 is empty. + + The range intersection of two range trees is produced by the function + and_range_trees. The pushdown of a range tree to a imerge is performed + by the function imerge_list_and_tree. This function may produce imerges + containing only one range tree. Such trees are intersected with rt and + the result of intersection is returned as the range part of the result + tree, while the corresponding imerges are removed altogether from its + imerge part. + + NOTE. + The pushdown operation of range trees into imerges is needed to be able + to construct valid imerges for the condition like this: + key1_p1=c1 AND (key1_p2 BETWEEN c21 AND c22 OR key2 < c2) + + RETURN + The result tree, if a success + 0 - otherwise. +*/ + +static +SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2) { DBUG_ENTER("tree_and"); if (!tree1) @@ -6145,87 +7941,216 @@ tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) tree1->type=SEL_TREE::KEY_SMALLER; DBUG_RETURN(tree1); } - key_map result_keys; - result_keys.clear_all(); - - /* Join the trees key per key */ - SEL_ARG **key1,**key2,**end; - for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ; - key1 != end ; key1++,key2++) + + if (!tree1->merges.is_empty()) + imerge_list_and_tree(param, &tree1->merges, tree2); + if (!tree2->merges.is_empty()) + imerge_list_and_tree(param, &tree2->merges, tree1); + if (and_range_trees(param, tree1, tree2, tree1)) + DBUG_RETURN(tree1); + imerge_list_and_list(&tree1->merges, &tree2->merges); + eliminate_single_tree_imerges(param, tree1); + DBUG_RETURN(tree1); +} + + +/* + Eliminate single tree imerges in a SEL_TREE objects + + SYNOPSIS + eliminate_single_tree_imerges() + param Context info for the function + tree SEL_TREE where single tree imerges are to be eliminated + + DESCRIPTION + For each imerge in 'tree' that contains only one disjunct tree, i.e. + for any imerge of the form m=rt, the function performs and operation + the range part of tree, replaces rt the with the result of anding and + removes imerge m from the the merge part of 'tree'. + + RETURN VALUE + none +*/ + +static +void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + SEL_IMERGE *imerge; + List<SEL_IMERGE> merges= tree->merges; + List_iterator<SEL_IMERGE> it(merges); + tree->merges.empty(); + while ((imerge= it++)) { - uint flag=0; - if (*key1 || *key2) - { - if (*key1 && !(*key1)->simple_key()) - flag|=CLONE_KEY1_MAYBE; - if (*key2 && !(*key2)->simple_key()) - flag|=CLONE_KEY2_MAYBE; - *key1=key_and(param, *key1, *key2, flag); - if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE) - { - tree1->type= SEL_TREE::IMPOSSIBLE; - DBUG_RETURN(tree1); - } - result_keys.set_bit(key1 - tree1->keys); -#ifdef EXTRA_DEBUG - if (*key1 && param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) - (*key1)->test_use_count(*key1); -#endif + if (imerge->trees+1 == imerge->trees_next) + { + tree= tree_and(param, tree, *imerge->trees); + it.remove(); } } - tree1->keys_map= result_keys; - /* dispose index_merge if there is a "range" option */ - if (!result_keys.is_clear_all()) - { - tree1->merges.empty(); - DBUG_RETURN(tree1); - } + tree->merges= merges; +} - /* ok, both trees are index_merge trees */ - imerge_list_and_list(&tree1->merges, &tree2->merges); - DBUG_RETURN(tree1); + +/* + For two trees check that there are indexes with ranges in both of them + + SYNOPSIS + sel_trees_have_common_keys() + tree1 SEL_TREE for the first tree + tree2 SEL_TREE for the second tree + common_keys OUT bitmap of all indexes with ranges in both trees + + DESCRIPTION + For two trees tree1 and tree1 the function checks if there are indexes + in their range parts such that SEL_ARG trees are defined for them in the + range parts of both trees. The function returns the bitmap of such + indexes in the parameter common_keys. + + RETURN + TRUE if there are such indexes (common_keys is nor empty) + FALSE otherwise +*/ + +static +bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys) +{ + *common_keys= tree1->keys_map; + common_keys->intersect(tree2->keys_map); + return !common_keys->is_clear_all(); } /* - Check if two SEL_TREES can be combined into one (i.e. a single key range - read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without - using index_merge. + Check whether range parts of two trees can be ored for some indexes + + SYNOPSIS + sel_trees_can_be_ored() + param Context info for the function + tree1 SEL_TREE for the first tree + tree2 SEL_TREE for the second tree + common_keys IN/OUT IN: bitmap of all indexes with SEL_ARG in both trees + OUT: bitmap of all indexes that can be ored + + DESCRIPTION + For two trees tree1 and tree2 and the bitmap common_keys containing + bits for indexes that have SEL_ARG trees in range parts of both trees + the function checks if there are indexes for which SEL_ARG trees can + be ored. Two SEL_ARG trees for the same index can be ored if the most + major components of the index used in these trees coincide. If the + SEL_ARG trees for an index cannot be ored the function clears the bit + for this index in the bitmap common_keys. + + The function does not verify that indexes marked in common_keys really + have SEL_ARG trees in both tree1 and tree2. It assumes that this is true. + + NOTE + The function sel_trees_can_be_ored is usually used in pair with the + function sel_trees_have_common_keys. + + RETURN + TRUE if there are indexes for which SEL_ARG trees can be ored + FALSE otherwise */ -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, - RANGE_OPT_PARAM* param) +static +bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map *common_keys) { - key_map common_keys= tree1->keys_map; DBUG_ENTER("sel_trees_can_be_ored"); - common_keys.intersect(tree2->keys_map); + if (!sel_trees_have_common_keys(tree1, tree2, common_keys)) + DBUG_RETURN(FALSE); + int key_no; + key_map::Iterator it(*common_keys); + while ((key_no= it++) != key_map::Iterator::BITMAP_END) + { + DBUG_ASSERT(tree1->keys[key_no] && tree2->keys[key_no]); + /* Trees have a common key, check if they refer to the same key part */ + if (tree1->keys[key_no]->part != tree2->keys[key_no]->part) + common_keys->clear_bit(key_no); + } + DBUG_RETURN(!common_keys->is_clear_all()); +} - if (common_keys.is_clear_all()) +/* + Check whether range parts of two trees must be ored for some indexes + + SYNOPSIS + sel_trees_must_be_ored() + param Context info for the function + tree1 SEL_TREE for the first tree + tree2 SEL_TREE for the second tree + ordable_keys bitmap of SEL_ARG trees that can be ored + + DESCRIPTION + For two trees tree1 and tree2 the function checks whether they must be + ored. The function assumes that the bitmap ordable_keys contains bits for + those corresponding pairs of SEL_ARG trees from tree1 and tree2 that can + be ored. + We believe that tree1 and tree2 must be ored if any pair of SEL_ARG trees + r1 and r2, such that r1 is from tree1 and r2 is from tree2 and both + of them are marked in ordable_keys, can be merged. + + NOTE + The function sel_trees_must_be_ored as a rule is used in pair with the + function sel_trees_can_be_ored. + + RETURN + TRUE if there are indexes for which SEL_ARG trees must be ored + FALSE otherwise +*/ + +static +bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param, + SEL_TREE *tree1, SEL_TREE *tree2, + key_map oredable_keys) +{ + key_map tmp; + DBUG_ENTER("sel_trees_must_be_ored"); + + tmp= tree1->keys_map; + tmp.merge(tree2->keys_map); + tmp.subtract(oredable_keys); + if (!tmp.is_clear_all()) DBUG_RETURN(FALSE); - /* trees have a common key, check if they refer to same key part */ - SEL_ARG **key1,**key2; - for (uint key_no=0; key_no < param->keys; key_no++) + int idx1, idx2; + key_map::Iterator it1(oredable_keys); + while ((idx1= it1++) != key_map::Iterator::BITMAP_END) { - if (common_keys.is_set(key_no)) + KEY_PART *key1_init= param->key[idx1]+tree1->keys[idx1]->part; + KEY_PART *key1_end= param->key[idx1]+tree1->keys[idx1]->max_part_no; + key_map::Iterator it2(oredable_keys); + while ((idx2= it2++) != key_map::Iterator::BITMAP_END) { - key1= tree1->keys + key_no; - key2= tree2->keys + key_no; - if ((*key1)->part == (*key2)->part) - { - DBUG_RETURN(TRUE); + if (idx2 <= idx1) + continue; + + KEY_PART *key2_init= param->key[idx2]+tree2->keys[idx2]->part; + KEY_PART *key2_end= param->key[idx2]+tree2->keys[idx2]->max_part_no; + KEY_PART *part1, *part2; + for (part1= key1_init, part2= key2_init; + part1 < key1_end && part2 < key2_end; + part1++, part2++) + { + if (!part1->field->eq(part2->field)) + DBUG_RETURN(FALSE); } } } - DBUG_RETURN(FALSE); -} + + DBUG_RETURN(TRUE); +} /* - Remove the trees that are not suitable for record retrieval. + Remove the trees that are not suitable for record retrieval + SYNOPSIS - param Range analysis parameter - tree Tree to be processed, tree->type is KEY or KEY_SMALLER + remove_nonrange_trees() + param Context info for the function + tree Tree to be processed, tree->type is KEY or KEY_SMALLER DESCRIPTION This function walks through tree->keys[] and removes the SEL_ARG* trees @@ -6236,41 +8161,36 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, A SEL_ARG* tree cannot be used to construct quick select if it has tree->part != 0. (e.g. it could represent "keypart2 < const"). - - WHY THIS FUNCTION IS NEEDED Normally we allow construction of SEL_TREE objects that have SEL_ARG - trees that do not allow quick range select construction. For example for - " keypart1=1 AND keypart2=2 " the execution will proceed as follows: + trees that do not allow quick range select construction. + For example: + for " keypart1=1 AND keypart2=2 " the execution will proceed as follows: tree1= SEL_TREE { SEL_ARG{keypart1=1} } tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select from this call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG tree. - - There is an exception though: when we construct index_merge SEL_TREE, - any SEL_ARG* tree that cannot be used to construct quick range select can - be removed, because current range analysis code doesn't provide any way - that tree could be later combined with another tree. - Consider an example: we should not construct - st1 = SEL_TREE { - merges = SEL_IMERGE { - SEL_TREE(t.key1part1 = 1), - SEL_TREE(t.key2part2 = 2) -- (*) - } - }; - because - - (*) cannot be used to construct quick range select, - - There is no execution path that would cause (*) to be converted to - a tree that could be used. - - The latter is easy to verify: first, notice that the only way to convert - (*) into a usable tree is to call tree_and(something, (*)). - - Second look at what tree_and/tree_or function would do when passed a - SEL_TREE that has the structure like st1 tree has, and conlcude that - tree_and(something, (*)) will not be called. + Another example: + tree3= SEL_TREE { SEL_ARG{key1part1 = 1} } + tree4= SEL_TREE { SEL_ARG{key2part2 = 2} } -- can't make quick range select + from this + call tree_or(tree3, tree4) -- creates a SEL_MERGE ot of which no index + merge can be constructed, but it is potentially useful, as anding it with + tree5= SEL_TREE { SEL_ARG{key2part1 = 3} } creates an index merge that + represents the formula + key1part1=1 AND key2part1=3 OR key2part1=3 AND key2part2=2 + for which an index merge can be built. + + Any final SEL_TREE may contain SEL_ARG trees for which no quick select + can be built. Such SEL_ARG trees should be removed from the range part + before different range scans are evaluated. Such SEL_ARG trees also should + be removed from all range trees of each index merge before different + possible index merge plans are evaluated. If after this removal one + of the range trees in the index merge becomes empty the whole index merge + must be discarded. + RETURN 0 Ok, some suitable trees left 1 No tree->keys[] left. @@ -6296,6 +8216,74 @@ static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) } +/* + Build a SEL_TREE for a disjunction out of such trees for the disjuncts + + SYNOPSIS + tree_or() + param Context info for the operation + tree1 SEL_TREE for the first disjunct + tree2 SEL_TREE for the second disjunct + + DESCRIPTION + This function builds a tree for the formula (A OR B) out of the trees + tree1 and tree2 that has been built for the formulas A and B respectively. + + In a general case + tree1 represents the formula RT1 AND MT1, + where RT1=R1_1 AND ... AND R1_k1, MT1=M1_1 AND ... AND M1_l1; + tree2 represents the formula RT2 AND MT2 + where RT2=R2_1 AND ... AND R2_k2, MT2=M2_1 and ... and M2_l2. + + The function constructs the result tree according the formula + (RT1 OR RT2) AND (MT1 OR RT1) AND (MT2 OR RT2) AND (MT1 OR MT2) + that is equivalent to the formula (RT1 AND MT1) OR (RT2 AND MT2). + + To limit the number of produced imerges the function considers + a weaker formula than the original one: + (RT1 AND M1_1) OR (RT2 AND M2_1) + that is equivalent to: + (RT1 OR RT2) (1) + AND + (M1_1 OR M2_1) (2) + AND + (M1_1 OR RT2) (3) + AND + (M2_1 OR RT1) (4) + + For the first conjunct (1) the function builds a tree with a range part + and, possibly, one imerge. For the other conjuncts (2-4)the function + produces sets of imerges. All constructed imerges are included into the + result tree. + + For the formula (1) the function produces the tree representing a formula + of the structure RT [AND M], such that: + - the range tree rt contains the result of oring SEL_ARG trees from rt1 + and rt2 + - the imerge m consists of two range trees rt1 and rt2. + The imerge m is added if it's not true that rt1 and rt2 must be ored + If rt1 and rt2 can't be ored rt is empty and only m is produced for (1). + + To produce imerges for the formula (2) the function calls the function + imerge_list_or_list passing it the merge parts of tree1 and tree2 as + parameters. + + To produce imerges for the formula (3) the function calls the function + imerge_list_or_tree passing it the imerge m1_1 and the range tree rt2 as + parameters. Similarly, to produce imerges for the formula (4) the function + calls the function imerge_list_or_tree passing it the imerge m2_1 and the + range tree rt1. + + If rt1 is empty then the trees for (1) and (4) are empty. + If rt2 is empty then the trees for (1) and (3) are empty. + If mt1 is empty then the trees for (2) and (3) are empty. + If mt2 is empty then the trees for (2) and (4) are empty. + + RETURN + The result tree for the operation if a success + 0 - otherwise +*/ + static SEL_TREE * tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { @@ -6311,74 +8299,100 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) if (tree2->type == SEL_TREE::MAYBE) DBUG_RETURN(tree2); - SEL_TREE *result= 0; - key_map result_keys; - result_keys.clear_all(); - if (sel_trees_can_be_ored(tree1, tree2, param)) + SEL_TREE *result= NULL; + key_map result_keys; + key_map ored_keys; + SEL_TREE *rtree[2]= {NULL,NULL}; + SEL_IMERGE *imerge[2]= {NULL, NULL}; + bool no_ranges1= tree1->without_ranges(); + bool no_ranges2= tree2->without_ranges(); + bool no_merges1= tree1->without_imerges(); + bool no_merges2= tree2->without_imerges(); + if (!no_ranges1 && !no_merges2) { - /* Join the trees key per key */ - SEL_ARG **key1,**key2,**end; - for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ; - key1 != end ; key1++,key2++) - { - *key1=key_or(param, *key1, *key2); - if (*key1) - { - result=tree1; // Added to tree1 - result_keys.set_bit(key1 - tree1->keys); -#ifdef EXTRA_DEBUG - if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) - (*key1)->test_use_count(*key1); -#endif - } - } - if (result) - result->keys_map= result_keys; + rtree[0]= new SEL_TREE(tree1, TRUE, param); + imerge[1]= new SEL_IMERGE(tree2->merges.head(), 0, param); } - else + if (!no_ranges2 && !no_merges1) + { + rtree[1]= new SEL_TREE(tree2, TRUE, param); + imerge[0]= new SEL_IMERGE(tree1->merges.head(), 0, param); + } + bool no_imerge_from_ranges= FALSE; + if (!(result= new SEL_TREE())) + DBUG_RETURN(result); + + /* Build the range part of the tree for the formula (1) */ + if (sel_trees_can_be_ored(param, tree1, tree2, &ored_keys)) { - /* ok, two trees have KEY type but cannot be used without index merge */ - if (tree1->merges.is_empty() && tree2->merges.is_empty()) + bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys); + no_imerge_from_ranges= must_be_ored; + key_map::Iterator it(ored_keys); + int key_no; + while ((key_no= it++) != key_map::Iterator::BITMAP_END) { - if (param->remove_jump_scans) + SEL_ARG *key1= tree1->keys[key_no]; + SEL_ARG *key2= tree2->keys[key_no]; + if (!must_be_ored) { - bool no_trees= remove_nonrange_trees(param, tree1); - no_trees= no_trees || remove_nonrange_trees(param, tree2); - if (no_trees) - DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); + key1->incr_refs(); + key2->incr_refs(); } - SEL_IMERGE *merge; - /* both trees are "range" trees, produce new index merge structure */ - if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) || - (result->merges.push_back(merge)) || - (merge->or_sel_tree(param, tree1)) || - (merge->or_sel_tree(param, tree2))) - result= NULL; - else - result->type= tree1->type; + if ((result->keys[key_no]= key_or(param, key1, key2))) + result->keys_map.set_bit(key_no); } - else if (!tree1->merges.is_empty() && !tree2->merges.is_empty()) - { - if (imerge_list_or_list(param, &tree1->merges, &tree2->merges)) - result= new SEL_TREE(SEL_TREE::ALWAYS); - else - result= tree1; - } - else - { - /* one tree is index merge tree and another is range tree */ - if (tree1->merges.is_empty()) - swap_variables(SEL_TREE*, tree1, tree2); + result->type= tree1->type; + } - if (param->remove_jump_scans && remove_nonrange_trees(param, tree2)) - DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); - /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */ - if (imerge_list_or_tree(param, &tree1->merges, tree2)) - result= new SEL_TREE(SEL_TREE::ALWAYS); - else - result= tree1; - } + if (no_imerge_from_ranges && no_merges1 && no_merges2) + { + if (result->keys_map.is_clear_all()) + result->type= SEL_TREE::ALWAYS; + DBUG_RETURN(result); + } + + SEL_IMERGE *imerge_from_ranges; + if (!(imerge_from_ranges= new SEL_IMERGE())) + result= NULL; + else if (!no_ranges1 && !no_ranges2 && !no_imerge_from_ranges) + { + /* Build the imerge part of the tree for the formula (1) */ + SEL_TREE *rt1= tree1; + SEL_TREE *rt2= tree2; + if (!no_merges1) + rt1= new SEL_TREE(tree1, TRUE, param); + if (!no_merges2) + rt2= new SEL_TREE(tree2, TRUE, param); + if (!rt1 || !rt2 || + result->merges.push_back(imerge_from_ranges) || + imerge_from_ranges->or_sel_tree(param, rt1) || + imerge_from_ranges->or_sel_tree(param, rt2)) + result= NULL; + } + if (!result) + DBUG_RETURN(result); + + result->type= tree1->type; + + if (!no_merges1 && !no_merges2 && + !imerge_list_or_list(param, &tree1->merges, &tree2->merges)) + { + /* Build the imerges for the formula (2) */ + imerge_list_and_list(&result->merges, &tree1->merges); + } + + /* Build the imerges for the formulas (3) and (4) */ + for (uint i=0; i < 2; i++) + { + List<SEL_IMERGE> merges; + SEL_TREE *rt= rtree[i]; + SEL_IMERGE *im= imerge[1-i]; + + if (rt && im && !merges.push_back(im) && + !imerge_list_or_tree(param, &merges, rt)) + imerge_list_and_list(&result->merges, &merges); } + DBUG_RETURN(result); } @@ -6424,6 +8438,7 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, if (!key1) return &null_element; // Impossible ranges key1->use_count++; + key1->max_part_no= max(key2->max_part_no, key2->part+1); return key1; } @@ -6516,6 +8531,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) key1->use_count--; key2->use_count--; SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0; + uint max_part_no= max(key1->max_part_no, key2->max_part_no); while (e1 && e2) { @@ -6553,6 +8569,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) key2->free_tree(); if (!new_tree) return &null_element; // Impossible range + new_tree->max_part_no= max_part_no; return new_tree; } @@ -6681,7 +8698,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) { swap_variables(SEL_ARG *,key1,key2); } - if (key1->use_count > 0 || !(key1=key1->clone_tree(param))) + if (key1->use_count > 0 && !(key1=key1->clone_tree(param))) return 0; // OOM } @@ -6689,6 +8706,8 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) bool key2_shared=key2->use_count != 0; key1->maybe_flag|=key2->maybe_flag; + uint max_part_no= max(key1->max_part_no, key2->max_part_no); + for (key2=key2->first(); key2; ) { SEL_ARG *tmp=key1->find_range(key2); // Find key1.min <= key2.min @@ -6922,6 +8941,7 @@ end: key2=next; } key1->use_count++; + key1->max_part_no= max_part_no; return key1; } @@ -7397,11 +9417,7 @@ static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key) void SEL_ARG::test_use_count(SEL_ARG *root) { uint e_count=0; - if (this == root && use_count != 1) - { - sql_print_information("Use_count: Wrong count %lu for root",use_count); - return; - } + if (this->type != SEL_ARG::KEY_RANGE) return; for (SEL_ARG *pos=first(); pos ; pos=pos->next) @@ -7505,14 +9521,15 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, bufsize, mrr_flags, cost); if (rows != HA_POS_ERROR) { - param->table->quick_rows[keynr]=rows; + param->quick_rows[keynr]= rows; if (update_tbl_stats) { param->table->quick_keys.set_bit(keynr); - param->table->quick_key_parts[keynr]=param->max_key_part+1; + param->table->quick_key_parts[keynr]= param->max_key_part+1; param->table->quick_n_ranges[keynr]= param->range_count; param->table->quick_condition_rows= min(param->table->quick_condition_rows, rows); + param->table->quick_rows[keynr]= rows; } } /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */ @@ -7857,7 +9874,7 @@ bool QUICK_SELECT_I::is_keys_used(const MY_BITMAP *fields) return is_key_used(head, index, fields); } -bool QUICK_INDEX_MERGE_SELECT::is_keys_used(const MY_BITMAP *fields) +bool QUICK_INDEX_SORT_SELECT::is_keys_used(const MY_BITMAP *fields) { QUICK_RANGE_SELECT *quick; List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); @@ -8045,13 +10062,23 @@ err: other error */ -int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() +int read_keys_and_merge_scans(THD *thd, + TABLE *head, + List<QUICK_RANGE_SELECT> quick_selects, + QUICK_RANGE_SELECT *pk_quick_select, + READ_RECORD *read_record, + bool intersection, + key_map *filtered_scans, + Unique **unique_ptr) { List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects); QUICK_RANGE_SELECT* cur_quick; int result; + Unique *unique= *unique_ptr; handler *file= head->file; - DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge"); + bool with_cpk_filter= pk_quick_select != NULL; + + DBUG_ENTER("read_keys_and_merge"); /* We're going to just read rowids. */ if (!head->key_read) @@ -8062,6 +10089,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() cur_quick_it.rewind(); cur_quick= cur_quick_it++; + bool first_quick= TRUE; DBUG_ASSERT(cur_quick != 0); /* @@ -8079,9 +10107,11 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() unique= new Unique(refpos_order_cmp, (void *)file, file->ref_length, - thd->variables.sortbuff_size); + thd->variables.sortbuff_size, + intersection ? quick_selects.elements : 0); if (!unique) goto err; + *unique_ptr= unique; } else unique->reset(); @@ -8093,6 +10123,14 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() { while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE) { + if (intersection) + with_cpk_filter= filtered_scans->is_set(cur_quick->index); + if (first_quick) + { + first_quick= FALSE; + if (intersection && unique->is_in_memory()) + unique->close_for_expansion(); + } cur_quick->range_end(); cur_quick= cur_quick_it++; if (!cur_quick) @@ -8117,8 +10155,8 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() if (thd->killed) goto err; - /* skip row if it will be retrieved by clustered PK scan */ - if (pk_quick_select && pk_quick_select->row_in_ranges()) + if (with_cpk_filter && + pk_quick_select->row_in_ranges() != intersection ) continue; cur_quick->file->position(cur_quick->record); @@ -8132,14 +10170,13 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() sequence. */ result= unique->get(head); - doing_pk_scan= FALSE; /* - index_merge currently doesn't support "using index" at all + index merge currently doesn't support "using index" at all */ head->disable_keyread(); - if (init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE)) + if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE)) result= 1; - DBUG_RETURN(result); + DBUG_RETURN(result); err: head->disable_keyread(); @@ -8147,6 +10184,17 @@ err: } +int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() + +{ + int result; + DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge"); + result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select, + &read_record, FALSE, NULL, &unique); + doing_pk_scan= FALSE; + DBUG_RETURN(result); +} + /* Get next row for index_merge. NOTES @@ -8183,6 +10231,32 @@ int QUICK_INDEX_MERGE_SELECT::get_next() DBUG_RETURN(result); } +int QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge() + +{ + int result; + DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::read_keys_and_merge"); + result= read_keys_and_merge_scans(thd, head, quick_selects, pk_quick_select, + &read_record, TRUE, &filtered_scans, + &unique); + DBUG_RETURN(result); +} + +int QUICK_INDEX_INTERSECT_SELECT::get_next() +{ + int result; + DBUG_ENTER("QUICK_INDEX_INTERSECT_SELECT::get_next"); + + if ((result= read_record.read_record(&read_record)) == -1) + { + result= HA_ERR_END_OF_FILE; + end_read_record(&read_record); + free_io_cache(head); + } + + DBUG_RETURN(result); +} + /* Retrieve next record. @@ -8826,30 +10900,53 @@ bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg) } -void QUICK_RANGE_SELECT::add_info_string(String *str) +void QUICK_SELECT_I::add_key_name(String *str, bool *first) { KEY *key_info= head->key_info + index; + + if (*first) + *first= FALSE; + else + str->append(','); str->append(key_info->name); } + + +void QUICK_RANGE_SELECT::add_info_string(String *str) +{ + bool first= TRUE; + + add_key_name(str, &first); +} void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str) { QUICK_RANGE_SELECT *quick; bool first= TRUE; List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + str->append(STRING_WITH_LEN("sort_union(")); while ((quick= it++)) { - if (!first) - str->append(','); - else - first= FALSE; - quick->add_info_string(str); + quick->add_key_name(str, &first); } if (pk_quick_select) + pk_quick_select->add_key_name(str, &first); + str->append(')'); +} + +void QUICK_INDEX_INTERSECT_SELECT::add_info_string(String *str) +{ + QUICK_RANGE_SELECT *quick; + bool first= TRUE; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + + str->append(STRING_WITH_LEN("sort_intersect(")); + if (pk_quick_select) + pk_quick_select->add_key_name(str, &first); + while ((quick= it++)) { - str->append(','); - pk_quick_select->add_info_string(str); + quick->add_key_name(str, &first); } str->append(')'); } @@ -8859,130 +10956,125 @@ void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str) bool first= TRUE; QUICK_SELECT_WITH_RECORD *qr; List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects); + str->append(STRING_WITH_LEN("intersect(")); while ((qr= it++)) { - KEY *key_info= head->key_info + qr->quick->index; - if (!first) - str->append(','); - else - first= FALSE; - str->append(key_info->name); + qr->quick->add_key_name(str, &first); } if (cpk_quick) - { - KEY *key_info= head->key_info + cpk_quick->index; - str->append(','); - str->append(key_info->name); - } + cpk_quick->add_key_name(str, &first); str->append(')'); } + void QUICK_ROR_UNION_SELECT::add_info_string(String *str) { - bool first= TRUE; QUICK_SELECT_I *quick; + bool first= TRUE; List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + str->append(STRING_WITH_LEN("union(")); while ((quick= it++)) { - if (!first) - str->append(','); - else + if (first) first= FALSE; + else + str->append(','); quick->add_info_string(str); } str->append(')'); } -void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, - String *used_lengths) +void QUICK_SELECT_I::add_key_and_length(String *key_names, + String *used_lengths, + bool *first) { char buf[64]; uint length; KEY *key_info= head->key_info + index; + + if (*first) + *first= FALSE; + else + { + key_names->append(','); + used_lengths->append(','); + } key_names->append(key_info->name); length= longlong10_to_str(max_used_key_length, buf, 10) - buf; used_lengths->append(buf, length); } + +void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + bool first= TRUE; + + add_key_and_length(key_names, used_lengths, &first); +} + void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names, String *used_lengths) { - char buf[64]; - uint length; - bool first= TRUE; QUICK_RANGE_SELECT *quick; + bool first= TRUE; List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) { - if (first) - first= FALSE; - else - { - key_names->append(','); - used_lengths->append(','); - } - - KEY *key_info= head->key_info + quick->index; - key_names->append(key_info->name); - length= longlong10_to_str(quick->max_used_key_length, buf, 10) - buf; - used_lengths->append(buf, length); + quick->add_key_and_length(key_names, used_lengths, &first); } + if (pk_quick_select) + pk_quick_select->add_key_and_length(key_names, used_lengths, &first); +} + + +void QUICK_INDEX_INTERSECT_SELECT::add_keys_and_lengths(String *key_names, + String *used_lengths) +{ + QUICK_RANGE_SELECT *quick; + bool first= TRUE; + + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + + if (pk_quick_select) + pk_quick_select->add_key_and_length(key_names, used_lengths, &first); + + while ((quick= it++)) { - KEY *key_info= head->key_info + pk_quick_select->index; - key_names->append(','); - key_names->append(key_info->name); - length= (longlong10_to_str(pk_quick_select->max_used_key_length, buf, 10) - - buf); - used_lengths->append(','); - used_lengths->append(buf, length); + quick->add_key_and_length(key_names, used_lengths, &first); } } void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names, String *used_lengths) { - char buf[64]; - uint length; - bool first= TRUE; QUICK_SELECT_WITH_RECORD *qr; + bool first= TRUE; + List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects); + while ((qr= it++)) { - KEY *key_info= head->key_info + qr->quick->index; - if (first) - first= FALSE; - else - { - key_names->append(','); - used_lengths->append(','); - } - key_names->append(key_info->name); - length= longlong10_to_str(qr->quick->max_used_key_length, buf, 10) - buf; - used_lengths->append(buf, length); + qr->quick->add_key_and_length(key_names, used_lengths, &first); } - if (cpk_quick) - { - KEY *key_info= head->key_info + cpk_quick->index; - key_names->append(','); - key_names->append(key_info->name); - length= longlong10_to_str(cpk_quick->max_used_key_length, buf, 10) - buf; - used_lengths->append(','); - used_lengths->append(buf, length); - } + cpk_quick->add_key_and_length(key_names, used_lengths, &first); } void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names, String *used_lengths) { - bool first= TRUE; QUICK_SELECT_I *quick; + bool first= TRUE; + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) { if (first) @@ -10915,6 +13007,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() /* Compare the found key with max_key. */ int cmp_res= key_cmp(index_info->key_part, max_key, real_prefix_len + min_max_arg_len); + my_afree(max_key); /* The key is outside of the range if: the interval is open and the key is equal to the maximum boundry @@ -11040,6 +13133,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range() /* Compare the found key with min_key. */ int cmp_res= key_cmp(index_info->key_part, min_key, real_prefix_len + min_max_arg_len); + my_afree(min_key); /* The key is outside of the range if: the interval is open and the key is equal to the minimum boundry @@ -11140,11 +13234,9 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_max_result() void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, String *used_lengths) { - char buf[64]; - uint length; - key_names->append(index_info->name); - length= longlong10_to_str(max_used_key_length, buf, 10) - buf; - used_lengths->append(buf, length); + bool first= TRUE; + + add_key_and_length(key_names, used_lengths, &first); } @@ -11312,7 +13404,7 @@ void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose) /* purecov: end */ } -void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose) +void QUICK_INDEX_SORT_SELECT::dbug_dump(int indent, bool verbose) { List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); QUICK_RANGE_SELECT *quick; diff --git a/sql/opt_range.h b/sql/opt_range.h index fe8d3eae6c7..1a0a2ee0029 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -283,12 +283,13 @@ public: virtual void need_sorted_output() = 0; enum { QS_TYPE_RANGE = 0, - QS_TYPE_INDEX_MERGE = 1, - QS_TYPE_RANGE_DESC = 2, - QS_TYPE_FULLTEXT = 3, - QS_TYPE_ROR_INTERSECT = 4, - QS_TYPE_ROR_UNION = 5, - QS_TYPE_GROUP_MIN_MAX = 6 + QS_TYPE_INDEX_INTERSECT = 1, + QS_TYPE_INDEX_MERGE = 2, + QS_TYPE_RANGE_DESC = 3, + QS_TYPE_FULLTEXT = 4, + QS_TYPE_ROR_INTERSECT = 5, + QS_TYPE_ROR_UNION = 6, + QS_TYPE_GROUP_MIN_MAX = 7 }; /* Get type of this quick select - one of the QS_TYPE_* values */ @@ -314,6 +315,10 @@ public: Save ROWID of last retrieved row in file->ref. This used in ROR-merging. */ virtual void save_last_pos(){}; + + void add_key_and_length(String *key_names, + String *used_lengths, + bool *first); /* Append comma-separated list of keys this quick select uses to key_names; @@ -323,13 +328,16 @@ public: virtual void add_keys_and_lengths(String *key_names, String *used_lengths)=0; + void add_key_name(String *str, bool *first); + /* Append text representation of quick select structure (what and how is merged) to str. The result is added to "Extra" field in EXPLAIN output. This function is implemented only by quick selects that merge other quick selects output and/or can produce output suitable for merging. */ - virtual void add_info_string(String *str) {}; + virtual void add_info_string(String *str) {} + /* Return 1 if any index used by this quick select uses field which is marked in passed bitmap. @@ -458,12 +466,23 @@ private: uint mrr_buf_size, MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; + friend class QUICK_INDEX_SORT_SELECT; friend class QUICK_INDEX_MERGE_SELECT; friend class QUICK_ROR_INTERSECT_SELECT; + friend class QUICK_INDEX_INTERSECT_SELECT; friend class QUICK_GROUP_MIN_MAX_SELECT; friend bool quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range); friend range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags); + friend + int read_keys_and_merge_scans(THD *thd, TABLE *head, + List<QUICK_RANGE_SELECT> quick_selects, + QUICK_RANGE_SELECT *pk_quick_select, + READ_RECORD *read_record, + bool intersection, + key_map *filtered_scans, + Unique **unique_ptr); + }; @@ -481,40 +500,44 @@ public: /* - QUICK_INDEX_MERGE_SELECT - index_merge access method quick select. + QUICK_INDEX_SORT_SELECT is the base class for the common functionality of: + - QUICK_INDEX_MERGE_SELECT, access based on multi-index merge/union + - QUICK_INDEX_INTERSECT_SELECT, access based on multi-index intersection + - QUICK_INDEX_MERGE_SELECT uses + QUICK_INDEX_SORT_SELECT uses * QUICK_RANGE_SELECTs to get rows - * Unique class to remove duplicate rows + * Unique class + - to remove duplicate rows for QUICK_INDEX_MERGE_SELECT + - to intersect rows for QUICK_INDEX_INTERSECT_SELECT INDEX MERGE OPTIMIZER - Current implementation doesn't detect all cases where index_merge could + Current implementation doesn't detect all cases where index merge could be used, in particular: - * index_merge will never be used if range scan is possible (even if - range scan is more expensive) - * index_merge+'using index' is not supported (this the consequence of + * index merge+'using index' is not supported (this the consequence of the above restriction) * If WHERE part contains complex nested AND and OR conditions, some ways - to retrieve rows using index_merge will not be considered. The choice + to retrieve rows using index merge will not be considered. The choice of read plan may depend on the order of conjuncts/disjuncts in WHERE part of the query, see comments near imerge_list_or_list and SEL_IMERGE::or_sel_tree_with_checks functions for details. - * There is no "index_merge_ref" method (but index_merge on non-first + * There is no "index_merge_ref" method (but index merge on non-first table in join is possible with 'range checked for each record'). - See comments around SEL_IMERGE class and test_quick_select for more - details. ROW RETRIEVAL ALGORITHM - index_merge uses Unique class for duplicates removal. index_merge takes - advantage of Clustered Primary Key (CPK) if the table has one. - The index_merge algorithm consists of two phases: + index merge/intersection uses Unique class for duplicates removal. + index merge/intersection takes advantage of Clustered Primary Key (CPK) + if the table has one. + The index merge/intersection algorithm consists of two phases: + + Phase 1 + (implemented by a QUICK_INDEX_MERGE_SELECT::read_keys_and_merge call): - Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique): prepare() { activate 'index only'; @@ -528,33 +551,32 @@ public: deactivate 'index only'; } - Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next - calls): + Phase 2 + (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls): fetch() { - retrieve all rows from row pointers stored in Unique; + retrieve all rows from row pointers stored in Unique + (merging/intersecting them); free Unique; - retrieve all rows for CPK scan; + if (! intersection) + retrieve all rows for CPK scan; } */ -class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I +class QUICK_INDEX_SORT_SELECT : public QUICK_SELECT_I { +protected: Unique *unique; public: - QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table); - ~QUICK_INDEX_MERGE_SELECT(); + QUICK_INDEX_SORT_SELECT(THD *thd, TABLE *table); + ~QUICK_INDEX_SORT_SELECT(); int init(); void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } int reset(void); - int get_next(); bool reverse_sorted() { return false; } bool unique_key_range() { return false; } - int get_type() { return QS_TYPE_INDEX_MERGE; } - void add_keys_and_lengths(String *key_names, String *used_lengths); - void add_info_string(String *str); bool is_keys_used(const MY_BITMAP *fields); #ifndef DBUG_OFF void dbug_dump(int indent, bool verbose); @@ -562,24 +584,57 @@ public: bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); - /* range quick selects this index_merge read consists of */ + /* range quick selects this index merge/intersect consists of */ List<QUICK_RANGE_SELECT> quick_selects; /* quick select that uses clustered primary key (NULL if none) */ QUICK_RANGE_SELECT* pk_quick_select; - /* true if this select is currently doing a clustered PK scan */ - bool doing_pk_scan; - MEM_ROOT alloc; THD *thd; - int read_keys_and_merge(); + virtual int read_keys_and_merge()= 0; /* used to get rows collected in Unique */ READ_RECORD read_record; }; + +class QUICK_INDEX_MERGE_SELECT : public QUICK_INDEX_SORT_SELECT +{ +private: + /* true if this select is currently doing a clustered PK scan */ + bool doing_pk_scan; +protected: + int read_keys_and_merge(); + +public: + QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table) + :QUICK_INDEX_SORT_SELECT(thd, table) {} + + int get_next(); + int get_type() { return QS_TYPE_INDEX_MERGE; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); +}; + +class QUICK_INDEX_INTERSECT_SELECT : public QUICK_INDEX_SORT_SELECT +{ +protected: + int read_keys_and_merge(); + +public: + QUICK_INDEX_INTERSECT_SELECT(THD *thd, TABLE *table) + :QUICK_INDEX_SORT_SELECT(thd, table) {} + + key_map filtered_scans; + int get_next(); + int get_type() { return QS_TYPE_INDEX_INTERSECT; } + void add_keys_and_lengths(String *key_names, String *used_lengths); + void add_info_string(String *str); +}; + + /* Rowid-Ordered Retrieval (ROR) index intersection quick select. This quick select produces intersection of row sequences returned diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 87e27d68b98..9b1d0df4251 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -6980,16 +6980,16 @@ struct MPVIO_EXT : public MYSQL_PLUGIN_VIO a helper function to report an access denied error in all the proper places */ -static void login_failed_error(THD *thd, bool passwd_used) +static void login_failed_error(THD *thd) { my_error(ER_ACCESS_DENIED_ERROR, MYF(0), thd->main_security_ctx.user, thd->main_security_ctx.host_or_ip, - passwd_used ? ER(ER_YES) : ER(ER_NO)); + thd->password ? ER(ER_YES) : ER(ER_NO)); general_log_print(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_ERROR), thd->main_security_ctx.user, thd->main_security_ctx.host_or_ip, - passwd_used ? ER(ER_YES) : ER(ER_NO)); + thd->password ? ER(ER_YES) : ER(ER_NO)); status_var_increment(thd->status_var.access_denied_errors); /* Log access denied messages to the error log when log-warnings = 2 @@ -7001,7 +7001,7 @@ static void login_failed_error(THD *thd, bool passwd_used) sql_print_warning(ER(ER_ACCESS_DENIED_ERROR), thd->main_security_ctx.user, thd->main_security_ctx.host_or_ip, - passwd_used ? ER(ER_YES) : ER(ER_NO)); + thd->password ? ER(ER_YES) : ER(ER_NO)); } } @@ -7266,7 +7266,7 @@ static bool find_mpvio_user(MPVIO_EXT *mpvio, Security_context *sctx) if (!mpvio->acl_user) { - login_failed_error(mpvio->thd, 0); + login_failed_error(mpvio->thd); return 1; } @@ -7583,8 +7583,11 @@ static ulong parse_client_handshake_packet(MPVIO_EXT *mpvio, return packet_error; } + thd->password= passwd_len > 0; if (find_mpvio_user(mpvio, sctx)) + { return packet_error; + } if (thd->client_capabilities & CLIENT_PLUGIN_AUTH) { @@ -8072,7 +8075,7 @@ bool acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_le DBUG_ASSERT(mpvio.status == MPVIO_EXT::FAILURE); if (!thd->is_error()) - login_failed_error(thd, thd->password); + login_failed_error(thd); DBUG_RETURN(1); } @@ -8092,7 +8095,7 @@ bool acl_authenticate(THD *thd, uint connect_errors, uint com_change_user_pkt_le */ if (acl_check_ssl(thd, acl_user)) { - login_failed_error(thd, thd->password); + login_failed_error(thd); DBUG_RETURN(1); } diff --git a/sql/sql_class.h b/sql/sql_class.h index 7f3b0cbfb03..4b49fbcd3ff 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3269,6 +3269,7 @@ class user_var_entry DTCollation collation; }; + /* Unique -- class for unique (removing of duplicates). Puts all values to the TREE. If the tree becomes too big, @@ -3285,27 +3286,43 @@ class Unique :public Sql_alloc IO_CACHE file; TREE tree; uchar *record_pointers; + ulong filtered_out_elems; bool flush(); uint size; + uint full_size; + uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ public: ulong elements; Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, - uint size_arg, ulonglong max_in_memory_size_arg); + uint size_arg, ulonglong max_in_memory_size_arg, + uint min_dupl_count_arg= 0); ~Unique(); ulong elements_in_tree() { return tree.elements_in_tree; } inline bool unique_add(void *ptr) { DBUG_ENTER("unique_add"); DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements)); - if (tree.elements_in_tree > max_elements && flush()) + if (!(tree.flag & TREE_ONLY_DUPS) && + tree.elements_in_tree >= max_elements && flush()) DBUG_RETURN(1); DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); } + bool is_in_memory() { return (my_b_tell(&file) == 0); } + void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; } + bool get(TABLE *table); + + /* Cost of searching for an element in the tree */ + inline static double get_search_cost(uint tree_elems, uint compare_factor) + { + return log((double) tree_elems) / (compare_factor * M_LN2); + } + static double get_use_cost(uint *buffer, uint nkeys, uint key_size, - ulonglong max_in_memory_size); + ulonglong max_in_memory_size, uint compare_factor, + bool intersect_fl, bool *in_memory); inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size, ulonglong max_in_memory_size) { @@ -3322,6 +3339,11 @@ public: friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); + + friend int unique_write_to_file_with_count(uchar* key, element_count count, + Unique *unique); + friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, + Unique *unique); }; diff --git a/sql/sql_list.h b/sql/sql_list.h index dc840cefc66..76b3145f24d 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -235,6 +235,11 @@ public: { if (!list->is_empty()) { + if (is_empty()) + { + *this= *list; + return; + } *last= list->first; last= list->last; elements+= list->elements; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 16fbb309d89..c0360828f3c 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2800,6 +2800,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, goto error; } table->quick_keys.clear_all(); + table->intersect_keys.clear_all(); table->reginfo.join_tab=s; table->reginfo.not_exists_optimize=0; bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); @@ -6907,8 +6908,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) used_tables|=current_map; if (tab->type == JT_REF && tab->quick && - (uint) tab->ref.key == tab->quick->index && - tab->ref.key_length < tab->quick->max_used_key_length) + (((uint) tab->ref.key == tab->quick->index && + tab->ref.key_length < tab->quick->max_used_key_length) || + tab->table->intersect_keys.is_set(tab->ref.key))) { /* Range uses longer key; Use this instead of ref on key */ tab->type=JT_ALL; @@ -11627,6 +11629,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, table->quick_keys.init(); table->covering_keys.init(); table->merge_keys.init(); + table->intersect_keys.init(); table->keys_in_use_for_query.init(); table->s= share; @@ -15648,7 +15651,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, by clustered PK values. */ - if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) goto use_filesort; @@ -16074,6 +16078,7 @@ check_reverse_order: bool error= FALSE; int quick_type= select->quick->get_type(); if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) @@ -16252,6 +16257,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order, select->cleanup(); // filesort did select tab->select= 0; table->quick_keys.clear_all(); // as far as we cleanup select->quick + table->intersect_keys.clear_all(); table->sort.io_cache= tablesort_result_cache; } tab->set_select_cond(NULL, __LINE__); @@ -18782,6 +18788,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, quick_type= tab->select->quick->get_type(); if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) tab->type = JT_INDEX_MERGE; else @@ -19018,6 +19025,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) { extra.append(STRING_WITH_LEN("; Using ")); diff --git a/sql/sql_sort.h b/sql/sql_sort.h index f54b085eeda..fc4117e2767 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -57,6 +57,7 @@ typedef struct st_sort_param { uint addon_length; /* Length of added packed fields */ uint res_length; /* Length of records in final sorted file/buffer */ uint keys; /* Max keys / buffer */ + element_count min_dupl_count; ha_rows max_rows,examined_rows; TABLE *sort_form; /* For quicker make_sortkey */ SORT_FIELD *local_sortorder; @@ -80,4 +81,9 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file, IO_CACHE *to_file, uchar *sort_buffer, BUFFPEK *lastbuff,BUFFPEK *Fb, BUFFPEK *Tb,int flag); +int merge_index(SORTPARAM *param, uchar *sort_buffer, + BUFFPEK *buffpek, uint maxbuffer, + IO_CACHE *tempfile, IO_CACHE *outfile); + void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length); + diff --git a/sql/table.h b/sql/table.h index 7688d289f81..80519e486ac 100644 --- a/sql/table.h +++ b/sql/table.h @@ -693,7 +693,7 @@ struct st_table { needed by the query without reading the row. */ key_map covering_keys; - key_map quick_keys, merge_keys; + key_map quick_keys, merge_keys,intersect_keys; /* A set of keys that can be used in the query that references this table. diff --git a/sql/uniques.cc b/sql/uniques.cc index 561b1068097..e309caf9849 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -33,7 +33,6 @@ #include "mysql_priv.h" #include "sql_sort.h" - int unique_write_to_file(uchar* key, element_count count, Unique *unique) { /* @@ -45,6 +44,12 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique) return my_b_write(&unique->file, key, unique->size) ? 1 : 0; } +int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique) +{ + return my_b_write(&unique->file, key, unique->size) || + my_b_write(&unique->file, &count, sizeof(element_count)) ? 1 : 0; +} + int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) { memcpy(unique->record_pointers, key, unique->size); @@ -52,10 +57,28 @@ int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) return 0; } +int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique) +{ + if (count >= unique->min_dupl_count) + { + memcpy(unique->record_pointers, key, unique->size); + unique->record_pointers+=unique->size; + } + else + unique->filtered_out_elems++; + return 0; +} + + Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, - uint size_arg, ulonglong max_in_memory_size_arg) + uint size_arg, ulonglong max_in_memory_size_arg, + uint min_dupl_count_arg) :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0) { + min_dupl_count= min_dupl_count_arg; + full_size= size; + if (min_dupl_count_arg) + full_size+= sizeof(element_count); my_b_clear(&file); init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0, NULL, comp_func_fixed_arg); @@ -123,7 +146,8 @@ inline double log2_n_fact(double x) */ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, - uint *first, uint *last) + uint *first, uint *last, + uint compare_factor) { uint total_buf_elems= 0; for (uint *pbuf= first; pbuf <= last; pbuf++) @@ -134,7 +158,7 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, /* Using log2(n)=log(n)/log(2) formula */ return 2*((double)total_buf_elems*elem_size) / IO_SIZE + - total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2); + total_buf_elems*log((double) n_buffers) / (compare_factor * M_LN2); } @@ -167,7 +191,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, static double get_merge_many_buffs_cost(uint *buffer, uint maxbuffer, uint max_n_elems, - uint last_n_elems, int elem_size) + uint last_n_elems, int elem_size, + uint compare_factor) { register int i; double total_cost= 0.0; @@ -194,19 +219,22 @@ static double get_merge_many_buffs_cost(uint *buffer, { total_cost+=get_merge_buffers_cost(buff_elems, elem_size, buff_elems + i, - buff_elems + i + MERGEBUFF-1); + buff_elems + i + MERGEBUFF-1, + compare_factor); lastbuff++; } total_cost+=get_merge_buffers_cost(buff_elems, elem_size, buff_elems + i, - buff_elems + maxbuffer); + buff_elems + maxbuffer, + compare_factor); maxbuffer= lastbuff; } } /* Simulate final merge_buff call. */ total_cost += get_merge_buffers_cost(buff_elems, elem_size, - buff_elems, buff_elems + maxbuffer); + buff_elems, buff_elems + maxbuffer, + compare_factor); return total_cost; } @@ -221,7 +249,11 @@ static double get_merge_many_buffs_cost(uint *buffer, to get # bytes needed. nkeys #of elements in Unique key_size size of each elements in bytes - max_in_memory_size amount of memory Unique will be allowed to use + max_in_memory_size amount of memory Unique will be allowed to use + compare_factor used to calculate cost of one comparison + write_fl if the result must be saved written to disk + in_memory_elems OUT estimate of the number of elements in memory + if disk is not used RETURN Cost in disk seeks. @@ -259,7 +291,9 @@ static double get_merge_many_buffs_cost(uint *buffer, */ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, - ulonglong max_in_memory_size) + ulonglong max_in_memory_size, + uint compare_factor, + bool intersect_fl, bool *in_memory) { ulong max_elements_in_tree; ulong last_tree_elems; @@ -276,12 +310,15 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, result= 2*log2_n_fact(last_tree_elems + 1.0); if (n_full_trees) result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0); - result /= TIME_FOR_COMPARE_ROWID; + result /= compare_factor; DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys, n_full_trees, n_full_trees?max_elements_in_tree:0, last_tree_elems)); + if (in_memory) + *in_memory= !n_full_trees; + if (!n_full_trees) return result; @@ -295,12 +332,12 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE); /* Cost of merge */ + if (intersect_fl) + key_size+= sizeof(element_count); double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees, max_elements_in_tree, - last_tree_elems, key_size); - if (merge_cost < 0.0) - return merge_cost; - + last_tree_elems, key_size, + compare_factor); result += merge_cost; /* Add cost of reading the resulting sequence, assuming there were no @@ -327,7 +364,10 @@ bool Unique::flush() file_ptr.count=tree.elements_in_tree; file_ptr.file_pos=my_b_tell(&file); - if (tree_walk(&tree, (tree_walk_action) unique_write_to_file, + tree_walk_action action= min_dupl_count ? + (tree_walk_action) unique_write_to_file_with_count : + (tree_walk_action) unique_write_to_file; + if (tree_walk(&tree, action, (void*) this, left_root_right) || insert_dynamic(&file_ptrs, (uchar*) &file_ptr)) return 1; @@ -357,6 +397,7 @@ Unique::reset() reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1); } elements= 0; + tree.flag= 0; } /* @@ -576,15 +617,19 @@ bool Unique::get(TABLE *table) { SORTPARAM sort_param; table->sort.found_records=elements+tree.elements_in_tree; - if (my_b_tell(&file) == 0) { /* Whole tree is in memory; Don't use disk if you don't need to */ if ((record_pointers=table->sort.record_pointers= (uchar*) my_malloc(size * tree.elements_in_tree, MYF(0)))) { - (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs, + tree_walk_action action= min_dupl_count ? + (tree_walk_action) unique_intersect_write_to_ptrs : + (tree_walk_action) unique_write_to_ptrs; + filtered_out_elems= 0; + (void) tree_walk(&tree, action, this, left_root_right); + table->sort.found_records-= filtered_out_elems; return 0; } } @@ -614,7 +659,9 @@ bool Unique::get(TABLE *table) sort_param.max_rows= elements; sort_param.sort_form=table; sort_param.rec_length= sort_param.sort_length= sort_param.ref_length= - size; + full_size; + sort_param.min_dupl_count= min_dupl_count; + sort_param.res_length= 0; sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length); sort_param.not_killable=1; @@ -635,8 +682,9 @@ bool Unique::get(TABLE *table) if (flush_io_cache(&file) || reinit_io_cache(&file,READ_CACHE,0L,0,0)) goto err; - if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr, - file_ptr, file_ptr+maxbuffer,0)) + sort_param.res_length= sort_param.rec_length- + (min_dupl_count ? sizeof(min_dupl_count) : 0); + if (merge_index(&sort_param, sort_buffer, file_ptr, maxbuffer, &file, outfile)) goto err; error=0; err: @@ -651,3 +699,5 @@ err: outfile->end_of_file=save_pos; return error; } + + diff --git a/sql/unireg.h b/sql/unireg.h index 57a9038d5f7..ccdbb650485 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -89,7 +89,7 @@ #define MAX_SELECT_NESTING (sizeof(nesting_map)*8-1) #define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD) -#define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD) +#define MIN_SORT_MEMORY (1024-MALLOC_OVERHEAD) /* Memory allocated when parsing a statement / saving a statement */ #define MEM_ROOT_BLOCK_SIZE 8192 |