diff options
author | sergefp@pylon.mylan <> | 2006-09-29 18:35:03 +0400 |
---|---|---|
committer | sergefp@pylon.mylan <> | 2006-09-29 18:35:03 +0400 |
commit | 16506e906b1059debf184c570d3f7124357199b0 (patch) | |
tree | f7d9c5044452625090f105217b0d51c8508f9598 /sql/sql_select.cc | |
parent | 8da0a672afacbdeb1291620d908ebc39b3205e3d (diff) | |
parent | e37d6ca7cd45dfb36e7be43a60cfe5574dce06bf (diff) | |
download | mariadb-git-16506e906b1059debf184c570d3f7124357199b0.tar.gz |
Merge spetrunia@bk-internal.mysql.com:/home/bk/mysql-5.1-opt
into mysql.com:/home/psergey/mysql-5.1-bug14940-r10a
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r-- | sql/sql_select.cc | 110 |
1 files changed, 97 insertions, 13 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 5e44aaf063e..a7fee637a5f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -70,7 +70,7 @@ static int join_tab_cmp_straight(const void* ptr1, const void* ptr2); static void find_best(JOIN *join,table_map rest_tables,uint index, double record_count,double read_time); static uint cache_record_length(JOIN *join,uint index); -static double prev_record_reads(JOIN *join,table_map found_ref); +static double prev_record_reads(JOIN *join, uint idx, table_map found_ref); static bool get_best_combination(JOIN *join); static store_key *get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, @@ -3426,6 +3426,7 @@ set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) join->positions[idx].table= table; join->positions[idx].key=key; join->positions[idx].records_read=1.0; /* This is a const table */ + join->positions[idx].ref_depend_map= 0; /* Move the const table as down as possible in best_ref */ JOIN_TAB **pos=join->best_ref+idx+1; @@ -3483,6 +3484,7 @@ best_access_path(JOIN *join, double best= DBL_MAX; double best_time= DBL_MAX; double records= DBL_MAX; + table_map best_ref_depends_map; double tmp; ha_rows rec; @@ -3511,13 +3513,20 @@ best_access_path(JOIN *join, /* Calculate how many key segments of the current key we can use */ start_key= keyuse; - do - { /* for each keypart */ + + do /* For each keypart */ + { uint keypart= keyuse->keypart; table_map best_part_found_ref= 0; double best_prev_record_reads= DBL_MAX; - do + + do /* For each way to access the keypart */ { + + /* + if 1. expression doesn't refer to forward tables + 2. we won't get two ref-or-null's + */ if (!(remaining_tables & keyuse->used_tables) && !(ref_or_null_part && (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))) @@ -3525,8 +3534,9 @@ best_access_path(JOIN *join, found_part|= keyuse->keypart_map; if (!(keyuse->used_tables & ~join->const_table_map)) const_part|= keyuse->keypart_map; - double tmp= prev_record_reads(join, (found_ref | - keyuse->used_tables)); + + double tmp= prev_record_reads(join, idx, (found_ref | + keyuse->used_tables)); if (tmp < best_prev_record_reads) { best_part_found_ref= keyuse->used_tables & ~join->const_table_map; @@ -3565,7 +3575,7 @@ best_access_path(JOIN *join, Really, there should be records=0.0 (yes!) but 1.0 would be probably safer */ - tmp= prev_record_reads(join, found_ref); + tmp= prev_record_reads(join, idx, found_ref); records= 1.0; } else @@ -3580,7 +3590,7 @@ best_access_path(JOIN *join, max_key_part= (uint) ~0; if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) { - tmp = prev_record_reads(join, found_ref); + tmp = prev_record_reads(join, idx, found_ref); records=1.0; } else @@ -3833,6 +3843,7 @@ best_access_path(JOIN *join, best_records= records; best_key= start_key; best_max_key_part= max_key_part; + best_ref_depends_map= found_ref; } } records= best_records; @@ -3961,6 +3972,8 @@ best_access_path(JOIN *join, best= tmp; records= rows2double(rnd_records); best_key= 0; + /* range/index_merge/ALL/index access method are "independent", so: */ + best_ref_depends_map= 0; } } @@ -3969,6 +3982,7 @@ best_access_path(JOIN *join, join->positions[idx].read_time= best; join->positions[idx].key= best_key; join->positions[idx].table= s; + join->positions[idx].ref_depend_map= best_ref_depends_map; if (!best_key && idx == join->const_tables && @@ -4736,17 +4750,85 @@ cache_record_length(JOIN *join,uint idx) } +/* + Get the number of different row combinations for subset of partial join + + SYNOPSIS + prev_record_reads() + join The join structure + idx Number of tables in the partial join order (i.e. the + partial join order is in join->positions[0..idx-1]) + found_ref Bitmap of tables for which we need to find # of distinct + row combinations. + + DESCRIPTION + Given a partial join order (in join->positions[0..idx-1]) and a subset of + tables within that join order (specified in found_ref), find out how many + distinct row combinations of subset tables will be in the result of the + partial join order. + + This is used as follows: Suppose we have a table accessed with a ref-based + method. The ref access depends on current rows of tables in found_ref. + We want to count # of different ref accesses. We assume two ref accesses + will be different if at least one of access parameters is different. + Example: consider a query + + SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field + + and a join order: + t1, ref access on t1.key=c1 + t2, ref access on t2.key=c2 + t3, ref access on t3.key=t1.field + + For t1: n_ref_scans = 1, n_distinct_ref_scans = 1 + For t2: n_ref_scans = records_read(t1), n_distinct_ref_scans=1 + For t3: n_ref_scans = records_read(t1)*records_read(t2) + n_distinct_ref_scans = #records_read(t1) + + The reason for having this function (at least the latest version of it) + is that we need to account for buffering in join execution. + + An edge-case example: if we have a non-first table in join accessed via + ref(const) or ref(param) where there is a small number of different + values of param, then the access will likely hit the disk cache and will + not require any disk seeks. + + The proper solution would be to assume an LRU disk cache of some size, + calculate probability of cache hits, etc. For now we just count + identical ref accesses as one. + + RETURN + Expected number of row combinations +*/ + static double -prev_record_reads(JOIN *join,table_map found_ref) +prev_record_reads(JOIN *join, uint idx, table_map found_ref) { double found=1.0; - found_ref&= ~OUTER_REF_TABLE_BIT; - for (POSITION *pos=join->positions ; found_ref ; pos++) + POSITION *pos_end= join->positions - 1; + for (POSITION *pos= join->positions + idx - 1; pos != pos_end; pos--) { if (pos->table->table->map & found_ref) { - found_ref&= ~pos->table->table->map; - found*=pos->records_read; + found_ref|= pos->ref_depend_map; + /* + For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table + with no matching row we will get position[t2].records_read==0. + Actually the size of output is one null-complemented row, therefore + we will use value of 1 whenever we get records_read==0. + + Note + - the above case can't occur if inner part of outer join has more + than one table: table with no matches will not be marked as const. + + - Ideally we should add 1 to records_read for every possible null- + complemented row. We're not doing it because: 1. it will require + non-trivial code and add overhead. 2. The value of records_read + is an inprecise estimate and adding 1 (or, in the worst case, + #max_nested_outer_joins=64-1) will not make it any more precise. + */ + if (pos->records_read) + found*= pos->records_read; } } return found; @@ -10496,6 +10578,7 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos) tab->info="const row not found"; /* Mark for EXPLAIN that the row was not found */ pos->records_read=0.0; + pos->ref_depend_map= 0; if (!table->maybe_null || error > 0) DBUG_RETURN(error); } @@ -10521,6 +10604,7 @@ join_read_const_table(JOIN_TAB *tab, POSITION *pos) tab->info="unique row not found"; /* Mark for EXPLAIN that the row was not found */ pos->records_read=0.0; + pos->ref_depend_map= 0; if (!table->maybe_null || error > 0) DBUG_RETURN(error); } |