diff options
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r-- | sql/sql_select.cc | 1109 |
1 files changed, 638 insertions, 471 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 037d7ea5df8..e85faa5cfa0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -103,7 +103,7 @@ static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item); static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after); static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables); static void update_depend_map(JOIN *join); -static void update_depend_map(JOIN *join, ORDER *order); +static void update_depend_map_for_order(JOIN *join, ORDER *order); static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond, bool change_list, bool *simple_order); static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables, @@ -239,8 +239,6 @@ static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg, double *record_count_arg); static uint make_join_orderinfo(JOIN *join); -static int -join_read_record_no_init(JOIN_TAB *tab); Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, bool *inherited_fl); @@ -1035,15 +1033,26 @@ JOIN::optimize() /* Permorm the the optimization on fields evaluation mentioned above for all on expressions. - */ - for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++) + */ + { - if (*tab->on_expr_ref) + List_iterator<JOIN_TAB_RANGE> it(join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + uint first_tab_offs= const_tables; + while ((jt_range= it++)) { - *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref, - tab->cond_equal, - map2table); - (*tab->on_expr_ref)->update_used_tables(); + for (JOIN_TAB *tab= jt_range->start + first_tab_offs; + tab < jt_range->end; tab++) + { + if (*tab->on_expr_ref) + { + *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref, + tab->cond_equal, + map2table); + (*tab->on_expr_ref)->update_used_tables(); + } + } + first_tab_offs= 0; } } @@ -1318,8 +1327,11 @@ JOIN::optimize() */ if (need_tmp || select_distinct || group_list || order) { - for (uint i = const_tables; i < tables; i++) - join_tab[i].table->prepare_for_position(); + for (uint i= 0; i < tables; i++) + { + if (!(table[i]->map & const_table_map)) + table[i]->prepare_for_position(); + } } DBUG_EXECUTE("info",TEST_join(this);); @@ -2256,7 +2268,7 @@ JOIN::exec() WHERE clause for any tables after the sorted one. */ JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1]; - JOIN_TAB *end_table= &curr_join->join_tab[curr_join->tables]; + JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_jtrange_tables]; for (; curr_table < end_table ; curr_table++) { /* @@ -2624,7 +2636,7 @@ bool JOIN::setup_subquery_materialization() { Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate; if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && - in_subs->setup_engine()) + in_subs->setup_engine(FALSE)) return TRUE; } } @@ -2710,9 +2722,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, DBUG_ENTER("make_join_statistics"); table_count=join->tables; - stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count); + + stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count)); stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES); - table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2)); + table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*((table_count)*2)); if (!stat || !stat_ref || !table_vector) DBUG_RETURN(1); // Eom /* purecov: inspected */ @@ -2721,7 +2734,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, stat_end=stat+table_count; found_const_table_map= all_table_map=0; const_count=0; - + for (s= stat, i= 0; tables; s++, tables= tables->next_leaf, i++) @@ -2744,24 +2757,27 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, table->reginfo.join_tab=s; table->reginfo.not_exists_optimize=0; bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); - all_table_map|= table->map; + all_table_map|= s->table->map; s->join=join; s->info=0; // For describe + s->bush_root_tab= NULL; s->dependent= tables->dep_tables; s->key_dependent= 0; if (tables->schema_table) table->file->stats.records= 2; table->quick_condition_rows= table->file->stats.records; - + s->on_expr_ref= &tables->on_expr; if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ #ifdef WITH_PARTITION_STORAGE_ENGINE - if ((!table->file->stats.records || table->no_partitions_used) && !embedding) + if (!table->is_filled_at_execution() && + (!table->file->stats.records || table->no_partitions_used) && !embedding) #else - if (!table->file->stats.records && !embedding) + if (!table->is_filled_at_execution() && + !table->file->stats.records && !embedding) #endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. @@ -2795,7 +2811,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, #else const bool no_partitions_used= FALSE; #endif - if ((table->s->system || table->file->stats.records <= 1 || + if (!table->is_filled_at_execution() && + (table->s->system || table->file->stats.records <= 1 || no_partitions_used) && !s->dependent && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && @@ -2805,6 +2822,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, no_rows_const_tables |= table->map; } } + stat_vector[i]=0; join->outer_join=outer_join; @@ -2907,6 +2925,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { table=s->table; + if (table->is_filled_at_execution()) + continue; + /* If equi-join condition by a key is null rejecting and after a substitution of a const table the key value happens to be null @@ -3058,15 +3079,30 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, for (s=stat ; s < stat_end ; s++) { + s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { /* Only one matching row */ - s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0; + s->found_records= s->records= 1; + s->read_time=1.0; + s->worst_seeks=1.0; continue; } /* Approximate found rows and time to read them */ - s->found_records=s->records=s->table->file->stats.records; - s->read_time=(ha_rows) s->table->file->scan_time(); + + if (s->table->is_filled_at_execution()) + { + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records; + table->quick_condition_rows=s->records; + } + else + { + s->found_records= s->records= s->table->file->stats.records; + s->read_time= s->table->file->scan_time(); + } + /* Set a max range of how many seeks we can expect when using keys @@ -3089,10 +3125,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, Don't do range analysis if we're on the inner side of an outer join (2). Do range analysis if we're on the inner side of a semi-join (3). */ - if (!s->const_keys.is_clear_all() && // (1) - (!s->table->pos_in_table_list->embedding || // (2) - (s->table->pos_in_table_list->embedding && // (3) - s->table->pos_in_table_list->embedding->sj_on_expr))) // (3) + if (!s->const_keys.is_clear_all() && // (1) + (!s->table->pos_in_table_list->embedding || // (2) + (s->table->pos_in_table_list->embedding && // (3) + s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3) + !s->table->is_filled_at_execution()) { ha_rows records; SQL_SELECT *select; @@ -3130,7 +3167,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, if (records != HA_POS_ERROR) { s->found_records=records; - s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0); + s->read_time= s->quick ? s->quick->read_time : 0.0; } delete select; } @@ -3141,7 +3178,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, join->join_tab=stat; join->map2table=stat_ref; - join->table= join->all_tables=table_vector; + join->table= table_vector; join->const_tables=const_count; join->found_const_table_map=found_const_table_map; @@ -4303,7 +4340,7 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array) for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ; if (map == 1) // Only one table { - TABLE *tmp_table=join->all_tables[tablenr]; + TABLE *tmp_table=join->table[tablenr]; keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100); } } @@ -4808,8 +4845,9 @@ best_access_path(JOIN *join, else tmp= best_time; // Do nothing } - loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); + tmp += s->startup_cost; + loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE) { @@ -4851,13 +4889,19 @@ best_access_path(JOIN *join, Since we have a 'ref' access path, and FORCE INDEX instructs us to choose it over ALL/index, there is no need to consider a full table scan. + (5) Non-flattenable semi-joins: don't consider doing a scan of temporary + table if we had an option to make lookups into it. In real-world cases, + lookups are cheaper than full scans, but when the table is small, they + can be [considered to be] more expensive, which causes lookups not to + be used for cases with small datasets, which is annoying. */ if ((records >= s->found_records || best > s->read_time) && // (1) !(s->quick && best_key && s->quick->index == best_key->key && // (2) best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) - !(s->table->force_index && best_key && !s->quick)) // (4) + !(s->table->force_index && best_key && !s->quick) && // (4) + !(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join ha_rows rnd_records= s->found_records; /* @@ -4904,8 +4948,7 @@ best_access_path(JOIN *join, } else { - /* Estimate cost of reading table. */ - tmp= s->table->file->scan_time(); + tmp= s->read_time; if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache { /* @@ -4934,6 +4977,7 @@ best_access_path(JOIN *join, } } + tmp += s->startup_cost; /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus @@ -4956,7 +5000,7 @@ best_access_path(JOIN *join, join->outer_join))); } } - + /* Update the cost information for the current partial plan */ pos->records_read= records; pos->read_time= best; @@ -5901,8 +5945,8 @@ void calc_used_field_length(THD *thd, JOIN_TAB *join_tab) rec_length+=(uint) max(4,blob_length); } /* - psergey-todo: why we don't count here rowid that we might need to store - when using DuplicateElimination? + TODO: why we don't count here rowid that we might need to store when + using DuplicateElimination? */ join_tab->used_fields=fields; join_tab->used_fieldlength=rec_length; @@ -6017,6 +6061,102 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) } +JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables) +{ + JOIN_TAB *first= join->join_tab; + if (after_const_tables) + first+= join->const_tables; + if (first < join->join_tab + join->top_jtrange_tables) + return first; + return NULL; +} + + +/* + A helper function to loop over all join's join_tab in sequential fashion + + DESCRIPTION + Depending on include_bush_roots parameter, JOIN_TABS that represent + SJM-scan/lookups are produced or omitted. + + SJM-Bush children are returned right after (or in place of) their container + join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems + to.) +*/ + +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) +{ + if (include_bush_roots && tab->bush_children) + return tab->bush_children->start; + + DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab); + if (tab->last_leaf_in_bush) + tab= tab->bush_root_tab; + + if (tab->bush_root_tab) + return ++tab; + + if (++tab == join->join_tab + join->top_jtrange_tables) + return NULL; + + if (!include_bush_roots && tab->bush_children) + tab= tab->bush_children->start; + + return tab; +} + + +/* + A helper function to iterate over all join tables in bush-children-first order + + DESCRIPTION + + For example, for this join plan + + ot1 ot2 sjm ot3 + | +--------+ + | | + it1 it2 it3 + + + the function will return + + ot1-ot2-it1-it2-it3-sjm-ot3 ... + +*/ + +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) +{ + bool start= FALSE; + if (tab == NULL) + { + /* This means we're starting the enumeration */ + if (join->const_tables == join->top_jtrange_tables) + return NULL; + + tab= join->join_tab + join->const_tables; + start= TRUE; + } + + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing*/ + if (!start) + tab++; + + if (tab == join->join_tab_ranges.head()->end) + return NULL; /* End */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + +static Item * const null_ptr= NULL; + /* Set up join struct according to the picked join order in @@ -6031,7 +6171,12 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) fix_semijoin_strategies_for_picked_join_order) - create join->join_tab array and put there the JOIN_TABs in the join order - create data structures describing ref access methods. - + + NOTE + In this function we switch from pre-join-optimization JOIN_TABs to + post-join-optimization JOIN_TABs. This is achieved by copying the entire + JOIN_TAB objects. + RETURN FALSE OK TRUE Out of memory @@ -6040,7 +6185,7 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) static bool get_best_combination(JOIN *join) { - uint i,tablenr; + uint tablenr; table_map used_tables; JOIN_TAB *join_tab,*j; KEYUSE *keyuse; @@ -6058,11 +6203,74 @@ get_best_combination(JOIN *join) used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read fix_semijoin_strategies_for_picked_join_order(join); - + + JOIN_TAB_RANGE *root_range= new JOIN_TAB_RANGE; + root_range->start= join->join_tab; + /* root_range->end will be set later */ + join->join_tab_ranges.empty(); + join->join_tab_ranges.push_back(root_range); + + JOIN_TAB *sjm_nest_end= NULL; + JOIN_TAB *sjm_saved_tab; /* protected by sjm_nest_end */ + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) { TABLE *form; + POSITION *cur_pos= &join->best_positions[tablenr]; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + /* + Ok, we've entered an SJ-Materialization semi-join (note that this can't + be done recursively, semi-joins are not allowed to be nested). + */ + /* + 1. Put into main join order a JOIN_TAB that represents a lookup or scan + in the temptable. + // TODO: record this join_tab to be processed by + // setup_semijoin_elimination? + */ + bzero(j, sizeof(JOIN_TAB)); + j->join= join; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->ref.key_parts=0; + j->loosescan_match_tab= NULL; //non-nulls will be set later + j->use_join_cache= FALSE; + j->on_expr_ref= (Item**) &null_ptr; + j->cache= NULL; + j->keys= key_map(1); // The unique index is always in 'possible keys' in EXPLAIN + + + /* + 2. Proceed with processing SJM nest's join tabs, putting them into the + sub-order + */ + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j->records= j->records_read= sjm->is_sj_scan? sjm->rows : 1; + JOIN_TAB *jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB) * sjm->tables); + JOIN_TAB_RANGE *jt_range= new JOIN_TAB_RANGE; + jt_range->start= jt; + jt_range->end= jt + sjm->tables; + //sjm->jt_range= jt_range; + join->join_tab_ranges.push_back(jt_range); + j->bush_children= jt_range; + j->bush_root_tab= NULL; //note: a lot of code depends on bush nodes not containing one another + j->quick= NULL; + sjm_nest_end= jt + sjm->tables; + sjm_saved_tab= j; + root_range->end= j+1; + + j= jt; + //goto loop_end_not_table; + } + *j= *join->best_positions[tablenr].table; + + if (sjm_nest_end) + j->bush_root_tab= sjm_saved_tab; + else + root_range->end= j+1; form=join->table[tablenr]=j->table; used_tables|= form->map; form->reginfo.join_tab=j; @@ -6070,14 +6278,14 @@ get_best_combination(JOIN *join) form->reginfo.not_exists_optimize=0; // Only with LEFT JOIN DBUG_PRINT("info",("type: %d", j->type)); if (j->type == JT_CONST) - continue; // Handled in make_join_stat.. + goto loop_end; // Handled in make_join_stat.. j->loosescan_match_tab= NULL; //non-nulls will be set later j->ref.key = -1; j->ref.key_parts=0; if (j->type == JT_SYSTEM) - continue; + goto loop_end; if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key) || (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { @@ -6088,10 +6296,24 @@ get_best_combination(JOIN *join) } else if (create_ref_for_key(join, j, keyuse, used_tables)) DBUG_RETURN(TRUE); // Something went wrong + loop_end: + j->records_read= join->best_positions[tablenr].records_read; + join->map2table[j->table->tablenr]= j; + + // If we've reached the end of sjm nest, switch back to main sequence + if (j + 1 == sjm_nest_end) + { + j->last_leaf_in_bush= TRUE; + j= sjm_saved_tab; + sjm_nest_end= NULL; + } } - for (i=0 ; i < table_count ; i++) - join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i; + join->top_jtrange_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + + //for (i=0 ; i < table_count ; i++) + // join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i; update_depend_map(join); DBUG_RETURN(0); } @@ -6341,6 +6563,8 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) join_tab= parent->join_tab_reexec; table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table; + top_jtrange_tables= 1; + tables= 1; const_tables= 0; const_table_map= 0; @@ -6386,6 +6610,9 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) join_tab->loosescan_match_tab= NULL; join_tab->emb_sj_nest= NULL; join_tab->pre_idx_push_select_cond= NULL; + join_tab->bush_root_tab= NULL; + join_tab->bush_children= NULL; + join_tab->last_leaf_in_bush= FALSE; bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record)); temp_table->status=0; temp_table->null_row=0; @@ -6410,6 +6637,7 @@ inline void add_cond_and_fix(Item **e1, Item *e2) } + /** Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions we've inferred from ref/eq_ref access performed. @@ -6464,9 +6692,11 @@ inline void add_cond_and_fix(Item **e1, Item *e2) static void add_not_null_conds(JOIN *join) { DBUG_ENTER("add_not_null_conds"); - for (uint i=join->const_tables ; i < join->tables ; i++) + + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) { - JOIN_TAB *tab=join->join_tab+i; if ((tab->type == JT_REF || tab->type == JT_EQ_REF || tab->type == JT_REF_OR_NULL) && !tab->table->maybe_null) @@ -6590,58 +6820,72 @@ static void make_outerjoin_info(JOIN *join) { DBUG_ENTER("make_outerjoin_info"); - for (uint i=join->const_tables ; i < join->tables ; i++) - { - JOIN_TAB *tab=join->join_tab+i; - TABLE *table=tab->table; - TABLE_LIST *tbl= table->pos_in_table_list; - TABLE_LIST *embedding= tbl->embedding; + bool top= TRUE; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; - if (tbl->outer_join) + while ((jt_range= it++)) + { + for (JOIN_TAB *tab=jt_range->start + (top ? join->const_tables : 0); + tab != jt_range->end; tab++) { + TABLE *table=tab->table; /* - Table tab is the only one inner table for outer join. - (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a - is in the query above.) + psergey: The following is probably incorrect, fix it when we get + semi+outer joins processing to work: */ - tab->last_inner= tab->first_inner= tab; - tab->on_expr_ref= &tbl->on_expr; - tab->cond_equal= tbl->cond_equal; - if (embedding) - tab->first_upper= embedding->nested_join->first_nested; - } - for ( ; embedding ; embedding= embedding->embedding) - { - /* Ignore sj-nests: */ - if (!embedding->on_expr) + if (!table) continue; - NESTED_JOIN *nested_join= embedding->nested_join; - if (!nested_join->counter) + TABLE_LIST *tbl= table->pos_in_table_list; + TABLE_LIST *embedding= tbl->embedding; + + if (tbl->outer_join) { /* - Table tab is the first inner table for nested_join. - Save reference to it in the nested join structure. - */ - nested_join->first_nested= tab; - tab->on_expr_ref= &embedding->on_expr; + Table tab is the only one inner table for outer join. + (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a + is in the query above.) + */ + tab->last_inner= tab->first_inner= tab; + tab->on_expr_ref= &tbl->on_expr; tab->cond_equal= tbl->cond_equal; - if (embedding->embedding) - tab->first_upper= embedding->embedding->nested_join->first_nested; - } - if (!tab->first_inner) - tab->first_inner= nested_join->first_nested; - if (tab->table->reginfo.not_exists_optimize) - tab->first_inner->table->reginfo.not_exists_optimize= 1; - if (++nested_join->counter < nested_join->n_tables) - break; - /* Table tab is the last inner table for nested join. */ - nested_join->first_nested->last_inner= tab; - if (tab->first_inner->table->reginfo.not_exists_optimize) + if (embedding) + tab->first_upper= embedding->nested_join->first_nested; + } + for ( ; embedding ; embedding= embedding->embedding) { - for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++) - join_tab->table->reginfo.not_exists_optimize= 1; - } + /* Ignore sj-nests: */ + if (!embedding->on_expr) + continue; + NESTED_JOIN *nested_join= embedding->nested_join; + if (!nested_join->counter) + { + /* + Table tab is the first inner table for nested_join. + Save reference to it in the nested join structure. + */ + nested_join->first_nested= tab; + tab->on_expr_ref= &embedding->on_expr; + tab->cond_equal= tbl->cond_equal; + if (embedding->embedding) + tab->first_upper= embedding->embedding->nested_join->first_nested; + } + if (!tab->first_inner) + tab->first_inner= nested_join->first_nested; + if (tab->table->reginfo.not_exists_optimize) + tab->first_inner->table->reginfo.not_exists_optimize= 1; + if (++nested_join->counter < nested_join->n_tables) + break; + /* Table tab is the last inner table for nested join. */ + nested_join->first_nested->last_inner= tab; + if (tab->first_inner->table->reginfo.not_exists_optimize) + { + for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++) + join_tab->table->reginfo.not_exists_optimize= 1; + } + } } + top= FALSE; } DBUG_VOID_RETURN; } @@ -6684,8 +6928,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->const_table_map, (table_map) 0, TRUE); DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY);); - for (JOIN_TAB *tab= join->join_tab+join->const_tables; - tab < join->join_tab+join->tables ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, FALSE)) { if (*tab->on_expr_ref) { @@ -6724,15 +6969,21 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); JOIN_TAB *tab; table_map current_map; - for (uint i=join->const_tables ; i < join->tables ; i++) + uint i= join->const_tables; + for (tab= next_depth_first_tab(join, NULL); tab; + tab= next_depth_first_tab(join, tab), i++) { - tab= join->join_tab+i; /* first_inner is the X in queries like: SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X */ - JOIN_TAB *first_inner_tab= tab->first_inner; - current_map= tab->table->map; + JOIN_TAB *first_inner_tab= tab->first_inner; + + if (tab->table) + current_map= tab->table->map; + else + current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; + bool use_quick_range=0; COND *tmp; @@ -6779,8 +7030,20 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } tmp= NULL; + if (cond) - tmp= make_cond_for_table(cond, used_tables, current_map, FALSE); + { + if (tab->bush_children) + { + // Reached the materialization tab + tmp= make_cond_after_sjm(cond, cond, save_used_tables, used_tables); + used_tables= save_used_tables | used_tables; + save_used_tables= 0; + } + else + tmp= make_cond_for_table(cond, used_tables, current_map, FALSE); + } + if (cond && !tmp && tab->quick) { // Outer join if (tab->type != JT_ALL) @@ -6808,9 +7071,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL || tab->type == JT_EQ_REF || first_inner_tab) { - DBUG_EXECUTE("where", - print_where(tmp,tab->table->alias.c_ptr(), - QT_ORDINARY);); + DBUG_EXECUTE("where",print_where(tmp, + tab->table? tab->table->alias.c_ptr() :"sjm-nest", + QT_ORDINARY);); SQL_SELECT *sel= tab->select= ((SQL_SELECT*) thd->memdup((uchar*) select, sizeof(*select))); @@ -6834,16 +7097,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab->set_select_cond(tmp, __LINE__); /* Push condition to storage engine if this is enabled and the condition is not guarded */ - tab->table->file->pushed_cond= NULL; - if (thd->variables.engine_condition_pushdown && !first_inner_tab) + if (tab->table) { - COND *push_cond= - make_cond_for_table(tmp, current_map, current_map, FALSE); - if (push_cond) + tab->table->file->pushed_cond= NULL; + if (thd->variables.engine_condition_pushdown && !first_inner_tab) { - /* Push condition to handler */ - if (!tab->table->file->cond_push(push_cond)) - tab->table->file->pushed_cond= push_cond; + COND *push_cond= + make_cond_for_table(tmp, current_map, current_map, FALSE); + if (push_cond) + { + /* Push condition to handler */ + if (!tab->table->file->cond_push(push_cond)) + tab->table->file->pushed_cond= push_cond; + } } } } @@ -6875,7 +7141,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } tab->quick=0; } - uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1; + uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0; if (i == join->const_tables && ref_key) { if (!tab->const_keys.is_clear_all() && @@ -6894,12 +7160,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) the index if we are using limit and this is the first table */ - if ((cond && - (!tab->keys.is_subset(tab->const_keys) && i > 0)) || - (!tab->const_keys.is_clear_all() && i == join->const_tables && - join->unit->select_limit_cnt < - join->best_positions[i].records_read && - !(join->select_options & OPTION_FOUND_ROWS))) + if (!tab->table->is_filled_at_execution() && + ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || + (!tab->const_keys.is_clear_all() && i == join->const_tables && + join->unit->select_limit_cnt < + join->best_positions[i].records_read && + !(join->select_options & OPTION_FOUND_ROWS)))) { /* Join with outer join condition */ COND *orig_cond=sel->cond; @@ -6982,7 +7248,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } /* - Push down conditions from all on expressions. + Push down conditions from all ON expressions. Each of these conditions are guarded by a variable that turns if off just before null complemented row for outer joins is formed. Thus, the condition from an @@ -6991,8 +7257,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ /* First push down constant conditions from on expressions */ - for (JOIN_TAB *join_tab= join->join_tab+join->const_tables; - join_tab < join->join_tab+join->tables ; join_tab++) + for (JOIN_TAB *join_tab= first_linear_tab(join, TRUE); + join_tab; + join_tab= next_linear_tab(join, join_tab, FALSE)) { if (*join_tab->on_expr_ref) { @@ -7017,10 +7284,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } } - /* Push down non-constant conditions from on expressions */ + /* Push down non-constant conditions from ON expressions */ JOIN_TAB *last_tab= tab; while (first_inner_tab && first_inner_tab->last_inner == last_tab) - { + { /* Table tab is the last inner table of an outer join. An on expression is always attached to it. @@ -7029,8 +7296,18 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) table_map used_tables2= (join->const_table_map | OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); - for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) { + if (!tab->table) + { + /* + psergey-todo: this is probably incorrect, fix this when we get + correct processing for outer joins + semi joins + */ + continue; + } current_map= tab->table->map; used_tables2|= current_map; COND *tmp_cond= make_cond_for_table(on_expr, used_tables2, @@ -7071,37 +7348,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) cond_tab->select_cond->update_used_tables(); if (cond_tab->select) cond_tab->select->cond= cond_tab->select_cond; - } + } + if (tab == last_tab) + break; } first_inner_tab= first_inner_tab->first_upper; } - - if (save_used_tables && !(used_tables & - ~(tab->emb_sj_nest->sj_inner_tables | - join->const_table_map | PSEUDO_TABLE_BITS))) - { - /* - We have reached the end of semi join nest. That is, the join order - looks like this: - - outer_tbl1 SJ-Materialize(inner_tbl1 ... inner_tblN) outer_tbl ... - ^ - \-we're here - At this point, we need to produce two conditions - - A condition that can be checked when we have all of the sj-inner - tables (inner_tbl1 ... inner_tblN). This will be used while doing - materialization. - - A condition that can be checked when we have all of the tables - in the prefix (both inner and outer). - */ - tab->emb_sj_nest->sj_mat_info->join_cond= - cond ? - make_cond_after_sjm(cond, cond, save_used_tables, used_tables): - NULL; - used_tables= save_used_tables | used_tables; - save_used_tables= 0; - } - } } DBUG_RETURN(0); @@ -7290,7 +7542,7 @@ void revise_cache_usage(JOIN_TAB *join_tab) SYNOPSIS end_sj_materialize() join The join - join_tab Last join table + join_tab Points to right after the last join_tab in materialization bush end_of_records FALSE <=> This call is made to pass another record combination TRUE <=> EOF (no action) @@ -7308,7 +7560,7 @@ void revise_cache_usage(JOIN_TAB *join_tab) NESTED_LOOP_ERROR */ -static enum_nested_loop_state +enum_nested_loop_state end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) { int error; @@ -7448,15 +7700,17 @@ uint check_join_cache_usage(JOIN_TAB *tab, uint i= tab - join->join_tab; *icp_other_tables_ok= TRUE; - if (cache_level == 0 || i == join->const_tables) + /* + Don't use join cache if @@join_cache_level==0 or this table is the first + one join suborder (either at top level or inside a bush) + */ + if (cache_level == 0 || tab == join->join_tab + join->const_tables || + (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)) return 0; if (options & SELECT_NO_JOIN_CACHE) goto no_join_cache; - /* - psergey-todo: why the below when execution code seems to handle the - "range checked for each record" case? - */ + if (tab->use_quick == 2) goto no_join_cache; /* @@ -7474,8 +7728,8 @@ uint check_join_cache_usage(JOIN_TAB *tab, Don't use join buffering if we're dictated not to by no_jbuf_after (this ...) */ - if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || - sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) + if ((!tab->bush_root_tab? !(i <= no_jbuf_after) : FALSE) || + tab->loosescan_match_tab || tab->bush_children) goto no_join_cache; for (JOIN_TAB *first_inner= tab->first_inner; first_inner; @@ -7582,8 +7836,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) uint i; bool statistics= test(!(join->select_options & SELECT_DESCRIBE)); bool sorted= 1; - uint first_sjm_table= MAX_TABLES; - uint last_sjm_table= MAX_TABLES; + //uint first_sjm_table= MAX_TABLES; + //uint last_sjm_table= MAX_TABLES; DBUG_ENTER("make_join_readinfo"); @@ -7591,17 +7845,34 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) setup_semijoin_dups_elimination(join, options, no_jbuf_after)) DBUG_RETURN(TRUE); /* purecov: inspected */ - for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++) + i= 0; + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE), i++) { - JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; bool icp_other_tables_ok; + tab->sorted= sorted; + sorted= 0; // only first must be sorted + + if (tab->bush_children) + { + if (setup_sj_materialization(tab)) + return TRUE; + table= tab->table; + } + tab->read_record.table= table; tab->read_record.file=table->file; tab->read_record.unlock_row= rr_unlock_row; - tab->next_select=sub_select; /* normal select */ - tab->sorted= sorted; - sorted= 0; // only first must be sorted + + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + } + if (tab->loosescan_match_tab) { if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab-> @@ -7609,19 +7880,6 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) return TRUE; /* purecov: inspected */ tab->sorted= TRUE; } - if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) - { - /* This is a start of semi-join nest */ - first_sjm_table= i; - last_sjm_table= i + join->best_positions[i].n_sj_tables; - if (i == join->const_tables) - join->first_select= sub_select_sjm; - else - tab[-1].next_select= sub_select_sjm; - - if (setup_sj_materialization(tab)) - return TRUE; - } table->status=STATUS_NO_RECORD; pick_table_access_method (tab); @@ -7709,7 +7967,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) } else { - tab->read_first_record= join_init_read_record; + if (!tab->bush_children) + tab->read_first_record= join_init_read_record; if (i == join->const_tables) { if (tab->select && tab->select->quick) @@ -7784,13 +8043,16 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) abort(); /* purecov: deadcode */ } } - join->join_tab[join->tables-1].next_select=0; /* Set by do_select */ + uint n_top_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ -/* + /* If a join buffer is used to join a table the ordering by an index for the first non-constant table cannot be employed anymore. */ - for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++) + for (i=join->const_tables ; i < n_top_tables ; i++) { JOIN_TAB *tab=join->join_tab+i; if (tab->use_join_cache) @@ -7849,6 +8111,9 @@ bool error_if_full_join(JOIN *join) /** cleanup JOIN_TAB. + + DESCRIPTION + This is invoked when we've finished all join executions. */ void JOIN_TAB::cleanup() @@ -7867,6 +8132,12 @@ void JOIN_TAB::cleanup() { table->disable_keyread(); table->file->ha_index_or_rnd_end(); + + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + table->pos_in_table_list->jtbm_subselect->cleanup(); + } /* We need to reset this for next select (Tested in part_of_refkey) @@ -8061,7 +8332,7 @@ void JOIN::cleanup(bool full) if (table) { - JOIN_TAB *tab,*end; + JOIN_TAB *tab; /* Only a sorted table may be cached. This sorted table is always the first non const table in join->table @@ -8074,13 +8345,15 @@ void JOIN::cleanup(bool full) if (full) { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE)) tab->cleanup(); + //psergey4: how is the above supposed to work when + //top_jtrange_tables==FALSE? It will crash right away! table= 0; } else { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE)) { if (tab->table) tab->table->file->ha_index_or_rnd_end(); @@ -8218,24 +8491,29 @@ only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables) static void update_depend_map(JOIN *join) { - JOIN_TAB *join_tab=join->join_tab, *end=join_tab+join->tables; - - for (; join_tab != end ; join_tab++) - { - TABLE_REF *ref= &join_tab->ref; - table_map depend_map=0; - Item **item=ref->items; - uint i; - for (i=0 ; i < ref->key_parts ; i++,item++) - depend_map|=(*item)->used_tables(); - ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT; - depend_map&= ~OUTER_REF_TABLE_BIT; - for (JOIN_TAB **tab=join->map2table; - depend_map ; - tab++,depend_map>>=1 ) - { - if (depend_map & 1) - ref->depend_map|=(*tab)->ref.depend_map; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + + while ((jt_range= it++)) + { + for (JOIN_TAB *join_tab=jt_range->start; join_tab != jt_range->end; + join_tab++) + { + TABLE_REF *ref= &join_tab->ref; + table_map depend_map=0; + Item **item=ref->items; + uint i; + for (i=0 ; i < ref->key_parts ; i++,item++) + depend_map|=(*item)->used_tables(); + ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT; + depend_map&= ~OUTER_REF_TABLE_BIT; + for (JOIN_TAB **tab=join->map2table; + depend_map ; + tab++,depend_map>>=1 ) + { + if (depend_map & 1) + ref->depend_map|=(*tab)->ref.depend_map; + } } } } @@ -8243,7 +8521,7 @@ static void update_depend_map(JOIN *join) /** Update the dependency map for the sort order. */ -static void update_depend_map(JOIN *join, ORDER *order) +static void update_depend_map_for_order(JOIN *join, ORDER *order) { for (; order ; order=order->next) { @@ -8294,17 +8572,25 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, return change_list ? 0 : first_order; // No need to sort ORDER *order,**prev_ptr; - table_map first_table= join->join_tab[join->const_tables].table->map; + table_map first_table; table_map not_const_tables= ~join->const_table_map; table_map ref; + bool first_is_base_table= FALSE; DBUG_ENTER("remove_const"); + if (join->join_tab[join->const_tables].table) + { + first_table= join->join_tab[join->const_tables].table->map; + first_is_base_table= TRUE; + } + + prev_ptr= &first_order; *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1; /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */ - update_depend_map(join, first_order); + update_depend_map_for_order(join, first_order); for (order=first_order; order ; order=order->next) { table_map order_tables=order->item[0]->used_tables(); @@ -8342,7 +8628,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); continue; } - if ((ref=order_tables & (not_const_tables ^ first_table))) + if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table))) { if (!(order_tables & first_table) && only_eq_ref_tables(join,first_order, ref)) @@ -8418,8 +8704,11 @@ static void clear_tables(JOIN *join) must clear only the non-const tables, as const tables are not re-calculated. */ - for (uint i=join->const_tables ; i < join->tables ; i++) - mark_as_null_row(join->table[i]); // All fields are NULL + for (uint i= 0 ; i < join->tables ; i++) + { + if (!(join->table[i]->map & join->const_table_map)) + mark_as_null_row(join->table[i]); // All fields are NULL + } } /***************************************************************************** @@ -9240,7 +9529,25 @@ static int compare_fields_by_table_order(Item_field *field1, if (outer_ref) return cmp; JOIN_TAB **idx= (JOIN_TAB **) table_join_idx; - cmp= idx[field2->field->table->tablenr]-idx[field1->field->table->tablenr]; + + JOIN_TAB *tab1= idx[field1->field->table->tablenr]; + JOIN_TAB *tab2= idx[field2->field->table->tablenr]; + + /* + if one of the table is inside a merged SJM nest and another one isn't, + compare SJM bush roots of the tables. + */ + if (tab1->bush_root_tab != tab2->bush_root_tab) + { + if (tab1->bush_root_tab) + tab1= tab1->bush_root_tab; + + if (tab2->bush_root_tab) + tab2= tab2->bush_root_tab; + } + + cmp= tab2 - tab1; + return cmp < 0 ? -1 : (cmp ? 1 : 0); } @@ -9328,7 +9635,8 @@ Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, /* Pick the "head" item: the constant one or the first in the join order - that's not inside some SJM nest. + that's not inside some SJM nest. psergey2: out-of-date comment. It is ok + inside SJM, too. */ if (item_const) head= item_const; @@ -10415,6 +10723,9 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, for (i= first_tab; i <= last_tab; i++) reopt_remaining_tables |= join->positions[i].table->table->map; + table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; + for (i= first_tab; i <= last_tab; i++) { JOIN_TAB *rs= join->positions[i].table; @@ -10440,6 +10751,7 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, if (!rs->emb_sj_nest) *outer_rec_count *= pos.records_read; } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; *reopt_cost= cost; } @@ -11445,7 +11757,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, for distinct, as we want the distinct index to be usable in this case too. */ - item->marker == 4 || param->bit_fields_as_long, // psergey-feb17 + item->marker == 4 || param->bit_fields_as_long, force_copy_fields, param->convert_blob_length); @@ -11719,6 +12031,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, share->keys=1; share->uniques= test(using_unique_constraint); table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -11734,6 +12048,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; + if (cur_group == group) + field->key_start.set_bit(0); key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); @@ -11791,7 +12107,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, indexes on blobs with arbitrary length. Such indexes cannot be used for lookups. */ - //// psergey-merge: using_unique_constraint=1; share->uniques= 1; } null_pack_length-=hidden_null_pack_length; @@ -11804,6 +12119,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; @@ -11842,6 +12159,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, { key_part_info->null_bit=0; key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + key_part_info->null_bit= (*reg_field)->null_bit; + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); + key_part_info->offset= (*reg_field)->offset(table->record[0]); key_part_info->length= (uint16) (*reg_field)->pack_length(); /* TODO: @@ -12655,8 +12979,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) Next_select_func end_select= setup_end_select_func(join); if (join->tables) { - join->join_tab[join->tables-1].next_select= end_select; - + join->join_tab[join->top_jtrange_tables - 1].next_select= end_select; join_tab=join->join_tab+join->const_tables; } join->send_records=0; @@ -12694,9 +13017,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) else { DBUG_ASSERT(join->tables); - error= join->first_select(join,join_tab,0); + error= sub_select(join,join_tab,0); if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) - error= join->first_select(join,join_tab,1); + error= sub_select(join,join_tab,1); if (error == NESTED_LOOP_QUERY_LIMIT) error= NESTED_LOOP_OK; /* select_limit used */ } @@ -12763,134 +13086,7 @@ int rr_sequential_and_unpack(READ_RECORD *info) /* - Semi-join materialization join function - - SYNOPSIS - sub_select_sjm() - join The join - join_tab The first table in the materialization nest - end_of_records FALSE <=> This call is made to pass another record - combination - TRUE <=> EOF - - DESCRIPTION - This is a join execution function that does materialization of a join - suborder before joining it to the rest of the join. - - The table pointed by join_tab is the first of the materialized tables. - This function first creates the materialized table and then switches to - joining the materialized table with the rest of the join. - - The materialized table can be accessed in two ways: - - index lookups - - full table scan - - RETURN - One of enum_nested_loop_state values -*/ - -enum_nested_loop_state -sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) -{ - int res; - enum_nested_loop_state rc; - - DBUG_ENTER("sub_select_sjm"); - - if (!join_tab->emb_sj_nest) - { - /* - We're handling GROUP BY/ORDER BY, this is the first table, and we've - actually executed the join already and now we're just reading the - result of the join from the temporary table. - Bypass to regular join handling. - Yes, it would be nicer if sub_select_sjm wasn't called at all in this - case but there's no easy way to arrange this. - */ - rc= sub_select(join, join_tab, end_of_records); - DBUG_RETURN(rc); - } - - SJ_MATERIALIZATION_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info; - if (end_of_records) - { - rc= (*join_tab[sjm->tables - 1].next_select)(join, - join_tab + sjm->tables, - end_of_records); - DBUG_RETURN(rc); - } - if (!sjm->materialized) - { - /* - Do the materialization. First, put end_sj_materialize after the last - inner table so we can catch record combinations of sj-inner tables. - */ - Next_select_func next_func= join_tab[sjm->tables - 1].next_select; - join_tab[sjm->tables - 1].next_select= end_sj_materialize; - - /* - Now run the join for the inner tables. The first call is to run the - join, the second one is to signal EOF (this is essential for some - join strategies, e.g. it will make join buffering flush the records) - */ - if ((rc= sub_select(join, join_tab, FALSE)) < 0 || - (rc= sub_select(join, join_tab, TRUE/*EOF*/)) < 0) - { - join_tab[sjm->tables - 1].next_select= next_func; - DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ - } - join_tab[sjm->tables - 1].next_select= next_func; - - /* - Ok, materialization finished. Initialize the access to the temptable - */ - sjm->materialized= TRUE; - join_tab->read_record.read_record= join_no_more_records; - if (sjm->is_sj_scan) - { - /* Initialize full scan */ - JOIN_TAB *last_tab= join_tab + (sjm->tables - 1); - init_read_record(&last_tab->read_record, join->thd, - sjm->table, NULL, TRUE, TRUE, FALSE); - - DBUG_ASSERT(last_tab->read_record.read_record == rr_sequential); - last_tab->read_first_record= join_read_record_no_init; - last_tab->read_record.copy_field= sjm->copy_field; - last_tab->read_record.copy_field_end= sjm->copy_field + - sjm->sjm_table_cols.elements; - last_tab->read_record.read_record= rr_sequential_and_unpack; - } - } - - if (sjm->is_sj_scan) - { - /* Do full scan of the materialized table */ - JOIN_TAB *last_tab= join_tab + (sjm->tables - 1); - - Item *save_cond= last_tab->select_cond; - last_tab->set_select_cond(sjm->join_cond, __LINE__); - - rc= sub_select(join, last_tab, end_of_records); - last_tab->set_select_cond(save_cond, __LINE__); - DBUG_RETURN(rc); - } - else - { - /* Do index lookup in the materialized table */ - if ((res= join_read_key2(join_tab->join->thd, join_tab, - sjm->table, sjm->tab_ref)) == 1) - DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ - if (res || !sjm->in_equality->val_int()) - DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); - } - rc= (*join_tab[sjm->tables - 1].next_select)(join, - join_tab + sjm->tables, - end_of_records); - DBUG_RETURN(rc); -} - -/* Fill the join buffer with partial records, retrieve all full matches for them SYNOPSIS @@ -13140,6 +13336,9 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) } join->thd->row_count= 0; + if ((rc= join_tab_execution_startup(join_tab)) < 0) + DBUG_RETURN(rc); + if (join_tab->loosescan_match_tab) join_tab->loosescan_match_tab->found_match= FALSE; @@ -13953,13 +14152,24 @@ int join_init_read_record(JOIN_TAB *tab) return (*tab->read_record.read_record)(&tab->read_record); } -static int +int join_read_record_no_init(JOIN_TAB *tab) { + Copy_field *save_copy, *save_copy_end; + + save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select,1,1, FALSE); + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + tab->read_record.read_record= rr_sequential_and_unpack; + return (*tab->read_record.read_record)(&tab->read_record); } - static int join_read_first(JOIN_TAB *tab) { @@ -14604,8 +14814,25 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), *****************************************************************************/ /** - @return - 1 if right_item is used removable reference key on left_item + Check if a given equality is guaranteed to be true by use of ref access + + SYNOPSIS + test_if_ref() + root_cond + left_item + right_item + + DESCRIPTION + Check if the given "left_item = right_item" equality is guaranteed to be + true by use of [eq_]ref access method. + + We need root_cond as we can't remove ON expressions even if employed ref + access guarantees that they are true. This is because TODO + + RETURN + TRUE if right_item is used removable reference key on left_item + FALSE Otherwise + */ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) @@ -14663,7 +14890,8 @@ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) SYNOPSIS make_cond_for_table() cond Condition to analyze - tables Tables for which "current field values" are available + tables Tables for which "current field values" are available (this + includes used_table) used_table Table that we're extracting the condition for (may also include PSEUDO_TABLE_BITS exclude_expensive_cond Do not push expensive conditions @@ -14697,7 +14925,8 @@ make_cond_for_table(Item *cond, table_map tables, table_map used_table, return make_cond_for_table_from_pred(cond, cond, tables, used_table, exclude_expensive_cond); } - + + static Item * make_cond_for_table_from_pred(Item *root_cond, Item *cond, table_map tables, table_map used_table, @@ -14718,6 +14947,7 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, */ !((used_table & 1) && cond->is_expensive())) return (COND*) 0; // Already checked + if (cond->type() == Item::COND_ITEM) { if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -14792,6 +15022,7 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, */ (!used_table && exclude_expensive_cond && cond->is_expensive())) return (COND*) 0; // Can't check this yet + if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK) return cond; // Not boolean op @@ -14818,14 +15049,29 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, } +/* + The difference of this from make_cond_for_table() is that we're in the + following state: + 1. conditions referring to 'tables' have been checked + 2. conditions referring to sjm_tables have been checked, too + 3. We need condition that couldn't be checked in #1 or #2 but + can be checked when we get both (tables | sjm_tables). + +*/ static COND * make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables) { + /* + We assume that conditions that refer to only join prefix tables or + sjm_tables have already been checked. + */ if ((!(cond->used_tables() & ~tables) || !(cond->used_tables() & ~sjm_tables))) return (COND*) 0; // Already checked + + /* AND/OR recursive descent */ if (cond->type() == Item::COND_ITEM) { if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -15392,8 +15638,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, &usable_keys)) < MAX_KEY) { /* Found key that can be used to retrieve data in sorted order */ - //psergey-mrr:if (tab->pre_idx_push_select_cond) - // tab->select_cond= tab->select->cond= tab->pre_idx_push_select_cond; if (tab->ref.key >= 0) { /* @@ -18368,16 +18612,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, { table_map used_tables=0; - uchar sjm_nests[MAX_TABLES]; - uint sjm_nests_cur=0; - uint sjm_nests_end= 0; - uint end_table= join->tables; bool printing_materialize_nest= FALSE; uint select_id= join->select_lex->select_number; - for (uint i=0 ; i < end_table ; i++) + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + while ((jt_range= it++)) + { + if (jt_range != join->join_tab_ranges.head()) + { + select_id= jt_range->start->emb_sj_nest->sj_subq_pred->get_identifier(); + printing_materialize_nest= TRUE; + } + + for (JOIN_TAB *tab= jt_range->start + 0; tab < jt_range->end; tab++) { - JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; TABLE_LIST *table_list= tab->table->pos_in_table_list; char buff[512]; @@ -18410,84 +18659,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, join->select_lex->type; item_list.push_back(new Item_string(stype, strlen(stype), cs)); - /* - Special processing for SJ-Materialization nests: print the fake table - and delay printing of the SJM nest contents until later. - */ - uint sj_strategy= join->best_positions[i].sj_strategy; - if (sj_is_materialize_strategy(sj_strategy) && - !printing_materialize_nest) - { - /* table */ - int len= my_snprintf(table_name_buffer, - sizeof(table_name_buffer)-1, - "subselect%d", - tab->emb_sj_nest->sj_subq_pred->get_identifier()); - item_list.push_back(new Item_string(table_name_buffer, len, cs)); - /* partitions */ - if (join->thd->lex->describe & DESCRIBE_PARTITIONS) - item_list.push_back(item_null); - /* type */ - uint type= (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? JT_ALL : JT_EQ_REF; - item_list.push_back(new Item_string(join_type_str[type], - strlen(join_type_str[type]), - cs)); - /* possible_keys */ - item_list.push_back(new Item_string("unique_key", - strlen("unique_key"), cs)); - if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - { - item_list.push_back(item_null); /* key */ - item_list.push_back(item_null); /* key_len */ - item_list.push_back(item_null); /* ref */ - } - else - { - /* key */ - item_list.push_back(new Item_string("unique_key", strlen("unique_key"), cs)); - /* key_len */ - uint klen= tab->emb_sj_nest->sj_mat_info->table->key_info[0].key_length; - uint buflen= longlong10_to_str(klen, keylen_str_buf, 10) - keylen_str_buf; - item_list.push_back(new Item_string(keylen_str_buf, buflen, cs)); - /* ref */ - item_list.push_back(new Item_string("func", strlen("func"), cs)); - } - /* rows */ - ha_rows rows= (ha_rows) ((sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? - tab->emb_sj_nest->sj_mat_info->rows : 1.0); - item_list.push_back(new Item_int((longlong)rows, - MY_INT64_NUM_DECIMAL_DIGITS)); - /* filtered */ - if (join->thd->lex->describe & DESCRIBE_EXTENDED) - item_list.push_back(new Item_float(1.0, 2)); - - /* Extra */ - if (need_tmp_table) - { - need_tmp_table=0; - extra.append(STRING_WITH_LEN("; Using temporary")); - } - if (need_order) - { - need_order=0; - extra.append(STRING_WITH_LEN("; Using filesort")); - } - /* Skip initial "; "*/ - const char *str= extra.ptr(); - uint32 extra_len= extra.length(); - if (extra_len) - { - str += 2; - extra_len -= 2; - } - item_list.push_back(new Item_string(str, extra_len, cs)); - - /* Register the nest for further processing: */ - sjm_nests[sjm_nests_end++]= i; - i += join->best_positions[i].n_sj_tables-1; - goto loop_end; - } - if (tab->type == JT_ALL && tab->select && tab->select->quick) { quick_type= tab->select->quick->get_type(); @@ -18508,6 +18679,16 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, table->derived_select_number); item_list.push_back(new Item_string(table_name_buffer, len, cs)); } + else if (tab->bush_children) + { + JOIN_TAB *ctab= tab->bush_children->start; + /* table */ + int len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "<subquery%d>", + ctab->emb_sj_nest->sj_subq_pred->get_identifier()); + item_list.push_back(new Item_string(table_name_buffer, len, cs)); + } else { TABLE_LIST *real_table= table->pos_in_table_list; @@ -18602,7 +18783,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else { - if (table_list->schema_table && + if (table_list && /* SJM bushes don't have table_list */ + table_list->schema_table && table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) { const char *tmp_buff; @@ -18633,7 +18815,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } /* Add "rows" field to item_list. */ - if (table_list->schema_table) + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table) { /* in_rows */ if (join->thd->lex->describe & DESCRIBE_EXTENDED) @@ -18652,12 +18835,13 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, examined_rows= tab->limit; else { - tab->table->file->info(HA_STATUS_VARIABLE); - examined_rows= tab->table->file->stats.records; + //tab->table->file->info(HA_STATUS_VARIABLE); + //examined_rows= tab->table->file->stats.records; + examined_rows= (ha_rows)tab->records_read; } } else - examined_rows=(ha_rows)join->best_positions[i].records_read; + examined_rows=(ha_rows)tab->records_read; item_list.push_back(new Item_int((longlong) (ulonglong) examined_rows, MY_INT64_NUM_DECIMAL_DIGITS)); @@ -18678,7 +18862,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, */ float f= 0.0; if (examined_rows) - f= (float) (100.0 * join->best_positions[i].records_read / + f= (float) (100.0 * tab->records_read/*join->best_positions[i].records_read*/ / examined_rows); item_list.push_back(new Item_float(f, 2)); } @@ -18763,7 +18947,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, extra.append(STRING_WITH_LEN("; Using where")); } } - if (table_list->schema_table && + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table && table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) { if (!table_list->table_open_method) @@ -18842,25 +19027,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } } - /* - if (sj_is_materialize_strategy(sj_strategy)) - { - if (join->best_positions[i].n_sj_tables == 1) - extra.append(STRING_WITH_LEN("; Materialize")); - else - { - last_sjm_table= i + join->best_positions[i].n_sj_tables - 1; - extra.append(STRING_WITH_LEN("; Start materialize")); - } - if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - extra.append(STRING_WITH_LEN("; Scan")); - } - else if (last_sjm_table == i) - { - extra.append(STRING_WITH_LEN("; End materialize")); - } - */ - for (uint part= 0; part < tab->ref.key_parts; part++) { if (tab->ref.cond_guards[part]) @@ -18870,7 +19036,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } } - if (i > 0 && tab[-1].next_select == sub_select_cache) + if ((tab != jt_range->start) && tab[-1].next_select == sub_select_cache) extra.append(STRING_WITH_LEN("; Using join buffer")); /* Skip initial "; "*/ @@ -18883,20 +19049,13 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } item_list.push_back(new Item_string(str, len, cs)); } - loop_end: - if (i+1 == end_table && sjm_nests_cur != sjm_nests_end) - { - printing_materialize_nest= TRUE; - i= sjm_nests[sjm_nests_cur++] - 1; - end_table= (i+1) + join->best_positions[i+1].n_sj_tables; - select_id= join->join_tab[i+1].emb_sj_nest->sj_subq_pred->get_identifier(); - } // For next iteration used_tables|=table->map; if (result->send_data(item_list)) join->error= 1; } + } } for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit(); unit; @@ -19144,6 +19303,14 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); str->append(')'); } + else if (jtbm_subselect) + { + str->append(STRING_WITH_LEN(" <materialize> (")); + subselect_hash_sj_engine *hash_engine; + hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine; + hash_engine->materialize_engine->print(str, query_type); + str->append(')'); + } else { const char *cmp_name; // Name to compare with alias |