summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSergey Petrunya <psergey@askmonty.org>2010-05-23 23:13:18 +0400
committerSergey Petrunya <psergey@askmonty.org>2010-05-23 23:13:18 +0400
commit3f595889d35c81540eb14ef3c53105cb6c4db833 (patch)
treeb3c1f3047009d3d94a41c169093b843883ee4470 /sql
parented8aa9868e074e6489f3b71f585606b6eecc97c6 (diff)
downloadmariadb-git-3f595889d35c81540eb14ef3c53105cb6c4db833.tar.gz
Subqueries: Inside-out execution for non-semijoin materialized subqueries that are AND-parts of the WHERE
- Code cleanup - Query plan change is due to s/ha_rows JOIN_TAB::read_time/double JOIN_TAB::read_time/
Diffstat (limited to 'sql')
-rw-r--r--sql/item_subselect.cc4
-rw-r--r--sql/item_subselect.h4
-rw-r--r--sql/opt_range.cc3
-rw-r--r--sql/opt_subselect.cc71
-rw-r--r--sql/opt_subselect.h6
-rw-r--r--sql/sql_select.cc70
-rw-r--r--sql/sql_select.h5
-rw-r--r--sql/table.cc6
-rw-r--r--sql/table.h7
9 files changed, 115 insertions, 61 deletions
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 1e782120867..508c2af057d 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -190,10 +190,6 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref)
changed= 1;
inside_first_fix_fields= FALSE;
-
- // all transformation is done (used by prepared statements)
- changed= 1;
-
/*
Substitute the current item with an Item_in_optimizer that was
created by Item_in_subselect::select_in_like_transformer and
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 3c3d6cdbf00..3e8ec9b174d 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -393,12 +393,12 @@ public:
join->sj_subselects.
jtbm-todo: option 1: let sj_subselects list pairs.
*/
- bool convert_to_semi_join;
+ bool is_flattenable_semijoin;
/*
Cost to populate the temporary table (set on if-needed basis).
*/
- double startup_cost;
+ //double startup_cost;
bool *get_cond_guard(int i)
{
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index c6aea650f5d..7c1bbc82ea4 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2285,7 +2285,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
quick=0;
needed_reg.clear_all();
quick_keys.clear_all();
- if (keys_to_use.is_clear_all() || head->pos_in_table_list->jtbm_subselect)
+ DBUG_ASSERT(!head->is_filled_at_execution());
+ if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
DBUG_RETURN(0);
records= head->file->stats.records;
if (!records)
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index d80fa48a169..51bd17e015c 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -172,7 +172,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
(void)subquery_types_allow_materialization(in_subs);
in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
- in_subs->convert_to_semi_join= TRUE; //JTBM
+ in_subs->is_flattenable_semijoin= TRUE;
/* Register the subquery for further processing in flatten_subqueries() */
select_lex->
@@ -240,7 +240,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1)
{
in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
- in_subs->convert_to_semi_join= FALSE;
+ in_subs->is_flattenable_semijoin= FALSE;
select_lex->outer_select()->
join->sj_subselects.append(thd->mem_root, in_subs);
}
@@ -358,7 +358,19 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
}
-static bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item)
+/*
+ Finalize IN->EXISTS conversion in case we couldn't use materialization.
+
+ DESCRIPTION Invoke the IN->EXISTS converter
+ Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE.
+
+ RETURN
+ FALSE - Ok
+ TRUE - Fatal error
+*/
+
+static
+bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item)
{
DBUG_ENTER("make_in_exists_conversion");
JOIN *child_join= item->unit->first_select()->join;
@@ -381,20 +393,15 @@ static bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *i
Item *substitute= item->substitution;
bool do_fix_fields= !item->substitution->fixed;
- Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)?
- &join->conds : &(item->emb_on_expr_nest->on_expr);
-
- Item *replace_me= item;
/*
- JTBM: the subquery was already mapped with Item_in_optimizer, so we
- should search for that, not for original Item_in_subselect.
- TODO: what about delaying that rewrite until here?
+ The Item_subselect has already been wrapped with Item_in_optimizer, so we
+ should search for item->optimizer, not 'item'.
*/
- if (!item->convert_to_semi_join)
- { //psergey-jtbm-fix: this branch is always taken??
- replace_me= item->optimizer;
- }
+ Item *replace_me= item->optimizer;
+ DBUG_ASSERT(replace_me==substitute);
+ Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)?
+ &join->conds : &(item->emb_on_expr_nest->on_expr);
if (replace_where_subcondition(join, tree, replace_me, substitute,
do_fix_fields))
DBUG_RETURN(TRUE);
@@ -528,7 +535,7 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
in_subq++)
{
bool remove_item= TRUE;
- if ((*in_subq)->convert_to_semi_join)
+ if ((*in_subq)->is_flattenable_semijoin)
{
if (convert_subq_to_sj(join, *in_subq))
DBUG_RETURN(TRUE);
@@ -548,7 +555,7 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
should search for that, not for original Item_in_subselect.
TODO: what about delaying that rewrite until here?
*/
- if (!(*in_subq)->convert_to_semi_join)
+ if (!(*in_subq)->is_flattenable_semijoin)
{
replace_me= (*in_subq)->optimizer;
}
@@ -593,7 +600,7 @@ skip_conversion:
should search for that, not for original Item_in_subselect.
TODO: what about delaying that rewrite until here?
*/
- if (!(*in_subq)->convert_to_semi_join)
+ if (!(*in_subq)->is_flattenable_semijoin)
{
replace_me= (*in_subq)->optimizer;
}
@@ -622,9 +629,33 @@ skip_conversion:
}
-void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
- ha_rows *scan_time)
+/*
+ Get #output_rows and scan_time estimates for a "delayed" table.
+
+ SYNOPSIS
+ get_delayed_table_estimates()
+ table IN Table to get estimates for
+ out_rows OUT E(#rows in the table)
+ scan_time OUT E(scan_time).
+ startup_cost OUT cost to populate the table.
+
+ DESCRIPTION
+ Get #output_rows and scan_time estimates for a "delayed" table. By
+ "delayed" here we mean that the table is filled at the start of query
+ execution. This means that the optimizer can't use table statistics to
+ get #rows estimate for it, it has to call this function instead.
+
+ This function is expected to make different actions depending on the nature
+ of the table. At the moment there is only one kind of delayed tables,
+ non-flattenable semi-joins.
+*/
+
+void get_delayed_table_estimates(TABLE *table,
+ ha_rows *out_rows,
+ double *scan_time,
+ double *startup_cost)
{
+ Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
item->optimize();
DBUG_ASSERT(item->engine->engine_type() ==
@@ -644,7 +675,7 @@ void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
read_time += join->best_positions[i].read_time;
}
*out_rows= rows;
- item->startup_cost= read_time;
+ *startup_cost= read_time;
/* Calculate cost of scanning the temptable */
double data_size= rows * hash_sj_engine->tmp_table->s->reclength;
/* Do like in handler::read_time */
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index 75e4a0df202..2f6c9d87367 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -367,8 +367,10 @@ int clear_sj_tmp_tables(JOIN *join);
int rewrite_to_index_subquery_engine(JOIN *join);
-void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
- ha_rows *scan_time);
+void get_delayed_table_estimates(TABLE *table,
+ ha_rows *out_rows,
+ double *scan_time,
+ double *startup_cost);
bool do_jtbm_materialization_if_needed(JOIN_TAB *tab);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 597c9ed5372..a81b08511b8 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -2581,10 +2581,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
{
/* s is the only inner table of an outer join */
#ifdef WITH_PARTITION_STORAGE_ENGINE
- if (!table->pos_in_table_list->jtbm_subselect &&
+ if (!table->is_filled_at_execution() &&
(!table->file->stats.records || table->no_partitions_used) && !embedding)
#else
- if (!table->pos_in_table_list->jtbm_subselect &&
+ if (!table->is_filled_at_execution() &&
!table->file->stats.records && !embedding)
#endif
{ // Empty table
@@ -2619,7 +2619,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
#else
const bool no_partitions_used= FALSE;
#endif
- if (!table->pos_in_table_list->jtbm_subselect &&
+ if (!table->is_filled_at_execution() &&
(table->s->system || table->file->stats.records <= 1 ||
no_partitions_used) &&
!s->dependent &&
@@ -2719,7 +2719,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
{
table=s->table;
- if (table->pos_in_table_list->jtbm_subselect)
+ if (table->is_filled_at_execution())
continue;
/*
@@ -2874,6 +2874,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
for (s=stat ; s < stat_end ; s++)
{
+ s->startup_cost= 0;
if (s->type == JT_SYSTEM || s->type == JT_CONST)
{
/* Only one matching row */
@@ -2882,18 +2883,17 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
}
/* Approximate found rows and time to read them */
- if (s->table->pos_in_table_list->jtbm_subselect)
+ if (s->table->is_filled_at_execution())
{
- get_temptable_params(s->table->pos_in_table_list->jtbm_subselect,
- &s->records,
- &s->read_time);
- s->found_records= s->records;
- table->quick_condition_rows=s->records;
+ get_delayed_table_estimates(s->table, &s->records, &s->read_time,
+ &s->startup_cost);
+ s->found_records= s->records;
+ table->quick_condition_rows=s->records;
}
else
{
- s->found_records=s->records=s->table->file->stats.records;
- s->read_time=(ha_rows) s->table->file->scan_time();
+ s->found_records= s->records= s->table->file->stats.records;
+ s->read_time= s->table->file->scan_time();
}
@@ -2922,7 +2922,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
(!s->table->pos_in_table_list->embedding || // (2)
(s->table->pos_in_table_list->embedding && // (3)
s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3)
- !s->table->pos_in_table_list->jtbm_subselect)
+ !s->table->is_filled_at_execution())
{
ha_rows records;
SQL_SELECT *select;
@@ -2960,7 +2960,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
if (records != HA_POS_ERROR)
{
s->found_records=records;
- s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
+ s->read_time= s->quick ? s->quick->read_time : 0.0;
}
delete select;
}
@@ -4288,7 +4288,6 @@ best_access_path(JOIN *join,
ha_rows rec;
bool best_uses_jbuf= FALSE;
Item_in_subselect* jtbm_subselect= s->table->pos_in_table_list->jtbm_subselect;
- bool jtbm_ref_used= FALSE;
Loose_scan_opt loose_scan_opt;
DBUG_ENTER("best_access_path");
@@ -4641,8 +4640,8 @@ best_access_path(JOIN *join,
else
tmp= best_time; // Do nothing
}
- if (jtbm_subselect)
- tmp += jtbm_subselect->startup_cost;
+
+ tmp += s->startup_cost;
loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
} /* not ft_key */
if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
@@ -4653,8 +4652,6 @@ best_access_path(JOIN *join,
best_key= start_key;
best_max_key_part= max_key_part;
best_ref_depends_map= found_ref;
- if (jtbm_subselect)
- jtbm_ref_used= TRUE;
}
} /* for each key */
records= best_records;
@@ -4687,6 +4684,11 @@ best_access_path(JOIN *join,
Since we have a 'ref' access path, and FORCE INDEX instructs us to
choose it over ALL/index, there is no need to consider a full table
scan.
+ (5) Non-flattenable semi-joins: don't consider doing a scan of temporary
+ table if we had an option to make lookups into it. In real-world cases,
+ lookups are cheaper than full scans, but when the table is small, they
+ can be [considered to be] more expensive, which causes lookups not to
+ be used for cases with small datasets, which is annoying.
*/
if ((records >= s->found_records || best > s->read_time) && // (1)
!(s->quick && best_key && s->quick->index == best_key->key && // (2)
@@ -4694,7 +4696,7 @@ best_access_path(JOIN *join,
!((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
!(s->table->force_index && best_key && !s->quick) && // (4)
- !jtbm_ref_used)
+ !(best_key && jtbm_subselect)) // (5)
{ // Check full join
ha_rows rnd_records= s->found_records;
/*
@@ -4741,12 +4743,19 @@ best_access_path(JOIN *join,
}
else
{
+#if 0
/* Estimate cost of reading table. */
- if (jtbm_subselect)
+ if (jtbm_subselect) //psergey-jtbm-todo: why the difference?
tmp= s->read_time;
else
tmp= s->table->file->scan_time();
-
+ //psergey-debug:
+ if (!jtbm_subselect && fabs(s->read_time - s->table->file->scan_time()) > 1.0)
+ {
+ fprintf(stderr, "Q:%s\n", thd->query());
+ }
+#endif
+ tmp= s->read_time;
if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache
{
/*
@@ -4775,8 +4784,7 @@ best_access_path(JOIN *join,
}
}
- if (jtbm_subselect)
- tmp += jtbm_subselect->startup_cost;
+ tmp += s->startup_cost;
/*
We estimate the cost of evaluating WHERE clause for found records
as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
@@ -4799,7 +4807,7 @@ best_access_path(JOIN *join,
join->outer_join)));
}
}
-
+
/* Update the cost information for the current partial plan */
pos->records_read= records;
pos->read_time= best;
@@ -6722,12 +6730,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
the index if we are using limit and this is the first table
*/
- if ((cond &&
- (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
- (!tab->const_keys.is_clear_all() && i == join->const_tables &&
- join->unit->select_limit_cnt <
- join->best_positions[i].records_read &&
- !(join->select_options & OPTION_FOUND_ROWS)))
+ if (!tab->table->is_filled_at_execution() &&
+ ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
+ (!tab->const_keys.is_clear_all() && i == join->const_tables &&
+ join->unit->select_limit_cnt <
+ join->best_positions[i].records_read &&
+ !(join->select_options & OPTION_FOUND_ROWS))))
{
/* Join with outer join condition */
COND *orig_cond=sel->cond;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 733ea310ad3..ab62f5f7d46 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -210,8 +210,11 @@ typedef struct st_join_table {
method (but not 'index' for some reason), i.e. this matches method which
E(#records) is in found_records.
*/
- ha_rows read_time;
+ double read_time;
+ /* Startup cost for execution */
+ double startup_cost;
+
table_map dependent,key_dependent;
uint use_quick,index;
uint status; ///< Save status for cache
diff --git a/sql/table.cc b/sql/table.cc
index 7c63138acc7..27aabad1cd5 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -5114,6 +5114,12 @@ bool st_table::is_children_attached(void)
(parent && parent->children_attached));
}
+
+bool st_table::is_filled_at_execution()
+{
+ return test(pos_in_table_list->jtbm_subselect);
+}
+
/*
Cleanup this table for re-execution.
diff --git a/sql/table.h b/sql/table.h
index b564d762547..c9fc12708f6 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -914,6 +914,13 @@ struct st_table {
inline bool needs_reopen_or_name_lock()
{ return s->version != refresh_version; }
bool is_children_attached(void);
+
+ /*
+ If TRUE, the table is filled at execution phase (and so, the optimizer
+ should not do things like range analysis or constant table detection on
+ it).
+ */
+ bool is_filled_at_execution();
};
enum enum_schema_table_state