diff options
author | Sergei Golubchik <sergii@pisem.net> | 2012-03-28 20:25:31 +0200 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2012-03-28 20:25:31 +0200 |
commit | 867296c3edc4502093f7b706e7ac4c1670aa9515 (patch) | |
tree | afcc8157b0b71a28edbcca6b862e6ca854fc743c /sql | |
parent | 0d5adca0de0a51b1f0bd49045fc4062eac7d1d25 (diff) | |
parent | 6131d708e889cd4f93490c22bfee00d0728edfd2 (diff) | |
download | mariadb-git-867296c3edc4502093f7b706e7ac4c1670aa9515.tar.gz |
5.3 merge
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item_cmpfunc.cc | 4 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 11 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 263 | ||||
-rw-r--r-- | sql/sql_select.cc | 18 |
4 files changed, 292 insertions, 4 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index f5dc74c389c..d3251bc1fb3 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5463,7 +5463,7 @@ Item *Item_bool_rowready_func2::negated_item() */ Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item) - : Item_bool_func(), eval_item(0), cond_false(0) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) { const_item_cache= 0; with_const= with_const_item; @@ -5486,7 +5486,7 @@ Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item) */ Item_equal::Item_equal(Item_equal *item_equal) - : Item_bool_func(), eval_item(0), cond_false(0) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) { const_item_cache= 0; List_iterator_fast<Item> li(item_equal->equal_items); diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 95d797c62cf..38dbc2901c6 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1693,9 +1693,16 @@ class Item_equal: public Item_bool_func as datetimes. The comparator is used only if compare_as_dates=TRUE */ Arg_comparator cmp; + + /* + For Item_equal objects inside an OR clause: one of the fields that were + used in the original equality. + */ + Item_field *context_field; public: inline Item_equal() - : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0) + : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0), + context_field(NULL) { const_item_cache=0 ;} Item_equal(Item *f1, Item *f2, bool with_const_item); Item_equal(Item_equal *item_equal); @@ -1722,6 +1729,8 @@ public: Item *transform(Item_transformer transformer, uchar *arg); virtual void print(String *str, enum_query_type query_type); CHARSET_INFO *compare_collation(); + + void set_context_field(Item_field *ctx_field) { context_field= ctx_field; } friend class Item_equal_fields_iterator; friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, Item_equal *item_equal); diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index fe9b8da3a24..202e97dc190 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -188,6 +188,252 @@ */ +/* +EqualityPropagationAndSjmNests +****************************** + +Equalities are used for: +P1. Equality propagation +P2. Equality substitution [for a certain join order] + +The equality propagation is not affected by SJM nests. In fact, it is done +before we determine the execution plan, i.e. before we even know we will use +SJM-nests for execution. + +The equality substitution is affected. + +Substitution without SJMs +========================= +When one doesn't have SJM nests, tables have a strict join order: + + ---------------------------------> + t1 -- t2 -- t3 -- t4 --- t5 + + + ? ^ + \ + --(part-of-WHERE) + + +parts WHERE/ON and ref. expressions are attached at some point along the axis. +Expression is allowed to refer to a table column if the table is to the left of +the attachment point. For any given expression, we have a goal: + + "Move leftmost allowed attachment point as much as possible to the left" + +Substitution with SJMs - task setting +===================================== + +When SJM nests are present, there is no global strict table ordering anymore: + + + ---------------------------------> + + ot1 -- ot2 --- sjm -- ot4 --- ot5 + | + | Main execution + - - - - - - - - - - - - - - - - - - - - - - - - + | Materialization + it1 -- it2 --/ + + +Besides that, we must take into account that + - values for outer table columns, otN.col, are inaccessible at + materialization step (SJM-RULE) + - values for inner table columns, itN.col, are inaccessible at Main execution + step, except for SJ-Materialization-Scan and columns that are in the + subquery's select list. (SJM-RULE) + +Substitution with SJMs - solution +================================= + +First, we introduce global strict table ordering like this: + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +Now, let's see how to meet (SJM-RULE). + +SJ-Materialization is only applicable for uncorrelated subqueries. From this, it +follows that any multiple equality will either +1. include only columns of outer tables, or +2. include only columns of inner tables, or +3. include columns of inner and outer tables, joined together through one + of IN-equalities. + +Cases #1 and #2 can be handled in the same way as with regular inner joins. + +Case #3 requires special handling, so that we don't construct violations of +(SJM-RULE). Let's consider possible ways to build violations. + +Equality propagation starts with the clause in this form + + top_query_where AND subquery_where AND in_equalities + +First, it builds multi-equalities. It can also build a mixed multi-equality + + multiple-equal(ot1.col, ot2.col, ... it1.col, itN.col) + +Multi-equalities are pushed down the OR-clauses in top_query_where and in +subquery_where, so it's possible that clauses like this one are built: + + subquery_cond OR (multiple-equal(it1.col, ot1.col,...) AND ...) + ^^^^^^^^^^^^^ \ + | this must be evaluated + \- can only be evaluated at the main phase. + at the materialization phase + +Finally, equality substitution is started. It does two operations: + + +1. Field reference substitution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(In the code, this is Item_field::replace_equal_field) + +This is a process of replacing each reference to "tblX.col" +with the first element of the multi-equality. (REF-SUBST-ORIG) + +This behaviour can cause problems with Semi-join nests. Suppose, we have a +condition: + + func(it1.col, it2.col) + +and a multi-equality(ot1.col, it1.col). Then, reference to "it1.col" will be +replaced with "ot1.col", constructing a condition + + func(ot1.col, it2.col) + +which will be a violation of (SJM-RULE). + +In order to avoid this, (REF-SUBST-ORIG) is amended as follows: + +- references to tables "itX.col" that are inner wrt some SJM nest, are + replaced with references to the first inner table from the same SJM nest. + +- references to top-level tables "otX.col" are replaced with references to + the first element of the multi-equality, no matter if that first element is + a column of a top-level table or of table from some SJM nest. + (REF-SUBST-SJM) + + The case where the first element is a table from an SJM nest $SJM is ok, + because it can be proven that $SJM uses SJ-Materialization-Scan, and + "unpacks" correct column values to the first element during the main + execution phase. + +2. Item_equal elimination +~~~~~~~~~~~~~~~~~~~~~~~~~ +(In the code: eliminate_item_equal) This is a process of taking + + multiple-equal(a,b,c,d,e) + +and replacing it with an equivalent expression which is an AND of pair-wise +equalities: + + a=b AND a=c AND ... + +The equalities are picked such that for any given join prefix (t1,t2...) the +subset of equalities that can be evaluated gives the most restrictive +filtering. + +Without SJM nests, it is sufficient to compare every multi-equality member +with the first one: + + elem1=elem2 AND elem1=elem3 AND elem1=elem4 ... + +When SJM nests are present, we should take care not to construct equalities +that violate the (SJM-RULE). This is achieved by generating separate sets of +equalites for top-level tables and for inner tables. That is, for the join +order + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +we will generate + ot1.col=ot2.col + ot1.col=ot3.col + ot1.col=ot5.col + it2.col=it1.col + + +2.1 The problem with Item_equals and ORs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As has been mentioned above, multiple equalities are pushed down into OR +clauses, possibly building clauses like this: + + func(it.col2) OR multiple-equal(it1.col1, it1.col2, ot1.col) (1) + +where the first part of the clause has references to inner tables, while the +second has references to the top-level tables, which is a violation of +(SJM-RULE). + +AND-clauses of this kind do not create problems, because make_cond_for_table() +will take them apart. OR-clauses will not be split. It is possible to +split-out the part that's dependent on the inner table: + + func(it.col2) OR it1.col1=it1.col2 + +but this is a less-restrictive condition than condition (1). Current execution +scheme will still try to generate the "remainder" condition: + + func(it.col2) OR it1.col1=ot1.col + +which is a violation of (SJM-RULE). + +QQ: "ot1.col=it1.col" is checked at the upper level. Why was it not removed +here? +AA: because has a proper subset of conditions that are found on this level. + consider a join order of ot, sjm(it) + and a condition + ot.col=it.col AND ( ot.col=it.col='foo' OR it.col2='bar') + + we will produce: + table ot: nothing + table it: ot.col=it.col AND (ot.col='foo' OR it.col2='bar') + ^^^^ ^^^^^^^^^^^^^^^^ + | \ the problem is that + | this part condition didnt + | receive a substitution + | + +--- it was correct to subst, 'ot' is + the left-most. + + +Does it make sense to push "inner=outer" down into ORs? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Yes. Consider the query: + + select * from ot + where ot.col in (select it.col from it where (it.col='foo' OR it.col='bar')) + +here, it may be useful to infer that + + (ot.col='foo' OR ot.col='bar') (CASE-FOR-SUBST) + +and attach that condition to the table 'ot'. + +Possible solutions for Item_equals and ORs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Solution #1 +~~~~~~~~~~~ +Let make_cond_for_table() chop analyze the OR clauses it has produced and +discard them if they violate (SJM-RULE). This solution would allow to handle +cases like (CASE-FOR-SUBST) at the expense of making semantics of +make_cond_for_table() complicated. + +Solution #2 +~~~~~~~~~~~ +Before the equality propagation phase, none of the OR clauses violate the +(SJM-RULE). This way, if we remember which tables the original equality +referred to, we can only generate equalities that refer to the outer (or inner) +tables. Note that this will disallow handling of cases like (CASE-FOR-SUBST). + +Currently, solution #2 is implemented. + +*/ + static bool subquery_types_allow_materialization(Item_in_subselect *in_subs); @@ -2673,6 +2919,8 @@ bool Firstmatch_picker::check_qep(JOIN *join, } } } + else + invalidate_firstmatch_prefix(); return FALSE; } @@ -3100,7 +3348,22 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) record_count, join->best_positions + idx, &loose_scan_pos); if (idx==first) + { join->best_positions[idx]= loose_scan_pos; + /* + If LooseScan is based on ref access (including the "degenerate" + one with 0 key parts), we should use full index scan. + + Unfortunately, lots of code assumes that if tab->type==JT_ALL && + tab->quick!=NULL, then quick select should be used. The only + simple way to fix this is to remove the quick select: + */ + if (join->best_positions[idx].key) + { + delete join->best_positions[idx].table->quick; + join->best_positions[idx].table->quick= NULL; + } + } } rem_tables &= ~join->best_positions[idx].table->table->map; record_count *= join->best_positions[idx].records_read; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index c7af4a37e9a..d905516b075 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -10859,6 +10859,9 @@ finish: acceptable, as this happens rarely. The implementation without copying would be much more complicated. + For description of how equality propagation works with SJM nests, grep + for EqualityPropagationAndSjmNests. + @param left_item left term of the quality to be checked @param right_item right term of the equality to be checked @param item equality item if the equality originates from a condition @@ -10932,12 +10935,14 @@ static bool check_simple_equality(Item *left_item, Item *right_item, { /* left_item_equal of an upper level contains left_item */ left_item_equal= new Item_equal(left_item_equal); + left_item_equal->set_context_field(((Item_field*) left_item)); cond_equal->current_level.push_back(left_item_equal); } if (right_copyfl) { /* right_item_equal of an upper level contains right_item */ right_item_equal= new Item_equal(right_item_equal); + right_item_equal->set_context_field(((Item_field*) right_item)); cond_equal->current_level.push_back(right_item_equal); } @@ -10967,6 +10972,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item, Item_equal *item_equal= new Item_equal(orig_left_item, orig_right_item, FALSE); + item_equal->set_context_field((Item_field*)left_item); cond_equal->current_level.push_back(item_equal); } } @@ -11023,6 +11029,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item, { item_equal= new Item_equal(item_equal); cond_equal->current_level.push_back(item_equal); + item_equal->set_context_field(field_item); } if (item_equal) { @@ -11036,6 +11043,7 @@ static bool check_simple_equality(Item *left_item, Item *right_item, else { item_equal= new Item_equal(const_item, orig_field_item, TRUE); + item_equal->set_context_field(field_item); cond_equal->current_level.push_back(item_equal); } return TRUE; @@ -11672,6 +11680,8 @@ static TABLE_LIST* embedding_sjm(Item *item) Item_equal::get_first() also takes similar measures for dealing with equality substitution in presense of SJM nests. + Grep for EqualityPropagationAndSjmNests for a more verbose description. + @return - The condition with generated simple equalities or a pointer to the simple generated equality, if success. @@ -11735,9 +11745,13 @@ Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, on upper AND-levels. */ if (upper) - { + { + TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field); if (item_const && upper->get_const()) + { + /* Upper item also has "field_item=const". Don't produce equality here */ item= 0; + } else { Item_equal_fields_iterator li(*item_equal); @@ -11748,6 +11762,8 @@ Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, break; } } + if (embedding_sjm(field_item) != native_sjm) + item= NULL; /* Don't produce equality */ } bool produce_equality= test(item == field_item); |