diff options
author | Igor Babaev <igor@askmonty.org> | 2019-06-30 13:16:12 -0700 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2019-07-11 13:39:21 -0700 |
commit | 8540fa83bb34df6a3d489a4e85c77692f36e3e26 (patch) | |
tree | 7a56a3d6652d25e7af8a3dafc15cbc7aaee09fab /sql | |
parent | 8997f20f12309d2660988f254415a343d6328835 (diff) | |
download | mariadb-git-8540fa83bb34df6a3d489a4e85c77692f36e3e26.tar.gz |
MDEV-19421 Basic 3-way join queries are not parsed.
The parser returned a syntax error message for the queries with join
expressions like this t1 JOIN t2 [LEFT | RIGHT] JOIN t3 ON ... ON ... when
the second operand of the outer JOIN operation with ON clause was another
join expression with ON clause. In this expression the JOIN operator is
right-associative, i.e. expression has to be parsed as the expression
t1 JOIN (t2 [LEFT | RIGHT] JOIN t3 ON ... ) ON ...
Such join expressions are hard to parse because the outer JOIN is
left-associative if there is no ON clause for the first outer JOIN operator.
The patch implements the solution when the JOIN operator is always parsed
as right-associative and builds first the right-associative tree. If it
happens that there is no corresponding ON clause for this operator the
tree is converted to left-associative.
The idea of the solution was taken from the patch by Martin Hansson
"WL#8083: Fixed the join_table rule" from MySQL-8.0 code line.
As the grammar rules related to join expressions in MySQL-8.0 and
MariaDB-5.5+ are quite different MariaDB solution could not borrow
any code from the MySQL-8.0 solution.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/sql_lex.h | 6 | ||||
-rw-r--r-- | sql/sql_parse.cc | 235 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 49 | ||||
-rw-r--r-- | sql/table.h | 24 |
4 files changed, 288 insertions, 26 deletions
diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 615b583897b..8f629750198 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -957,6 +957,8 @@ public: TABLE_LIST *end_nested_join(THD *thd); TABLE_LIST *nest_last_join(THD *thd); void add_joined_table(TABLE_LIST *table); + bool add_cross_joined_table(TABLE_LIST *left_op, TABLE_LIST *right_op, + bool straight_fl); TABLE_LIST *convert_right_join(); List<Item>* get_item_list(); ulong get_table_join_options(); @@ -2745,9 +2747,9 @@ struct LEX: public Query_tables_list return context_stack.push_front(context); } - void pop_context() + Name_resolution_context *pop_context() { - context_stack.pop(); + return context_stack.pop(); } bool copy_db_to(char **p_db, size_t *p_db_length) const; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 1d5733a646b..346f8ad5e8b 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -6390,6 +6390,7 @@ TABLE_LIST *st_select_lex::end_nested_join(THD *thd) join_list= ptr->join_list; embedding= ptr->embedding; nested_join= ptr->nested_join; + nested_join->nest_type= 0; if (nested_join->join_list.elements == 1) { TABLE_LIST *embedded= nested_join->join_list.head(); @@ -6399,6 +6400,8 @@ TABLE_LIST *st_select_lex::end_nested_join(THD *thd) join_list->push_front(embedded); ptr= embedded; embedded->lifted= 1; + if (embedded->nested_join) + embedded->nested_join->nest_type= 0; } else if (nested_join->join_list.elements == 0) { @@ -6429,6 +6432,16 @@ TABLE_LIST *st_select_lex::nest_last_join(THD *thd) List<TABLE_LIST> *embedded_list; DBUG_ENTER("nest_last_join"); + TABLE_LIST *head= join_list->head(); + if (head->nested_join && head->nested_join->nest_type & REBALANCED_NEST) + { + List_iterator<TABLE_LIST> li(*join_list); + li++; + while (li++) + li.remove(); + DBUG_RETURN(head); + } + if (!(ptr= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ sizeof(NESTED_JOIN)))) DBUG_RETURN(0); @@ -6440,6 +6453,7 @@ TABLE_LIST *st_select_lex::nest_last_join(THD *thd) ptr->alias= (char*) "(nest_last_join)"; embedded_list= &nested_join->join_list; embedded_list->empty(); + nested_join->nest_type= JOIN_OP_NEST; for (uint i=0; i < 2; i++) { @@ -6491,6 +6505,227 @@ void st_select_lex::add_joined_table(TABLE_LIST *table) /** + @brief + Create a node for JOIN/INNER JOIN/CROSS JOIN/STRAIGHT_JOIN operation + + @param left_op the node for the left operand constructed by the parser + @param right_op the node for the right operand constructed by the parser + @param straight_fl TRUE if STRAIGHT_JOIN is used + + @retval + false on success + true otherwise + + @details + + JOIN operator can be left-associative with other join operators in one + context and right-associative in another context. + + In this query + SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a (Q1) + JOIN is left-associative and the query Q1 is interpreted as + SELECT * FROM (t1 JOIN t2) LEFT JOIN t3 ON t2.a=t3.a. + While in this query + SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a ON t1.b=t2.b (Q2) + JOIN is right-associative and the query Q2 is interpreted as + SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.b=t2.b + + JOIN is right-associative if it is used with ON clause or with USING clause. + Otherwise it is left-associative. + When parsing a join expression with JOIN operator we can't determine + whether this operation left or right associative until either we read the + corresponding ON clause or we reach the end of the expression. This creates + a problem for the parser to build a proper internal representation of the + used join expression. + + For Q1 and Q2 the trees representing the used join expressions look like + + LJ - ON J - ON + / \ / \ + J t3 (TQ1) t1 LJ - ON (TQ2) + / \ / \ + t1 t2 t2 t3 + + To build TQ1 the parser has to reduce the expression for JOIN right after + it has read the reference to t2. To build TQ2 the parser reduces JOIN + when he has read the whole join expression. There is no way to determine + whether an early reduction is needed until the whole join expression is + read. + A solution here is always to do a late reduction. In this case the parser + first builds an incorrect tree TQ1* that has to be rebalanced right after + it has been constructed. + + J LJ - ON + / \ / \ + t1 LJ - ON (TQ1*) => J t3 + / \ / \ + t2 t3 t1 t2 + + Actually the transformation is performed over the nodes t1 and LJ before the + node for J is created in the function st_select_lex::add_cross_joined_table. + The function creates a node for J which replaces the node t2. Then it + attaches the nodes t1 and t2 to this newly created node. The node LJ becomes + the top node of the tree. + + For the query + SELECT * FROM t1 JOIN t2 RIGHT JOIN t3 ON t2.a=t3.a (Q3) + the transformation looks slightly differently because the parser + replaces the RIGHT JOIN tree for an equivalent LEFT JOIN tree. + + J LJ - ON + / \ / \ + t1 LJ - ON (TQ3*) => J t2 + / \ / \ + t3 t2 t1 t3 + + With several left associative JOINs + SELECT * FROM t1 JOIN t2 JOIN t3 LEFT JOIN t4 ON t3.a=t4.a (Q4) + the newly created node for JOIN replaces the left most node of the tree: + + J1 LJ - ON + / \ / \ + t1 LJ - ON J2 t4 + / \ => / \ + J2 t4 J1 t3 + / \ / \ + t2 t3 t1 t2 + + Here's another example: + SELECT * + FROM t1 JOIN t2 LEFT JOIN t3 JOIN t4 ON t3.a=t4.a ON t2.b=t3.b (Q5) + + J LJ - ON + / \ / \ + t1 LJ - ON J J - ON + / \ => / \ / \ + t2 J - ON t1 t2 t3 t4 + / \ + t3 t4 + + If the transformed nested join node node is a natural join node like in + the following query + SELECT * FROM t1 JOIN t2 LEFT JOIN t3 USING(a) (Q6) + the transformation additionally has to take care about setting proper + references in the field natural_join for both operands of the natural + join operation. + The function also has to change the name resolution context for ON + expressions used in the transformed join expression to take into + account the tables of the left_op node. +*/ + +bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op, + TABLE_LIST *right_op, + bool straight_fl) +{ + DBUG_ENTER("add_cross_joined_table"); + THD *thd= parent_lex->thd; + if (!(right_op->nested_join && + (right_op->nested_join->nest_type & JOIN_OP_NEST))) + { + /* + This handles the cases when the right operand is not a nested join. + like in queries + SELECT * FROM t1 JOIN t2; + SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a JOIN t3 + */ + right_op->straight= straight_fl; + DBUG_RETURN(false); + } + + TABLE_LIST *tbl; + List<TABLE_LIST> *jl= &right_op->nested_join->join_list; + TABLE_LIST *cj_nest; + + /* + Create the node NJ for a new nested join for the future inclusion + of left_op in it. Initially the nest is empty. + */ + if (unlikely(!(cj_nest= + (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ + sizeof(NESTED_JOIN))))) + DBUG_RETURN(true); + cj_nest->nested_join= + ((NESTED_JOIN*) ((uchar*) cj_nest + ALIGN_SIZE(sizeof(TABLE_LIST)))); + cj_nest->nested_join->nest_type= JOIN_OP_NEST; + List<TABLE_LIST> *cjl= &cj_nest->nested_join->join_list; + cjl->empty(); + + /* Look for the left most node tbl of the right_op tree */ + for ( ; ; ) + { + TABLE_LIST *pair_tbl= 0; /* useful only for operands of natural joins */ + + List_iterator<TABLE_LIST> li(*jl); + tbl= li++; + + /* Expand name resolution context */ + Name_resolution_context *on_context; + if ((on_context= tbl->on_context)) + { + on_context->first_name_resolution_table= + left_op->first_leaf_for_name_resolution(); + } + + if (!(tbl->outer_join & JOIN_TYPE_RIGHT)) + { + pair_tbl= tbl; + tbl= li++; + } + if (tbl->nested_join && + tbl->nested_join->nest_type & JOIN_OP_NEST) + { + jl= &tbl->nested_join->join_list; + continue; + } + + /* Replace the tbl node in the tree for the newly created NJ node */ + cj_nest->outer_join= tbl->outer_join; + cj_nest->on_expr= tbl->on_expr; + cj_nest->embedding= tbl->embedding; + cj_nest->join_list= jl; + cj_nest->alias= (char*) "(nest_last_join)"; + li.replace(cj_nest); + + /* + If tbl is an operand of a natural join set properly the references + in the fields natural_join for both operands of the operation. + */ + if(tbl->embedding && tbl->embedding->is_natural_join) + { + if (!pair_tbl) + pair_tbl= li++; + pair_tbl->natural_join= cj_nest; + cj_nest->natural_join= pair_tbl; + } + break; + } + + /* Attach tbl as the right operand of NJ */ + if (unlikely(cjl->push_back(tbl, thd->mem_root))) + DBUG_RETURN(true); + tbl->outer_join= 0; + tbl->on_expr= 0; + tbl->straight= straight_fl; + tbl->natural_join= 0; + tbl->embedding= cj_nest; + tbl->join_list= cjl; + + /* Add left_op as the left operand of NJ */ + if (unlikely(cjl->push_back(left_op, thd->mem_root))) + DBUG_RETURN(true); + left_op->embedding= cj_nest; + left_op->join_list= cjl; + + /* + Mark right_op as a rebalanced nested join in order not to + create a new top level nested join node. + */ + right_op->nested_join->nest_type|= REBALANCED_NEST; + DBUG_RETURN(false); +} + + +/** Convert a right join into equivalent left join. The function takes the current join list t[0],t[1] ... and diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 93ac55592bf..a0e9497f217 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1428,9 +1428,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token IMPOSSIBLE_ACTION /* To avoid warning for yyerrlab1 */ -%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT /* A dummy token to force the priority of table_ref production in a join. */ -%left TABLE_REF_PRIORITY +%left CONDITIONLESS_JOIN +%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT ON_SYM USING %left SET_VAR %left OR_OR_SYM OR_SYM OR2_SYM %left XOR @@ -9607,9 +9607,9 @@ join_table_list: and are ignored. */ esc_table_ref: - table_ref { $$=$1; } - | '{' ident table_ref '}' { $$=$3; } - ; + table_ref { $$=$1; } + | '{' ident table_ref '}' { $$=$3; } + ; /* Equivalent to <table reference list> in the SQL:2003 standard. */ /* Warning - may return NULL in case of incomplete SELECT */ @@ -9622,23 +9622,24 @@ derived_table_list: ; /* - Notice that JOIN is a left-associative operation, and it must be parsed - as such, that is, the parser must process first the left join operand - then the right one. Such order of processing ensures that the parser - produces correct join trees which is essential for semantic analysis - and subsequent optimization phases. + Notice that JOIN can be a left-associative operator in one context and + a right-associative operator in another context (see the comment for + st_select_lex::add_cross_joined_table). */ join_table: /* INNER JOIN variants */ - /* - Use %prec to evaluate production 'table_ref' before 'normal_join' - so that [INNER | CROSS] JOIN is properly nested as other - left-associative joins. - */ - table_ref normal_join table_ref %prec TABLE_REF_PRIORITY - { MYSQL_YYABORT_UNLESS($1 && ($$=$3)); } - | table_ref STRAIGHT_JOIN table_factor - { MYSQL_YYABORT_UNLESS($1 && ($$=$3)); $3->straight=1; } + table_ref normal_join table_ref %prec CONDITIONLESS_JOIN + { + MYSQL_YYABORT_UNLESS($1 && ($$=$3)); + if (unlikely(Select->add_cross_joined_table($1, $3, false))) + MYSQL_YYABORT; + } + | table_ref STRAIGHT_JOIN table_ref %prec CONDITIONLESS_JOIN + { + MYSQL_YYABORT_UNLESS($1 && ($$=$3)); + if (unlikely(Select->add_cross_joined_table($1, $3, true))) + MYSQL_YYABORT; + } | table_ref normal_join table_ref ON { @@ -9651,10 +9652,10 @@ join_table: expr { add_join_on($3,$6); - Lex->pop_context(); + $3->on_context= Lex->pop_context(); Select->parsing_place= NO_MATTER; } - | table_ref STRAIGHT_JOIN table_factor + | table_ref STRAIGHT_JOIN table_ref ON { MYSQL_YYABORT_UNLESS($1 && $3); @@ -9667,7 +9668,7 @@ join_table: { $3->straight=1; add_join_on($3,$6); - Lex->pop_context(); + $3->on_context= Lex->pop_context(); Select->parsing_place= NO_MATTER; } | table_ref normal_join table_ref @@ -9696,7 +9697,7 @@ join_table: expr { add_join_on($5,$8); - Lex->pop_context(); + $5->on_context= Lex->pop_context(); $5->outer_join|=JOIN_TYPE_LEFT; $$=$5; Select->parsing_place= NO_MATTER; @@ -9735,7 +9736,7 @@ join_table: if (!($$= lex->current_select->convert_right_join())) MYSQL_YYABORT; add_join_on($$, $8); - Lex->pop_context(); + $1->on_context= Lex->pop_context(); Select->parsing_place= NO_MATTER; } | table_ref RIGHT opt_outer JOIN_SYM table_factor diff --git a/sql/table.h b/sql/table.h index c81ee68b0a6..66a51b9d4f5 100644 --- a/sql/table.h +++ b/sql/table.h @@ -46,6 +46,7 @@ struct TABLE_LIST; class ACL_internal_schema_access; class ACL_internal_table_access; class Field; +struct Name_resolution_context; /* Used to identify NESTED_JOIN structures within a join (applicable only to @@ -1618,6 +1619,7 @@ struct TABLE_LIST char *db, *alias, *table_name, *schema_table_name; char *option; /* Used by cache index */ Item *on_expr; /* Used with outer join */ + Name_resolution_context *on_context; /* For ON expressions */ Item *sj_on_expr; /* @@ -2332,9 +2334,31 @@ public: }; +#define JOIN_OP_NEST 1 +#define REBALANCED_NEST 2 + typedef struct st_nested_join { List<TABLE_LIST> join_list; /* list of elements in the nested join */ + /* + Currently the valid values for nest type are: + JOIN_OP_NEST - for nest created for JOIN operation used as an operand in + a join expression, contains 2 elements; + JOIN_OP_NEST | REBALANCED_NEST - nest created after tree re-balancing + in st_select_lex::add_cross_joined_table(), contains 1 element; + 0 - for all other nests. + Examples: + 1. SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a; + Here the nest created for LEFT JOIN at first has nest_type==JOIN_OP_NEST. + After re-balancing in st_select_lex::add_cross_joined_table() this nest + has nest_type==JOIN_OP_NEST | REBALANCED_NEST. The nest for JOIN created + in st_select_lex::add_cross_joined_table() has nest_type== JOIN_OP_NEST. + 2. SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) + Here the nest created for LEFT JOIN has nest_type==0, because it's not + an operand in a join expression. The nest created for JOIN has nest_type + set to JOIN_OP_NEST. + */ + uint nest_type; /* Bitmap of tables within this nested join (including those embedded within its children), including tables removed by table elimination. |