diff options
author | Galina Shalygina <galashalygina@gmail.com> | 2016-05-08 23:04:41 +0300 |
---|---|---|
committer | Galina Shalygina <galashalygina@gmail.com> | 2016-05-08 23:04:41 +0300 |
commit | be1d06c8a5f843e775374e5ec148aaee56970bdc (patch) | |
tree | bd7a95e771ca3b405583dccab8b468dd6fb4509f /sql/sql_select.h | |
parent | e09b1f2a226bf2763b211f74908a6486b83ebed1 (diff) | |
download | mariadb-git-be1d06c8a5f843e775374e5ec148aaee56970bdc.tar.gz |
Merge branch '10.2' into 10.2-mdev9864
Diffstat (limited to 'sql/sql_select.h')
-rw-r--r-- | sql/sql_select.h | 376 |
1 files changed, 248 insertions, 128 deletions
diff --git a/sql/sql_select.h b/sql/sql_select.h index 86c5ef87d89..c143d58c2e8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -34,6 +34,7 @@ #include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */ #include "filesort.h" +typedef struct st_join_table JOIN_TAB; /* Values in optimize */ #define KEY_OPTIMIZE_EXISTS 1 #define KEY_OPTIMIZE_REF_OR_NULL 2 @@ -184,7 +185,7 @@ enum sj_strategy_enum typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); -Next_select_func setup_end_select_func(JOIN *join); +Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); int rr_sequential_and_unpack(READ_RECORD *info); @@ -198,9 +199,11 @@ int rr_sequential_and_unpack(READ_RECORD *info); class JOIN_CACHE; class SJ_TMP_TABLE; class JOIN_TAB_RANGE; +class AGGR_OP; +class Filesort; typedef struct st_join_table { - st_join_table() {} /* Remove gcc warning */ + st_join_table() {} TABLE *table; TABLE_LIST *tab_list; KEYUSE *keyuse; /**< pointer to first used key */ @@ -238,7 +241,6 @@ typedef struct st_join_table { For join tabs that are inside an SJM bush: root of the bush */ st_join_table *bush_root_tab; - SORT_INFO *filesort; /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ bool last_leaf_in_bush; @@ -262,6 +264,7 @@ typedef struct st_join_table { */ uint packed_info; + // READ_RECORD::Setup_func materialize_table; READ_RECORD::Setup_func read_first_record; Next_select_func next_select; READ_RECORD read_record; @@ -348,6 +351,7 @@ typedef struct st_join_table { */ Item *cache_idx_cond; SQL_SELECT *cache_select; + AGGR_OP *aggr; JOIN *join; /* Embedding SJ-nest (may be not the direct parent), or NULL if none. @@ -414,6 +418,46 @@ typedef struct st_join_table { /* NestedOuterJoins: Bitmap of nested joins this table is part of */ nested_join_map embedding_map; + /* Tmp table info */ + TMP_TABLE_PARAM *tmp_table_param; + + /* Sorting related info */ + Filesort *filesort; + SORT_INFO *filesort_result; + + /* + Non-NULL value means this join_tab must do window function computation + before reading. + */ + Window_funcs_computation* window_funcs_step; + + /** + List of topmost expressions in the select list. The *next* JOIN TAB + in the plan should use it to obtain correct values. Same applicable to + all_fields. These lists are needed because after tmp tables functions + will be turned to fields. These variables are pointing to + tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp + table in the query plan. + @see JOIN::make_tmp_tables_info() + */ + List<Item> *fields; + /** List of all expressions in the select list */ + List<Item> *all_fields; + /* + Pointer to the ref array slice which to switch to before sending + records. Valid only for tmp tables. + */ + Ref_ptr_array *ref_array; + + /** Number of records saved in tmp table */ + ha_rows send_records; + + /** HAVING condition for checking prior saving a record into tmp table*/ + Item *having; + + /** TRUE <=> remove duplicates on this table. */ + bool distinct; + /* Semi-join strategy to be used for this join table. This is a copy of POSITION::sj_strategy field. This field is set up by the @@ -428,9 +472,9 @@ typedef struct st_join_table { void cleanup(); inline bool is_using_loose_index_scan() { - return (select && select->quick && - (select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); + const SQL_SELECT *sel= filesort ? filesort->select : select; + return (sel && sel->quick && + (sel->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); } bool is_using_agg_loose_index_scan () { @@ -565,16 +609,22 @@ typedef struct st_join_table { void save_explain_data(Explain_table_access *eta, table_map prefix_tables, bool distinct, struct st_join_table *first_top_tab); - void update_explain_data(uint idx); + bool use_order() const; ///< Use ordering provided by chosen index? + bool sort_table(); + bool remove_duplicates(); + } JOIN_TAB; #include "sql_join_cache.h" -enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool - end_of_records); -enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool - end_of_records); +enum_nested_loop_state +sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); @@ -869,12 +919,14 @@ typedef struct st_position Sj_materialization_picker sjmat_picker; } POSITION; +typedef Bounds_checked_array<Item_null_result*> Item_null_array; + typedef struct st_rollup { enum State { STATE_NONE, STATE_INITED, STATE_READY }; State state; - Item_null_result **null_items; - Item ***ref_pointer_arrays; + Item_null_array null_items; + Ref_ptr_array *ref_pointer_arrays; List<Item> *fields; } ROLLUP; @@ -888,6 +940,56 @@ public: class Pushdown_query; +/** + @brief + Class to perform postjoin aggregation operations + + @details + The result records are obtained on the put_record() call. + The aggrgation process is determined by the write_func, it could be: + end_write Simply store all records in tmp table. + end_write_group Perform grouping using join->group_fields, + records are expected to be sorted. + end_update Perform grouping using the key generated on tmp + table. Input records aren't expected to be sorted. + Tmp table uses the heap engine + end_update_unique Same as above, but the engine is myisam. + + Lazy table initialization is used - the table will be instantiated and + rnd/index scan started on the first put_record() call. + +*/ + +class AGGR_OP :public Sql_alloc +{ +public: + JOIN_TAB *join_tab; + + AGGR_OP(JOIN_TAB *tab) : join_tab(tab), write_func(NULL) + {}; + + enum_nested_loop_state put_record() { return put_record(false); }; + /* + Send the result of operation further (to a next operation/client) + This function is called after all records were put into tmp table. + + @return return one of enum_nested_loop_state values. + */ + enum_nested_loop_state end_send(); + /** write_func setter */ + void set_write_func(Next_select_func new_write_func) + { + write_func= new_write_func; + } + +private: + /** Write function that would be used for saving records in tmp table. */ + Next_select_func write_func; + enum_nested_loop_state put_record(bool end_of_records); + bool prepare_tmp_table(); +}; + + class JOIN :public Sql_alloc { private: @@ -956,33 +1058,11 @@ protected: public: JOIN_TAB *join_tab, **best_ref; - - /* - Saved join_tab for pre_sorting. create_sort_index() will save here.. - */ - JOIN_TAB *pre_sort_join_tab; - uint pre_sort_index; - Item *pre_sort_idx_pushed_cond; - void clean_pre_sort_join_tab(); /* List of fields that aren't under an aggregate function */ List<Item_field> non_agg_fields; - /* - For "Using temporary+Using filesort" queries, JOIN::join_tab can point to - either: - 1. array of join tabs describing how to run the select, or - 2. array of single join tab describing read from the temporary table. - - SHOW EXPLAIN code needs to read/show #1. This is why two next members are - there for saving it. - */ - JOIN_TAB *table_access_tabs; - uint top_table_access_tabs_count; - JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs - JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution - List<JOIN_TAB_RANGE> join_tab_ranges; /* @@ -1013,14 +1093,9 @@ public: We keep it here so that it is saved/restored with JOIN::restore_tmp. */ uint top_join_tab_count; + uint aggr_tables; ///< Number of post-join tmp tables uint send_group_parts; /* - This counts how many times do_select() was invoked for this JOIN. - It's used to restrict Pushdown_query::execute() only to the first - do_select() invocation. - */ - uint do_select_call_count; - /* True if the query has GROUP BY. (that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it will set this, too. It is not clear why we need a separate var from @@ -1125,6 +1200,7 @@ public: */ table_map complex_firstmatch_tables; + Next_select_func first_select; /* The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into @@ -1140,9 +1216,6 @@ public: double join_record_count; List<Item> *fields; List<Cached_item> group_fields, group_fields_cache; - TABLE *tmp_table; - /// used to store 2 possible tmp table of SELECT - TABLE *exec_tmp_table1, *exec_tmp_table2; THD *thd; Item_sum **sum_funcs, ***sum_funcs_end; /** second copy of sumfuncs (for queries with 2 temporary tables */ @@ -1151,6 +1224,8 @@ public: Item *having; Item *tmp_having; ///< To store having when processed temporary table Item *having_history; ///< Store having for explain + ORDER *group_list_for_estimates; + bool having_is_correlated; ulonglong select_options; /* Bitmap of allowed types of the join caches that @@ -1189,26 +1264,6 @@ public: */ bool filesort_found_rows; - /** - Copy of this JOIN to be used with temporary tables. - - tmp_join is used when the JOIN needs to be "reusable" (e.g. in a - subquery that gets re-executed several times) and we know will use - temporary tables for materialization. The materialization to a - temporary table overwrites the JOIN structure to point to the - temporary table after the materialization is done. This is where - tmp_join is used : it's a copy of the JOIN before the - materialization and is used in restoring before re-execution by - overwriting the current JOIN structure with the saved copy. - Because of this we should pay extra care of not freeing up helper - structures that are referenced by the original contents of the - JOIN. We can check for this by making sure the "current" join is - not the temporary copy, e.g. !tmp_join || tmp_join != join - - We should free these sub-structures at JOIN::destroy() if the - "current" join has a copy is not that copy. - */ - JOIN *tmp_join; ROLLUP rollup; ///< Used with rollup bool mixed_implicit_grouping; @@ -1230,6 +1285,19 @@ public: GROUP/ORDER BY. */ bool simple_order, simple_group; + + /* + ordered_index_usage is set if an ordered index access + should be used instead of a filesort when computing + ORDER/GROUP BY. + */ + enum + { + ordered_index_void, // No ordered index avail. + ordered_index_group_by, // Use index for GROUP BY + ordered_index_order_by // Use index for ORDER BY + } ordered_index_usage; + /** Is set only in case if we have a GROUP BY clause and no ORDER BY after constant elimination of 'order'. @@ -1282,10 +1350,19 @@ public: List<Item> exec_const_order_group_cond; SQL_SELECT *select; ///<created in optimisation phase JOIN_TAB *return_tab; ///<used only for outer joins - Item **ref_pointer_array; ///<used pointer reference for this select - // Copy of above to be used with different lists - Item **items0, **items1, **items2, **items3, **current_ref_pointer_array; - uint ref_pointer_array_size; ///< size of above in bytes + + /* + Used pointer reference for this select. + select_lex->ref_pointer_array contains five "slices" of the same length: + |========|========|========|========|========| + ref_ptrs items0 items1 items2 items3 + */ + Ref_ptr_array ref_ptrs; + // Copy of the initial slice above, to be used with different lists + Ref_ptr_array items0, items1, items2, items3; + // Used by rollup, to restore ref_ptrs after overwriting it. + Ref_ptr_array current_ref_ptrs; + const char *zero_result_cause; ///< not 0 if exec must return zero result bool union_part; ///< this subselect is part of union @@ -1312,20 +1389,12 @@ public: /* SJM nests that are executed with SJ-Materialization strategy */ List<SJ_MATERIALIZATION_INFO> sjm_info_list; - /* - storage for caching buffers allocated during query execution. - These buffers allocations need to be cached as the thread memory pool is - cleared only at the end of the execution of the whole query and not caching - allocations that occur in repetition at execution time will result in - excessive memory usage. - Note: make_simple_join always creates an execution plan that accesses - a single table, thus it is sufficient to have a one-element array for - table_reexec. - */ - SORT_FIELD *sortorder; // make_unireg_sortorder() - TABLE *table_reexec[1]; // make_simple_join() - JOIN_TAB *join_tab_reexec; // make_simple_join() - /* end of allocation caching storage */ + /** TRUE <=> ref_pointer_array is set to items3. */ + bool set_group_rpa; + /** Exec time only: TRUE <=> current group has been sent */ + bool group_sent; + + JOIN_TAB *sort_and_group_aggr_tab; JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) @@ -1337,12 +1406,13 @@ public: void init(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) { - join_tab= join_tab_save= 0; + join_tab= 0; table= 0; table_count= 0; top_join_tab_count= 0; const_tables= 0; const_table_map= 0; + aggr_tables= 0; eliminated_tables= 0; join_list= 0; implicit_grouping= FALSE; @@ -1352,25 +1422,21 @@ public: send_records= 0; found_records= 0; fetch_limit= HA_POS_ERROR; - join_examined_rows= 0; - exec_tmp_table1= 0; - exec_tmp_table2= 0; - sortorder= 0; - table_reexec[0]= 0; - join_tab_reexec= 0; thd= thd_arg; sum_funcs= sum_funcs2= 0; procedure= 0; having= tmp_having= having_history= 0; + having_is_correlated= false; + group_list_for_estimates= 0; select_options= select_options_arg; result= result_arg; lock= thd_arg->lock; select_lex= 0; //for safety - tmp_join= 0; select_distinct= MY_TEST(select_options & SELECT_DISTINCT); no_order= 0; simple_order= 0; simple_group= 0; + ordered_index_usage= ordered_index_void; need_distinct= 0; skip_sort_order= 0; need_tmp= 0; @@ -1378,8 +1444,11 @@ public: error= 0; select= 0; return_tab= 0; - ref_pointer_array= items0= items1= items2= items3= 0; - ref_pointer_array_size= 0; + ref_ptrs.reset(); + items0.reset(); + items1.reset(); + items2.reset(); + items3.reset(); zero_result_cause= 0; optimized= 0; have_query_plan= QEP_NOT_PRESENT_YET; @@ -1393,8 +1462,6 @@ public: positions= best_positions= 0; pushdown_query= 0; original_join_tab= 0; - do_select_call_count= 0; - explain= NULL; all_fields= fields_arg; @@ -1407,22 +1474,21 @@ public: rollup.state= ROLLUP::STATE_NONE; no_const_tables= FALSE; + first_select= sub_select; + set_group_rpa= false; + group_sent= 0; + outer_ref_cond= pseudo_bits_cond= NULL; in_to_exists_where= NULL; in_to_exists_having= NULL; - pre_sort_join_tab= NULL; emb_sjm_nest= NULL; sjm_lookup_tables= 0; - - /* - The following is needed because JOIN::cleanup(true) may be called for - joins for which JOIN::optimize was aborted with an error before a proper - query plan was produced - */ - table_access_tabs= NULL; } - int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num, + /* True if the plan guarantees that it will be returned zero or one row */ + bool only_const_tables() { return const_tables == table_count; } + + int prepare(TABLE_LIST *tables, uint wind_num, COND *conds, uint og_num, ORDER *order, bool skip_order_by, ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select, SELECT_LEX_UNIT *unit); @@ -1432,7 +1498,9 @@ public: int reinit(); int init_execution(); void exec(); + void exec_inner(); + bool prepare_result(List<Item> **columns_list); int destroy(); void restore_tmp(); bool alloc_func_list(); @@ -1442,16 +1510,42 @@ public: bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields, bool before_group_by, bool recompute= FALSE); - inline void set_items_ref_array(Item **ptr) + /// Initialzes a slice, see comments for ref_ptrs above. + Ref_ptr_array ref_ptr_array_slice(size_t slice_num) { - memcpy((char*) ref_pointer_array, (char*) ptr, ref_pointer_array_size); - current_ref_pointer_array= ptr; + size_t slice_sz= select_lex->ref_pointer_array.size() / 5U; + DBUG_ASSERT(select_lex->ref_pointer_array.size() % 5 == 0); + DBUG_ASSERT(slice_num < 5U); + return Ref_ptr_array(&select_lex->ref_pointer_array[slice_num * slice_sz], + slice_sz); } - inline void init_items_ref_array() + + /** + Overwrites one slice with the contents of another slice. + In the normal case, dst and src have the same size(). + However: the rollup slices may have smaller size than slice_sz. + */ + void copy_ref_ptr_array(Ref_ptr_array dst_arr, Ref_ptr_array src_arr) + { + DBUG_ASSERT(dst_arr.size() >= src_arr.size()); + void *dest= dst_arr.array(); + const void *src= src_arr.array(); + memcpy(dest, src, src_arr.size() * src_arr.element_size()); + } + + /// Overwrites 'ref_ptrs' and remembers the the source as 'current'. + void set_items_ref_array(Ref_ptr_array src_arr) { - items0= ref_pointer_array + all_fields.elements; - memcpy(items0, ref_pointer_array, ref_pointer_array_size); - current_ref_pointer_array= items0; + copy_ref_ptr_array(ref_ptrs, src_arr); + current_ref_ptrs= src_arr; + } + + /// Initializes 'items0' and remembers that it is 'current'. + void init_items_ref_array() + { + items0= ref_ptr_array_slice(1); + copy_ref_ptr_array(items0, ref_ptrs); + current_ref_ptrs= items0; } bool rollup_init(); @@ -1460,18 +1554,10 @@ public: Item_sum ***func); int rollup_send_data(uint idx); int rollup_write_data(uint idx, TABLE *table); - /** - Release memory and, if possible, the open tables held by this execution - plan (and nested plans). It's used to release some tables before - the end of execution in order to increase concurrency and reduce - memory consumption. - */ void join_free(); /** Cleanup this JOIN, possibly for reuse */ void cleanup(bool full); void clear(); - bool save_join_tab(); - bool init_save_join_tab(); bool send_row_on_empty_set() { return (do_send_rows && implicit_grouping && !group_optimized_away && @@ -1490,6 +1576,8 @@ public: return (table_map(1) << table_count) - 1; } void drop_unused_derived_keys(); + bool get_best_combination(); + bool add_sorting_to_table(JOIN_TAB *tab, ORDER *order); inline void eval_select_list_used_tables(); /* Return the table for which an index scan can be used to satisfy @@ -1551,16 +1639,44 @@ public: int save_explain_data_intern(Explain_query *output, bool need_tmp_table, bool need_order, bool distinct, const char *message); - JOIN_TAB *first_breadth_first_optimization_tab() { return table_access_tabs; } - JOIN_TAB *first_breadth_first_execution_tab() { return join_tab; } + JOIN_TAB *first_breadth_first_tab() { return join_tab; } private: /** + Create a temporary table to be used for processing DISTINCT/ORDER + BY/GROUP BY. + + @note Will modify JOIN object wrt sort/group attributes + + @param tab the JOIN_TAB object to attach created table to + @param tmp_table_fields List of items that will be used to define + column types of the table. + @param tmp_table_group Group key to use for temporary table, NULL if none. + @param save_sum_fields If true, do not replace Item_sum items in + @c tmp_fields list with Item_field items referring + to fields in temporary table. + + @returns false on success, true on failure + */ + bool create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *tmp_table_fields, + ORDER *tmp_table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_ordermake); + /** + Optimize distinct when used on a subset of the tables. + + E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b + In this case we can stop scanning t2 when we have found one t1.a + */ + void optimize_distinct(); + + /** TRUE if the query contains an aggregate function but has no GROUP BY clause. */ bool implicit_grouping; - bool make_simple_join(JOIN *join, TABLE *tmp_table); void cleanup_item_list(List<Item> &items) const; + bool make_aggr_tables_info(); }; enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; @@ -1585,7 +1701,7 @@ extern const char *join_type_str[]; void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, List<Item> &fields, bool reset_with_sum_func); bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, - Item **ref_pointer_array, + Ref_ptr_array ref_pointer_array, List<Item> &new_list1, List<Item> &new_list2, uint elements, List<Item> &fields); void copy_fields(TMP_TABLE_PARAM *param); @@ -1826,19 +1942,20 @@ int safe_index_read(JOIN_TAB *tab); int get_quick_record(SQL_SELECT *select); SORT_FIELD * make_unireg_sortorder(THD *thd, ORDER *order, uint *length, SORT_FIELD *sortorder); -int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, - List<Item> &fields, List <Item> &all_fields, ORDER *order); -int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List <Item> &all_fields, ORDER *order, + bool search_in_all_fields= true); +int setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, List<Item> &fields, List<Item> &all_fields, ORDER *order, - bool *hidden_group_fields); + bool *hidden_group_fields, bool search_in_all_fields= true); bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, - Item **ref_pointer_array); + Ref_ptr_array ref_pointer_array); int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table, struct st_table_ref *table_ref); bool handle_select(THD *thd, LEX *lex, select_result *result, ulong setup_tables_done_option); -bool mysql_select(THD *thd, Item ***rref_pointer_array, +bool mysql_select(THD *thd, TABLE_LIST *tables, uint wild_num, List<Item> &list, COND *conds, uint og_num, ORDER *order, ORDER *group, Item *having, ORDER *proc_param, ulonglong select_type, @@ -2154,4 +2271,7 @@ public: int execute(JOIN *join); }; +bool test_if_order_compatible(SQL_I_List<ORDER> &a, SQL_I_List<ORDER> &b); +int test_if_group_changed(List<Cached_item> &list); +int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); #endif /* SQL_SELECT_INCLUDED */ |