diff options
author | Monty <monty@mariadb.org> | 2015-10-02 10:18:27 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2015-10-05 17:14:14 +0200 |
commit | cf50e13fbd1b4d27a3542fe2751216d274eb7493 (patch) | |
tree | 2ff477367c13e63a50c2e13b00e0be79d65bc646 /sql | |
parent | d8df2b946442e6f4bd7dd73570a603ae3e2d21f0 (diff) | |
download | mariadb-git-cf50e13fbd1b4d27a3542fe2751216d274eb7493.tar.gz |
MDEV-6080: Allowing storage engine to shortcut group by queries
This task is to allow storage engines that can execute GROUP BY or
summary queries efficiently to intercept a full query or sub query from
MariaDB and deliver the result either to the client or to a temporary
table for further processing.
- Added code in sql_select.cc to intercept GROUP BY queries.
Creation of group_by_handler is done after all optimizations to allow
storage engine to benefit of an optimized WHERE clause and suggested
indexes to use.
- Added group by handler to sequence engine and a group_by test suite as
a way to test the new interface.
- Intercept EXPLAIN with a message "Storage engine handles GROUP BY"
libmysqld/CMakeLists.txt:
Added new group_by_handler files
sql/CMakeLists.txt:
Added new group_by_handler files
sql/group_by_handler.cc:
Implementation of group_by_handler functions
sql/group_by_handler.h:
Definition of group_by_handler class
sql/handler.h:
Added handlerton function to create a group_by_handler, if the storage
engine can intercept the query.
sql/item_cmpfunc.cc:
Allow one to evaluate item_equal any time.
sql/sql_select.cc:
Added code to intercept GROUP BY queries
- If all tables are from the same storage engine and the query is
using sum functions, call create_group_by() to check if the storage
engine can intercept the query.
- If yes:
- create a temporary table to hold a GROUP_BY row or result
- In do_select() intercept normal query execution by instead
calling the group_by_handler to get the result
- Intercept EXPLAIN
sql/sql_select.h:
Added handling of group_by_handler
Added caching of the original join tab (needed for cleanup after
group_by handler)
storage/sequence/mysql-test/sequence/group_by.result:
Test group_by_handler interface
storage/sequence/mysql-test/sequence/group_by.test:
Test group_by_handler interface
storage/sequence/sequence.cc:
Added simple group_by_engine for handling COUNT(*) and
SUM(primary_key). This was done as a test of the group_by_handler
interface
Diffstat (limited to 'sql')
-rw-r--r-- | sql/CMakeLists.txt | 1 | ||||
-rw-r--r-- | sql/group_by_handler.cc | 144 | ||||
-rw-r--r-- | sql/group_by_handler.h | 133 | ||||
-rw-r--r-- | sql/handler.h | 23 | ||||
-rw-r--r-- | sql/sql_select.cc | 158 | ||||
-rw-r--r-- | sql/sql_select.h | 11 |
6 files changed, 467 insertions, 3 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 0d9016c9d6d..9e5ed096e91 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -108,6 +108,7 @@ SET (SQL_SOURCE sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc debug_sync.cc debug_sync.h sql_repl.cc sql_select.cc sql_show.cc sql_state.c + group_by_handler.cc sql_statistics.cc sql_string.cc sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc diff --git a/sql/group_by_handler.cc b/sql/group_by_handler.cc new file mode 100644 index 00000000000..db03ce9196b --- /dev/null +++ b/sql/group_by_handler.cc @@ -0,0 +1,144 @@ +/* + Copyright (c) 2014, SkySQL Ab & MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This file implements the group_by_handler code. This interface + can be used by storage handlers that can intercept summary or GROUP + BY queries from MariaDB and itself return the result to the user or + upper level. +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "sql_priv.h" +#include "sql_select.h" + +/* + Same return values as do_select(); + + @retval + 0 if ok + @retval + 1 if error is sent + @retval + -1 if error should be sent +*/ + +int group_by_handler::execute(JOIN *join) +{ + int err; + ha_rows max_limit; + ha_rows *reset_limit= 0; + Item **reset_item= 0; + DBUG_ENTER("group_by_handler"); + + if ((err= init_scan())) + goto error; + + if (store_data_in_temp_table) + { + max_limit= join->tmp_table_param.end_write_records; + reset_limit= &join->unit->select_limit_cnt; + } + else + { + max_limit= join->unit->select_limit_cnt; + if (join->unit->fake_select_lex) + reset_item= &join->unit->fake_select_lex->select_limit; + } + + while (!(err= next_row())) + { + if (thd->check_killed()) + { + thd->send_kill_message(); + (void) end_scan(); + DBUG_RETURN(-1); + } + + /* Check if we can accept the row */ + if (!having || having->val_bool()) + { + if (store_data_in_temp_table) + { + if ((err= table->file->ha_write_tmp_row(table->record[0]))) + { + bool is_duplicate; + if (!table->file->is_fatal_error(err, HA_CHECK_DUP)) + continue; // Distinct elimination + + if (create_internal_tmp_table_from_heap(thd, table, + join->tmp_table_param. + start_recinfo, + &join->tmp_table_param. + recinfo, + err, 1, &is_duplicate)) + DBUG_RETURN(1); + if (is_duplicate) + continue; + } + } + else + { + if (join->do_send_rows) + { + int error; + /* result < 0 if row was not accepted and should not be counted */ + if ((error= join->result->send_data(*join->fields))) + { + (void) end_scan(); + DBUG_RETURN(error < 0 ? 0 : -1); + } + } + } + + /* limit handling */ + if (++join->send_records >= max_limit && join->do_send_rows) + { + if (!(join->select_options & OPTION_FOUND_ROWS)) + break; // LIMIT reached + join->do_send_rows= 0; // Calculate FOUND_ROWS() + if (reset_limit) + *reset_limit= HA_POS_ERROR; + if (reset_item) + *reset_item= 0; + } + } + } + if (err != 0 && err != HA_ERR_END_OF_FILE) + goto error; + + if ((err= end_scan())) + goto error_2; + if (!store_data_in_temp_table && join->result->send_eof()) + DBUG_RETURN(1); // Don't send error to client + + DBUG_RETURN(0); + +error: + (void) end_scan(); +error_2: + print_error(err, MYF(0)); + DBUG_RETURN(-1); // Error not sent to client +} + + +void group_by_handler::print_error(int error, myf errflag) +{ + my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str); +} diff --git a/sql/group_by_handler.h b/sql/group_by_handler.h new file mode 100644 index 00000000000..425e1440d17 --- /dev/null +++ b/sql/group_by_handler.h @@ -0,0 +1,133 @@ +/* + Copyright (c) 2014, SkySQL Ab & MariaDB Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This file implements the group_by_handler interface. This interface + can be used by storage handlers that can intercept summary or GROUP + BY queries from MariaDB and itself return the result to the user or + upper level. + + Both main and sub queries are supported. Here are some examples of what the + storage engine could intersept: + + SELECT count(*) FROM t1; + SELECT a,count(*) FROM t1 group by a; + SELECT a,count(*) as sum FROM t1 where b > 10 group by a, order by sum; + SELECT a,count(*) FROM t1,t2; + SELECT a, (select sum(*) from t2 where t1.a=t2.a) from t2; + + See https://mariadb.atlassian.net/browse/MDEV-6080 for more information. +*/ + +class JOIN; + +class group_by_handler +{ +public: + /* Arguments for group_by_handler, for usage later */ + THD *thd; + SELECT_LEX *select_lex; + List<Item> *fields; + TABLE_LIST *table_list; + ORDER *group_by, *order_by; + Item *where, *having; + handlerton *ht; /* storage engine of this handler */ + + /* + Bit's of things the storage engine can do for this query. + Should be initialized on object creation. + */ + /* Temporary table where all results should be stored in record[0] */ + TABLE *table; + + bool store_data_in_temp_table; /* Set by mariadb */ + + group_by_handler(THD *thd_arg, SELECT_LEX *select_lex_arg, + List<Item> *fields_arg, + TABLE_LIST *table_list_arg, ORDER *group_by_arg, + ORDER *order_by_arg, Item *where_arg, + Item *having_arg, handlerton *ht_arg) + : thd(thd_arg), select_lex(select_lex_arg), fields(fields_arg), + table_list(table_list_arg), group_by(group_by_arg), + order_by(order_by_arg), where(where_arg), having(having_arg), + ht(ht_arg), table(0), store_data_in_temp_table(0) + {} + virtual ~group_by_handler() {} + + /* + Store pointer to temporary table and objects modified to point to + the temporary table. This will happen during the optimize phase. + + We provide new 'having' and 'order_by' elements here. The differ from the + original ones in that these are modified to point to fields in the + temporary table 'table'. + + Return 1 if the storage handler cannot handle the GROUP BY after all, + in which case we have to give an error to the end user for the query. + This is becasue we can't revert back the old having and order_by elements. + */ + + virtual bool init(TABLE *temporary_table, Item *having_arg, + ORDER *order_by_arg) + { + table= temporary_table; + having= having_arg; + order_by= order_by_arg; + return 0; + } + + /* + Result data is sorted by the storage engine according to order_by (if it + exists) else according to the group_by. If this is not specified, + MariaDB will store the result set into the temporary table and sort the + result. + */ + #define GROUP_BY_ORDER_BY 1 + /* The storage engine can handle DISTINCT */ + #define GROUP_BY_DISTINCT 2 + virtual uint flags() { return 0; } + + /* + Functions to scan data. All these returns 0 if ok, error code in case + of error + */ + + /* + Initialize group_by scan, prepare for next_row(). + If this is a sub query with group by, this can be called many times for + a query. + */ + virtual int init_scan()= 0; + + /* + Return next group by result in table->record[0]. + Return 0 if row found, HA_ERR_END_OF_FILE if last row and other error + number in case of fatal error. + */ + virtual int next_row()= 0; + + /* End scanning */ + virtual int end_scan()=0; + + /* Information for optimizer (used by EXPLAIN) */ + virtual int info(uint flag, ha_statistics *stats)= 0; + + /* Function that calls the above scan functions */ + int execute(JOIN *join); + + /* Report errors */ + virtual void print_error(int error, myf errflag); +}; diff --git a/sql/handler.h b/sql/handler.h index 749c1aaf497..674b58aacd7 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -949,6 +949,10 @@ struct handler_iterator { }; class handler; +class group_by_handler; +typedef class st_select_lex SELECT_LEX; +typedef struct st_order ORDER; + /* handlerton is a singleton structure - one instance per storage engine - to provide access to storage engine functionality that works on the @@ -1251,6 +1255,24 @@ struct handlerton */ const char **tablefile_extensions; // by default - empty list + /********************************************************************** + Functions to intercept queries + **********************************************************************/ + + /* + Create and return a group_by_handler, if the storage engine can execute + the summary / group by query. + If the storage engine can't do that, return NULL. + + This is only called for SELECT's where all tables are from the same + storage engine. + */ + group_by_handler *(*create_group_by)(THD *thd, SELECT_LEX *select_lex, + List<Item> *fields, + TABLE_LIST *table_list, ORDER *group_by, + ORDER *order_by, Item *where, + Item *having); + /********************************************************************* Table discovery API. It allows the server to "discover" tables that exist in the storage @@ -4080,6 +4102,7 @@ protected: }; #include "multi_range_read.h" +#include "group_by_handler.h" bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index a8c665d5684..3690df815ac 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -759,6 +759,12 @@ JOIN::prepare(Item ***rref_pointer_array, TABLE_LIST *tbl; List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables); + /* + If all tables comes from the same storage engine, one_storge_engine will + be set to point to the handlerton of this engine. + */ + one_storage_engine= 0; + uint table_loop_count= 0; while ((tbl= li++)) { /* @@ -770,8 +776,17 @@ JOIN::prepare(Item ***rref_pointer_array, Note: this loop doesn't touch tables inside merged semi-joins, because subquery-to-semijoin conversion has not been done yet. This is intended. */ - if (mixed_implicit_grouping && tbl->table) - tbl->table->maybe_null= 1; + if (tbl->table) + { + if (mixed_implicit_grouping) + tbl->table->maybe_null= 1; + if (!table_loop_count++) + one_storage_engine= tbl->table->file->ht; + else if (one_storage_engine != tbl->table->file->ht) + one_storage_engine= 0; + } + else + one_storage_engine= 0; } if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields, @@ -1894,6 +1909,102 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S optimize_schema_tables_reads(this); /* + All optimization is done. Check if we can use the storage engines + group by handler to evaluate the group by + */ + + if ((tmp_table_param.sum_func_count || group_list) && !procedure && + (one_storage_engine && one_storage_engine->create_group_by)) + { + /* Check if the storage engine can intercept the query */ + if ((storage_handler_for_group_by= + (one_storage_engine->create_group_by)(thd, select_lex, + &all_fields, + tables_list, + group_list, order, + conds, having))) + { + uint handler_flags= storage_handler_for_group_by->flags(); + int err; + + /* + We must store rows in the tmp table if we need to do an ORDER BY + or DISTINCT and the storage handler can't handle it. + */ + need_tmp= ((!(handler_flags & GROUP_BY_ORDER_BY) && + (order || group_list)) || + (!(handler_flags & GROUP_BY_DISTINCT) && select_distinct)); + tmp_table_param.hidden_field_count= (all_fields.elements - + fields_list.elements); + if (!(exec_tmp_table1= + create_tmp_table(thd, &tmp_table_param, all_fields, + 0, handler_flags & GROUP_BY_DISTINCT ? + 0 : select_distinct, 1, + select_options, HA_POS_ERROR, "", + !need_tmp, + (!order || + (handler_flags & GROUP_BY_ORDER_BY))))) + DBUG_RETURN(1); + + /* + Setup reference fields, used by summary functions and group by fields, + to point to the temporary table. + The actual switching to the temporary tables fields for HAVING + and ORDER BY is done in do_select() by calling + set_items_ref_array(items1). + */ + init_items_ref_array(); + items1= items0 + all_fields.elements; + if (change_to_use_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(1); + + /* Give storage engine access to temporary table */ + if ((err= storage_handler_for_group_by->init(exec_tmp_table1, + having, order))) + { + storage_handler_for_group_by->print_error(err, MYF(0)); + DBUG_RETURN(1); + } + storage_handler_for_group_by->store_data_in_temp_table= need_tmp; + /* + If there is not specified ORDER BY, we should sort things according + to the group_by + */ + if (!order) + order= group_list; + /* + Group by and having is calculated by the group_by handler. + Reset the group by and having + */ + group= 0; group_list= 0; + having= tmp_having= 0; + /* + Select distinct is handled by handler or by creating an unique index + over all fields in the temporary table + */ + select_distinct= 0; + if (handler_flags & GROUP_BY_ORDER_BY) + order= 0; + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + + /* Remember information about the original join */ + original_join_tab= join_tab; + original_table_count= table_count; + + /* Set up one join tab to get sorting to work */ + const_tables= 0; + table_count= 1; + join_tab= (JOIN_TAB*) thd->calloc(sizeof(JOIN_TAB)); + join_tab[0].table= exec_tmp_table1; + + DBUG_RETURN(thd->is_fatal_error); + } + } + + /* The loose index scan access method guarantees that all grouping or duplicate row elimination (for distinct) is already performed during data retrieval, and that all MIN/MAX functions are already @@ -1970,7 +2081,7 @@ int JOIN::init_execution() thd->lex->set_limit_rows_examined(); /* Create a tmp table if distinct or if the sort is too complicated */ - if (need_tmp) + if (need_tmp && ! storage_handler_for_group_by) { DBUG_PRINT("info",("Creating tmp table")); THD_STAGE_INFO(thd, stage_copying_to_tmp_table); @@ -2632,6 +2743,8 @@ void JOIN::exec_inner() */ curr_join->join_examined_rows= 0; + curr_join->do_select_call_count= 0; + /* Create a tmp table if distinct or if the sort is too complicated */ if (need_tmp) { @@ -11823,6 +11936,14 @@ void JOIN::cleanup(bool full) if (full) have_query_plan= QEP_DELETED; + if (original_join_tab) + { + /* Free the original optimized join created for the group_by_handler */ + join_tab= original_join_tab; + original_join_tab= 0; + table_count= original_table_count; + } + if (table) { JOIN_TAB *tab; @@ -11931,6 +12052,9 @@ void JOIN::cleanup(bool full) } tmp_table_param.cleanup(); + delete storage_handler_for_group_by; + storage_handler_for_group_by= 0; + if (!join_tab) { List_iterator<TABLE_LIST> li(*join_list); @@ -17727,6 +17851,18 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) join->procedure=procedure; join->tmp_table= table; /* Save for easy recursion */ join->fields= fields; + join->do_select_call_count++; + + if (join->storage_handler_for_group_by && + join->do_select_call_count == 1) + { + /* Select fields are in the temporary table */ + join->fields= &join->tmp_fields_list1; + /* Setup HAVING to work with fields in temporary table */ + join->set_items_ref_array(join->items1); + /* The storage engine will take care of the group by query result */ + DBUG_RETURN(join->storage_handler_for_group_by->execute(join)); + } if (table) { @@ -24192,6 +24328,22 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; output->add_node(explain); } + else if (storage_handler_for_group_by) + { + explain= new (output->mem_root) Explain_select(output->mem_root, + thd->lex->analyze_stmt); + select_lex->set_explain_type(true); + + explain->select_id= select_lex->select_number; + explain->select_type= select_lex->type; + explain->using_temporary= need_tmp; + explain->using_filesort= need_order; + explain->message= "Storage engine handles GROUP BY"; + + if (select_lex->master_unit()->derived) + explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; + output->add_node(explain); + } else { Explain_select *xpl_sel; diff --git a/sql/sql_select.h b/sql/sql_select.h index 57c66bae8e2..4e77d5b6008 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1011,6 +1011,7 @@ public: */ uint top_join_tab_count; uint send_group_parts; + uint do_select_call_count; /* True if the query has GROUP BY. (that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it @@ -1080,6 +1081,12 @@ public: /* Finally picked QEP. This is result of join optimization */ POSITION *best_positions; + /* points to a storage engine if all tables comes from the storage engine */ + handlerton *one_storage_engine; + group_by_handler *storage_handler_for_group_by; + JOIN_TAB *original_join_tab; + uint original_table_count; + /******* Join optimization state members start *******/ /* pointer - we're doing optimization for a semi-join materialization nest. @@ -1378,6 +1385,10 @@ public: group_optimized_away= 0; no_rows_in_result_called= 0; positions= best_positions= 0; + one_storage_engine= 0; + storage_handler_for_group_by= 0; + original_join_tab= 0; + do_select_call_count= 0; explain= NULL; |