summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMonty <monty@mariadb.org>2015-10-02 10:18:27 +0200
committerSergei Golubchik <serg@mariadb.org>2015-10-05 17:14:14 +0200
commitcf50e13fbd1b4d27a3542fe2751216d274eb7493 (patch)
tree2ff477367c13e63a50c2e13b00e0be79d65bc646 /sql
parentd8df2b946442e6f4bd7dd73570a603ae3e2d21f0 (diff)
downloadmariadb-git-cf50e13fbd1b4d27a3542fe2751216d274eb7493.tar.gz
MDEV-6080: Allowing storage engine to shortcut group by queries
This task is to allow storage engines that can execute GROUP BY or summary queries efficiently to intercept a full query or sub query from MariaDB and deliver the result either to the client or to a temporary table for further processing. - Added code in sql_select.cc to intercept GROUP BY queries. Creation of group_by_handler is done after all optimizations to allow storage engine to benefit of an optimized WHERE clause and suggested indexes to use. - Added group by handler to sequence engine and a group_by test suite as a way to test the new interface. - Intercept EXPLAIN with a message "Storage engine handles GROUP BY" libmysqld/CMakeLists.txt: Added new group_by_handler files sql/CMakeLists.txt: Added new group_by_handler files sql/group_by_handler.cc: Implementation of group_by_handler functions sql/group_by_handler.h: Definition of group_by_handler class sql/handler.h: Added handlerton function to create a group_by_handler, if the storage engine can intercept the query. sql/item_cmpfunc.cc: Allow one to evaluate item_equal any time. sql/sql_select.cc: Added code to intercept GROUP BY queries - If all tables are from the same storage engine and the query is using sum functions, call create_group_by() to check if the storage engine can intercept the query. - If yes: - create a temporary table to hold a GROUP_BY row or result - In do_select() intercept normal query execution by instead calling the group_by_handler to get the result - Intercept EXPLAIN sql/sql_select.h: Added handling of group_by_handler Added caching of the original join tab (needed for cleanup after group_by handler) storage/sequence/mysql-test/sequence/group_by.result: Test group_by_handler interface storage/sequence/mysql-test/sequence/group_by.test: Test group_by_handler interface storage/sequence/sequence.cc: Added simple group_by_engine for handling COUNT(*) and SUM(primary_key). This was done as a test of the group_by_handler interface
Diffstat (limited to 'sql')
-rw-r--r--sql/CMakeLists.txt1
-rw-r--r--sql/group_by_handler.cc144
-rw-r--r--sql/group_by_handler.h133
-rw-r--r--sql/handler.h23
-rw-r--r--sql/sql_select.cc158
-rw-r--r--sql/sql_select.h11
6 files changed, 467 insertions, 3 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 0d9016c9d6d..9e5ed096e91 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -108,6 +108,7 @@ SET (SQL_SOURCE
sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc
debug_sync.cc debug_sync.h
sql_repl.cc sql_select.cc sql_show.cc sql_state.c
+ group_by_handler.cc
sql_statistics.cc sql_string.cc
sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc
sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc
diff --git a/sql/group_by_handler.cc b/sql/group_by_handler.cc
new file mode 100644
index 00000000000..db03ce9196b
--- /dev/null
+++ b/sql/group_by_handler.cc
@@ -0,0 +1,144 @@
+/*
+ Copyright (c) 2014, SkySQL Ab & MariaDB Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ This file implements the group_by_handler code. This interface
+ can be used by storage handlers that can intercept summary or GROUP
+ BY queries from MariaDB and itself return the result to the user or
+ upper level.
+*/
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#include "sql_priv.h"
+#include "sql_select.h"
+
+/*
+ Same return values as do_select();
+
+ @retval
+ 0 if ok
+ @retval
+ 1 if error is sent
+ @retval
+ -1 if error should be sent
+*/
+
+int group_by_handler::execute(JOIN *join)
+{
+ int err;
+ ha_rows max_limit;
+ ha_rows *reset_limit= 0;
+ Item **reset_item= 0;
+ DBUG_ENTER("group_by_handler");
+
+ if ((err= init_scan()))
+ goto error;
+
+ if (store_data_in_temp_table)
+ {
+ max_limit= join->tmp_table_param.end_write_records;
+ reset_limit= &join->unit->select_limit_cnt;
+ }
+ else
+ {
+ max_limit= join->unit->select_limit_cnt;
+ if (join->unit->fake_select_lex)
+ reset_item= &join->unit->fake_select_lex->select_limit;
+ }
+
+ while (!(err= next_row()))
+ {
+ if (thd->check_killed())
+ {
+ thd->send_kill_message();
+ (void) end_scan();
+ DBUG_RETURN(-1);
+ }
+
+ /* Check if we can accept the row */
+ if (!having || having->val_bool())
+ {
+ if (store_data_in_temp_table)
+ {
+ if ((err= table->file->ha_write_tmp_row(table->record[0])))
+ {
+ bool is_duplicate;
+ if (!table->file->is_fatal_error(err, HA_CHECK_DUP))
+ continue; // Distinct elimination
+
+ if (create_internal_tmp_table_from_heap(thd, table,
+ join->tmp_table_param.
+ start_recinfo,
+ &join->tmp_table_param.
+ recinfo,
+ err, 1, &is_duplicate))
+ DBUG_RETURN(1);
+ if (is_duplicate)
+ continue;
+ }
+ }
+ else
+ {
+ if (join->do_send_rows)
+ {
+ int error;
+ /* result < 0 if row was not accepted and should not be counted */
+ if ((error= join->result->send_data(*join->fields)))
+ {
+ (void) end_scan();
+ DBUG_RETURN(error < 0 ? 0 : -1);
+ }
+ }
+ }
+
+ /* limit handling */
+ if (++join->send_records >= max_limit && join->do_send_rows)
+ {
+ if (!(join->select_options & OPTION_FOUND_ROWS))
+ break; // LIMIT reached
+ join->do_send_rows= 0; // Calculate FOUND_ROWS()
+ if (reset_limit)
+ *reset_limit= HA_POS_ERROR;
+ if (reset_item)
+ *reset_item= 0;
+ }
+ }
+ }
+ if (err != 0 && err != HA_ERR_END_OF_FILE)
+ goto error;
+
+ if ((err= end_scan()))
+ goto error_2;
+ if (!store_data_in_temp_table && join->result->send_eof())
+ DBUG_RETURN(1); // Don't send error to client
+
+ DBUG_RETURN(0);
+
+error:
+ (void) end_scan();
+error_2:
+ print_error(err, MYF(0));
+ DBUG_RETURN(-1); // Error not sent to client
+}
+
+
+void group_by_handler::print_error(int error, myf errflag)
+{
+ my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str);
+}
diff --git a/sql/group_by_handler.h b/sql/group_by_handler.h
new file mode 100644
index 00000000000..425e1440d17
--- /dev/null
+++ b/sql/group_by_handler.h
@@ -0,0 +1,133 @@
+/*
+ Copyright (c) 2014, SkySQL Ab & MariaDB Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ This file implements the group_by_handler interface. This interface
+ can be used by storage handlers that can intercept summary or GROUP
+ BY queries from MariaDB and itself return the result to the user or
+ upper level.
+
+ Both main and sub queries are supported. Here are some examples of what the
+ storage engine could intersept:
+
+ SELECT count(*) FROM t1;
+ SELECT a,count(*) FROM t1 group by a;
+ SELECT a,count(*) as sum FROM t1 where b > 10 group by a, order by sum;
+ SELECT a,count(*) FROM t1,t2;
+ SELECT a, (select sum(*) from t2 where t1.a=t2.a) from t2;
+
+ See https://mariadb.atlassian.net/browse/MDEV-6080 for more information.
+*/
+
+class JOIN;
+
+class group_by_handler
+{
+public:
+ /* Arguments for group_by_handler, for usage later */
+ THD *thd;
+ SELECT_LEX *select_lex;
+ List<Item> *fields;
+ TABLE_LIST *table_list;
+ ORDER *group_by, *order_by;
+ Item *where, *having;
+ handlerton *ht; /* storage engine of this handler */
+
+ /*
+ Bit's of things the storage engine can do for this query.
+ Should be initialized on object creation.
+ */
+ /* Temporary table where all results should be stored in record[0] */
+ TABLE *table;
+
+ bool store_data_in_temp_table; /* Set by mariadb */
+
+ group_by_handler(THD *thd_arg, SELECT_LEX *select_lex_arg,
+ List<Item> *fields_arg,
+ TABLE_LIST *table_list_arg, ORDER *group_by_arg,
+ ORDER *order_by_arg, Item *where_arg,
+ Item *having_arg, handlerton *ht_arg)
+ : thd(thd_arg), select_lex(select_lex_arg), fields(fields_arg),
+ table_list(table_list_arg), group_by(group_by_arg),
+ order_by(order_by_arg), where(where_arg), having(having_arg),
+ ht(ht_arg), table(0), store_data_in_temp_table(0)
+ {}
+ virtual ~group_by_handler() {}
+
+ /*
+ Store pointer to temporary table and objects modified to point to
+ the temporary table. This will happen during the optimize phase.
+
+ We provide new 'having' and 'order_by' elements here. The differ from the
+ original ones in that these are modified to point to fields in the
+ temporary table 'table'.
+
+ Return 1 if the storage handler cannot handle the GROUP BY after all,
+ in which case we have to give an error to the end user for the query.
+ This is becasue we can't revert back the old having and order_by elements.
+ */
+
+ virtual bool init(TABLE *temporary_table, Item *having_arg,
+ ORDER *order_by_arg)
+ {
+ table= temporary_table;
+ having= having_arg;
+ order_by= order_by_arg;
+ return 0;
+ }
+
+ /*
+ Result data is sorted by the storage engine according to order_by (if it
+ exists) else according to the group_by. If this is not specified,
+ MariaDB will store the result set into the temporary table and sort the
+ result.
+ */
+ #define GROUP_BY_ORDER_BY 1
+ /* The storage engine can handle DISTINCT */
+ #define GROUP_BY_DISTINCT 2
+ virtual uint flags() { return 0; }
+
+ /*
+ Functions to scan data. All these returns 0 if ok, error code in case
+ of error
+ */
+
+ /*
+ Initialize group_by scan, prepare for next_row().
+ If this is a sub query with group by, this can be called many times for
+ a query.
+ */
+ virtual int init_scan()= 0;
+
+ /*
+ Return next group by result in table->record[0].
+ Return 0 if row found, HA_ERR_END_OF_FILE if last row and other error
+ number in case of fatal error.
+ */
+ virtual int next_row()= 0;
+
+ /* End scanning */
+ virtual int end_scan()=0;
+
+ /* Information for optimizer (used by EXPLAIN) */
+ virtual int info(uint flag, ha_statistics *stats)= 0;
+
+ /* Function that calls the above scan functions */
+ int execute(JOIN *join);
+
+ /* Report errors */
+ virtual void print_error(int error, myf errflag);
+};
diff --git a/sql/handler.h b/sql/handler.h
index 749c1aaf497..674b58aacd7 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -949,6 +949,10 @@ struct handler_iterator {
};
class handler;
+class group_by_handler;
+typedef class st_select_lex SELECT_LEX;
+typedef struct st_order ORDER;
+
/*
handlerton is a singleton structure - one instance per storage engine -
to provide access to storage engine functionality that works on the
@@ -1251,6 +1255,24 @@ struct handlerton
*/
const char **tablefile_extensions; // by default - empty list
+ /**********************************************************************
+ Functions to intercept queries
+ **********************************************************************/
+
+ /*
+ Create and return a group_by_handler, if the storage engine can execute
+ the summary / group by query.
+ If the storage engine can't do that, return NULL.
+
+ This is only called for SELECT's where all tables are from the same
+ storage engine.
+ */
+ group_by_handler *(*create_group_by)(THD *thd, SELECT_LEX *select_lex,
+ List<Item> *fields,
+ TABLE_LIST *table_list, ORDER *group_by,
+ ORDER *order_by, Item *where,
+ Item *having);
+
/*********************************************************************
Table discovery API.
It allows the server to "discover" tables that exist in the storage
@@ -4080,6 +4102,7 @@ protected:
};
#include "multi_range_read.h"
+#include "group_by_handler.h"
bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index a8c665d5684..3690df815ac 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -759,6 +759,12 @@ JOIN::prepare(Item ***rref_pointer_array,
TABLE_LIST *tbl;
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+ /*
+ If all tables comes from the same storage engine, one_storge_engine will
+ be set to point to the handlerton of this engine.
+ */
+ one_storage_engine= 0;
+ uint table_loop_count= 0;
while ((tbl= li++))
{
/*
@@ -770,8 +776,17 @@ JOIN::prepare(Item ***rref_pointer_array,
Note: this loop doesn't touch tables inside merged semi-joins, because
subquery-to-semijoin conversion has not been done yet. This is intended.
*/
- if (mixed_implicit_grouping && tbl->table)
- tbl->table->maybe_null= 1;
+ if (tbl->table)
+ {
+ if (mixed_implicit_grouping)
+ tbl->table->maybe_null= 1;
+ if (!table_loop_count++)
+ one_storage_engine= tbl->table->file->ht;
+ else if (one_storage_engine != tbl->table->file->ht)
+ one_storage_engine= 0;
+ }
+ else
+ one_storage_engine= 0;
}
if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields,
@@ -1894,6 +1909,102 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S
optimize_schema_tables_reads(this);
/*
+ All optimization is done. Check if we can use the storage engines
+ group by handler to evaluate the group by
+ */
+
+ if ((tmp_table_param.sum_func_count || group_list) && !procedure &&
+ (one_storage_engine && one_storage_engine->create_group_by))
+ {
+ /* Check if the storage engine can intercept the query */
+ if ((storage_handler_for_group_by=
+ (one_storage_engine->create_group_by)(thd, select_lex,
+ &all_fields,
+ tables_list,
+ group_list, order,
+ conds, having)))
+ {
+ uint handler_flags= storage_handler_for_group_by->flags();
+ int err;
+
+ /*
+ We must store rows in the tmp table if we need to do an ORDER BY
+ or DISTINCT and the storage handler can't handle it.
+ */
+ need_tmp= ((!(handler_flags & GROUP_BY_ORDER_BY) &&
+ (order || group_list)) ||
+ (!(handler_flags & GROUP_BY_DISTINCT) && select_distinct));
+ tmp_table_param.hidden_field_count= (all_fields.elements -
+ fields_list.elements);
+ if (!(exec_tmp_table1=
+ create_tmp_table(thd, &tmp_table_param, all_fields,
+ 0, handler_flags & GROUP_BY_DISTINCT ?
+ 0 : select_distinct, 1,
+ select_options, HA_POS_ERROR, "",
+ !need_tmp,
+ (!order ||
+ (handler_flags & GROUP_BY_ORDER_BY)))))
+ DBUG_RETURN(1);
+
+ /*
+ Setup reference fields, used by summary functions and group by fields,
+ to point to the temporary table.
+ The actual switching to the temporary tables fields for HAVING
+ and ORDER BY is done in do_select() by calling
+ set_items_ref_array(items1).
+ */
+ init_items_ref_array();
+ items1= items0 + all_fields.elements;
+ if (change_to_use_tmp_fields(thd, items1,
+ tmp_fields_list1, tmp_all_fields1,
+ fields_list.elements, all_fields))
+ DBUG_RETURN(1);
+
+ /* Give storage engine access to temporary table */
+ if ((err= storage_handler_for_group_by->init(exec_tmp_table1,
+ having, order)))
+ {
+ storage_handler_for_group_by->print_error(err, MYF(0));
+ DBUG_RETURN(1);
+ }
+ storage_handler_for_group_by->store_data_in_temp_table= need_tmp;
+ /*
+ If there is not specified ORDER BY, we should sort things according
+ to the group_by
+ */
+ if (!order)
+ order= group_list;
+ /*
+ Group by and having is calculated by the group_by handler.
+ Reset the group by and having
+ */
+ group= 0; group_list= 0;
+ having= tmp_having= 0;
+ /*
+ Select distinct is handled by handler or by creating an unique index
+ over all fields in the temporary table
+ */
+ select_distinct= 0;
+ if (handler_flags & GROUP_BY_ORDER_BY)
+ order= 0;
+ tmp_table_param.field_count+= tmp_table_param.sum_func_count;
+ tmp_table_param.sum_func_count= 0;
+
+ /* Remember information about the original join */
+ original_join_tab= join_tab;
+ original_table_count= table_count;
+
+ /* Set up one join tab to get sorting to work */
+ const_tables= 0;
+ table_count= 1;
+ join_tab= (JOIN_TAB*) thd->calloc(sizeof(JOIN_TAB));
+ join_tab[0].table= exec_tmp_table1;
+
+ DBUG_RETURN(thd->is_fatal_error);
+ }
+ }
+
+ /*
The loose index scan access method guarantees that all grouping or
duplicate row elimination (for distinct) is already performed
during data retrieval, and that all MIN/MAX functions are already
@@ -1970,7 +2081,7 @@ int JOIN::init_execution()
thd->lex->set_limit_rows_examined();
/* Create a tmp table if distinct or if the sort is too complicated */
- if (need_tmp)
+ if (need_tmp && ! storage_handler_for_group_by)
{
DBUG_PRINT("info",("Creating tmp table"));
THD_STAGE_INFO(thd, stage_copying_to_tmp_table);
@@ -2632,6 +2743,8 @@ void JOIN::exec_inner()
*/
curr_join->join_examined_rows= 0;
+ curr_join->do_select_call_count= 0;
+
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
{
@@ -11823,6 +11936,14 @@ void JOIN::cleanup(bool full)
if (full)
have_query_plan= QEP_DELETED;
+ if (original_join_tab)
+ {
+ /* Free the original optimized join created for the group_by_handler */
+ join_tab= original_join_tab;
+ original_join_tab= 0;
+ table_count= original_table_count;
+ }
+
if (table)
{
JOIN_TAB *tab;
@@ -11931,6 +12052,9 @@ void JOIN::cleanup(bool full)
}
tmp_table_param.cleanup();
+ delete storage_handler_for_group_by;
+ storage_handler_for_group_by= 0;
+
if (!join_tab)
{
List_iterator<TABLE_LIST> li(*join_list);
@@ -17727,6 +17851,18 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
join->procedure=procedure;
join->tmp_table= table; /* Save for easy recursion */
join->fields= fields;
+ join->do_select_call_count++;
+
+ if (join->storage_handler_for_group_by &&
+ join->do_select_call_count == 1)
+ {
+ /* Select fields are in the temporary table */
+ join->fields= &join->tmp_fields_list1;
+ /* Setup HAVING to work with fields in temporary table */
+ join->set_items_ref_array(join->items1);
+ /* The storage engine will take care of the group by query result */
+ DBUG_RETURN(join->storage_handler_for_group_by->execute(join));
+ }
if (table)
{
@@ -24192,6 +24328,22 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
output->add_node(explain);
}
+ else if (storage_handler_for_group_by)
+ {
+ explain= new (output->mem_root) Explain_select(output->mem_root,
+ thd->lex->analyze_stmt);
+ select_lex->set_explain_type(true);
+
+ explain->select_id= select_lex->select_number;
+ explain->select_type= select_lex->type;
+ explain->using_temporary= need_tmp;
+ explain->using_filesort= need_order;
+ explain->message= "Storage engine handles GROUP BY";
+
+ if (select_lex->master_unit()->derived)
+ explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
+ output->add_node(explain);
+ }
else
{
Explain_select *xpl_sel;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 57c66bae8e2..4e77d5b6008 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1011,6 +1011,7 @@ public:
*/
uint top_join_tab_count;
uint send_group_parts;
+ uint do_select_call_count;
/*
True if the query has GROUP BY.
(that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it
@@ -1080,6 +1081,12 @@ public:
/* Finally picked QEP. This is result of join optimization */
POSITION *best_positions;
+ /* points to a storage engine if all tables comes from the storage engine */
+ handlerton *one_storage_engine;
+ group_by_handler *storage_handler_for_group_by;
+ JOIN_TAB *original_join_tab;
+ uint original_table_count;
+
/******* Join optimization state members start *******/
/*
pointer - we're doing optimization for a semi-join materialization nest.
@@ -1378,6 +1385,10 @@ public:
group_optimized_away= 0;
no_rows_in_result_called= 0;
positions= best_positions= 0;
+ one_storage_engine= 0;
+ storage_handler_for_group_by= 0;
+ original_join_tab= 0;
+ do_select_call_count= 0;
explain= NULL;