diff options
author | Monty <monty@mariadb.org> | 2015-10-02 10:18:27 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2015-10-05 17:14:14 +0200 |
commit | cf50e13fbd1b4d27a3542fe2751216d274eb7493 (patch) | |
tree | 2ff477367c13e63a50c2e13b00e0be79d65bc646 /storage/sequence | |
parent | d8df2b946442e6f4bd7dd73570a603ae3e2d21f0 (diff) | |
download | mariadb-git-cf50e13fbd1b4d27a3542fe2751216d274eb7493.tar.gz |
MDEV-6080: Allowing storage engine to shortcut group by queries
This task is to allow storage engines that can execute GROUP BY or
summary queries efficiently to intercept a full query or sub query from
MariaDB and deliver the result either to the client or to a temporary
table for further processing.
- Added code in sql_select.cc to intercept GROUP BY queries.
Creation of group_by_handler is done after all optimizations to allow
storage engine to benefit of an optimized WHERE clause and suggested
indexes to use.
- Added group by handler to sequence engine and a group_by test suite as
a way to test the new interface.
- Intercept EXPLAIN with a message "Storage engine handles GROUP BY"
libmysqld/CMakeLists.txt:
Added new group_by_handler files
sql/CMakeLists.txt:
Added new group_by_handler files
sql/group_by_handler.cc:
Implementation of group_by_handler functions
sql/group_by_handler.h:
Definition of group_by_handler class
sql/handler.h:
Added handlerton function to create a group_by_handler, if the storage
engine can intercept the query.
sql/item_cmpfunc.cc:
Allow one to evaluate item_equal any time.
sql/sql_select.cc:
Added code to intercept GROUP BY queries
- If all tables are from the same storage engine and the query is
using sum functions, call create_group_by() to check if the storage
engine can intercept the query.
- If yes:
- create a temporary table to hold a GROUP_BY row or result
- In do_select() intercept normal query execution by instead
calling the group_by_handler to get the result
- Intercept EXPLAIN
sql/sql_select.h:
Added handling of group_by_handler
Added caching of the original join tab (needed for cleanup after
group_by handler)
storage/sequence/mysql-test/sequence/group_by.result:
Test group_by_handler interface
storage/sequence/mysql-test/sequence/group_by.test:
Test group_by_handler interface
storage/sequence/sequence.cc:
Added simple group_by_engine for handling COUNT(*) and
SUM(primary_key). This was done as a test of the group_by_handler
interface
Diffstat (limited to 'storage/sequence')
-rw-r--r-- | storage/sequence/mysql-test/sequence/group_by.result | 88 | ||||
-rw-r--r-- | storage/sequence/mysql-test/sequence/group_by.test | 42 | ||||
-rw-r--r-- | storage/sequence/sequence.cc | 165 |
3 files changed, 293 insertions, 2 deletions
diff --git a/storage/sequence/mysql-test/sequence/group_by.result b/storage/sequence/mysql-test/sequence/group_by.result new file mode 100644 index 00000000000..86bb158d9fc --- /dev/null +++ b/storage/sequence/mysql-test/sequence/group_by.result @@ -0,0 +1,88 @@ +show create table seq_1_to_15_step_2; +Table Create Table +seq_1_to_15_step_2 CREATE TABLE `seq_1_to_15_step_2` ( + `seq` bigint(20) unsigned NOT NULL, + PRIMARY KEY (`seq`) +) ENGINE=SEQUENCE DEFAULT CHARSET=latin1 +select count(seq),sum(seq),1 from seq_1_to_15_step_2; +count(seq) sum(seq) 1 +8 64 1 +# +# The engine should be able to optimize the following requests +# +select count(*) from seq_1_to_15_step_2; +count(*) +8 +explain select count(*) from seq_1_to_15_step_2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select count(seq) from seq_1_to_15_step_2; +count(seq) +8 +explain select count(seq) from seq_1_to_15_step_2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select sum(seq) from seq_1_to_15_step_2; +sum(seq) +64 +explain select sum(seq) from seq_1_to_15_step_2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select count(seq),sum(seq) from seq_1_to_15_step_2; +count(seq) sum(seq) +8 64 +explain select count(seq),sum(seq) from seq_1_to_15_step_2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select count(seq) as c from seq_1_to_15_step_2 having c > 5; +c +8 +explain select count(seq) as c from seq_1_to_15_step_2 having c > 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select count(seq) as c from seq_1_to_15_step_2 having c > 1000; +c +explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select distinct count(*) from seq_1_to_15_step_2; +count(*) +8 +explain select distinct count(*) from seq_1_to_15_step_2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY +select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1; +seq count(*) +1 8 +3 8 +5 8 +7 8 +9 8 +11 8 +13 8 +15 8 +explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY seq_1_to_15_step_2 index NULL PRIMARY 8 NULL # Using index +1 PRIMARY <derived2> ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +2 DERIVED NULL NULL NULL NULL NULL NULL # Storage engine handles GROUP BY +# +# The engine can't optimize the following queries +# +select count(seq),sum(seq),1 from seq_1_to_15_step_2; +count(seq) sum(seq) 1 +8 64 1 +explain select count(seq),sum(seq),1 from seq_1_to_15_step_2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index +explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index +1 SIMPLE t2 index NULL PRIMARY 8 NULL 8 Using index; Using join buffer (flat, BNL join) +explain select count(*) from seq_1_to_15_step_2 where seq > 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE seq_1_to_15_step_2 index PRIMARY PRIMARY 8 NULL 8 Using where; Using index +explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index; Using temporary; Using filesort +drop table seq_1_to_15_step_2; diff --git a/storage/sequence/mysql-test/sequence/group_by.test b/storage/sequence/mysql-test/sequence/group_by.test new file mode 100644 index 00000000000..870afd9ed89 --- /dev/null +++ b/storage/sequence/mysql-test/sequence/group_by.test @@ -0,0 +1,42 @@ +--source inc.inc + +# Check that group by handler forks for the sequence engine. +# The sequence engine can only optimize queries with COUNT(primary_key) or +# SUM(primary_key) when there is no GROUP BY. + +show create table seq_1_to_15_step_2; + +# Get the correct results +select count(seq),sum(seq),1 from seq_1_to_15_step_2; + +--echo # +--echo # The engine should be able to optimize the following requests +--echo # +select count(*) from seq_1_to_15_step_2; +explain select count(*) from seq_1_to_15_step_2; +select count(seq) from seq_1_to_15_step_2; +explain select count(seq) from seq_1_to_15_step_2; +select sum(seq) from seq_1_to_15_step_2; +explain select sum(seq) from seq_1_to_15_step_2; +select count(seq),sum(seq) from seq_1_to_15_step_2; +explain select count(seq),sum(seq) from seq_1_to_15_step_2; +select count(seq) as c from seq_1_to_15_step_2 having c > 5; +explain select count(seq) as c from seq_1_to_15_step_2 having c > 5; +select count(seq) as c from seq_1_to_15_step_2 having c > 1000; +explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000; +select distinct count(*) from seq_1_to_15_step_2; +explain select distinct count(*) from seq_1_to_15_step_2; +select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1; +--replace_column 9 # +explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1; + +--echo # +--echo # The engine can't optimize the following queries +--echo # +select count(seq),sum(seq),1 from seq_1_to_15_step_2; +explain select count(seq),sum(seq),1 from seq_1_to_15_step_2; +explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2; +explain select count(*) from seq_1_to_15_step_2 where seq > 0; +explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2); + +drop table seq_1_to_15_step_2; diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc index 970ee7ca2f7..d6e1ed7412a 100644 --- a/storage/sequence/sequence.cc +++ b/storage/sequence/sequence.cc @@ -23,10 +23,14 @@ #include <my_config.h> #include <ctype.h> #include <mysql_version.h> +#include <item.h> +#include <item_sum.h> #include <handler.h> #include <table.h> #include <field.h> +handlerton *sequence_hton; + class Sequence_share : public Handler_share { public: const char *name; @@ -52,11 +56,11 @@ class ha_seq: public handler { private: THR_LOCK_DATA lock; - Sequence_share *seqs; Sequence_share *get_share(); ulonglong cur; public: + Sequence_share *seqs; ha_seq(handlerton *hton, TABLE_SHARE *table_arg) : handler(hton, table_arg), seqs(0) { } ulonglong table_flags() const @@ -346,9 +350,166 @@ static int discover_table_existence(handlerton *hton, const char *db, static int dummy_ret_int() { return 0; } +/***************************************************************************** + Example of a simple group by handler for queries like: + SELECT SUM(seq) from sequence_table; + + This implementation supports SUM() and COUNT() on primary key. +*****************************************************************************/ + +class ha_seq_group_by_handler: public group_by_handler +{ + bool first_row; + +public: + ha_seq_group_by_handler(THD *thd, SELECT_LEX *select_lex, + List<Item> *fields, + TABLE_LIST *table_list, ORDER *group_by, + ORDER *order_by, Item *where, + Item *having) + :group_by_handler(thd, select_lex, fields, table_list, group_by, + order_by, where, having, sequence_hton) + { + } + ~ha_seq_group_by_handler() {} + bool init(TABLE *temporary_table, Item *having_arg, + ORDER *order_by_arg); + int init_scan() { first_row= 1 ; return 0; } + int next_row(); + int end_scan() { return 0; } + int info(uint flag, ha_statistics *stats); +}; + +static group_by_handler * +create_group_by_handler(THD *thd, SELECT_LEX *select_lex, + List<Item> *fields, + TABLE_LIST *table_list, ORDER *group_by, + ORDER *order_by, Item *where, + Item *having) +{ + ha_seq_group_by_handler *handler; + Item *item; + List_iterator_fast<Item> it(*fields); + + /* check that only one table is used in FROM clause and no sub queries */ + if (table_list->next_local != 0) + return 0; + /* check that there is no where clause and no group_by */ + if (where != 0 || group_by != 0) + return 0; + + /* + Check that all fields are sum(primary_key) or count(primary_key) + For more ways to work with the field list and sum functions, see + opt_sum.cc::opt_sum_query(). + */ + while ((item= it++)) + { + Item *arg0; + Field *field; + if (item->type() != Item::SUM_FUNC_ITEM || + (((Item_sum*) item)->sum_func() != Item_sum::SUM_FUNC && + ((Item_sum*) item)->sum_func() != Item_sum::COUNT_FUNC)) + + return 0; // Not a SUM() function + arg0= ((Item_sum*) item)->get_arg(0); + if (arg0->type() != Item::FIELD_ITEM) + { + if ((((Item_sum*) item)->sum_func() == Item_sum::COUNT_FUNC) && + arg0->basic_const_item()) + continue; // Allow count(1) + return 0; + } + field= ((Item_field*) arg0)->field; + /* + Check that we are using the sequence table (the only table in the FROM + clause) and not an outer table. + */ + if (field->table != table_list->table) + return 0; + /* Check that we are using a SUM() on the primary key */ + if (strcmp(field->field_name, "seq")) + return 0; + } + + /* Create handler and return it */ + handler= new ha_seq_group_by_handler(thd, select_lex, fields, table_list, + group_by, + order_by, where, having); + return handler; +} + +bool ha_seq_group_by_handler::init(TABLE *temporary_table, Item *having_arg, + ORDER *order_by_arg) +{ + /* + Here we could add checks if the temporary table was created correctly + */ + return group_by_handler::init(temporary_table, having_arg, order_by_arg); +} + + +int ha_seq_group_by_handler::info(uint flag, ha_statistics *stats) +{ + bzero(stats, sizeof(*stats)); + /* We only return one records for a SUM(*) without a group by */ + stats->records= 1; + return 0; +} + +int ha_seq_group_by_handler::next_row() +{ + List_iterator_fast<Item> it(*fields); + Item_sum *item_sum; + Sequence_share *seqs= ((ha_seq*) table_list->table->file)->seqs; + DBUG_ENTER("ha_seq_group_by_handler"); + + /* + Check if this is the first call to the function. If not, we have already + returned all data. + */ + if (!first_row) + DBUG_RETURN(HA_ERR_END_OF_FILE); + first_row= 0; + + /* Pointer to first field in temporary table where we should store summary*/ + Field **field_ptr= table->field; + ulonglong elements= (seqs->to - seqs->from + seqs->step - 1) / seqs->step; + + while ((item_sum= (Item_sum*) it++)) + { + Field *field= *(field_ptr++); + switch (item_sum->sum_func()) { + case Item_sum::COUNT_FUNC: + { + field->store((longlong) elements, 1); + break; + } + case Item_sum::SUM_FUNC: + { + /* Calculate SUM(f, f+step, f+step*2 ... to) */ + ulonglong sum; + sum= seqs->from * elements + seqs->step * (elements*elements-elements)/2; + field->store((longlong) sum, 1); + break; + } + default: + DBUG_ASSERT(0); + } + field->set_notnull(); + } + DBUG_RETURN(0); +} + + +/***************************************************************************** + Initialize the interface between the sequence engine and MariaDB +*****************************************************************************/ + static int init(void *p) { handlerton *hton= (handlerton *)p; + sequence_hton= hton; hton->create= create_handler; hton->discover_table= discover_table; hton->discover_table_existence= discover_table_existence; @@ -356,7 +517,7 @@ static int init(void *p) (int (*)(handlerton *, THD *, bool)) &dummy_ret_int; hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release= (int (*)(handlerton *, THD *, void *)) &dummy_ret_int; - + hton->create_group_by= create_group_by_handler; return 0; } |