summaryrefslogtreecommitdiff
path: root/storage/sequence
diff options
context:
space:
mode:
authorMonty <monty@mariadb.org>2015-10-02 10:18:27 +0200
committerSergei Golubchik <serg@mariadb.org>2015-10-05 17:14:14 +0200
commitcf50e13fbd1b4d27a3542fe2751216d274eb7493 (patch)
tree2ff477367c13e63a50c2e13b00e0be79d65bc646 /storage/sequence
parentd8df2b946442e6f4bd7dd73570a603ae3e2d21f0 (diff)
downloadmariadb-git-cf50e13fbd1b4d27a3542fe2751216d274eb7493.tar.gz
MDEV-6080: Allowing storage engine to shortcut group by queries
This task is to allow storage engines that can execute GROUP BY or summary queries efficiently to intercept a full query or sub query from MariaDB and deliver the result either to the client or to a temporary table for further processing. - Added code in sql_select.cc to intercept GROUP BY queries. Creation of group_by_handler is done after all optimizations to allow storage engine to benefit of an optimized WHERE clause and suggested indexes to use. - Added group by handler to sequence engine and a group_by test suite as a way to test the new interface. - Intercept EXPLAIN with a message "Storage engine handles GROUP BY" libmysqld/CMakeLists.txt: Added new group_by_handler files sql/CMakeLists.txt: Added new group_by_handler files sql/group_by_handler.cc: Implementation of group_by_handler functions sql/group_by_handler.h: Definition of group_by_handler class sql/handler.h: Added handlerton function to create a group_by_handler, if the storage engine can intercept the query. sql/item_cmpfunc.cc: Allow one to evaluate item_equal any time. sql/sql_select.cc: Added code to intercept GROUP BY queries - If all tables are from the same storage engine and the query is using sum functions, call create_group_by() to check if the storage engine can intercept the query. - If yes: - create a temporary table to hold a GROUP_BY row or result - In do_select() intercept normal query execution by instead calling the group_by_handler to get the result - Intercept EXPLAIN sql/sql_select.h: Added handling of group_by_handler Added caching of the original join tab (needed for cleanup after group_by handler) storage/sequence/mysql-test/sequence/group_by.result: Test group_by_handler interface storage/sequence/mysql-test/sequence/group_by.test: Test group_by_handler interface storage/sequence/sequence.cc: Added simple group_by_engine for handling COUNT(*) and SUM(primary_key). This was done as a test of the group_by_handler interface
Diffstat (limited to 'storage/sequence')
-rw-r--r--storage/sequence/mysql-test/sequence/group_by.result88
-rw-r--r--storage/sequence/mysql-test/sequence/group_by.test42
-rw-r--r--storage/sequence/sequence.cc165
3 files changed, 293 insertions, 2 deletions
diff --git a/storage/sequence/mysql-test/sequence/group_by.result b/storage/sequence/mysql-test/sequence/group_by.result
new file mode 100644
index 00000000000..86bb158d9fc
--- /dev/null
+++ b/storage/sequence/mysql-test/sequence/group_by.result
@@ -0,0 +1,88 @@
+show create table seq_1_to_15_step_2;
+Table Create Table
+seq_1_to_15_step_2 CREATE TABLE `seq_1_to_15_step_2` (
+ `seq` bigint(20) unsigned NOT NULL,
+ PRIMARY KEY (`seq`)
+) ENGINE=SEQUENCE DEFAULT CHARSET=latin1
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+count(seq) sum(seq) 1
+8 64 1
+#
+# The engine should be able to optimize the following requests
+#
+select count(*) from seq_1_to_15_step_2;
+count(*)
+8
+explain select count(*) from seq_1_to_15_step_2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select count(seq) from seq_1_to_15_step_2;
+count(seq)
+8
+explain select count(seq) from seq_1_to_15_step_2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select sum(seq) from seq_1_to_15_step_2;
+sum(seq)
+64
+explain select sum(seq) from seq_1_to_15_step_2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select count(seq),sum(seq) from seq_1_to_15_step_2;
+count(seq) sum(seq)
+8 64
+explain select count(seq),sum(seq) from seq_1_to_15_step_2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+c
+8
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+c
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select distinct count(*) from seq_1_to_15_step_2;
+count(*)
+8
+explain select distinct count(*) from seq_1_to_15_step_2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
+select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+seq count(*)
+1 8
+3 8
+5 8
+7 8
+9 8
+11 8
+13 8
+15 8
+explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY seq_1_to_15_step_2 index NULL PRIMARY 8 NULL # Using index
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join)
+2 DERIVED NULL NULL NULL NULL NULL NULL # Storage engine handles GROUP BY
+#
+# The engine can't optimize the following queries
+#
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+count(seq) sum(seq) 1
+8 64 1
+explain select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index
+explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index
+1 SIMPLE t2 index NULL PRIMARY 8 NULL 8 Using index; Using join buffer (flat, BNL join)
+explain select count(*) from seq_1_to_15_step_2 where seq > 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE seq_1_to_15_step_2 index PRIMARY PRIMARY 8 NULL 8 Using where; Using index
+explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index; Using temporary; Using filesort
+drop table seq_1_to_15_step_2;
diff --git a/storage/sequence/mysql-test/sequence/group_by.test b/storage/sequence/mysql-test/sequence/group_by.test
new file mode 100644
index 00000000000..870afd9ed89
--- /dev/null
+++ b/storage/sequence/mysql-test/sequence/group_by.test
@@ -0,0 +1,42 @@
+--source inc.inc
+
+# Check that group by handler forks for the sequence engine.
+# The sequence engine can only optimize queries with COUNT(primary_key) or
+# SUM(primary_key) when there is no GROUP BY.
+
+show create table seq_1_to_15_step_2;
+
+# Get the correct results
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+
+--echo #
+--echo # The engine should be able to optimize the following requests
+--echo #
+select count(*) from seq_1_to_15_step_2;
+explain select count(*) from seq_1_to_15_step_2;
+select count(seq) from seq_1_to_15_step_2;
+explain select count(seq) from seq_1_to_15_step_2;
+select sum(seq) from seq_1_to_15_step_2;
+explain select sum(seq) from seq_1_to_15_step_2;
+select count(seq),sum(seq) from seq_1_to_15_step_2;
+explain select count(seq),sum(seq) from seq_1_to_15_step_2;
+select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+select distinct count(*) from seq_1_to_15_step_2;
+explain select distinct count(*) from seq_1_to_15_step_2;
+select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+--replace_column 9 #
+explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+
+--echo #
+--echo # The engine can't optimize the following queries
+--echo #
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+explain select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2;
+explain select count(*) from seq_1_to_15_step_2 where seq > 0;
+explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
+
+drop table seq_1_to_15_step_2;
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index 970ee7ca2f7..d6e1ed7412a 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -23,10 +23,14 @@
#include <my_config.h>
#include <ctype.h>
#include <mysql_version.h>
+#include <item.h>
+#include <item_sum.h>
#include <handler.h>
#include <table.h>
#include <field.h>
+handlerton *sequence_hton;
+
class Sequence_share : public Handler_share {
public:
const char *name;
@@ -52,11 +56,11 @@ class ha_seq: public handler
{
private:
THR_LOCK_DATA lock;
- Sequence_share *seqs;
Sequence_share *get_share();
ulonglong cur;
public:
+ Sequence_share *seqs;
ha_seq(handlerton *hton, TABLE_SHARE *table_arg)
: handler(hton, table_arg), seqs(0) { }
ulonglong table_flags() const
@@ -346,9 +350,166 @@ static int discover_table_existence(handlerton *hton, const char *db,
static int dummy_ret_int() { return 0; }
+/*****************************************************************************
+ Example of a simple group by handler for queries like:
+ SELECT SUM(seq) from sequence_table;
+
+ This implementation supports SUM() and COUNT() on primary key.
+*****************************************************************************/
+
+class ha_seq_group_by_handler: public group_by_handler
+{
+ bool first_row;
+
+public:
+ ha_seq_group_by_handler(THD *thd, SELECT_LEX *select_lex,
+ List<Item> *fields,
+ TABLE_LIST *table_list, ORDER *group_by,
+ ORDER *order_by, Item *where,
+ Item *having)
+ :group_by_handler(thd, select_lex, fields, table_list, group_by,
+ order_by, where, having, sequence_hton)
+ {
+ }
+ ~ha_seq_group_by_handler() {}
+ bool init(TABLE *temporary_table, Item *having_arg,
+ ORDER *order_by_arg);
+ int init_scan() { first_row= 1 ; return 0; }
+ int next_row();
+ int end_scan() { return 0; }
+ int info(uint flag, ha_statistics *stats);
+};
+
+static group_by_handler *
+create_group_by_handler(THD *thd, SELECT_LEX *select_lex,
+ List<Item> *fields,
+ TABLE_LIST *table_list, ORDER *group_by,
+ ORDER *order_by, Item *where,
+ Item *having)
+{
+ ha_seq_group_by_handler *handler;
+ Item *item;
+ List_iterator_fast<Item> it(*fields);
+
+ /* check that only one table is used in FROM clause and no sub queries */
+ if (table_list->next_local != 0)
+ return 0;
+ /* check that there is no where clause and no group_by */
+ if (where != 0 || group_by != 0)
+ return 0;
+
+ /*
+ Check that all fields are sum(primary_key) or count(primary_key)
+ For more ways to work with the field list and sum functions, see
+ opt_sum.cc::opt_sum_query().
+ */
+ while ((item= it++))
+ {
+ Item *arg0;
+ Field *field;
+ if (item->type() != Item::SUM_FUNC_ITEM ||
+ (((Item_sum*) item)->sum_func() != Item_sum::SUM_FUNC &&
+ ((Item_sum*) item)->sum_func() != Item_sum::COUNT_FUNC))
+
+ return 0; // Not a SUM() function
+ arg0= ((Item_sum*) item)->get_arg(0);
+ if (arg0->type() != Item::FIELD_ITEM)
+ {
+ if ((((Item_sum*) item)->sum_func() == Item_sum::COUNT_FUNC) &&
+ arg0->basic_const_item())
+ continue; // Allow count(1)
+ return 0;
+ }
+ field= ((Item_field*) arg0)->field;
+ /*
+ Check that we are using the sequence table (the only table in the FROM
+ clause) and not an outer table.
+ */
+ if (field->table != table_list->table)
+ return 0;
+ /* Check that we are using a SUM() on the primary key */
+ if (strcmp(field->field_name, "seq"))
+ return 0;
+ }
+
+ /* Create handler and return it */
+ handler= new ha_seq_group_by_handler(thd, select_lex, fields, table_list,
+ group_by,
+ order_by, where, having);
+ return handler;
+}
+
+bool ha_seq_group_by_handler::init(TABLE *temporary_table, Item *having_arg,
+ ORDER *order_by_arg)
+{
+ /*
+ Here we could add checks if the temporary table was created correctly
+ */
+ return group_by_handler::init(temporary_table, having_arg, order_by_arg);
+}
+
+
+int ha_seq_group_by_handler::info(uint flag, ha_statistics *stats)
+{
+ bzero(stats, sizeof(*stats));
+ /* We only return one records for a SUM(*) without a group by */
+ stats->records= 1;
+ return 0;
+}
+
+int ha_seq_group_by_handler::next_row()
+{
+ List_iterator_fast<Item> it(*fields);
+ Item_sum *item_sum;
+ Sequence_share *seqs= ((ha_seq*) table_list->table->file)->seqs;
+ DBUG_ENTER("ha_seq_group_by_handler");
+
+ /*
+ Check if this is the first call to the function. If not, we have already
+ returned all data.
+ */
+ if (!first_row)
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ first_row= 0;
+
+ /* Pointer to first field in temporary table where we should store summary*/
+ Field **field_ptr= table->field;
+ ulonglong elements= (seqs->to - seqs->from + seqs->step - 1) / seqs->step;
+
+ while ((item_sum= (Item_sum*) it++))
+ {
+ Field *field= *(field_ptr++);
+ switch (item_sum->sum_func()) {
+ case Item_sum::COUNT_FUNC:
+ {
+ field->store((longlong) elements, 1);
+ break;
+ }
+ case Item_sum::SUM_FUNC:
+ {
+ /* Calculate SUM(f, f+step, f+step*2 ... to) */
+ ulonglong sum;
+ sum= seqs->from * elements + seqs->step * (elements*elements-elements)/2;
+ field->store((longlong) sum, 1);
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ field->set_notnull();
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*****************************************************************************
+ Initialize the interface between the sequence engine and MariaDB
+*****************************************************************************/
+
static int init(void *p)
{
handlerton *hton= (handlerton *)p;
+ sequence_hton= hton;
hton->create= create_handler;
hton->discover_table= discover_table;
hton->discover_table_existence= discover_table_existence;
@@ -356,7 +517,7 @@ static int init(void *p)
(int (*)(handlerton *, THD *, bool)) &dummy_ret_int;
hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release=
(int (*)(handlerton *, THD *, void *)) &dummy_ret_int;
-
+ hton->create_group_by= create_group_by_handler;
return 0;
}