MDEV-6080: Allowing storage engine to shortcut group by queries

This task is to allow storage engines that can execute GROUP BY or summary queries efficiently to intercept a full query or sub query from MariaDB and deliver the result either to the client or to a temporary table for further processing. - Added code in sql_select.cc to intercept GROUP BY queries. Creation of group_by_handler is done after all optimizations to allow storage engine to benefit of an optimized WHERE clause and suggested indexes to use. - Added group by handler to sequence engine and a group_by test suite as a way to test the new interface. - Intercept EXPLAIN with a message "Storage engine handles GROUP BY" libmysqld/CMakeLists.txt: Added new group_by_handler files sql/CMakeLists.txt: Added new group_by_handler files sql/group_by_handler.cc: Implementation of group_by_handler functions sql/group_by_handler.h: Definition of group_by_handler class sql/handler.h: Added handlerton function to create a group_by_handler, if the storage engine can intercept the query. sql/item_cmpfunc.cc: Allow one to evaluate item_equal any time. sql/sql_select.cc: Added code to intercept GROUP BY queries - If all tables are from the same storage engine and the query is using sum functions, call create_group_by() to check if the storage engine can intercept the query. - If yes: - create a temporary table to hold a GROUP_BY row or result - In do_select() intercept normal query execution by instead calling the group_by_handler to get the result - Intercept EXPLAIN sql/sql_select.h: Added handling of group_by_handler Added caching of the original join tab (needed for cleanup after group_by handler) storage/sequence/mysql-test/sequence/group_by.result: Test group_by_handler interface storage/sequence/mysql-test/sequence/group_by.test: Test group_by_handler interface storage/sequence/sequence.cc: Added simple group_by_engine for handling COUNT(*) and SUM(primary_key). This was done as a test of the group_by_handler interface
author: Monty <monty@mariadb.org> 2015-10-02 10:18:27 +0200
committer: Sergei Golubchik <serg@mariadb.org> 2015-10-05 17:14:14 +0200
commit: cf50e13fbd1b4d27a3542fe2751216d274eb7493 (patch)
tree: 2ff477367c13e63a50c2e13b00e0be79d65bc646 /storage/sequence
parent: d8df2b946442e6f4bd7dd73570a603ae3e2d21f0 (diff)
download: mariadb-git-cf50e13fbd1b4d27a3542fe2751216d274eb7493.tar.gz
3 files changed, 293 insertions, 2 deletions
diff --git a/storage/sequence/mysql-test/sequence/group_by.result b/storage/sequence/mysql-test/sequence/group_by.result
new file mode 100644
index 00000000000..86bb158d9fc
--- /dev/null
+++ b/storage/sequence/mysql-test/sequence/group_by.result
@@ -0,0 +1,88 @@
+show create table seq_1_to_15_step_2;
+Table	Create Table
+seq_1_to_15_step_2	CREATE TABLE `seq_1_to_15_step_2` (
+  `seq` bigint(20) unsigned NOT NULL,
+  PRIMARY KEY (`seq`)
+) ENGINE=SEQUENCE DEFAULT CHARSET=latin1
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+count(seq)	sum(seq)	1
+8	64	1
+#
+# The engine should be able to optimize the following requests
+#
+select count(*) from seq_1_to_15_step_2;
+count(*)
+8
+explain select count(*) from seq_1_to_15_step_2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select count(seq) from seq_1_to_15_step_2;
+count(seq)
+8
+explain select count(seq) from seq_1_to_15_step_2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select sum(seq) from seq_1_to_15_step_2;
+sum(seq)
+64
+explain select sum(seq) from seq_1_to_15_step_2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select count(seq),sum(seq) from seq_1_to_15_step_2;
+count(seq)	sum(seq)
+8	64
+explain select count(seq),sum(seq) from seq_1_to_15_step_2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+c
+8
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+c
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select distinct count(*) from seq_1_to_15_step_2;
+count(*)
+8
+explain select distinct count(*) from seq_1_to_15_step_2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Storage engine handles GROUP BY
+select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+seq	count(*)
+1	8
+3	8
+5	8
+7	8
+9	8
+11	8
+13	8
+15	8
+explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	seq_1_to_15_step_2	index	NULL	PRIMARY	8	NULL	#	Using index
+1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	#	Using join buffer (flat, BNL join)
+2	DERIVED	NULL	NULL	NULL	NULL	NULL	NULL	#	Storage engine handles GROUP BY
+#
+# The engine can't optimize the following queries
+#
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+count(seq)	sum(seq)	1
+8	64	1
+explain select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	seq_1_to_15_step_2	index	NULL	PRIMARY	8	NULL	8	Using index
+explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	seq_1_to_15_step_2	index	NULL	PRIMARY	8	NULL	8	Using index
+1	SIMPLE	t2	index	NULL	PRIMARY	8	NULL	8	Using index; Using join buffer (flat, BNL join)
+explain select count(*) from seq_1_to_15_step_2 where seq > 0;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	seq_1_to_15_step_2	index	PRIMARY	PRIMARY	8	NULL	8	Using where; Using index
+explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	seq_1_to_15_step_2	index	NULL	PRIMARY	8	NULL	8	Using index; Using temporary; Using filesort
+drop table seq_1_to_15_step_2;
diff --git a/storage/sequence/mysql-test/sequence/group_by.test b/storage/sequence/mysql-test/sequence/group_by.test
new file mode 100644
index 00000000000..870afd9ed89
--- /dev/null
+++ b/storage/sequence/mysql-test/sequence/group_by.test
@@ -0,0 +1,42 @@
+--source inc.inc
+
+# Check that group by handler forks for the sequence engine.
+# The sequence engine can only optimize queries with COUNT(primary_key) or
+# SUM(primary_key) when there is no GROUP BY.
+
+show create table seq_1_to_15_step_2;
+
+# Get the correct results
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+
+--echo #
+--echo # The engine should be able to optimize the following requests
+--echo #
+select count(*) from seq_1_to_15_step_2;
+explain select count(*) from seq_1_to_15_step_2;
+select count(seq) from seq_1_to_15_step_2;
+explain select count(seq) from seq_1_to_15_step_2;
+select sum(seq) from seq_1_to_15_step_2;
+explain select sum(seq) from seq_1_to_15_step_2;
+select count(seq),sum(seq) from seq_1_to_15_step_2;
+explain select count(seq),sum(seq) from seq_1_to_15_step_2;
+select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 5;
+select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
+select distinct count(*) from seq_1_to_15_step_2;
+explain select distinct count(*) from seq_1_to_15_step_2;
+select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+--replace_column 9 #
+explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
+
+--echo #
+--echo # The engine can't optimize the following queries
+--echo #
+select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+explain select count(seq),sum(seq),1 from seq_1_to_15_step_2;
+explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2;
+explain select count(*) from seq_1_to_15_step_2 where seq > 0;
+explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
+
+drop table seq_1_to_15_step_2;
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index 970ee7ca2f7..d6e1ed7412a 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -23,10 +23,14 @@
 #include <my_config.h>
 #include <ctype.h>
 #include <mysql_version.h>
+#include <item.h>
+#include <item_sum.h>
 #include <handler.h>
 #include <table.h>
 #include <field.h>
 
+handlerton *sequence_hton;
+
 class Sequence_share : public Handler_share {
 public:
   const char *name;
@@ -52,11 +56,11 @@ class ha_seq: public handler
 {
 private:
   THR_LOCK_DATA lock;
-  Sequence_share *seqs;
   Sequence_share *get_share();
   ulonglong cur;
 
 public:
+  Sequence_share *seqs;
   ha_seq(handlerton *hton, TABLE_SHARE *table_arg)
     : handler(hton, table_arg), seqs(0) { }
   ulonglong table_flags() const
@@ -346,9 +350,166 @@ static int discover_table_existence(handlerton *hton, const char *db,
 
 static int dummy_ret_int() { return 0; }
 
+/*****************************************************************************
+  Example of a simple group by handler for queries like:
+  SELECT SUM(seq) from sequence_table;
+
+  This implementation supports SUM() and COUNT() on primary key.
+*****************************************************************************/
+
+class ha_seq_group_by_handler: public group_by_handler
+{
+  bool first_row;
+
+public:
+  ha_seq_group_by_handler(THD *thd, SELECT_LEX *select_lex,
+                          List<Item> *fields,
+                          TABLE_LIST *table_list, ORDER *group_by,
+                          ORDER *order_by, Item *where,
+                          Item *having)
+    :group_by_handler(thd, select_lex, fields, table_list, group_by,
+                      order_by, where, having, sequence_hton)
+  {
+  }
+  ~ha_seq_group_by_handler() {}
+  bool init(TABLE *temporary_table, Item *having_arg,
+            ORDER *order_by_arg);
+  int init_scan() { first_row= 1 ; return 0; }
+  int next_row();
+  int end_scan()  { return 0; }
+  int info(uint flag, ha_statistics *stats);
+};
+
+static group_by_handler *
+create_group_by_handler(THD *thd, SELECT_LEX *select_lex,
+                        List<Item> *fields,
+                        TABLE_LIST *table_list, ORDER *group_by,
+                        ORDER *order_by, Item *where,
+                        Item *having)
+{
+  ha_seq_group_by_handler *handler;
+  Item *item;
+  List_iterator_fast<Item> it(*fields);
+
+  /* check that only one table is used in FROM clause and no sub queries */
+  if (table_list->next_local != 0)
+    return 0;
+  /* check that there is no where clause and no group_by */
+  if (where != 0 || group_by != 0)
+    return 0;
+
+  /*
+    Check that all fields are sum(primary_key) or count(primary_key)
+    For more ways to work with the field list and sum functions, see
+    opt_sum.cc::opt_sum_query().
+  */
+  while ((item= it++))
+  {
+    Item *arg0;
+    Field *field;
+    if (item->type() != Item::SUM_FUNC_ITEM ||
+        (((Item_sum*) item)->sum_func() != Item_sum::SUM_FUNC &&
+         ((Item_sum*) item)->sum_func() != Item_sum::COUNT_FUNC))
+
+      return 0;                                  // Not a SUM() function
+    arg0= ((Item_sum*) item)->get_arg(0);
+    if (arg0->type() != Item::FIELD_ITEM)
+    {
+      if ((((Item_sum*) item)->sum_func() == Item_sum::COUNT_FUNC) &&
+          arg0->basic_const_item())
+        continue;                               // Allow count(1)
+      return 0;
+    }
+    field= ((Item_field*) arg0)->field;
+    /*
+      Check that we are using the sequence table (the only table in the FROM
+      clause) and not an outer table.
+    */
+    if (field->table != table_list->table)
+      return 0;
+    /* Check that we are using a SUM() on the primary key */
+    if (strcmp(field->field_name, "seq"))
+      return 0;
+  }
+
+  /* Create handler and return it */
+  handler= new ha_seq_group_by_handler(thd, select_lex, fields, table_list,
+                                       group_by,
+                                       order_by, where, having);
+  return handler;
+}
+
+bool ha_seq_group_by_handler::init(TABLE *temporary_table, Item *having_arg,
+                                  ORDER *order_by_arg)
+{
+  /*
+    Here we could add checks if the temporary table was created correctly
+  */
+  return group_by_handler::init(temporary_table, having_arg, order_by_arg);
+}
+
+
+int ha_seq_group_by_handler::info(uint flag, ha_statistics *stats)
+{
+  bzero(stats, sizeof(*stats));
+  /* We only return one records for a SUM(*) without a group by */
+  stats->records= 1;
+  return 0;
+}
+
+int ha_seq_group_by_handler::next_row()
+{
+  List_iterator_fast<Item> it(*fields);
+  Item_sum *item_sum;
+  Sequence_share *seqs= ((ha_seq*) table_list->table->file)->seqs;
+  DBUG_ENTER("ha_seq_group_by_handler");
+
+  /*
+    Check if this is the first call to the function. If not, we have already
+    returned all data.
+  */
+  if (!first_row)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  first_row= 0;
+
+  /* Pointer to first field in temporary table where we should store summary*/
+  Field **field_ptr= table->field;
+  ulonglong elements= (seqs->to - seqs->from + seqs->step - 1) / seqs->step;
+
+  while ((item_sum= (Item_sum*) it++))
+  {
+    Field *field= *(field_ptr++);
+    switch (item_sum->sum_func()) {
+    case Item_sum::COUNT_FUNC:
+    {
+      field->store((longlong) elements, 1);
+      break;
+    }
+    case Item_sum::SUM_FUNC:
+    {
+      /* Calculate SUM(f, f+step, f+step*2 ... to) */
+      ulonglong sum;
+      sum= seqs->from * elements + seqs->step * (elements*elements-elements)/2;
+      field->store((longlong) sum, 1);
+      break;
+    }
+    default:
+      DBUG_ASSERT(0);
+    }
+    field->set_notnull();
+  }
+  DBUG_RETURN(0);
+}
+
+
+/*****************************************************************************
+  Initialize the interface between the sequence engine and MariaDB
+*****************************************************************************/
+
 static int init(void *p)
 {
   handlerton *hton= (handlerton *)p;
+  sequence_hton= hton;
   hton->create= create_handler;
   hton->discover_table= discover_table;
   hton->discover_table_existence= discover_table_existence;
@@ -356,7 +517,7 @@ static int init(void *p)
    (int (*)(handlerton *, THD *, bool)) &dummy_ret_int;
   hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release=
    (int  (*)(handlerton *, THD *, void *)) &dummy_ret_int;
-    
+  hton->create_group_by= create_group_by_handler;
   return 0;
 }
author	Monty <monty@mariadb.org>	2015-10-02 10:18:27 +0200
committer	Sergei Golubchik <serg@mariadb.org>	2015-10-05 17:14:14 +0200
commit	cf50e13fbd1b4d27a3542fe2751216d274eb7493 (patch)
tree	2ff477367c13e63a50c2e13b00e0be79d65bc646 /storage/sequence
parent	d8df2b946442e6f4bd7dd73570a603ae3e2d21f0 (diff)
download	mariadb-git-cf50e13fbd1b4d27a3542fe2751216d274eb7493.tar.gz