diff options
author | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2015-06-25 08:33:15 +0300 |
---|---|---|
committer | Vicentiu Ciorbaru <vicentiu@mariadb.org> | 2015-07-13 16:01:25 +0000 |
commit | c64a3e9290db188c649f4c857352dcb34821d9f1 (patch) | |
tree | 28130ad44d06071bc8a4a3d73ff09b128d8cf752 | |
parent | 33745a407b3557f2914ebf8119e8ea3435481cde (diff) | |
download | mariadb-git-10.1-window.tar.gz |
Initial window func attempt10.1-window
-rw-r--r-- | mysql-test/t/win.test | 18 | ||||
-rw-r--r-- | sql/item_windowfunc.cc | 19 | ||||
-rw-r--r-- | sql/item_windowfunc.h | 35 | ||||
-rw-r--r-- | sql/protocol.cc | 2 | ||||
-rw-r--r-- | sql/sql_select.cc | 152 | ||||
-rw-r--r-- | sql/sql_select.h | 1 | ||||
-rw-r--r-- | sql/sql_window.h | 8 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 2 |
8 files changed, 221 insertions, 16 deletions
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test new file mode 100644 index 00000000000..1276eddca3d --- /dev/null +++ b/mysql-test/t/win.test @@ -0,0 +1,18 @@ +create table t1(a int, b int, x char(32)); +insert into t1 values (2, 10, 'xx'); +insert into t1 values (2, 10, 'zz'); +insert into t1 values (2, 20, 'yy'); +insert into t1 values (3, 10, 'xxx'); +insert into t1 values (3, 20, 'vvv'); +# Uncommenting this line causes a crash in setup_group when executing the second +# select. +#select row_number() over (order by b) from t1; +select a, b, x, row_number() over (partition by a,b order by x), + row_number() over (partition by a), + row_number() over (partition by a order by x) + +from t1; +# Uncommenting this line causes a crash in filesort during init_for_filesort. +#select a, b, x, row_number() over (partition by a order by x) from t1; + +drop table t1; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 273a0831116..3d3cea59450 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -1,4 +1,7 @@ #include "item_windowfunc.h" +#include "my_dbug.h" +#include "my_global.h" +#include "sql_select.h" // test if group changed bool Item_window_func::fix_fields(THD *thd, Item **ref) @@ -8,6 +11,22 @@ Item_window_func::fix_fields(THD *thd, Item **ref) if (window_func->fix_fields(thd, ref)) return TRUE; + for (ORDER * curr = window_spec->partition_list.first; curr; curr=curr->next) { + curr->item_ptr->fix_fields(thd, curr->item); + Cached_item *tmp= new_Cached_item(thd, curr->item_ptr, TRUE); + partition_fields.push_back(tmp); + } + fixed= 1; return FALSE; } + +void Item_window_func::advance_window() { + + int changed = test_if_group_changed(partition_fields); + + if (changed > -1) { + window_func->clear(); + } + window_func->add(); +} diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index b2de0e9ca00..70f8038e75c 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -3,19 +3,25 @@ #include "my_global.h" #include "item.h" +#include "sql_window.h" -class Window_spec; class Item_sum_row_number: public Item_sum_int { longlong count; - void clear() {} - bool add() { return false; } - void update_field() {} - public: +public: + void clear() { + count= 0; + } + bool add() + { + count++; + return false; + } + Item_sum_row_number() : Item_sum_int(), count(0) {} @@ -24,11 +30,17 @@ class Item_sum_row_number: public Item_sum_int return ROW_NUMBER_FUNC; } + longlong val_int() + { + return count; + } + const char*func_name() const { return "row_number"; } - + + void update_field() {} }; class Item_sum_rank: public Item_sum_int @@ -136,16 +148,21 @@ class Item_sum_cume_dist: public Item_sum_num class Item_window_func : public Item_result_field { private: + List<Cached_item> partition_fields; +public: Item_sum *window_func; LEX_STRING *window_name; Window_spec *window_spec; -public: Item_window_func(Item_sum *win_func, LEX_STRING *win_name) - : window_func(win_func), window_name(win_name), window_spec(NULL) {} + : window_func(win_func), window_name(win_name), window_spec(NULL) { + } Item_window_func(Item_sum *win_func, Window_spec *win_spec) - : window_func(win_func), window_name(NULL), window_spec(win_spec) {} + : window_func(win_func), window_name(NULL), window_spec(win_spec) { + } + + void advance_window(); enum Item::Type type() const { return Item::WINDOW_FUNC_ITEM; } diff --git a/sql/protocol.cc b/sql/protocol.cc index 5970568b66c..09df0ed1b3d 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -30,6 +30,8 @@ #include "unireg.h" // REQUIRED: for other includes #include "protocol.h" #include "sql_class.h" // THD +#include "sql_window.h" +#include "item_windowfunc.h" #include <stdarg.h> static const unsigned int PACKET_BUFFER_EXTRA_ALLOC= 1024; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 3b1940fcb9f..f058eb11317 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -54,6 +54,7 @@ #include "sql_derived.h" #include "sql_statistics.h" #include "sql_window.h" +#include "item_windowfunc.h" #include "debug_sync.h" // DEBUG_SYNC #include <m_ctype.h> @@ -172,7 +173,6 @@ end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); static enum_nested_loop_state end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); -static int test_if_group_changed(List<Cached_item> &list); static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos); static int join_read_system(JOIN_TAB *tab); static int join_read_const(JOIN_TAB *tab); @@ -2406,6 +2406,18 @@ void JOIN::exec() ); } +bool test_if_order_compatible(SQL_I_List<ORDER> &a, SQL_I_List<ORDER> &b) +{ + for (ORDER *curr_a = a.first, *curr_b = b.first; curr_a && curr_b; + curr_a=curr_a->next, curr_b=curr_b->next) + { + // Test if the items are the same. + if (!curr_a->item_ptr->eq(curr_b->item_ptr, TRUE)) { + return false; + } + } + return true; +} /** Exec select. @@ -3092,6 +3104,132 @@ void JOIN::exec_inner() curr_join->fields= curr_fields_list; curr_join->procedure= procedure; + + /* + TODO Get this code to set can_compute_window_function during preparation, + not during execution. + + The reason for this is the following: + Our single scan optimization for window functions without tmp table, + is valid, if and only if, we only need to perform one sorting operation, + via filesort. The cases where we need to perform one sorting operation only: + + * A select with only one window function. + * A select with multiple window functions, but they must have their + partition and order by clauses compatible. This means that one ordering + is acceptable for both window functions. + + For example: + partition by a, b, c; order by d, e results in sorting by a b c d e. + partition by a; order by d results in sorting by a d. + + This kind of sorting is compatible. The less specific partition does + not care for the order of b and c columns so it is valid if we sort + by those in case of equality over a. + + partition by a, b; order by d, e results in sorting by a b d e + partition by a; order by e results in sorting by a e + + This sorting is incompatible due to the order by clause. The partition by + clause is compatible, (partition by a) is a prefix for (partition by a, b) + However, order by e is not a prefix for order by d, e, thus it is not + compatible. + + The rule for having compatible sorting is thus: + Each partition order must contain the other window functions partitions + prefixes, or be a prefix itself. This must hold true for all partitions. + Analog for the order by clause. + + */ + + List<Item_window_func> window_functions; + SQL_I_List<ORDER> largest_partition; + SQL_I_List<ORDER> largest_order_by; + List_iterator_fast<Item> it(*curr_fields_list); + bool can_compute_window_live = !need_tmp; + + Item *item; + // Construct the window_functions item list and check if they can be + // computed using only one sorting. + // + // TODO: Perhaps group functions into compatible sorting bins + // to minimize the number of sorting passes required to compute all of them. + while ((item= it++)) + { + if (item->type() == Item::WINDOW_FUNC_ITEM) + { + Item_window_func *item_win = (Item_window_func *) item; + window_functions.push_back(item_win); + if (!can_compute_window_live) + continue; // No point checking since we have to perform multiple sorts. + Window_spec *spec = item_win->window_spec; + // Having an empty partition list on one window function and a + // not empty list on a separate window function causes the sorting + // to be incompatible. + // + // Example: + // over (partition by a, order by x) && over (order by x). + // + // The first function requires an ordering by a first and then by x, + // while the seond function requires an ordering by x first. + // The same restriction is not required for the order by clause. + if (largest_partition.elements && !spec->partition_list.elements) + { + can_compute_window_live= FALSE; + continue; + } + can_compute_window_live= test_if_order_compatible(largest_partition, + spec->partition_list); + if (!can_compute_window_live) + continue; + + can_compute_window_live= test_if_order_compatible(largest_order_by, + spec->order_list); + if (!can_compute_window_live) + continue; + + if (largest_partition.elements < spec->partition_list.elements) + largest_partition = spec->partition_list; + if (largest_order_by.elements < spec->order_list.elements) + largest_order_by = spec->order_list; + } + } + + if (can_compute_window_live && window_functions.elements && table_count == 1) + { + ha_rows examined_rows = 0; + ha_rows found_rows = 0; + ha_rows filesort_retval; + SORT_FIELD *s_order= (SORT_FIELD *) my_malloc(sizeof(SORT_FIELD) * + (largest_partition.elements + largest_order_by.elements) + 1, + MYF(MY_WME | MY_ZEROFILL | MY_THREAD_SPECIFIC)); + + size_t pos= 0; + for (ORDER* curr = largest_partition.first; curr; curr=curr->next, pos++) + s_order[pos].item = *curr->item; + + for (ORDER* curr = largest_order_by.first; curr; curr=curr->next, pos++) + s_order[pos].item = *curr->item; + + table[0]->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE), + MYF(MY_WME | MY_ZEROFILL| + MY_THREAD_SPECIFIC)); + + + filesort_retval= filesort(thd, table[0], s_order, + (largest_partition.elements + largest_order_by.elements), + this->select, HA_POS_ERROR, FALSE, + &examined_rows, &found_rows, + this->explain->ops_tracker.report_sorting(thd)); + table[0]->sort.found_records= filesort_retval; + + join_tab->read_first_record = join_init_read_record; + join_tab->records= found_rows; + + my_free(s_order); + + } + THD_STAGE_INFO(thd, stage_sending_data); DBUG_PRINT("info", ("%s", thd->proc_info)); result->send_result_set_metadata((procedure ? curr_join->procedure_fields_list : @@ -17665,6 +17803,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) { List<Item> *columns_list= (procedure ? &join->procedure_fields_list : fields); + rc= join->result->send_data(*columns_list) > 0; } } @@ -19130,6 +19269,15 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), DBUG_ENTER("end_send"); if (!end_of_records) { + /* Advance window functions to the next row before sending their values. */ + List_iterator_fast<Item> it(*join->fields); + for (Item *item= it++; item; item= it++) + { + if (item->type() == Item::WINDOW_FUNC_ITEM) + { + ((Item_window_func *) item)->advance_window(); + } + } if (join->table_count && (join->join_tab->is_using_loose_index_scan() || /* @@ -22309,7 +22457,7 @@ int test_if_item_cache_changed(List<Cached_item> &list) -static int +int test_if_group_changed(List<Cached_item> &list) { DBUG_ENTER("test_if_group_changed"); diff --git a/sql/sql_select.h b/sql/sql_select.h index 9cb7e0e61b1..a0dfd42e88b 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1944,5 +1944,6 @@ ulong check_selectivity(THD *thd, TABLE *table, List<COND_STATISTIC> *conds); +int test_if_group_changed(List<Cached_item> &list); #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_window.h b/sql/sql_window.h index b300c13136f..e6a6e6b1e3f 100644 --- a/sql/sql_window.h +++ b/sql/sql_window.h @@ -1,7 +1,7 @@ - #ifndef SQL_WINDOW_INCLUDED #define SQL_WINDOW_INCLUDED + #include "my_global.h" #include "item.h" @@ -9,7 +9,7 @@ class Window_frame_bound : public Sql_alloc { public: - + enum Bound_precedence_type { PRECEDING, @@ -25,7 +25,7 @@ public: class Window_frame : public Sql_alloc { - + public: enum Frame_units @@ -71,7 +71,7 @@ class Window_spec : public Sql_alloc Window_frame *window_frame; - Window_spec(LEX_STRING *win_ref, + Window_spec(LEX_STRING *win_ref, SQL_I_List<ORDER> part_list, SQL_I_List<ORDER> ord_list, Window_frame *win_frame) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 8546da866a7..35521abf1a2 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1010,7 +1010,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); Currently there are 168 shift/reduce conflicts. We should not introduce new conflicts any more. */ -%expect 168 +%expect 167 /* Comments for TOKENS. |