diff options
author | Galina Shalygina <galashalygina@gmail.com> | 2016-05-08 23:04:41 +0300 |
---|---|---|
committer | Galina Shalygina <galashalygina@gmail.com> | 2016-05-08 23:04:41 +0300 |
commit | be1d06c8a5f843e775374e5ec148aaee56970bdc (patch) | |
tree | bd7a95e771ca3b405583dccab8b468dd6fb4509f /sql | |
parent | e09b1f2a226bf2763b211f74908a6486b83ebed1 (diff) | |
download | mariadb-git-be1d06c8a5f843e775374e5ec148aaee56970bdc.tar.gz |
Merge branch '10.2' into 10.2-mdev9864
Diffstat (limited to 'sql')
88 files changed, 7780 insertions, 3032 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 6136c39fc9c..062e59bee5b 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -123,7 +123,7 @@ SET (SQL_SOURCE sql_profile.cc event_parse_data.cc sql_alter.cc sql_signal.cc rpl_handler.cc mdl.cc sql_admin.cc transaction.cc sys_vars.cc sql_truncate.cc datadict.cc - sql_reload.cc sql_cmd.h item_inetfunc.cc + sql_reload.cc sql_cmd.h item_inetfunc.cc # added in MariaDB: sql_explain.h sql_explain.cc @@ -138,6 +138,7 @@ SET (SQL_SOURCE my_json_writer.cc my_json_writer.h rpl_gtid.cc rpl_parallel.cc sql_type.cc sql_type.h + item_windowfunc.cc sql_window.cc sql_cte.cc sql_cte.h ${WSREP_SOURCES} table_cache.cc encryption.cc diff --git a/sql/derror.cc b/sql/derror.cc index bc4b89493aa..5a1bee23f4a 100644 --- a/sql/derror.cc +++ b/sql/derror.cc @@ -30,16 +30,19 @@ #include "derror.h" // read_texts #include "sql_class.h" // THD +uint errors_per_range[MAX_ERROR_RANGES+1]; + static bool check_error_mesg(const char *file_name, const char **errmsg); static void init_myfunc_errs(void); C_MODE_START -static const char **get_server_errmsgs() +static const char **get_server_errmsgs(int nr) { + int section= (nr-ER_ERROR_FIRST) / ERRORS_PER_RANGE; if (!current_thd) - return DEFAULT_ERRMSGS; - return CURRENT_THD_ERRMSGS; + return DEFAULT_ERRMSGS[section]; + return CURRENT_THD_ERRMSGS[section]; } C_MODE_END @@ -60,61 +63,88 @@ C_MODE_END TRUE Error */ +static const char ***original_error_messages; + bool init_errmessage(void) { - const char **errmsgs, **ptr, **org_errmsgs; + const char **errmsgs; bool error= FALSE; DBUG_ENTER("init_errmessage"); - /* - Get a pointer to the old error messages pointer array. - read_texts() tries to free it. - */ - org_errmsgs= my_error_unregister(ER_ERROR_FIRST, ER_ERROR_LAST); + free_error_messages(); + my_free(original_error_messages); + original_error_messages= 0; + + error_message_charset_info= system_charset_info; /* Read messages from file. */ if (read_texts(ERRMSG_FILE, my_default_lc_messages->errmsgs->language, - &errmsgs, ER_ERROR_LAST - ER_ERROR_FIRST + 1) && - !errmsgs) + &original_error_messages)) { - my_free(errmsgs); - - if (org_errmsgs) - { - /* Use old error messages */ - errmsgs= org_errmsgs; - } - else + /* + No error messages. Create a temporary empty error message so + that we don't get a crash if some code wrongly tries to access + a non existing error message. + */ + if (!(original_error_messages= (const char***) + my_malloc(MAX_ERROR_RANGES * sizeof(char**) + + (ERRORS_PER_RANGE * sizeof(char*)), + MYF(0)))) + DBUG_RETURN(TRUE); + errmsgs= (const char**) (original_error_messages + MAX_ERROR_RANGES); + + for (uint i=0 ; i < MAX_ERROR_RANGES ; i++) { - /* - No error messages. Create a temporary empty error message so - that we don't get a crash if some code wrongly tries to access - a non existing error message. - */ - if (!(errmsgs= (const char**) my_malloc((ER_ERROR_LAST-ER_ERROR_FIRST+1)* - sizeof(char*), MYF(0)))) - DBUG_RETURN(TRUE); - for (ptr= errmsgs; ptr < errmsgs + ER_ERROR_LAST - ER_ERROR_FIRST; ptr++) - *ptr= ""; - error= TRUE; + original_error_messages[i]= errmsgs; + errors_per_range[i]= ERRORS_PER_RANGE; } + errors_per_range[2]= 0; // MYSYS error messages + + for (const char **ptr= errmsgs; + ptr < errmsgs + ERRORS_PER_RANGE ; + ptr++) + *ptr= ""; + + error= TRUE; } - else - my_free(org_errmsgs); // Free old language /* Register messages for use with my_error(). */ - if (my_error_register(get_server_errmsgs, ER_ERROR_FIRST, ER_ERROR_LAST)) + for (uint i=0 ; i < MAX_ERROR_RANGES ; i++) { - my_free(errmsgs); - DBUG_RETURN(TRUE); + if (errors_per_range[i]) + { + if (my_error_register(get_server_errmsgs, (i+1)*ERRORS_PER_RANGE, + (i+1)*ERRORS_PER_RANGE + + errors_per_range[i]-1)) + { + my_free(original_error_messages); + original_error_messages= 0; + DBUG_RETURN(TRUE); + } + } } - - DEFAULT_ERRMSGS= errmsgs; /* Init global variable */ + DEFAULT_ERRMSGS= original_error_messages; init_myfunc_errs(); /* Init myfunc messages */ DBUG_RETURN(error); } +void free_error_messages() +{ + /* We don't need to free errmsg as it's done in cleanup_errmsg */ + for (uint i= 0 ; i < MAX_ERROR_RANGES ; i++) + { + if (errors_per_range[i]) + { + my_error_unregister((i+1)*ERRORS_PER_RANGE, + (i+1)*ERRORS_PER_RANGE + + errors_per_range[i]-1); + errors_per_range[i]= 0; + } + } +} + + /** Check the error messages array contains all relevant error messages */ @@ -125,11 +155,17 @@ static bool check_error_mesg(const char *file_name, const char **errmsg) The last MySQL error message can't be an empty string; If it is, it means that the error file doesn't contain all MySQL messages and is probably from an older version of MySQL / MariaDB. + We also check that each section has enough error messages. */ - if (errmsg[ER_LAST_MYSQL_ERROR_MESSAGE -1 - ER_ERROR_FIRST][0] == 0) + if (errmsg[ER_LAST_MYSQL_ERROR_MESSAGE -1 - ER_ERROR_FIRST][0] == 0 || + (errors_per_range[0] < ER_ERROR_LAST_SECTION_2 - ER_ERROR_FIRST + 1) || + errors_per_range[1] != 0 || + (errors_per_range[2] < ER_ERROR_LAST_SECTION_4 - + ER_ERROR_FIRST_SECTION_4 +1) || + (errors_per_range[3] < ER_ERROR_LAST - ER_ERROR_FIRST_SECTION_5 + 1)) { sql_print_error("Error message file '%s' is probably from and older " - "version of MariaDB / MYSQL as it doesn't contain all " + "version of MariaDB as it doesn't contain all " "error messages", file_name); return 1; } @@ -137,27 +173,28 @@ static bool check_error_mesg(const char *file_name, const char **errmsg) } -/** - Read text from packed textfile in language-directory. +struct st_msg_file +{ + uint sections; + uint max_error; + uint errors; + size_t text_length; +}; - If we can't read messagefile then it's panic- we can't continue. +/** + Open file for packed textfile in language-directory. */ -bool read_texts(const char *file_name, const char *language, - const char ***point, uint error_messages) +static File open_error_msg_file(const char *file_name, const char *language, + uint error_messages, struct st_msg_file *ret) { - register uint i; - uint count,funktpos; - size_t offset, length; + int error_pos= 0; File file; char name[FN_REFLEN]; char lang_path[FN_REFLEN]; - uchar *UNINIT_VAR(buff); - uchar head[32],*pos; - DBUG_ENTER("read_texts"); + uchar head[32]; + DBUG_ENTER("open_error_msg_file"); - *point= 0; - funktpos=0; convert_dirname(lang_path, language, NullS); (void) my_load_path(lang_path, lang_path, lc_messages_dir); if ((file= mysql_file_open(key_file_ERRMSG, @@ -168,69 +205,121 @@ bool read_texts(const char *file_name, const char *language, /* Trying pre-5.4 sematics of the --language parameter. It included the language-specific part, e.g.: - --language=/path/to/english/ */ if ((file= mysql_file_open(key_file_ERRMSG, - fn_format(name, file_name, lc_messages_dir, "", 4), + fn_format(name, file_name, lc_messages_dir, "", + 4), O_RDONLY | O_SHARE | O_BINARY, MYF(0))) < 0) goto err; sql_print_warning("An old style --language or -lc-message-dir value with language specific part detected: %s", lc_messages_dir); sql_print_warning("Use --lc-messages-dir without language specific part instead."); } - - funktpos=1; + error_pos=1; if (mysql_file_read(file, (uchar*) head, 32, MYF(MY_NABP))) goto err; - funktpos=2; + error_pos=2; if (head[0] != (uchar) 254 || head[1] != (uchar) 254 || - head[2] != 2 || head[3] != 3) + head[2] != 2 || head[3] != 4) goto err; /* purecov: inspected */ - error_message_charset_info= system_charset_info; - length=uint4korr(head+6); count=uint2korr(head+10); + ret->text_length= uint4korr(head+6); + ret->max_error= uint2korr(head+10); + ret->errors= uint2korr(head+12); + ret->sections= uint2korr(head+14); - if (count < error_messages) + if (ret->max_error < error_messages || ret->sections != MAX_ERROR_RANGES) { sql_print_error("\ Error message file '%s' had only %d error messages, but it should contain at least %d error messages.\nCheck that the above file is the right version for this program!", - name,count,error_messages); + name,ret->errors,error_messages); (void) mysql_file_close(file, MYF(MY_WME)); - DBUG_RETURN(1); + DBUG_RETURN(FERR); } + DBUG_RETURN(file); - if (!(*point= (const char**) - my_malloc((size_t) (MY_MAX(length,count*2)+count*sizeof(char*)),MYF(0)))) - { - funktpos=3; /* purecov: inspected */ +err: + sql_print_error((error_pos == 2) ? + "Incompatible header in messagefile '%s'. Probably from " + "another version of MariaDB" : + ((error_pos == 1) ? "Can't read from messagefile '%s'" : + "Can't find messagefile '%s'"), name); + if (file != FERR) + (void) mysql_file_close(file, MYF(MY_WME)); + DBUG_RETURN(FERR); +} + + +/* + Define the number of normal and extra error messages in the errmsg.sys + file +*/ + +static const uint error_messages= ER_ERROR_LAST - ER_ERROR_FIRST+1; + +/** + Read text from packed textfile in language-directory. +*/ + +bool read_texts(const char *file_name, const char *language, + const char ****data) +{ + uint i, range_size; + const char **point; + size_t offset; + File file; + uchar *buff, *pos; + struct st_msg_file msg_file; + DBUG_ENTER("read_texts"); + + if ((file= open_error_msg_file(file_name, language, error_messages, + &msg_file)) == FERR) + DBUG_RETURN(1); + + if (!(*data= (const char***) + my_malloc((size_t) ((MAX_ERROR_RANGES+1) * sizeof(char**) + + MY_MAX(msg_file.text_length, msg_file.errors * 2)+ + msg_file.errors * sizeof(char*)), + MYF(MY_WME)))) goto err; /* purecov: inspected */ - } - buff= (uchar*) (*point + count); - if (mysql_file_read(file, buff, (size_t) count*2, MYF(MY_NABP))) + point= (const char**) ((*data) + MAX_ERROR_RANGES); + buff= (uchar*) (point + msg_file.errors); + + if (mysql_file_read(file, buff, + (size_t) (msg_file.errors + msg_file.sections) * 2, + MYF(MY_NABP | MY_WME))) goto err; - for (i=0, offset=0, pos= buff ; i< count ; i++) + + pos= buff; + /* read in sections */ + for (i= 0, offset= 0; i < msg_file.sections ; i++) { - (*point)[i]= (char*) buff+offset; - offset+= uint2korr(pos); + (*data)[i]= point + offset; + errors_per_range[i]= range_size= uint2korr(pos); + offset+= range_size; + pos+= 2; + } + + /* Calculate pointers to text data */ + for (i=0, offset=0 ; i < msg_file.errors ; i++) + { + point[i]= (char*) buff+offset; + offset+=uint2korr(pos); pos+=2; } - if (mysql_file_read(file, buff, length, MYF(MY_NABP))) + + /* Read error message texts */ + if (mysql_file_read(file, buff, msg_file.text_length, MYF(MY_NABP | MY_WME))) goto err; - (void) mysql_file_close(file, MYF(0)); + (void) mysql_file_close(file, MYF(MY_WME)); - i= check_error_mesg(file_name, *point); - DBUG_RETURN(i); + DBUG_RETURN(check_error_mesg(file_name, point)); err: - sql_print_error((funktpos == 3) ? "Not enough memory for messagefile '%s'" : - (funktpos == 2) ? "Incompatible header in messagefile '%s'. Probably from another version of MariaDB" : - ((funktpos == 1) ? "Can't read from messagefile '%s'" : - "Can't find messagefile '%s'"), name); - if (file != FERR) - (void) mysql_file_close(file, MYF(MY_WME)); + (void) mysql_file_close(file, MYF(0)); DBUG_RETURN(1); } /* read_texts */ diff --git a/sql/derror.h b/sql/derror.h index b2f6331e048..9f2aee71c7e 100644 --- a/sql/derror.h +++ b/sql/derror.h @@ -19,7 +19,8 @@ #include "my_global.h" /* uint */ bool init_errmessage(void); +void free_error_messages(); bool read_texts(const char *file_name, const char *language, - const char ***point, uint error_messages); + const char ****data); #endif /* DERROR_INCLUDED */ diff --git a/sql/field.cc b/sql/field.cc index cf5be38398f..a5d2d759edc 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1224,7 +1224,8 @@ bool Field::test_if_equality_guarantees_uniqueness(const Item *item) const for temporal columns, so the query: WHERE temporal_column='string' cannot return multiple distinct temporal values. - QQ: perhaps we could allow INT/DECIMAL/DOUBLE types for temporal items. + + TODO: perhaps we could allow INT/DECIMAL/DOUBLE types for temporal items. */ return result_type() == item->result_type(); } diff --git a/sql/filesort.cc b/sql/filesort.cc index 54a79421d2e..82a5c90d0bb 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -74,7 +74,6 @@ static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info, TABLE *table, ha_rows records, ulong memory_available); - void Sort_param::init_for_filesort(uint sortlen, TABLE *table, ulong max_length_for_sort_data, ha_rows maxrows, bool sort_positions) @@ -124,28 +123,21 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table, @param thd Current thread @param table Table to sort - @param sortorder How to sort the table - @param s_length Number of elements in sortorder - @param select Condition to apply to the rows - @param max_rows Return only this many rows - @param sort_positions Set to TRUE if we want to force sorting by - position - (Needed by UPDATE/INSERT or ALTER TABLE or - when rowids are required by executor) + @param filesort How to sort the table + @param[out] found_rows Store the number of found rows here. + This is the number of found rows after + applying WHERE condition. @note - If we sort by position (like if sort_positions is 1) filesort() will - call table->prepare_for_position(). + If we sort by position (like if filesort->sort_positions==true) + filesort() will call table->prepare_for_position(). @retval 0 Error # SORT_INFO */ -SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, - uint s_length, - SQL_SELECT *select, ha_rows max_rows, - bool sort_positions, - Filesort_tracker* tracker) +SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, + Filesort_tracker* tracker) { int error; size_t memory_available= thd->variables.sortbuff_size; @@ -156,9 +148,16 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, Sort_param param; bool multi_byte_charset; Bounded_queue<uchar, uchar> pq; + SQL_SELECT *const select= filesort->select; + ha_rows max_rows= filesort->limit; + uint s_length= 0; DBUG_ENTER("filesort"); - DBUG_EXECUTE("info",TEST_filesort(sortorder,s_length);); + + if (!(s_length= filesort->make_sortorder(thd))) + DBUG_RETURN(NULL); /* purecov: inspected */ + + DBUG_EXECUTE("info",TEST_filesort(filesort->sortorder,s_length);); #ifdef SKIP_DBUG_IN_FILESORT DBUG_PUSH(""); /* No DBUG here */ #endif @@ -194,11 +193,11 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, error= 1; sort->found_rows= HA_POS_ERROR; - param.init_for_filesort(sortlength(thd, sortorder, s_length, + param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length, &multi_byte_charset), table, thd->variables.max_length_for_sort_data, - max_rows, sort_positions); + max_rows, filesort->sort_positions); sort->addon_buf= param.addon_buf; sort->addon_field= param.addon_field; @@ -275,7 +274,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, goto err; param.sort_form= table; - param.end=(param.local_sortorder=sortorder)+s_length; + param.end=(param.local_sortorder=filesort->sortorder)+s_length; num_rows= find_all_keys(thd, ¶m, select, sort, &buffpek_pointers, @@ -429,6 +428,55 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, } /* filesort */ +void Filesort::cleanup() +{ + if (select && own_select) + { + select->cleanup(); + select= NULL; + } +} + + +uint Filesort::make_sortorder(THD *thd) +{ + uint count; + SORT_FIELD *sort,*pos; + ORDER *ord; + DBUG_ENTER("make_sortorder"); + + + count=0; + for (ord = order; ord; ord= ord->next) + count++; + if (!sortorder) + sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * (count + 1)); + pos= sort= sortorder; + + if (!pos) + DBUG_RETURN(0); + + for (ord= order; ord; ord= ord->next, pos++) + { + Item *item= ord->item[0]->real_item(); + pos->field= 0; pos->item= 0; + if (item->type() == Item::FIELD_ITEM) + pos->field= ((Item_field*) item)->field; + else if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item()) + pos->field= ((Item_sum*) item)->get_tmp_table_field(); + else if (item->type() == Item::COPY_STR_ITEM) + { // Blob patch + pos->item= ((Item_copy*) item)->get_item(); + } + else + pos->item= *ord->item; + pos->reverse= (ord->direction == ORDER::ORDER_DESC); + DBUG_ASSERT(pos->field != NULL || pos->item != NULL); + } + DBUG_RETURN(count); + } + + /** Read 'count' number of buffer pointers into memory. */ static uchar *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count, diff --git a/sql/filesort.h b/sql/filesort.h index 454c745b5c0..18029a10c14 100644 --- a/sql/filesort.h +++ b/sql/filesort.h @@ -17,13 +17,67 @@ #define FILESORT_INCLUDED #include "my_base.h" /* ha_rows */ +#include "sql_list.h" /* Sql_alloc */ #include "filesort_utils.h" class SQL_SELECT; class THD; struct TABLE; -struct SORT_FIELD; class Filesort_tracker; +struct SORT_FIELD; +typedef struct st_order ORDER; + + +/** + Sorting related info. + To be extended by another WL to include complete filesort implementation. +*/ +class Filesort: public Sql_alloc +{ +public: + /** List of expressions to order the table by */ + ORDER *order; + /** Number of records to return */ + ha_rows limit; + /** ORDER BY list with some precalculated info for filesort */ + SORT_FIELD *sortorder; + /** select to use for getting records */ + SQL_SELECT *select; + /** TRUE <=> free select on destruction */ + bool own_select; + /** true means we are using Priority Queue for order by with limit. */ + bool using_pq; + + /* + TRUE means sort operation must produce table rowids. + FALSE means that it halso has an option of producing {sort_key, + addon_fields} pairs. + */ + bool sort_positions; + + Filesort_tracker *tracker; + + Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg, + SQL_SELECT *select_arg): + order(order_arg), + limit(limit_arg), + sortorder(NULL), + select(select_arg), + own_select(false), + using_pq(false), + sort_positions(sort_positions_arg) + { + DBUG_ASSERT(order); + }; + + ~Filesort() { cleanup(); } + /* Prepare ORDER BY list for sorting. */ + uint make_sortorder(THD *thd); + +private: + void cleanup(); +}; + class SORT_INFO { @@ -97,19 +151,12 @@ public: size_t sort_buffer_size() const { return filesort_buffer.sort_buffer_size(); } - friend SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, - uint s_length, - SQL_SELECT *select, ha_rows max_rows, - bool sort_positions, - Filesort_tracker* tracker); + friend SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, + Filesort_tracker* tracker); }; - -SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, - uint s_length, - SQL_SELECT *select, ha_rows max_rows, - bool sort_positions, - Filesort_tracker* tracker); +SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, + Filesort_tracker* tracker); void change_double_for_sort(double nr,uchar *to); diff --git a/sql/handler.cc b/sql/handler.cc index 748a51f5c59..2186d389056 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -295,7 +295,7 @@ handler *get_ha_partition(partition_info *part_info) static const char **handler_errmsgs; C_MODE_START -static const char **get_handler_errmsgs() +static const char **get_handler_errmsgs(int nr) { return handler_errmsgs; } @@ -386,12 +386,10 @@ int ha_init_errors(void) */ static int ha_finish_errors(void) { - const char **errmsgs; - /* Allocate a pointer array for the error message strings. */ - if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST))) - return 1; - my_free(errmsgs); + my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST); + my_free(handler_errmsgs); + handler_errmsgs= 0; return 0; } @@ -3079,6 +3077,7 @@ int handler::update_auto_increment() if (unlikely(nr == ULONGLONG_MAX)) DBUG_RETURN(HA_ERR_AUTOINC_ERANGE); + DBUG_ASSERT(nr != 0); DBUG_PRINT("info",("auto_increment: %llu nb_reserved_values: %llu", nr, append ? nb_reserved_values : 0)); diff --git a/sql/item.cc b/sql/item.cc index 7cdb2d2e7e4..65fb00d4757 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -455,7 +455,7 @@ Item::Item(THD *thd): { DBUG_ASSERT(thd); marker= 0; - maybe_null=null_value=with_sum_func=with_field=0; + maybe_null=null_value=with_sum_func=with_window_func=with_field=0; in_rollup= 0; with_subselect= 0; /* Initially this item is not attached to any JOIN_TAB. */ @@ -500,6 +500,7 @@ Item::Item(THD *thd, Item *item): in_rollup(item->in_rollup), null_value(item->null_value), with_sum_func(item->with_sum_func), + with_window_func(item->with_window_func), with_field(item->with_field), fixed(item->fixed), is_autogenerated_name(item->is_autogenerated_name), @@ -1749,7 +1750,7 @@ public: thd->fatal_error() may be called if we are out of memory */ -void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, +void Item::split_sum_func2(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, Item **ref, uint split_flags) { @@ -1760,6 +1761,14 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, ((Item_sum *) this)->ref_by) return; } + else if (type() == WINDOW_FUNC_ITEM) + { + /* + Skip the else part, window functions are very special functions: + they need to have their own fields in the temp. table, but they + need to be proceessed differently than regular aggregate functions + */ + } else { /* Not a SUM() function */ @@ -1800,7 +1809,7 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, Exception is Item_direct_view_ref which we need to convert to Item_ref to allow fields from view being stored in tmp table. */ - Item_aggregate_ref *item_ref; + Item_ref *item_ref; uint el= fields.elements; /* If this is an item_ref, get the original item @@ -1810,13 +1819,24 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, Item *real_itm= real_item(); ref_pointer_array[el]= real_itm; - if (!(item_ref= (new (thd->mem_root) - Item_aggregate_ref(thd, - &thd->lex->current_select->context, - ref_pointer_array + el, 0, name)))) - return; // fatal_error is set + if (type() == WINDOW_FUNC_ITEM) + { + if (!(item_ref= (new (thd->mem_root) + Item_direct_ref(thd, + &thd->lex->current_select->context, + &ref_pointer_array[el], 0, name)))) + return; // fatal_error is set + } + else + { + if (!(item_ref= (new (thd->mem_root) + Item_aggregate_ref(thd, + &thd->lex->current_select->context, + &ref_pointer_array[el], 0, name)))) + return; // fatal_error is set + } if (type() == SUM_FUNC_ITEM) - item_ref->depended_from= ((Item_sum *) this)->depended_from(); + item_ref->depended_from= ((Item_sum *) this)->depended_from(); fields.push_front(real_itm); thd->change_item_tree(ref, item_ref); } @@ -3755,16 +3775,18 @@ Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it) str_value.charset()); collation.set(str_value.charset(), DERIVATION_COERCIBLE); decimals= 0; - + item_type= Item::STRING_ITEM; break; } case REAL_RESULT: set_double(arg->val_real()); + item_type= Item::REAL_ITEM; break; case INT_RESULT: set_int(arg->val_int(), arg->max_length); + item_type= Item::INT_ITEM; break; case DECIMAL_RESULT: @@ -3776,6 +3798,7 @@ Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it) return TRUE; set_decimal(dv); + item_type= Item::DECIMAL_ITEM; break; } @@ -3785,11 +3808,11 @@ Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it) DBUG_ASSERT(TRUE); // Abort in debug mode. set_null(); // Set to NULL in release mode. + item_type= Item::NULL_ITEM; return FALSE; } set_handler_by_result_type(arg->result_type()); - item_type= arg->type(); return FALSE; } @@ -4489,7 +4512,7 @@ resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select) return NULL; } DBUG_ASSERT((*select_ref)->fixed); - return (select->ref_pointer_array + counter); + return &select->ref_pointer_array[counter]; } if (group_by_ref) return group_by_ref; @@ -6516,15 +6539,14 @@ Item *Item_field::update_value_transformer(THD *thd, uchar *select_arg) type() != Item::TRIGGER_FIELD_ITEM) { List<Item> *all_fields= &select->join->all_fields; - Item **ref_pointer_array= select->ref_pointer_array; - DBUG_ASSERT(all_fields->elements <= select->ref_pointer_array_size); + Ref_ptr_array &ref_pointer_array= select->ref_pointer_array; int el= all_fields->elements; Item_ref *ref; ref_pointer_array[el]= (Item*)this; all_fields->push_front((Item*)this, thd->mem_root); ref= new (thd->mem_root) - Item_ref(thd, &select->context, ref_pointer_array + el, + Item_ref(thd, &select->context, &ref_pointer_array[el], table_name, field_name); return ref; } @@ -6928,6 +6950,7 @@ void Item_ref::set_properties() split_sum_func() doesn't try to change the reference. */ with_sum_func= (*ref)->with_sum_func; + with_window_func= (*ref)->with_window_func; with_field= (*ref)->with_field; fixed= 1; if (alias_name_used) diff --git a/sql/item.h b/sql/item.h index e42442aa301..674ff6e99dc 100644 --- a/sql/item.h +++ b/sql/item.h @@ -65,6 +65,8 @@ class RANGE_OPT_PARAM; class SEL_TREE; +typedef Bounds_checked_array<Item*> Ref_ptr_array; + static inline uint32 char_to_byte_length_safe(uint32 char_length_arg, uint32 mbmaxlen_arg) { @@ -626,7 +628,8 @@ public: static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); } static void operator delete(void *ptr, MEM_ROOT *mem_root) {} - enum Type {FIELD_ITEM= 0, FUNC_ITEM, SUM_FUNC_ITEM, STRING_ITEM, + enum Type {FIELD_ITEM= 0, FUNC_ITEM, SUM_FUNC_ITEM, + WINDOW_FUNC_ITEM, STRING_ITEM, INT_ITEM, REAL_ITEM, NULL_ITEM, VARBIN_ITEM, COPY_STR_ITEM, FIELD_AVG_ITEM, DEFAULT_VALUE_ITEM, PROC_ITEM,COND_ITEM, REF_ITEM, FIELD_STD_ITEM, @@ -692,6 +695,7 @@ public: of a query with ROLLUP */ bool null_value; /* if item is null */ bool with_sum_func; /* True if item contains a sum func */ + bool with_window_func; /* True if item contains a window func */ /** True if any item except Item_sum contains a field. Set during parsing. */ @@ -1180,7 +1184,7 @@ public: void print_item_w_name(String *, enum_query_type query_type); void print_value(String *); virtual void update_used_tables() {} - virtual COND *build_equal_items(THD *thd, COND_EQUAL *inherited, + virtual COND *build_equal_items(THD *thd, COND_EQUAL *inheited, bool link_item_fields, COND_EQUAL **cond_equal_ref) { @@ -1216,10 +1220,11 @@ public: { return false; } - virtual void split_sum_func(THD *thd, Item **ref_pointer_array, + virtual void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, uint flags) {} /* Called for items that really have to be split */ - void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields, + void split_sum_func2(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, Item **ref, uint flags); virtual bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate); bool get_time(MYSQL_TIME *ltime) @@ -4768,17 +4773,10 @@ public: - cmp() method that compares the saved value with the current value of the source item, and if they were not equal saves item's value into the saved value. -*/ -/* - Cached_item_XXX objects are not exactly caches. They do the following: - - Each Cached_item_XXX object has - - its source item - - saved value of the source item - - cmp() method that compares the saved value with the current value of the - source item, and if they were not equal saves item's value into the saved - value. + TODO: add here: + - a way to save the new value w/o comparison + - a way to do less/equal/greater comparison */ class Cached_item :public Sql_alloc @@ -4786,48 +4784,75 @@ class Cached_item :public Sql_alloc public: bool null_value; Cached_item() :null_value(0) {} + /* + Compare the cached value with the source value. If not equal, copy + the source value to the cache. + @return + true - Not equal + false - Equal + */ virtual bool cmp(void)=0; + + /* Compare the cached value with the source value, without copying */ + virtual int cmp_read_only()=0; + virtual ~Cached_item(); /*line -e1509 */ }; -class Cached_item_str :public Cached_item +class Cached_item_item : public Cached_item { +protected: Item *item; + + Cached_item_item(Item *arg) : item(arg) {} +public: + void fetch_value_from(Item *new_item) + { + Item *save= item; + item= new_item; + cmp(); + item= save; + } +}; + +class Cached_item_str :public Cached_item_item +{ uint32 value_max_length; String value,tmp_value; public: Cached_item_str(THD *thd, Item *arg); bool cmp(void); + int cmp_read_only(); ~Cached_item_str(); // Deallocate String:s }; -class Cached_item_real :public Cached_item +class Cached_item_real :public Cached_item_item { - Item *item; double value; public: - Cached_item_real(Item *item_par) :item(item_par),value(0.0) {} + Cached_item_real(Item *item_par) :Cached_item_item(item_par),value(0.0) {} bool cmp(void); + int cmp_read_only(); }; -class Cached_item_int :public Cached_item +class Cached_item_int :public Cached_item_item { - Item *item; longlong value; public: - Cached_item_int(Item *item_par) :item(item_par),value(0) {} + Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {} bool cmp(void); + int cmp_read_only(); }; -class Cached_item_decimal :public Cached_item +class Cached_item_decimal :public Cached_item_item { - Item *item; my_decimal value; public: Cached_item_decimal(Item *item_par); bool cmp(void); + int cmp_read_only(); }; class Cached_item_field :public Cached_item @@ -4844,6 +4869,7 @@ public: buff= (uchar*) thd_calloc(thd, length= field->pack_length()); } bool cmp(void); + int cmp_read_only(); }; class Item_default_value : public Item_field @@ -5129,7 +5155,7 @@ public: return (this->*processor)(arg); } virtual Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs); - void split_sum_func2_example(THD *thd, Item **ref_pointer_array, + void split_sum_func2_example(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, uint flags) { example->split_sum_func2(thd, ref_pointer_array, fields, &example, flags); diff --git a/sql/item_buff.cc b/sql/item_buff.cc index 62c2f76dc2e..488eb52fb77 100644 --- a/sql/item_buff.cc +++ b/sql/item_buff.cc @@ -71,7 +71,7 @@ Cached_item::~Cached_item() {} */ Cached_item_str::Cached_item_str(THD *thd, Item *arg) - :item(arg), + :Cached_item_item(arg), value_max_length(MY_MIN(arg->max_length, thd->variables.max_sort_length)), value(value_max_length) {} @@ -98,6 +98,25 @@ bool Cached_item_str::cmp(void) return tmp; } + +int Cached_item_str::cmp_read_only() +{ + String *res= item->val_str(&tmp_value); + + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + + return sortcmp(&value, res, item->collation.collation); +} + + Cached_item_str::~Cached_item_str() { item=0; // Safety @@ -115,6 +134,23 @@ bool Cached_item_real::cmp(void) return FALSE; } + +int Cached_item_real::cmp_read_only() +{ + double nr= item->val_real(); + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + return (nr == value)? 0 : ((nr < value)? 1: -1); +} + + bool Cached_item_int::cmp(void) { longlong nr=item->val_int(); @@ -128,6 +164,22 @@ bool Cached_item_int::cmp(void) } +int Cached_item_int::cmp_read_only() +{ + longlong nr= item->val_int(); + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + return (nr == value)? 0 : ((nr < value)? 1: -1); +} + + bool Cached_item_field::cmp(void) { bool tmp= FALSE; // Value is identical @@ -148,8 +200,24 @@ bool Cached_item_field::cmp(void) } +int Cached_item_field::cmp_read_only() +{ + if (null_value) + { + if (field->is_null()) + return 0; + else + return -1; + } + if (field->is_null()) + return 1; + + return field->cmp(buff); +} + + Cached_item_decimal::Cached_item_decimal(Item *it) - :item(it) + :Cached_item_item(it) { my_decimal_set_zero(&value); } @@ -174,3 +242,20 @@ bool Cached_item_decimal::cmp() return FALSE; } + +int Cached_item_decimal::cmp_read_only() +{ + my_decimal tmp; + my_decimal *ptmp= item->val_decimal(&tmp); + if (null_value) + { + if (item->null_value) + return 0; + else + return -1; + } + if (item->null_value) + return 1; + return my_decimal_cmp(&value, ptmp); +} + diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 579cdc10057..335228c37fa 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2482,7 +2482,7 @@ bool Item_func_if::date_op(MYSQL_TIME *ltime, uint fuzzydate) } -void Item_func_nullif::split_sum_func(THD *thd, Item **ref_pointer_array, +void Item_func_nullif::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, uint flags) { if (m_cache) @@ -4791,7 +4791,7 @@ void Item_cond::traverse_cond(Cond_traverser traverser, that have or refer (HAVING) to a SUM expression. */ -void Item_cond::split_sum_func(THD *thd, Item **ref_pointer_array, +void Item_cond::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, uint flags) { List_iterator<Item> li(list); diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 2d197a86d9b..5789186dbe8 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1030,8 +1030,8 @@ public: uint decimal_precision() const { return args[2]->decimal_precision(); } const char *func_name() const { return "nullif"; } void print(String *str, enum_query_type query_type); - void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields, - uint flags); + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, uint flags); void update_used_tables(); table_map not_null_tables() const { return 0; } bool is_null(); @@ -2055,8 +2055,8 @@ public: SARGABLE_PARAM **sargables); SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); virtual void print(String *str, enum_query_type query_type); - void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields, - uint flags); + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, uint flags); friend int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, COND **conds); void top_level_item() { abort_on_null=1; } @@ -2083,7 +2083,7 @@ template <template<class> class LI, class T> class Item_equal_iterator; All equality predicates of the form field1=field2 contained in a conjunction are substituted for a sequence of items of this class. An item of this class Item_equal(f1,f2,...fk) represents a - multiple equality f1=f2=...=fk. + multiple equality f1=f2=...=fk.l If a conjunction contains predicates f1=f2 and f2=f3, a new item of this class is created Item_equal(f1,f2,f3) representing the multiple diff --git a/sql/item_func.cc b/sql/item_func.cc index 50b6f4a6b68..57bd004cf88 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -132,6 +132,7 @@ void Item_func::sync_with_sum_func_and_with_field(List<Item> &list) while ((item= li++)) { with_sum_func|= item->with_sum_func; + with_window_func|= item->with_window_func; with_field|= item->with_field; } } @@ -226,6 +227,7 @@ Item_func::fix_fields(THD *thd, Item **ref) maybe_null=1; with_sum_func= with_sum_func || item->with_sum_func; + with_window_func= with_window_func || item->with_window_func; with_field= with_field || item->with_field; used_tables_and_const_cache_join(item); with_subselect|= item->has_subquery(); @@ -431,7 +433,7 @@ void Item_args::propagate_equal_fields(THD *thd, See comments in Item_cond::split_sum_func() */ -void Item_func::split_sum_func(THD *thd, Item **ref_pointer_array, +void Item_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, uint flags) { Item **arg, **arg_end; @@ -4904,9 +4906,16 @@ Item_func_set_user_var::update_hash(void *ptr, uint length, If we set a variable explicitely to NULL then keep the old result type of the variable */ - if ((null_value= args[0]->null_value) && null_item) + if (args[0]->type() == Item::FIELD_ITEM) + { + /* args[0]->null_value may be outdated */ + null_value= ((Item_field*)args[0])->field->is_null(); + } + else + null_value= args[0]->null_value; + if (null_value && null_item) res_type= m_var_entry->type; // Don't change type of item - if (::update_hash(m_var_entry, (null_value= args[0]->null_value), + if (::update_hash(m_var_entry, null_value, ptr, length, res_type, cs, unsigned_arg)) { null_value= 1; diff --git a/sql/item_func.h b/sql/item_func.h index 2ce199b3565..5c21535adbe 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -151,8 +151,8 @@ public: sync_with_sum_func_and_with_field(list); list.empty(); // Fields are used } - void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields, - uint flags); + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, uint flags); virtual void print(String *str, enum_query_type query_type); void print_op(String *str, enum_query_type query_type); void print_args(String *str, uint from, enum_query_type query_type); diff --git a/sql/item_row.cc b/sql/item_row.cc index b1575b81087..56d73f7b759 100644 --- a/sql/item_row.cc +++ b/sql/item_row.cc @@ -100,7 +100,7 @@ void Item_row::cleanup() } -void Item_row::split_sum_func(THD *thd, Item **ref_pointer_array, +void Item_row::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, uint flags) { Item **arg, **arg_end; diff --git a/sql/item_row.h b/sql/item_row.h index 153a6f085b3..ddb6f0835f2 100644 --- a/sql/item_row.h +++ b/sql/item_row.h @@ -85,8 +85,8 @@ public: bool fix_fields(THD *thd, Item **ref); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void cleanup(); - void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields, - uint flags); + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, uint flags); table_map used_tables() const { return used_tables_cache; }; bool const_item() const { return const_item_cache; }; enum Item_result result_type() const { return ROW_RESULT; } diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 8745baa8c69..94e7bc98618 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -739,7 +739,7 @@ int Item_in_subselect::optimize(double *out_rows, double *cost) } /* Now with grouping */ - if (join->group_list) + if (join->group_list_for_estimates) { DBUG_PRINT("info",("Materialized join has grouping, trying to estimate it")); double output_rows= get_post_group_estimate(join, *out_rows); @@ -1896,7 +1896,8 @@ bool Item_allany_subselect::transform_into_max_min(JOIN *join) (ALL && (> || =>)) || (ANY && (< || =<)) for ALL condition is inverted */ - item= new (thd->mem_root) Item_sum_max(thd, *select_lex->ref_pointer_array); + item= new (thd->mem_root) Item_sum_max(thd, + select_lex->ref_pointer_array[0]); } else { @@ -1904,11 +1905,12 @@ bool Item_allany_subselect::transform_into_max_min(JOIN *join) (ALL && (< || =<)) || (ANY && (> || =>)) for ALL condition is inverted */ - item= new (thd->mem_root) Item_sum_min(thd, *select_lex->ref_pointer_array); + item= new (thd->mem_root) Item_sum_min(thd, + select_lex->ref_pointer_array[0]); } if (upper_item) upper_item->set_sum_test(item); - thd->change_item_tree(select_lex->ref_pointer_array, item); + thd->change_item_tree(&select_lex->ref_pointer_array[0], item); { List_iterator<Item> it(select_lex->item_list); it++; @@ -2054,8 +2056,8 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN *join, thd, &select_lex->context, this, - select_lex-> - ref_pointer_array, + &select_lex-> + ref_pointer_array[0], (char *)"<ref>", this->full_name())); if (!abort_on_null && left_expr->maybe_null) @@ -2130,7 +2132,7 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN *join, new (thd->mem_root) Item_ref_null_helper(thd, &select_lex->context, this, - select_lex->ref_pointer_array, + &select_lex->ref_pointer_array[0], (char *)"<no matter>", (char *)"<result>")); if (!abort_on_null && left_expr->maybe_null) @@ -2317,7 +2319,7 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, (char *)in_left_expr_name), new (thd->mem_root) Item_ref(thd, &select_lex->context, - select_lex->ref_pointer_array + i, + &select_lex->ref_pointer_array[i], (char *)"<no matter>", (char *)"<list ref>")); Item *item_isnull= @@ -2325,7 +2327,7 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, Item_func_isnull(thd, new (thd->mem_root) Item_ref(thd, &select_lex->context, - select_lex->ref_pointer_array+i, + &select_lex->ref_pointer_array[i], (char *)"<no matter>", (char *)"<list ref>")); Item *col_item= new (thd->mem_root) @@ -2343,8 +2345,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, Item_is_not_null_test(thd, this, new (thd->mem_root) Item_ref(thd, &select_lex->context, - select_lex-> - ref_pointer_array + i, + &select_lex-> + ref_pointer_array[i], (char *)"<no matter>", (char *)"<list ref>")); if (!abort_on_null && left_expr->element_index(i)->maybe_null) @@ -2382,8 +2384,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, (char *)in_left_expr_name), new (thd->mem_root) Item_direct_ref(thd, &select_lex->context, - select_lex-> - ref_pointer_array+i, + &select_lex-> + ref_pointer_array[i], (char *)"<no matter>", (char *)"<list ref>")); if (!abort_on_null && select_lex->ref_pointer_array[i]->maybe_null) @@ -2393,7 +2395,7 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, Item_is_not_null_test(thd, this, new (thd->mem_root) Item_ref(thd, &select_lex->context, - select_lex->ref_pointer_array + i, + &select_lex->ref_pointer_array[i], (char *)"<no matter>", (char *)"<list ref>")); @@ -2402,8 +2404,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join, Item_func_isnull(thd, new (thd->mem_root) Item_direct_ref(thd, &select_lex->context, - select_lex-> - ref_pointer_array+i, + &select_lex-> + ref_pointer_array[i], (char *)"<no matter>", (char *)"<list ref>")); item= new (thd->mem_root) Item_cond_or(thd, item, item_isnull); @@ -3533,8 +3535,7 @@ int subselect_single_select_engine::prepare() prepared= 1; SELECT_LEX *save_select= thd->lex->current_select; thd->lex->current_select= select_lex; - if (join->prepare(&select_lex->ref_pointer_array, - select_lex->table_list.first, + if (join->prepare(select_lex->table_list.first, select_lex->with_wild, select_lex->where, select_lex->order_list.elements + @@ -3683,14 +3684,6 @@ int subselect_single_select_engine::exec() */ select_lex->uncacheable|= UNCACHEABLE_EXPLAIN; select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN; - /* - Force join->join_tmp creation, because this subquery will be replaced - by a simple select from the materialization temp table by optimize() - called by EXPLAIN and we need to preserve the initial query structure - so we can display it. - */ - if (join->need_tmp && join->init_save_join_tab()) - DBUG_RETURN(1); /* purecov: inspected */ } } if (item->engine_changed(this)) @@ -5231,7 +5224,7 @@ double get_post_group_estimate(JOIN* join, double join_op_rows) table_map tables_in_group_list= table_map(0); /* Find out which tables are used in GROUP BY list */ - for (ORDER *order= join->group_list; order; order= order->next) + for (ORDER *order= join->group_list_for_estimates; order; order= order->next) { Item *item= order->item[0]; table_map item_used_tables= item->used_tables(); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 0c85cf53e18..f774ee5a561 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1243,7 +1243,8 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table) Item_sum_sum::Item_sum_sum(THD *thd, Item_sum_sum *item) :Item_sum_num(thd, item), Type_handler_hybrid_field_type(item), - curr_dec_buff(item->curr_dec_buff) + curr_dec_buff(item->curr_dec_buff), + count(item->count) { /* TODO: check if the following assignments are really needed */ if (Item_sum_sum::result_type() == DECIMAL_RESULT) @@ -1265,6 +1266,7 @@ void Item_sum_sum::clear() { DBUG_ENTER("Item_sum_sum::clear"); null_value=1; + count= 0; if (Item_sum_sum::result_type() == DECIMAL_RESULT) { curr_dec_buff= 0; @@ -1318,25 +1320,57 @@ void Item_sum_sum::fix_length_and_dec() bool Item_sum_sum::add() { DBUG_ENTER("Item_sum_sum::add"); + add_helper(false); + DBUG_RETURN(0); +} + +void Item_sum_sum::add_helper(bool perform_removal) +{ + DBUG_ENTER("Item_sum_sum::add_helper"); + if (Item_sum_sum::result_type() == DECIMAL_RESULT) { my_decimal value; const my_decimal *val= aggr->arg_val_decimal(&value); if (!aggr->arg_is_null(true)) { - my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1), - val, dec_buffs + curr_dec_buff); + if (perform_removal) + { + DBUG_ASSERT(count > 0); + my_decimal_sub(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff ^ 1), + dec_buffs + curr_dec_buff, val); + count--; + } + else + { + count++; + my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff ^ 1), + val, dec_buffs + curr_dec_buff); + } curr_dec_buff^= 1; - null_value= 0; + null_value= (count > 0) ? 0 : 1; } } else { - sum+= aggr->arg_val_real(); + if (perform_removal) + sum-= aggr->arg_val_real(); + else + sum+= aggr->arg_val_real(); if (!aggr->arg_is_null(true)) - null_value= 0; + { + if (perform_removal) + { + DBUG_ASSERT(count > 0); + count--; + } + else + count++; + + null_value= (count > 0) ? 0 : 1; + } } - DBUG_RETURN(0); + DBUG_VOID_RETURN; } @@ -1386,6 +1420,13 @@ my_decimal *Item_sum_sum::val_decimal(my_decimal *val) return val_decimal_from_real(val); } +void Item_sum_sum::remove() +{ + DBUG_ENTER("Item_sum_sum::remove"); + add_helper(true); + DBUG_VOID_RETURN; +} + /** Aggregate a distinct row from the distinct hash table. @@ -1531,6 +1572,19 @@ bool Item_sum_count::add() return 0; } + +/* + Remove a row. This is used by window functions. +*/ + +void Item_sum_count::remove() +{ + DBUG_ASSERT(aggr->Aggrtype() == Aggregator::SIMPLE_AGGREGATOR); + if (aggr->arg_is_null(false)) + return; + count--; +} + longlong Item_sum_count::val_int() { DBUG_ASSERT(fixed == 1); @@ -1626,6 +1680,16 @@ bool Item_sum_avg::add() return FALSE; } +void Item_sum_avg::remove() +{ + Item_sum_sum::remove(); + if (!aggr->arg_is_null(true)) + { + DBUG_ASSERT(count > 0); + count--; + } +} + double Item_sum_avg::val_real() { DBUG_ASSERT(fixed == 1); @@ -2086,6 +2150,8 @@ longlong Item_sum_bit::val_int() void Item_sum_bit::clear() { bits= reset_bits; + if (as_window_function) + clear_as_window(); } Item *Item_sum_or::copy_or_same(THD* thd) @@ -2093,15 +2159,79 @@ Item *Item_sum_or::copy_or_same(THD* thd) return new (thd->mem_root) Item_sum_or(thd, this); } +bool Item_sum_bit::clear_as_window() +{ + memset(bit_counters, 0, sizeof(bit_counters)); + num_values_added= 0; + set_bits_from_counters(); + return 0; +} + +bool Item_sum_bit::remove_as_window(ulonglong value) +{ + DBUG_ASSERT(as_window_function); + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + if (!bit_counters[i]) + { + // Don't attempt to remove values that were never added. + DBUG_ASSERT((value & (1 << i)) == 0); + continue; + } + bit_counters[i]-= (value & (1 << i)) ? 1 : 0; + } + DBUG_ASSERT(num_values_added > 0); + // Prevent overflow; + num_values_added = std::min(num_values_added, num_values_added - 1); + set_bits_from_counters(); + return 0; +} + +bool Item_sum_bit::add_as_window(ulonglong value) +{ + DBUG_ASSERT(as_window_function); + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + bit_counters[i]+= (value & (1 << i)) ? 1 : 0; + } + // Prevent overflow; + num_values_added = std::max(num_values_added, num_values_added + 1); + set_bits_from_counters(); + return 0; +} + +void Item_sum_or::set_bits_from_counters() +{ + ulonglong value= 0; + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + value|= bit_counters[i] > 0 ? (1 << i) : 0; + } + bits= value | reset_bits; +} bool Item_sum_or::add() { ulonglong value= (ulonglong) args[0]->val_int(); if (!args[0]->null_value) + { + if (as_window_function) + return add_as_window(value); bits|=value; + } return 0; } +void Item_sum_xor::set_bits_from_counters() +{ + ulonglong value= 0; + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + value|= (bit_counters[i] % 2) ? (1 << i) : 0; + } + bits= value ^ reset_bits; +} + Item *Item_sum_xor::copy_or_same(THD* thd) { return new (thd->mem_root) Item_sum_xor(thd, this); @@ -2112,10 +2242,31 @@ bool Item_sum_xor::add() { ulonglong value= (ulonglong) args[0]->val_int(); if (!args[0]->null_value) + { + if (as_window_function) + return add_as_window(value); bits^=value; + } return 0; } +void Item_sum_and::set_bits_from_counters() +{ + ulonglong value= 0; + if (!num_values_added) + { + bits= reset_bits; + return; + } + + for (int i= 0; i < NUM_BIT_COUNTERS; i++) + { + // We've only added values of 1 for this bit. + if (bit_counters[i] == num_values_added) + value|= (1 << i); + } + bits= value & reset_bits; +} Item *Item_sum_and::copy_or_same(THD* thd) { return new (thd->mem_root) Item_sum_and(thd, this); @@ -2126,7 +2277,11 @@ bool Item_sum_and::add() { ulonglong value= (ulonglong) args[0]->val_int(); if (!args[0]->null_value) + { + if (as_window_function) + return add_as_window(value); bits&=value; + } return 0; } @@ -2314,6 +2469,10 @@ void Item_sum_bit::reset_field() void Item_sum_bit::update_field() { + // We never call update_field when computing the function as a window + // function. Setting bits to a random value invalidates the bits counters and + // the result of the bit function becomes erroneous. + DBUG_ASSERT(!as_window_function); uchar *res=result_field->ptr; bits= uint8korr(res); add(); @@ -2896,7 +3055,7 @@ int group_concat_key_cmp_with_order(void* arg, const void* key1, field->table->s->null_bytes); int res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset); if (res) - return (*order_item)->asc ? res : -res; + return ((*order_item)->direction == ORDER::ORDER_ASC) ? res : -res; } /* We can't return 0 because in that case the tree class would remove this @@ -3372,8 +3531,8 @@ bool Item_func_group_concat::setup(THD *thd) if (!ref_pointer_array) DBUG_RETURN(TRUE); memcpy(ref_pointer_array, args, arg_count * sizeof(Item*)); - if (setup_order(thd, ref_pointer_array, context->table_list, list, - all_fields, *order)) + if (setup_order(thd, Ref_ptr_array(ref_pointer_array, n_elems), + context->table_list, list, all_fields, *order)) DBUG_RETURN(TRUE); } @@ -3507,9 +3666,9 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type) if (i) str->append(','); orig_args[i + arg_count_field]->print(str, query_type); - if (order[i]->asc) + if (order[i]->direction == ORDER::ORDER_ASC) str->append(STRING_WITH_LEN(" ASC")); - else + else str->append(STRING_WITH_LEN(" DESC")); } } diff --git a/sql/item_sum.h b/sql/item_sum.h index 811e9d5c59c..e766e69a1c5 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -109,6 +109,7 @@ public: class st_select_lex; +class Window_spec; /** Class Item_sum is the base class used for special expressions that SQL calls @@ -347,7 +348,9 @@ public: enum Sumfunctype { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC, AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC, - VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC + VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC, + ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC, + CUME_DIST_FUNC, NTILE_FUNC }; Item **ref_by; /* pointer to a ref to the object used to register it */ @@ -540,12 +543,16 @@ public: virtual void clear()= 0; virtual bool add()= 0; virtual bool setup(THD *thd) { return false; } + + virtual void remove() { DBUG_ASSERT(0); } virtual void cleanup(); bool check_vcol_func_processor(uchar *int_arg) { return trace_unsupported_by_check_vcol_func_processor(func_name()); } + + virtual void setup_window_func(THD *thd, Window_spec *window_spec) {} }; @@ -710,6 +717,7 @@ public: class Item_sum_int :public Item_sum_num { public: + Item_sum_int(THD *thd): Item_sum_num(thd) {} Item_sum_int(THD *thd, Item *item_par): Item_sum_num(thd, item_par) {} Item_sum_int(THD *thd, List<Item> &list): Item_sum_num(thd, list) {} Item_sum_int(THD *thd, Item_sum_int *item) :Item_sum_num(thd, item) {} @@ -724,7 +732,7 @@ public: class Item_sum_sum :public Item_sum_num, - public Type_handler_hybrid_field_type + public Type_handler_hybrid_field_type { protected: double sum; @@ -763,6 +771,11 @@ public: return has_with_distinct() ? "sum(distinct " : "sum("; } Item *copy_or_same(THD* thd); + void remove(); + +private: + void add_helper(bool perform_removal); + ulonglong count; }; @@ -775,6 +788,7 @@ class Item_sum_count :public Item_sum_int void clear(); bool add(); void cleanup(); + void remove(); public: Item_sum_count(THD *thd, Item *item_par): @@ -821,6 +835,8 @@ class Item_sum_count :public Item_sum_int class Item_sum_avg :public Item_sum_sum { public: + // TODO-cvicentiu given that Item_sum_sum now uses a counter of its own, in + // order to implement remove(), it is possible to remove this member. ulonglong count; uint prec_increment; uint f_precision, f_scale, dec_bin_size; @@ -839,6 +855,7 @@ public: } void clear(); bool add(); + void remove(); double val_real(); // In SPs we might force the "wrong" type with select into a declare variable longlong val_int() { return val_int_from_real(); } @@ -1019,14 +1036,18 @@ public: class Item_sum_bit :public Item_sum_int { -protected: - ulonglong reset_bits,bits; - public: Item_sum_bit(THD *thd, Item *item_par, ulonglong reset_arg): - Item_sum_int(thd, item_par), reset_bits(reset_arg), bits(reset_arg) {} + Item_sum_int(thd, item_par), reset_bits(reset_arg), bits(reset_arg), + as_window_function(FALSE), num_values_added(0) {} Item_sum_bit(THD *thd, Item_sum_bit *item): - Item_sum_int(thd, item), reset_bits(item->reset_bits), bits(item->bits) {} + Item_sum_int(thd, item), reset_bits(item->reset_bits), bits(item->bits), + as_window_function(item->as_window_function), + num_values_added(item->num_values_added) + { + if (as_window_function) + memcpy(bit_counters, item->bit_counters, sizeof(bit_counters)); + } enum Sumfunctype sum_func () const {return SUM_BIT_FUNC;} void clear(); longlong val_int(); @@ -1037,8 +1058,42 @@ public: void cleanup() { bits= reset_bits; + if (as_window_function) + clear_as_window(); Item_sum_int::cleanup(); } + void setup_window_func(THD *thd __attribute__((unused)), + Window_spec *window_spec __attribute__((unused))) + { + as_window_function= TRUE; + clear_as_window(); + } + void remove() + { + if (as_window_function) + { + remove_as_window(args[0]->val_int()); + return; + } + // Unless we're counting bits, we can not remove anything. + DBUG_ASSERT(0); + } + +protected: + static const int NUM_BIT_COUNTERS= 64; + ulonglong reset_bits,bits; + /* + Marks whether the function is to be computed as a window function. + */ + bool as_window_function; + // When used as an aggregate window function, we need to store + // this additional information. + ulonglong num_values_added; + ulonglong bit_counters[NUM_BIT_COUNTERS]; + bool add_as_window(ulonglong value); + bool remove_as_window(ulonglong value); + bool clear_as_window(); + virtual void set_bits_from_counters()= 0; }; @@ -1050,28 +1105,37 @@ public: bool add(); const char *func_name() const { return "bit_or("; } Item *copy_or_same(THD* thd); + +private: + void set_bits_from_counters(); }; class Item_sum_and :public Item_sum_bit { - public: +public: Item_sum_and(THD *thd, Item *item_par): Item_sum_bit(thd, item_par, ULONGLONG_MAX) {} Item_sum_and(THD *thd, Item_sum_and *item) :Item_sum_bit(thd, item) {} bool add(); const char *func_name() const { return "bit_and("; } Item *copy_or_same(THD* thd); + +private: + void set_bits_from_counters(); }; class Item_sum_xor :public Item_sum_bit { - public: +public: Item_sum_xor(THD *thd, Item *item_par): Item_sum_bit(thd, item_par, 0) {} Item_sum_xor(THD *thd, Item_sum_xor *item) :Item_sum_bit(thd, item) {} bool add(); const char *func_name() const { return "bit_xor("; } Item *copy_or_same(THD* thd); + +private: + void set_bits_from_counters(); }; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc new file mode 100644 index 00000000000..d157d545dad --- /dev/null +++ b/sql/item_windowfunc.cc @@ -0,0 +1,242 @@ +#include "item_windowfunc.h" +#include "my_dbug.h" +#include "my_global.h" +#include "sql_select.h" // test if group changed + + +bool +Item_window_func::resolve_window_name(THD *thd) +{ + if (window_spec) + { + /* The window name has been already resolved */ + return false; + } + DBUG_ASSERT(window_name != NULL && window_spec == NULL); + char *ref_name= window_name->str; + + /* !TODO: Add the code to resolve ref_name in outer queries */ + /* + First look for the deinition of the window with 'window_name' + in the current select + */ + List<Window_spec> curr_window_specs= + List<Window_spec> (thd->lex->current_select->window_specs); + List_iterator_fast<Window_spec> it(curr_window_specs); + Window_spec *win_spec; + while((win_spec= it++)) + { + char *win_spec_name= win_spec->name(); + if (win_spec_name && + my_strcasecmp(system_charset_info, ref_name, win_spec_name) == 0) + { + window_spec= win_spec; + break; + } + } + + if (!window_spec) + { + my_error(ER_WRONG_WINDOW_SPEC_NAME, MYF(0), ref_name); + return true; + } + + return false; +} + + +void +Item_window_func::update_used_tables() +{ + used_tables_cache= 0; + window_func()->update_used_tables(); + used_tables_cache|= window_func()->used_tables(); + for (ORDER *ord= window_spec->partition_list->first; ord; ord=ord->next) + { + Item *item= *ord->item; + item->update_used_tables(); + used_tables_cache|= item->used_tables(); + } + for (ORDER *ord= window_spec->order_list->first; ord; ord=ord->next) + { + Item *item= *ord->item; + item->update_used_tables(); + used_tables_cache|= item->used_tables(); + } +} + + +bool +Item_window_func::fix_fields(THD *thd, Item **ref) +{ + DBUG_ASSERT(fixed == 0); + + enum_parsing_place place= thd->lex->current_select->parsing_place; + + if (!(place == SELECT_LIST || place == IN_ORDER_BY)) + { + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + return true; + } + + if (window_name && resolve_window_name(thd)) + return true; + + if (window_spec->window_frame && is_frame_prohibited()) + { + my_error(ER_NOT_ALLOWED_WINDOW_FRAME, MYF(0), window_func()->func_name()); + return true; + } + + if (window_spec->order_list->elements == 0 && is_order_list_mandatory()) + { + my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); + return true; + } + /* + TODO: why the last parameter is 'ref' in this call? What if window_func + decides to substitute itself for something else and does *ref=.... ? + This will substitute *this (an Item_window_func object) with Item_sum + object. Is this the intent? + */ + if (window_func()->fix_fields(thd, ref)) + return true; + + const_item_cache= false; + with_window_func= true; + with_sum_func= false; + + fix_length_and_dec(); + + max_length= window_func()->max_length; + maybe_null= window_func()->maybe_null; + + fixed= 1; + set_phase_to_initial(); + return false; +} + + +/* + @detail + Window function evaluates its arguments when it is scanning the temporary + table in partition/order-by order. That is, arguments should be read from + the temporary table, not from the original base columns. + + In order for this to work, we need to call "split_sum_func" for each + argument. The effect of the call is: + 1. the argument is added into ref_pointer_array. This will cause the + argument to be saved in the temp.table + 2. argument item is replaced with an Item_ref object. this object refers + the argument through the ref_pointer_array. + + then, change_to_use_tmp_fields() will replace ref_pointer_array with an + array that points to the temp.table fields. + This way, when window_func attempts to evaluate its arguments, it will use + Item_ref objects which will read data from the temp.table. + + Note: Before window functions, aggregate functions never needed to do such + transformations on their arguments. This is because grouping operation + does not need to read from the temp.table. + (Q: what happens when we first sort and then do grouping in a + group-after-group mode? dont group by items read from temp.table, then?) +*/ + +void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, uint flags) +{ + for (uint i=0; i < window_func()->argument_count(); i++) + { + Item **p_item= &window_func()->arguments()[i]; + (*p_item)->split_sum_func2(thd, ref_pointer_array, fields, p_item, flags); + } +} + + +/* + This must be called before advance_window() can be called. + + @detail + If we attempt to do it in fix_fields(), partition_fields will refer + to the original window function arguments. + We need it to refer to temp.table columns. +*/ + +void Item_window_func::setup_partition_border_check(THD *thd) +{ + partition_tracker.init(thd, window_spec->partition_list); + window_func()->setup_window_func(thd, window_spec); +} + + +void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: move this into Item_window_func? */ + peer_tracker.init(thd, window_spec->order_list); + clear(); +} + +void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: consider moving this && Item_sum_rank's implementation */ + peer_tracker.init(thd, window_spec->order_list); + clear(); +} + +bool Item_sum_dense_rank::add() +{ + if (peer_tracker.check_if_next_group() || first_add) + { + first_add= false; + dense_rank++; + } + + return false; +} + + +bool Item_sum_rank::add() +{ + row_number++; + if (peer_tracker.check_if_next_group()) + { + /* Row value changed */ + cur_rank= row_number; + } + return false; +} + +bool Item_window_func::check_if_partition_changed() +{ + return partition_tracker.check_if_next_group(); +} + +void Item_window_func::advance_window() +{ + if (check_if_partition_changed()) + { + /* Next partition */ + window_func()->clear(); + } + window_func()->add(); +} + +bool Item_sum_percent_rank::add() +{ + row_number++; + if (peer_tracker.check_if_next_group()) + { + /* Row value changed. */ + cur_rank= row_number; + } + return false; +} + +void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec) +{ + /* TODO: move this into Item_window_func? */ + peer_tracker.init(thd, window_spec->order_list); + clear(); +} + + diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h new file mode 100644 index 00000000000..40f48cc7dc5 --- /dev/null +++ b/sql/item_windowfunc.h @@ -0,0 +1,757 @@ +#ifndef ITEM_WINDOWFUNC_INCLUDED +#define ITEM_WINDOWFUNC_INCLUDED + +#include "my_global.h" +#include "item.h" + +class Window_spec; + + +int test_if_group_changed(List<Cached_item> &list); + +/* A wrapper around test_if_group_changed */ +class Group_bound_tracker +{ + List<Cached_item> group_fields; + /* + During the first check_if_next_group, the list of cached_items is not + initialized. The compare function will return that the items match if + the field's value is the same as the Cached_item's default value (0). + This flag makes sure that we always return true during the first check. + + XXX This is better to be implemented within test_if_group_changed, but + since it is used in other parts of the codebase, we keep it here for now. + */ + bool first_check; +public: + void init(THD *thd, SQL_I_List<ORDER> *list) + { + for (ORDER *curr = list->first; curr; curr=curr->next) + { + Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); + group_fields.push_back(tmp); + } + first_check= true; + } + + void cleanup() + { + group_fields.empty(); + } + + /* + Check if the current row is in a different group than the previous row + this function was called for. + XXX: Side-effect: The new row's group becomes the current row's group. + + Returns true if there is a change between the current_group and the cached + value, or if it is the first check after a call to init. + */ + bool check_if_next_group() + { + if (test_if_group_changed(group_fields) > -1 || first_check) + { + first_check= false; + return true; + } + return false; + } + + /* + Check if the current row is in a different group than the previous row + check_if_next_group was called for. + + Compares the groups without the additional side effect of updating the + current cached values. + */ + int compare_with_cache() + { + List_iterator<Cached_item> li(group_fields); + Cached_item *ptr; + int res; + while ((ptr= li++)) + { + if ((res= ptr->cmp_read_only())) + return res; + } + return 0; + } +}; + +/* + ROW_NUMBER() OVER (...) + + @detail + - This is a Window function (not just an aggregate) + - It can be computed by doing one pass over select output, provided + the output is sorted according to the window definition. +*/ + +class Item_sum_row_number: public Item_sum_int +{ + longlong count; + +public: + void clear() + { + count= 0; + } + bool add() + { + count++; + return false; + } + void update_field() {} + + Item_sum_row_number(THD *thd) + : Item_sum_int(thd), count(0) {} + + enum Sumfunctype sum_func() const + { + return ROW_NUMBER_FUNC; + } + + longlong val_int() + { + return count; + } + const char*func_name() const + { + return "row_number"; + } + +}; + + +/* + RANK() OVER (...) Windowing function + + @detail + - This is a Window function (not just an aggregate) + - It can be computed by doing one pass over select output, provided + the output is sorted according to the window definition. + + The function is defined as: + + "The rank of row R is defined as 1 (one) plus the number of rows that + precede R and are not peers of R" + + "This implies that if two or more rows are not distinct with respect to + the window ordering, then there will be one or more" +*/ + +class Item_sum_rank: public Item_sum_int +{ +protected: + longlong row_number; // just ROW_NUMBER() + longlong cur_rank; // current value + + Group_bound_tracker peer_tracker; +public: + void clear() + { + /* This is called on partition start */ + cur_rank= 1; + row_number= 0; + } + + bool add(); + + longlong val_int() + { + return cur_rank; + } + + void update_field() {} + /* + void reset_field(); + TODO: ^^ what does this do ? It is not called ever? + */ + +public: + Item_sum_rank(THD *thd) + : Item_sum_int(thd) {} + + enum Sumfunctype sum_func () const + { + return RANK_FUNC; + } + + const char*func_name() const + { + return "rank"; + } + + void setup_window_func(THD *thd, Window_spec *window_spec); + void cleanup() + { + peer_tracker.cleanup(); + Item_sum_int::cleanup(); + } +}; + + +/* + DENSE_RANK() OVER (...) Windowing function + + @detail + - This is a Window function (not just an aggregate) + - It can be computed by doing one pass over select output, provided + the output is sorted according to the window definition. + + The function is defined as: + + "If DENSE_RANK is specified, then the rank of row R is defined as the + number of rows preceding and including R that are distinct with respect + to the window ordering" + + "This implies that there are no gaps in the sequential rank numbering of + rows in each window partition." +*/ + + +class Item_sum_dense_rank: public Item_sum_int +{ + longlong dense_rank; + bool first_add; + Group_bound_tracker peer_tracker; + public: + /* + XXX(cvicentiu) This class could potentially be implemented in the rank + class, with a switch for the DENSE case. + */ + void clear() + { + dense_rank= 0; + first_add= true; + } + bool add(); + void update_field() {} + longlong val_int() + { + return dense_rank; + } + + Item_sum_dense_rank(THD *thd) + : Item_sum_int(thd), dense_rank(0), first_add(true) {} + enum Sumfunctype sum_func () const + { + return DENSE_RANK_FUNC; + } + + const char*func_name() const + { + return "dense_rank"; + } + + void setup_window_func(THD *thd, Window_spec *window_spec); + + void cleanup() + { + peer_tracker.cleanup(); + Item_sum_int::cleanup(); + } +}; + +/* + A base window function (aggregate) that also holds a counter for the number + of rows. +*/ +class Item_sum_window_with_row_count : public Item_sum_num +{ + public: + Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd), + partition_row_count_(0) {} + + Item_sum_window_with_row_count(THD *thd, Item *arg) : + Item_sum_num(thd, arg), partition_row_count_(0) {}; + + void set_row_count(ulonglong count) { partition_row_count_ = count; } + + protected: + longlong get_row_count() { return partition_row_count_; } + private: + ulonglong partition_row_count_; +}; + +/* + @detail + "The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is + defined to be the RANK of R and NR is defined to be the number of rows in + the window partition of R." + + Computation of this function requires two passes: + - First pass to find #rows in the partition + This is held within the row_count context. + - Second pass to compute rank of current row and the value of the function +*/ +class Item_sum_percent_rank: public Item_sum_window_with_row_count +{ + public: + Item_sum_percent_rank(THD *thd) + : Item_sum_window_with_row_count(thd), cur_rank(1) {} + + longlong val_int() + { + /* + Percent rank is a real value so calling the integer value should never + happen. It makes no sense as it gets truncated to either 0 or 1. + */ + DBUG_ASSERT(0); + return 0; + } + + double val_real() + { + /* + We can not get the real value without knowing the number of rows + in the partition. Don't divide by 0. + */ + ulonglong partition_rows = get_row_count(); + null_value= partition_rows > 0 ? false : true; + + return partition_rows > 1 ? + static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0; + } + + enum Sumfunctype sum_func () const + { + return PERCENT_RANK_FUNC; + } + + const char*func_name() const + { + return "percent_rank"; + } + + void update_field() {} + + void clear() + { + cur_rank= 1; + row_number= 0; + } + bool add(); + enum Item_result result_type () const { return REAL_RESULT; } + enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; } + + void fix_length_and_dec() + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + } + + void setup_window_func(THD *thd, Window_spec *window_spec); + + private: + longlong cur_rank; // Current rank of the current row. + longlong row_number; // Value if this were ROW_NUMBER() function. + + Group_bound_tracker peer_tracker; + + void cleanup() + { + peer_tracker.cleanup(); + Item_sum_num::cleanup(); + } +}; + + + + +/* + @detail + "The relative rank of a row R is defined as NP/NR, where + - NP is defined to be the number of rows preceding or peer with R in the + window ordering of the window partition of R + - NR is defined to be the number of rows in the window partition of R. + + Just like with Item_sum_percent_rank, computation of this function requires + two passes. +*/ + +class Item_sum_cume_dist: public Item_sum_window_with_row_count +{ + public: + Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd), + current_row_count_(0) {} + + double val_real() + { + if (get_row_count() == 0) + { + null_value= true; + return 0; + } + ulonglong partition_row_count= get_row_count(); + null_value= false; + return static_cast<double>(current_row_count_) / partition_row_count; + } + + bool add() + { + current_row_count_++; + return false; + } + + enum Sumfunctype sum_func() const + { + return CUME_DIST_FUNC; + } + + void clear() + { + current_row_count_= 0; + set_row_count(0); + } + + const char*func_name() const + { + return "cume_dist"; + } + + void update_field() {} + enum Item_result result_type () const { return REAL_RESULT; } + enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; } + + void fix_length_and_dec() + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + } + + private: + ulonglong current_row_count_; +}; + +class Item_sum_ntile : public Item_sum_window_with_row_count +{ + public: + Item_sum_ntile(THD* thd, Item* num_quantiles_expr) : + Item_sum_window_with_row_count(thd, num_quantiles_expr), + current_row_count_(0) {}; + + double val_real() + { + return val_int(); + } + + longlong val_int() + { + if (get_row_count() == 0) + { + null_value= true; + return 0; + } + + longlong num_quantiles= get_num_quantiles(); + + if (num_quantiles <= 0) { + my_error(ER_INVALID_NTILE_ARGUMENT, MYF(0)); + return true; + } + + null_value= false; + ulonglong quantile_size = get_row_count() / num_quantiles; + ulonglong extra_rows = get_row_count() - quantile_size * num_quantiles; + + if (current_row_count_ <= extra_rows * (quantile_size + 1)) + return (current_row_count_ - 1) / (quantile_size + 1) + 1; + + return (current_row_count_ - 1 - extra_rows) / quantile_size + 1; + } + + bool add() + { + current_row_count_++; + return false; + } + + enum Sumfunctype sum_func() const + { + return NTILE_FUNC; + } + + void clear() + { + current_row_count_= 0; + set_row_count(0); + } + + const char*func_name() const + { + return "ntile"; + } + + void update_field() {} + + enum Item_result result_type () const { return INT_RESULT; } + enum_field_types field_type() const { return MYSQL_TYPE_LONGLONG; } + + private: + longlong get_num_quantiles() { return args[0]->val_int(); } + ulong current_row_count_; +}; + + +class Item_window_func : public Item_func_or_sum +{ + /* Window function parameters as we've got them from the parser */ +public: + LEX_STRING *window_name; +public: + Window_spec *window_spec; + + /* + This stores the data about the partition we're currently in. + advance_window() uses this to tell when we've left one partition and + entered another + */ + Group_bound_tracker partition_tracker; +public: + Item_window_func(THD *thd, Item_sum *win_func, LEX_STRING *win_name) + : Item_func_or_sum(thd, (Item *) win_func), + window_name(win_name), window_spec(NULL), + force_return_blank(true), + read_value_from_result_field(false) {} + + Item_window_func(THD *thd, Item_sum *win_func, Window_spec *win_spec) + : Item_func_or_sum(thd, (Item *) win_func), + window_name(NULL), window_spec(win_spec), + force_return_blank(true), + read_value_from_result_field(false) {} + + Item_sum *window_func() const { return (Item_sum *) args[0]; } + + void update_used_tables(); + + /* + This is used by filesort to mark the columns it needs to read (because they + participate in the sort criteria and/or row retrieval. Window functions can + only be used in sort criteria). + + Sorting by window function value is only done after the window functions + have been computed. In that case, window function will need to read its + temp.table field. In order to allow that, mark that field in the read_set. + */ + bool register_field_in_read_map(uchar *arg) + { + TABLE *table= (TABLE*) arg; + if (result_field && (result_field->table == table || !table)) + { + bitmap_set_bit(result_field->table->read_set, result_field->field_index); + } + return 0; + } + + bool is_frame_prohibited() const + { + switch (window_func()->sum_func()) { + case Item_sum::ROW_NUMBER_FUNC: + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: + return true; + default: + return false; + } + } + + bool requires_partition_size() const + { + switch (window_func()->sum_func()) { + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: + return true; + default: + return false; + } + } + + bool requires_peer_size() const + { + switch (window_func()->sum_func()) { + case Item_sum::CUME_DIST_FUNC: + return true; + default: + return false; + } + } + + bool is_order_list_mandatory() const + { + switch (window_func()->sum_func()) { + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + return true; + default: + return false; + } + } + + /* + Computation functions. + TODO: consoder merging these with class Group_bound_tracker. + */ + void setup_partition_border_check(THD *thd); + + void advance_window(); + bool check_if_partition_changed(); + + enum_field_types field_type() const + { + return ((Item_sum *) args[0])->field_type(); + } + enum Item::Type type() const { return Item::WINDOW_FUNC_ITEM; } + +private: + /* + Window functions are very special functions, so val_() methods have + special meaning for them: + + - Phase#1, "Initial" we run the join and put its result into temporary + table. For window functions, we write the default value (NULL?) as + a placeholder. + + - Phase#2: "Computation": executor does the scan in {PARTITION, ORDER BY} + order of this window function. It calls appropriate methods to inform + the window function about rows entering/leaving the window. + It calls window_func()->val_int() so that current window function value + can be saved and stored in the temp.table. + + - Phase#3: "Retrieval" the temporary table is read and passed to query + output. However, Item_window_func still remains in the select list, + so item_windowfunc->val_int() will be called. + During Phase#3, read_value_from_result_field= true. + */ + bool force_return_blank; + bool read_value_from_result_field; + +public: + void set_phase_to_initial() + { + force_return_blank= true; + read_value_from_result_field= false; + } + void set_phase_to_computation() + { + force_return_blank= false; + read_value_from_result_field= false; + } + void set_phase_to_retrieval() + { + force_return_blank= false; + read_value_from_result_field= true; + } + + double val_real() + { + double res; + if (force_return_blank) + { + res= 0.0; + null_value= false; + } + else if (read_value_from_result_field) + { + res= result_field->val_real(); + null_value= result_field->is_null(); + } + else + { + res= window_func()->val_real(); + null_value= window_func()->null_value; + } + return res; + } + + longlong val_int() + { + longlong res; + if (force_return_blank) + { + res= 0; + null_value= false; + } + else if (read_value_from_result_field) + { + res= result_field->val_int(); + null_value= result_field->is_null(); + } + else + { + res= window_func()->val_int(); + null_value= window_func()->null_value; + } + return res; + } + + String* val_str(String* str) + { + String *res; + if (force_return_blank) + { + null_value= false; + str->length(0); + res= str; + } + else if (read_value_from_result_field) + { + if ((null_value= result_field->is_null())) + res= NULL; + else + res= result_field->val_str(str); + } + else + { + res= window_func()->val_str(str); + null_value= window_func()->null_value; + } + return res; + } + + my_decimal* val_decimal(my_decimal* dec) + { + my_decimal *res; + if (force_return_blank) + { + my_decimal_set_zero(dec); + null_value= false; + res= dec; + } + else if (read_value_from_result_field) + { + if ((null_value= result_field->is_null())) + res= NULL; + else + res= result_field->val_decimal(dec); + } + else + { + res= window_func()->val_decimal(dec); + null_value= window_func()->null_value; + } + return res; + } + + void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, + List<Item> &fields, uint flags); + void fix_length_and_dec() + { + decimals = window_func()->decimals; + } + + const char* func_name() const { return "WF"; } + + bool fix_fields(THD *thd, Item **ref); + + bool resolve_window_name(THD *thd); + +}; + +#endif /* ITEM_WINDOWFUNC_INCLUDED */ diff --git a/sql/lex.h b/sql/lex.h index da5fa2de137..f7a183e1862 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -221,6 +221,7 @@ static SYMBOL symbols[] = { { "EVERY", SYM(EVERY_SYM)}, { "EXAMINED", SYM(EXAMINED_SYM)}, { "EXCHANGE", SYM(EXCHANGE_SYM)}, + { "EXCLUDE", SYM(EXCLUDE_SYM)}, { "EXECUTE", SYM(EXECUTE_SYM)}, { "EXISTS", SYM(EXISTS)}, { "EXIT", SYM(EXIT_SYM)}, @@ -241,6 +242,7 @@ static SYMBOL symbols[] = { { "FLOAT4", SYM(FLOAT_SYM)}, { "FLOAT8", SYM(DOUBLE_SYM)}, { "FLUSH", SYM(FLUSH_SYM)}, + { "FOLLOWING", SYM(FOLLOWING_SYM)}, { "FOR", SYM(FOR_SYM)}, { "FORCE", SYM(FORCE_SYM)}, { "FOREIGN", SYM(FOREIGN)}, @@ -425,9 +427,11 @@ static SYMBOL symbols[] = { { "OPTIONALLY", SYM(OPTIONALLY)}, { "OR", SYM(OR_SYM)}, { "ORDER", SYM(ORDER_SYM)}, + { "OTHERS", SYM(OTHERS_SYM)}, { "OUT", SYM(OUT_SYM)}, { "OUTER", SYM(OUTER)}, { "OUTFILE", SYM(OUTFILE)}, + { "OVER", SYM(OVER_SYM)}, { "OWNER", SYM(OWNER_SYM)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, { "PAGE", SYM(PAGE_SYM)}, @@ -446,6 +450,7 @@ static SYMBOL symbols[] = { { "POINT", SYM(POINT_SYM)}, { "POLYGON", SYM(POLYGON)}, { "PORT", SYM(PORT_SYM)}, + { "PRECEDING", SYM(PRECEDING_SYM)}, { "PRECISION", SYM(PRECISION)}, { "PREPARE", SYM(PREPARE_SYM)}, { "PRESERVE", SYM(PRESERVE_SYM)}, @@ -602,6 +607,7 @@ static SYMBOL symbols[] = { { "TEXT", SYM(TEXT_SYM)}, { "THAN", SYM(THAN_SYM)}, { "THEN", SYM(THEN_SYM)}, + { "TIES", SYM(TIES_SYM)}, { "TIME", SYM(TIME_SYM)}, { "TIMESTAMP", SYM(TIMESTAMP)}, { "TIMESTAMPADD", SYM(TIMESTAMP_ADD)}, @@ -619,6 +625,7 @@ static SYMBOL symbols[] = { { "TRUNCATE", SYM(TRUNCATE_SYM)}, { "TYPE", SYM(TYPE_SYM)}, { "TYPES", SYM(TYPES_SYM)}, + { "UNBOUNDED", SYM(UNBOUNDED_SYM)}, { "UNCOMMITTED", SYM(UNCOMMITTED_SYM)}, { "UNDEFINED", SYM(UNDEFINED_SYM)}, { "UNDO_BUFFER_SIZE", SYM(UNDO_BUFFER_SIZE_SYM)}, @@ -660,6 +667,7 @@ static SYMBOL symbols[] = { { "WHEN", SYM(WHEN_SYM)}, { "WHERE", SYM(WHERE)}, { "WHILE", SYM(WHILE_SYM)}, + { "WINDOW", SYM(WINDOW_SYM)}, { "WITH", SYM(WITH)}, { "WORK", SYM(WORK_SYM)}, { "WRAPPER", SYM(WRAPPER_SYM)}, @@ -682,17 +690,23 @@ static SYMBOL sql_functions[] = { { "BIT_XOR", SYM(BIT_XOR)}, { "CAST", SYM(CAST_SYM)}, { "COUNT", SYM(COUNT_SYM)}, + { "CUME_DIST", SYM(CUME_DIST_SYM)}, { "CURDATE", SYM(CURDATE)}, { "CURTIME", SYM(CURTIME)}, { "DATE_ADD", SYM(DATE_ADD_INTERVAL)}, { "DATE_SUB", SYM(DATE_SUB_INTERVAL)}, + { "DENSE_RANK", SYM(DENSE_RANK_SYM)}, { "EXTRACT", SYM(EXTRACT_SYM)}, { "GROUP_CONCAT", SYM(GROUP_CONCAT_SYM)}, { "MAX", SYM(MAX_SYM)}, { "MID", SYM(SUBSTRING)}, /* unireg function */ { "MIN", SYM(MIN_SYM)}, { "NOW", SYM(NOW_SYM)}, + { "NTILE", SYM(NTILE_SYM)}, { "POSITION", SYM(POSITION_SYM)}, + { "PERCENT_RANK", SYM(PERCENT_RANK_SYM)}, + { "RANK", SYM(RANK_SYM)}, + { "ROW_NUMBER", SYM(ROW_NUMBER_SYM)}, { "SESSION_USER", SYM(USER)}, { "STD", SYM(STD_SYM)}, { "STDDEV", SYM(STD_SYM)}, diff --git a/sql/log.cc b/sql/log.cc index dc8c08bfd36..f04b1432229 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -139,6 +139,12 @@ static bool start_binlog_background_thread(); static rpl_binlog_state rpl_global_gtid_binlog_state; +void setup_log_handling() +{ + rpl_global_gtid_binlog_state.init(); +} + + /** purge logs, master and slave sides both, related error code convertor. diff --git a/sql/log.h b/sql/log.h index 9bf80d6e603..e556ef91399 100644 --- a/sql/log.h +++ b/sql/log.h @@ -26,6 +26,7 @@ class Relay_log_info; class Format_description_log_event; +void setup_log_handling(); bool trans_has_updated_trans_table(const THD* thd); bool stmt_has_updated_trans_table(const THD *thd); bool use_trans_cache(const THD* thd, bool is_transactional); diff --git a/sql/log_event.cc b/sql/log_event.cc index b56a9e2aee3..3715f0cc4d4 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -4523,7 +4523,7 @@ compare_errors: "Error on master: message (format)='%s' error code=%d ; " "Error on slave: actual message='%s', error code=%d. " "Default database: '%s'. Query: '%s'", - ER_SAFE_THD(thd, expected_error), + ER_THD(thd, expected_error), expected_error, actual_error ? thd->get_stmt_da()->message() : "no error", actual_error, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e7d7f90d44e..845d114bc7a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -367,7 +367,7 @@ static my_bool opt_short_log_format= 0, opt_silent_startup= 0; uint kill_cached_threads; static uint wake_thread; ulong max_used_connections; -static volatile ulong cached_thread_count= 0; +volatile ulong cached_thread_count= 0; static char *mysqld_user, *mysqld_chroot; static char *default_character_set_name; static char *character_set_filesystem_name; @@ -1987,8 +1987,6 @@ static void __cdecl kill_server(int sig_ptr) if (wsrep_inited == 1) wsrep_deinit(true); - wsrep_thr_deinit(); - if (sig != MYSQL_KILL_SIGNAL && sig != 0) unireg_abort(1); /* purecov: inspected */ @@ -2144,6 +2142,9 @@ static void mysqld_exit(int exit_code) set_malloc_size_cb(NULL); cleanup_tls(); DBUG_LEAVE; + if (opt_endinfo && global_status_var.global_memory_used) + fprintf(stderr, "Warning: Memory not freed: %ld\n", + (long) global_status_var.global_memory_used); sd_notify(0, "STATUS=MariaDB server is down"); exit(exit_code); /* purecov: inspected */ } @@ -2213,11 +2214,12 @@ void clean_up(bool print_message) free_global_client_stats(); free_global_table_stats(); free_global_index_stats(); - delete_dynamic(&all_options); + delete_dynamic(&all_options); // This should be empty free_all_rpl_filters(); #ifdef HAVE_REPLICATION end_slave_list(); #endif + wsrep_thr_deinit(); my_uuid_end(); delete binlog_filter; delete global_rpl_filter; @@ -2234,13 +2236,12 @@ void clean_up(bool print_message) if (print_message && my_default_lc_messages && server_start_time) sql_print_information(ER_DEFAULT(ER_SHUTDOWN_COMPLETE),my_progname); - cleanup_errmsgs(); MYSQL_CALLBACK(thread_scheduler, end, ()); thread_scheduler= 0; mysql_library_end(); finish_client_errs(); - (void) my_error_unregister(ER_ERROR_FIRST, ER_ERROR_LAST); // finish server errs - DBUG_PRINT("quit", ("Error messages freed")); + cleanup_errmsgs(); + free_error_messages(); /* Tell main we are ready */ logger.cleanup_end(); sys_var_end(); @@ -4088,40 +4089,45 @@ extern "C" { static void my_malloc_size_cb_func(long long size, my_bool is_thread_specific) { THD *thd= current_thd; - /* If thread specific memory */ - if (likely(is_thread_specific)) + + if (likely(is_thread_specific)) /* If thread specific memory */ { - if (mysqld_server_initialized || thd) - { - /* - THD may not be set if we are called from my_net_init() before THD - thread has started. - However, this should never happen, so better to assert and - fix this. - */ - DBUG_ASSERT(thd); - if (thd) - { - DBUG_PRINT("info", ("memory_used: %lld size: %lld", - (longlong) thd->status_var.local_memory_used, - size)); - thd->status_var.local_memory_used+= size; - DBUG_ASSERT((longlong) thd->status_var.local_memory_used >= 0 || - !debug_assert_on_not_freed_memory); - } - } + /* + When thread specfic is set, both mysqld_server_initialized and thd + must be set + */ + DBUG_ASSERT(mysqld_server_initialized && thd); + + DBUG_PRINT("info", ("thd memory_used: %lld size: %lld", + (longlong) thd->status_var.local_memory_used, + size)); + thd->status_var.local_memory_used+= size; + DBUG_ASSERT((longlong) thd->status_var.local_memory_used >= 0 || + !debug_assert_on_not_freed_memory); } else if (likely(thd)) + { + DBUG_PRINT("info", ("global thd memory_used: %lld size: %lld", + (longlong) thd->status_var.global_memory_used, + size)); thd->status_var.global_memory_used+= size; + } else { - // workaround for gcc 4.2.4-1ubuntu4 -fPIE (from DEB_BUILD_HARDENING=1) - int64 volatile * volatile ptr=&global_status_var.global_memory_used; - my_atomic_add64_explicit(ptr, size, MY_MEMORY_ORDER_RELAXED); + update_global_memory_status(size); +#ifndef EMBEDDED_LIBRARY + /* + Check if we have missed some mallocs. THis can't be done for embedded + server as the main code may have done calls to malloc before starting + the embedded library. + */ + DBUG_ASSERT(global_status_var.global_memory_used >= 0); +#endif } } } + /** Create a replication file name or base for file names. @@ -4158,6 +4164,22 @@ rpl_make_log_name(const char *opt, DBUG_RETURN(NULL); } +/* We have to setup my_malloc_size_cb_func early to catch all mallocs */ + +static int init_early_variables() +{ + if (pthread_key_create(&THR_THD, NULL)) + { + fprintf(stderr, "Fatal error: Can't create thread-keys\n"); + return 1; + } + set_current_thd(0); + set_malloc_size_cb(my_malloc_size_cb_func); + global_status_var.global_memory_used= 0; + return 0; +} + + static int init_common_variables() { umask(((~my_umask) & 0666)); @@ -4169,15 +4191,6 @@ static int init_common_variables() connection_errors_peer_addr= 0; my_decimal_set_zero(&decimal_zero); // set decimal_zero constant; - if (pthread_key_create(&THR_THD, NULL)) - { - sql_print_error("Can't create thread-keys"); - return 1; - } - - set_current_thd(0); - set_malloc_size_cb(my_malloc_size_cb_func); - init_libstrings(); tzset(); // Set tzname @@ -4714,6 +4727,7 @@ static int init_thread_environment() mysql_mutex_init(key_LOCK_global_system_variables, &LOCK_global_system_variables, MY_MUTEX_INIT_FAST); mysql_mutex_record_order(&LOCK_active_mi, &LOCK_global_system_variables); + mysql_mutex_record_order(&LOCK_status, &LOCK_thread_count); mysql_rwlock_init(key_rwlock_LOCK_system_variables_hash, &LOCK_system_variables_hash); mysql_mutex_init(key_LOCK_prepared_stmt_count, @@ -5020,6 +5034,8 @@ static int init_server_components() /* Setup logs */ + setup_log_handling(); + /* Enable old-fashioned error log, except when the user has requested help information. Since the implementation of plugin server @@ -5187,7 +5203,12 @@ static int init_server_components() variables even when a wsrep provider is not loaded. */ + /* It's now safe to use thread specific memory */ + mysqld_server_initialized= 1; + +#ifndef EMBEDDED_LIBRARY wsrep_thr_init(); +#endif if (WSREP_ON && !wsrep_recovery && !opt_abort) /* WSREP BEFORE SE */ { @@ -5637,6 +5658,9 @@ int mysqld_main(int argc, char **argv) sf_leaking_memory= 1; // no safemalloc memory leak reports if we exit early mysqld_server_started= mysqld_server_initialized= 0; + if (init_early_variables()) + exit(1); + #ifdef HAVE_NPTL ld_assume_kernel_is_set= (getenv("LD_ASSUME_KERNEL") != 0); #endif @@ -5939,9 +5963,6 @@ int mysqld_main(int argc, char **argv) if (Events::init((THD*) 0, opt_noacl || opt_bootstrap)) unireg_abort(1); - /* It's now safe to use thread specific memory */ - mysqld_server_initialized= 1; - if (WSREP_ON) { if (opt_bootstrap) @@ -8310,15 +8331,16 @@ static int show_default_keycache(THD *thd, SHOW_VAR *var, char *buff, static int show_memory_used(THD *thd, SHOW_VAR *var, char *buff, + struct system_status_var *status_var, enum enum_var_type scope) { var->type= SHOW_LONGLONG; var->value= buff; if (scope == OPT_GLOBAL) - *(longlong*) buff= (global_status_var.local_memory_used + - global_status_var.global_memory_used); + *(longlong*) buff= (status_var->global_memory_used + + status_var->local_memory_used); else - *(longlong*) buff= thd->status_var.local_memory_used; + *(longlong*) buff= status_var->local_memory_used; return 0; } @@ -8747,7 +8769,9 @@ static int mysql_init_variables(void) prepared_stmt_count= 0; mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS; bzero((uchar*) &mysql_tmpdir_list, sizeof(mysql_tmpdir_list)); - bzero((char *) &global_status_var, sizeof(global_status_var)); + /* Clear all except global_memory_used */ + bzero((char*) &global_status_var, offsetof(STATUS_VAR, + last_cleared_system_status_var)); opt_large_pages= 0; opt_super_large_pages= 0; #if defined(ENABLED_DEBUG_SYNC) @@ -9428,7 +9452,8 @@ static int get_options(int *argc_ptr, char ***argv_ptr) /* prepare all_options array */ my_init_dynamic_array(&all_options, sizeof(my_option), - array_elements(my_long_options), + array_elements(my_long_options) + + sys_var_elements(), array_elements(my_long_options)/4, MYF(0)); add_many_options(&all_options, my_long_options, array_elements(my_long_options)); sys_var_add_options(&all_options, 0); @@ -9992,6 +10017,7 @@ void refresh_status(THD *thd) /* Reset thread's status variables */ thd->set_status_var_init(); + thd->status_var.global_memory_used= 0; bzero((uchar*) &thd->org_status_var, sizeof(thd->org_status_var)); thd->start_bytes_received= 0; diff --git a/sql/mysqld.h b/sql/mysqld.h index ef4a0d6a47a..e538cbd724e 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -203,6 +203,7 @@ extern LEX_CSTRING reason_slave_blocked; extern ulong slave_trans_retries; extern uint slave_net_timeout; extern int max_user_connections; +extern volatile ulong cached_thread_count; extern ulong what_to_log,flush_time; extern ulong max_prepared_stmt_count, prepared_stmt_count; extern ulong open_files_limit; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 7169a3eda81..a69709bbf03 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -1533,7 +1533,7 @@ end: if (!head->no_keyread) { doing_key_read= 1; - head->enable_keyread(); + head->set_keyread(true); } head->prepare_for_position(); @@ -10643,7 +10643,7 @@ int read_keys_and_merge_scans(THD *thd, if (!head->key_read) { enabled_keyread= 1; - head->enable_keyread(); + head->set_keyread(true); } head->prepare_for_position(); @@ -10736,7 +10736,7 @@ int read_keys_and_merge_scans(THD *thd, index merge currently doesn't support "using index" at all */ if (enabled_keyread) - head->disable_keyread(); + head->set_keyread(false); if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, &unique->sort, 1 , 1, TRUE)) result= 1; @@ -10744,7 +10744,7 @@ int read_keys_and_merge_scans(THD *thd, err: if (enabled_keyread) - head->disable_keyread(); + head->set_keyread(false); DBUG_RETURN(1); } @@ -12071,9 +12071,6 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) DBUG_RETURN(NULL); /* Cannot execute with correlated conditions. */ /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/ - if (join->make_sum_func_list(join->all_fields, join->fields_list, 1)) - DBUG_RETURN(NULL); - List_iterator<Item> select_items_it(join->fields_list); is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds); @@ -13108,7 +13105,17 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, num_blocks= (ha_rows)(table_records / keys_per_block) + 1; /* Compute the number of keys in a group. */ - keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts - 1); + if (!group_key_parts) + { + /* Summary over the whole table */ + keys_per_group= table_records; + } + else + { + keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts - + 1); + } + if (keys_per_group == 0) /* If there is no statistics try to guess */ /* each group contains 10% of all records */ keys_per_group= (table_records / 10) + 1; @@ -13456,7 +13463,7 @@ QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT() { DBUG_ASSERT(file == head->file); if (doing_key_read) - head->disable_keyread(); + head->set_keyread(false); /* There may be a code path when the same table was first accessed by index, then the index is closed, and the table is scanned (order by + loose scan). @@ -13649,7 +13656,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void) if (!head->key_read) { doing_key_read= 1; - head->enable_keyread(); /* We need only the key attributes */ + head->set_keyread(true); /* We need only the key attributes */ } if ((result= file->ha_index_init(index,1))) { diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 79b28a5994e..55c6c075f48 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -445,7 +445,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); static bool convert_subq_to_jtbm(JOIN *parent_join, Item_in_subselect *subq_pred, bool *remove); static TABLE_LIST *alloc_join_nest(THD *thd); -static uint get_tmp_table_rec_length(Item **p_list, uint elements); +static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements); static double get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size); static double get_tmp_table_write_cost(THD *thd, double row_count, @@ -1342,8 +1342,8 @@ static bool replace_where_subcondition(JOIN *join, Item **expr, static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, void *arg) { - return (el1->sj_convert_priority > el2->sj_convert_priority) ? 1 : - ( (el1->sj_convert_priority == el2->sj_convert_priority)? 0 : -1); + return (el1->sj_convert_priority > el2->sj_convert_priority) ? -1 : + ( (el1->sj_convert_priority == el2->sj_convert_priority)? 0 : 1); } @@ -2236,13 +2236,9 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) JOIN_TAB *tab= join->best_positions[i].table; join->map2table[tab->table->tablenr]= tab; } - //List_iterator<Item> it(right_expr_list); - Item **ref_array= subq_select->ref_pointer_array; - Item **ref_array_end= ref_array + subq_select->item_list.elements; table_map map= 0; - //while ((item= it++)) - for (;ref_array < ref_array_end; ref_array++) - map |= (*ref_array)->used_tables(); + for (uint i=0; i < subq_select->item_list.elements; i++) + map|= subq_select->ref_pointer_array[i]->used_tables(); map= map & ~PSEUDO_TABLE_BITS; Table_map_iterator tm_it(map); int tableno; @@ -2305,15 +2301,14 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) Length of the temptable record, in bytes */ -static uint get_tmp_table_rec_length(Item **p_items, uint elements) +static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements) { uint len= 0; Item *item; //List_iterator<Item> it(items); - Item **p_item; - for (p_item= p_items; p_item < p_items + elements ; p_item++) + for (uint i= 0; i < elements ; i++) { - item = *p_item; + item = p_items[i]; switch (item->result_type()) { case REAL_RESULT: len += sizeof(double); @@ -3562,13 +3557,10 @@ bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab) */ sjm->sjm_table_param.init(); sjm->sjm_table_param.bit_fields_as_long= TRUE; - //List_iterator<Item> it(item_list); SELECT_LEX *subq_select= emb_sj_nest->sj_subq_pred->unit->first_select(); - Item **p_item= subq_select->ref_pointer_array; - Item **p_end= p_item + subq_select->item_list.elements; - //while((right_expr= it++)) - for(;p_item != p_end; p_item++) - sjm->sjm_table_cols.push_back(*p_item, thd->mem_root); + Ref_ptr_array p_items= subq_select->ref_pointer_array; + for (uint i= 0; i < subq_select->item_list.elements; i++) + sjm->sjm_table_cols.push_back(p_items[i], thd->mem_root); sjm->sjm_table_param.field_count= subq_select->item_list.elements; sjm->sjm_table_param.force_not_null_cols= TRUE; @@ -3724,13 +3716,13 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) */ sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements]; //it.rewind(); - Item **p_item= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array; + Ref_ptr_array p_items= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array; for (uint i=0; i < sjm->sjm_table_cols.elements; i++) { bool dummy; Item_equal *item_eq; //Item *item= (it++)->real_item(); - Item *item= (*(p_item++))->real_item(); + Item *item= p_items[i]->real_item(); DBUG_ASSERT(item->type() == Item::FIELD_ITEM); Field *copy_to= ((Item_field*)item)->field; /* @@ -5602,7 +5594,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) */ /* C.1 Compute the cost of the materialization strategy. */ //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list); - uint rowlen= get_tmp_table_rec_length(ref_pointer_array, + uint rowlen= get_tmp_table_rec_length(ref_ptrs, select_lex->item_list.elements); /* The cost of writing one row into the temporary table. */ double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1, diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index f36887eb137..ad3f5aed112 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -406,7 +406,7 @@ int opt_sum_query(THD *thd, if (!error && reckey_in_range(is_max, &ref, item_field->field, conds, range_fl, prefix_len)) error= HA_ERR_KEY_NOT_FOUND; - table->disable_keyread(); + table->set_keyread(false); table->file->ha_index_end(); if (error) { @@ -968,7 +968,7 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, converted (for example to upper case) */ if (field->part_of_key.is_set(idx)) - table->enable_keyread(); + table->set_keyread(true); DBUG_RETURN(TRUE); } } diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc index 912ef4a7df7..6835594ca07 100644 --- a/sql/opt_table_elimination.cc +++ b/sql/opt_table_elimination.cc @@ -1105,7 +1105,7 @@ int compare_field_values(Dep_value_field *a, Dep_value_field *b, void *unused) uint b_ratio= b->field->table->tablenr*MAX_FIELDS + b->field->field_index; - return (a_ratio < b_ratio)? -1 : ((a_ratio == b_ratio)? 0 : 1); + return (a_ratio < b_ratio)? 1 : ((a_ratio == b_ratio)? 0 : -1); } diff --git a/sql/records.cc b/sql/records.cc index 3995bea6569..e7a4ab836c0 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -39,7 +39,7 @@ int rr_sequential(READ_RECORD *info); static int rr_from_tempfile(READ_RECORD *info); static int rr_unpack_from_tempfile(READ_RECORD *info); static int rr_unpack_from_buffer(READ_RECORD *info); -static int rr_from_pointers(READ_RECORD *info); +int rr_from_pointers(READ_RECORD *info); static int rr_from_cache(READ_RECORD *info); static int init_rr_cache(THD *thd, READ_RECORD *info); static int rr_cmp(uchar *a,uchar *b); @@ -316,7 +316,7 @@ void end_read_record(READ_RECORD *info) } if (info->table) { - if (info->table->created) + if (info->table->is_created()) (void) info->table->file->extra(HA_EXTRA_NO_CACHE); if (info->read_record != rr_quick) // otherwise quick_range does it (void) info->table->file->ha_index_or_rnd_end(); @@ -535,7 +535,7 @@ static int rr_unpack_from_tempfile(READ_RECORD *info) return 0; } -static int rr_from_pointers(READ_RECORD *info) +int rr_from_pointers(READ_RECORD *info) { int tmp; uchar *cache_pos; diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index f54ef2b0081..5a94e1c5b54 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -1127,7 +1127,7 @@ rpl_slave_state::is_empty() } -rpl_binlog_state::rpl_binlog_state() +void rpl_binlog_state::init() { my_hash_init(&hash, &my_charset_bin, 32, offsetof(element, domain_id), sizeof(uint32), NULL, my_free, HASH_UNIQUE); @@ -1137,7 +1137,6 @@ rpl_binlog_state::rpl_binlog_state() initialized= 1; } - void rpl_binlog_state::reset_nolock() { diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index ece6effbef6..f638a084e38 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -231,9 +231,10 @@ struct rpl_binlog_state /* Auxiliary buffer to sort gtid list. */ DYNAMIC_ARRAY gtid_sort_array; - rpl_binlog_state(); + rpl_binlog_state() :initialized(0) {} ~rpl_binlog_state(); + void init(); void reset_nolock(); void reset(); void free(); diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index df721342d1d..02dbac46eb5 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -205,43 +205,56 @@ void init_master_log_pos(Master_info* mi) /** Parses the IO_CACHE for "key=" and returns the "key". + If no '=' found, returns the whole line (for END_MARKER). @param key [OUT] Key buffer @param max_size [IN] Maximum buffer size @param f [IN] IO_CACHE file + @param found_equal [OUT] Set true if a '=' was found. @retval 0 Either "key=" or '\n' found @retval 1 EOF */ -static int read_mi_key_from_file(char *key, int max_size, IO_CACHE *f) +static int +read_mi_key_from_file(char *key, int max_size, IO_CACHE *f, bool *found_equal) { int i= 0, c; - char *last_p; DBUG_ENTER("read_key_from_file"); - while (((c= my_b_get(f)) != '\n') && (c != my_b_EOF)) + *found_equal= false; + if (max_size <= 0) + DBUG_RETURN(1); + for (;;) { - last_p= key + i; - - if (i < max_size) + if (i >= max_size-1) { - if (c == '=') - { - /* We found '=', replace it by 0 and return. */ - *last_p= 0; - DBUG_RETURN(0); - } - else - *last_p= c; + key[i] = '\0'; + DBUG_RETURN(0); + } + c= my_b_get(f); + if (c == my_b_EOF) + { + DBUG_RETURN(1); + } + else if (c == '\n') + { + key[i]= '\0'; + DBUG_RETURN(0); + } + else if (c == '=') + { + key[i]= '\0'; + *found_equal= true; + DBUG_RETURN(0); + } + else + { + key[i]= c; + ++i; } - ++i; } - - if (c == my_b_EOF) - DBUG_RETURN(1); - - DBUG_RETURN(0); + /* NotReached */ } enum { @@ -539,6 +552,10 @@ file '%s')", fname); if (lines >= LINE_FOR_LAST_MYSQL_FUTURE) { uint i; + bool got_eq; + bool seen_using_gtid= false; + bool seen_do_domain_ids=false, seen_ignore_domain_ids=false; + /* Skip lines used by / reserved for MySQL >= 5.6. */ for (i= LINE_FOR_FIRST_MYSQL_5_6; i <= LINE_FOR_LAST_MYSQL_FUTURE; ++i) { @@ -551,11 +568,12 @@ file '%s')", fname); for "key=" and returns the "key" if found. The "value" can then the parsed on case by case basis. The "unknown" lines would be ignored to facilitate downgrades. + 10.0 does not have the END_MARKER before any left-overs at the end + of the file. So ignore any but the first occurrence of a key. */ - while (!read_mi_key_from_file(buf, sizeof(buf), &mi->file)) + while (!read_mi_key_from_file(buf, sizeof(buf), &mi->file, &got_eq)) { - /* using_gtid */ - if (!strncmp(buf, STRING_WITH_LEN("using_gtid"))) + if (got_eq && !seen_using_gtid && !strcmp(buf, "using_gtid")) { int val; if (!init_intvar_from_file(&val, &mi->file, 0)) @@ -566,15 +584,13 @@ file '%s')", fname); mi->using_gtid= Master_info::USE_GTID_SLAVE_POS; else mi->using_gtid= Master_info::USE_GTID_NO; - continue; + seen_using_gtid= true; } else { sql_print_error("Failed to initialize master info using_gtid"); goto errwithmsg; } } - - /* DO_DOMAIN_IDS */ - if (!strncmp(buf, STRING_WITH_LEN("do_domain_ids"))) + else if (got_eq && !seen_do_domain_ids && !strcmp(buf, "do_domain_ids")) { if (mi->domain_id_filter.init_ids(&mi->file, Domain_id_filter::DO_DOMAIN_IDS)) @@ -582,11 +598,10 @@ file '%s')", fname); sql_print_error("Failed to initialize master info do_domain_ids"); goto errwithmsg; } - continue; + seen_do_domain_ids= true; } - - /* IGNORE_DOMAIN_IDS */ - if (!strncmp(buf, STRING_WITH_LEN("ignore_domain_ids"))) + else if (got_eq && !seen_ignore_domain_ids && + !strcmp(buf, "ignore_domain_ids")) { if (mi->domain_id_filter.init_ids(&mi->file, Domain_id_filter::IGNORE_DOMAIN_IDS)) @@ -595,9 +610,9 @@ file '%s')", fname); "ignore_domain_ids"); goto errwithmsg; } - continue; + seen_ignore_domain_ids= true; } - else if (!strncmp(buf, STRING_WITH_LEN("END_MARKER"))) + else if (!got_eq && !strcmp(buf, "END_MARKER")) { /* Guard agaist extra left-overs at the end of file, in case a later diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index acdedd6e0a0..df036d0e23f 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -987,7 +987,6 @@ handle_rpl_parallel_thread(void *arg) thd->client_capabilities = CLIENT_LOCAL_FILES; thd->net.reading_or_writing= 0; thd_proc_info(thd, "Waiting for work from main SQL threads"); - thd->set_time(); thd->variables.lock_wait_timeout= LONG_TIMEOUT; thd->system_thread_info.rpl_sql_info= &sql_info; /* diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 2dd6f7d7afc..f9d0910a2a7 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -70,6 +70,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) DBUG_ENTER("Relay_log_info::Relay_log_info"); relay_log.is_relay_log= TRUE; + relay_log_state.init(); #ifdef HAVE_PSI_INTERFACE relay_log.set_psi_keys(key_RELAYLOG_LOCK_index, key_RELAYLOG_update_cond, diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index e896c189a8e..3eaee90d0f6 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -347,10 +347,11 @@ public: rpl_parallel parallel; /* - The relay_log_state keeps track of the current binlog state of the execution - of the relay log. This is used to know where to resume current GTID position - if the slave thread is stopped and restarted. - It is only accessed from the SQL thread, so it does not need any locking. + The relay_log_state keeps track of the current binlog state of the + execution of the relay log. This is used to know where to resume + current GTID position if the slave thread is stopped and + restarted. It is only accessed from the SQL thread, so it does + not need any locking. */ rpl_binlog_state relay_log_state; /* diff --git a/sql/set_var.cc b/sql/set_var.cc index b5430c56865..5392a0065ac 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -64,7 +64,7 @@ int sys_var_init() /* Must be already initialized. */ DBUG_ASSERT(system_charset_info != NULL); - if (my_hash_init(&system_variable_hash, system_charset_info, 100, 0, + if (my_hash_init(&system_variable_hash, system_charset_info, 700, 0, 0, (my_hash_get_key) get_sys_var_length, 0, HASH_UNIQUE)) goto error; @@ -78,6 +78,11 @@ error: DBUG_RETURN(1); } +uint sys_var_elements() +{ + return system_variable_hash.records; +} + int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags) { uint saved_elements= long_options->elements; diff --git a/sql/set_var.h b/sql/set_var.h index cf86ecf18fa..060a4e1a57c 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -414,6 +414,7 @@ extern sys_var *Sys_autocommit_ptr; CHARSET_INFO *get_old_charset_by_name(const char *old_name); int sys_var_init(); +uint sys_var_elements(); int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags); void sys_var_end(void); diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 0d168e937a9..376c1eb9d0d 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7154,3 +7154,50 @@ ER_WRONG_ORDER_IN_WITH_CLAUSE eng "The definition of the table '%s' refers to the table '%s' defined later in a non-recursive WITH clause" ER_RECURSIVE_QUERY_IN_WITH_CLAUSE eng "Recursive queries in WITH clause are not supported yet" + +# +# Internal errors, not used +# +skip-to-error-number 2000 + +# MySQL 5.7 error numbers starts here +skip-to-error-number 3000 + +ER_MYSQL_57_TEST + eng "5.7 test" + +# MariaDB extra error numbers starts from 4000 +skip-to-error-number 4000 + +ER_WRONG_WINDOW_SPEC_NAME + eng "Window specification with name '%s' is not defined" +ER_DUP_WINDOW_NAME + eng "Multiple window specifications with the same name '%s'" +ER_PARTITION_LIST_IN_REFERENCING_WINDOW_SPEC + eng "Window specification referencing another one '%s' cannot contain partition list" +ER_ORDER_LIST_IN_REFERENCING_WINDOW_SPEC + eng "Referenced window specification '%s' already contains order list" +ER_WINDOW_FRAME_IN_REFERENCED_WINDOW_SPEC + eng "Referenced window specification '%s' cannot contain window frame" +ER_BAD_COMBINATION_OF_WINDOW_FRAME_BOUND_SPECS + eng "Unacceptable combination of window frame bound specifications" +ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION + eng "Window function is allowed only in SELECT list and ORDER BY clause" +ER_WINDOW_FUNCTION_IN_WINDOW_SPEC + eng "Window function is not allowed in window specification" +ER_NOT_ALLOWED_WINDOW_FRAME + eng "Window frame is not allowed with '%s'" +ER_NO_ORDER_LIST_IN_WINDOW_SPEC + eng "No order list in window specification for '%s'" +ER_RANGE_FRAME_NEEDS_SIMPLE_ORDERBY + eng "RANGE-type frame requires ORDER BY clause with single sort key" +ER_WRONG_TYPE_FOR_ROWS_FRAME + eng "Integer is required for ROWS-type frame" +ER_WRONG_TYPE_FOR_RANGE_FRAME + eng "Numeric datatype is required for RANGE-type frame" +ER_FRAME_EXCLUSION_NOT_SUPPORTED + eng "Frame exclusion is not supported yet" +ER_WINDOW_FUNCTION_DONT_HAVE_FRAME + eng "This window function may not have a window frame" +ER_INVALID_NTILE_ARGUMENT + eng "Argument of NTILE must be greater than 0" diff --git a/sql/slave.cc b/sql/slave.cc index 93506bc2ccd..30a0018f490 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4955,7 +4955,6 @@ err_during_init: delete serial_rgi; mysql_mutex_unlock(&LOCK_thread_count); - THD_CHECK_SENTRY(thd); delete thd; thread_safe_decrement32(&service_thread_count); signal_thd_deleted(); diff --git a/sql/sql_analyze_stmt.cc b/sql/sql_analyze_stmt.cc index d11c93229b0..58f72d6b8de 100644 --- a/sql/sql_analyze_stmt.cc +++ b/sql/sql_analyze_stmt.cc @@ -69,75 +69,3 @@ void Filesort_tracker::print_json_members(Json_writer *writer) } } - -/* - Report that we are doing a filesort. - @return - Tracker object to be used with filesort -*/ - -Filesort_tracker *Sort_and_group_tracker::report_sorting(THD *thd) -{ - DBUG_ASSERT(cur_action < MAX_QEP_ACTIONS); - - if (total_actions) - { - /* This is not the first execution. Check */ - if (qep_actions[cur_action] != EXPL_ACTION_FILESORT) - { - varied_executions= true; - cur_action++; - if (!dummy_fsort_tracker) - dummy_fsort_tracker= new (thd->mem_root) Filesort_tracker(is_analyze); - return dummy_fsort_tracker; - } - return qep_actions_data[cur_action++].filesort_tracker; - } - - Filesort_tracker *fs_tracker= new(thd->mem_root)Filesort_tracker(is_analyze); - qep_actions_data[cur_action].filesort_tracker= fs_tracker; - qep_actions[cur_action++]= EXPL_ACTION_FILESORT; - - return fs_tracker; -} - - -void Sort_and_group_tracker::report_tmp_table(TABLE *tbl) -{ - DBUG_ASSERT(cur_action < MAX_QEP_ACTIONS); - if (total_actions) - { - /* This is not the first execution. Check if the steps match. */ - // todo: should also check that tmp.table kinds are the same. - if (qep_actions[cur_action] != EXPL_ACTION_TEMPTABLE) - varied_executions= true; - } - - if (!varied_executions) - { - qep_actions[cur_action]= EXPL_ACTION_TEMPTABLE; - // qep_actions_data[cur_action]= .... - } - - cur_action++; -} - - -void Sort_and_group_tracker::report_duplicate_removal() -{ - DBUG_ASSERT(cur_action < MAX_QEP_ACTIONS); - if (total_actions) - { - /* This is not the first execution. Check if the steps match. */ - if (qep_actions[cur_action] != EXPL_ACTION_REMOVE_DUPS) - varied_executions= true; - } - - if (!varied_executions) - { - qep_actions[cur_action]= EXPL_ACTION_REMOVE_DUPS; - } - - cur_action++; -} - diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h index d7634bdfb85..2a08a842dfc 100644 --- a/sql/sql_analyze_stmt.h +++ b/sql/sql_analyze_stmt.h @@ -284,174 +284,3 @@ private: ulonglong sort_buffer_size; }; - -typedef enum -{ - EXPL_NO_TMP_TABLE=0, - EXPL_TMP_TABLE_BUFFER, - EXPL_TMP_TABLE_GROUP, - EXPL_TMP_TABLE_DISTINCT -} enum_tmp_table_use; - - -typedef enum -{ - EXPL_ACTION_EOF, /* not-an-action */ - EXPL_ACTION_FILESORT, - EXPL_ACTION_TEMPTABLE, - EXPL_ACTION_REMOVE_DUPS, -} enum_qep_action; - - -/* - This is to track how a JOIN object has resolved ORDER/GROUP BY/DISTINCT - - We are not tied to the query plan at all, because query plan does not have - sufficient information. *A lot* of decisions about ordering/grouping are - made at very late stages (in JOIN::exec, JOIN::init_execution, in - create_sort_index and even in create_tmp_table). - - The idea is that operations that happen during select execution will report - themselves. We have these operations: - - Sorting with filesort() - - Duplicate row removal (the one done by remove_duplicates()). - - Use of temporary table to buffer the result. - - There is also "Selection" operation, done by do_select(). It reads rows, - there are several distinct cases: - 1. doing the join operation on the base tables - 2. reading the temporary table - 3. reading the filesort output - it would be nice to build execution graph, e.g. - - Select(JOIN op) -> temp.table -> filesort -> Select(filesort result) - - the problem is that there is no way to tell what a do_select() call will do. - - Our solution is not to have explicit selection operations. We make these - assumptions about the query plan: - - Select(JOIN op) is the first operation in the query plan - - Unless the first recorded operation is filesort(). filesort() is unable - read result of a select, so when we find it first, the query plan is: - - filesort(first join table) -> Select(JOIN op) -> ... - - the other popular query plan is: - - Select (JOIN op) -> temp.table -> filesort() -> ... - -///TODO: handle repeated execution with subselects! -*/ - -class Sort_and_group_tracker : public Sql_alloc -{ - enum { MAX_QEP_ACTIONS = 5 }; - - /* Query actions in the order they were made. */ - enum_qep_action qep_actions[MAX_QEP_ACTIONS]; - - /* Number for the next action */ - int cur_action; - - /* - Non-zero means there was already an execution which had - #total_actions actions - */ - int total_actions; - - int get_n_actions() - { - return total_actions? total_actions: cur_action; - } - - /* - TRUE<=>there were executions which took different sort/buffer/de-duplicate - routes. The counter values are not meaningful. - */ - bool varied_executions; - - /* Details about query actions */ - union - { - Filesort_tracker *filesort_tracker; - enum_tmp_table_use tmp_table; - } - qep_actions_data[MAX_QEP_ACTIONS]; - - Filesort_tracker *dummy_fsort_tracker; - bool is_analyze; -public: - Sort_and_group_tracker(bool is_analyze_arg) : - cur_action(0), total_actions(0), varied_executions(false), - dummy_fsort_tracker(NULL), - is_analyze(is_analyze_arg) - {} - - /*************** Reporting interface ***************/ - /* Report that join execution is started */ - void report_join_start() - { - if (!total_actions && cur_action != 0) - { - /* This is a second execution */ - total_actions= cur_action; - } - cur_action= 0; - } - - /* - Report that a temporary table is created. The next step is to write to the - this tmp. table - */ - void report_tmp_table(TABLE *tbl); - - /* - Report that we are doing a filesort. - @return - Tracker object to be used with filesort - */ - Filesort_tracker *report_sorting(THD *thd); - - /* - Report that remove_duplicates() is invoked [on a temp. table]. - We don't collect any statistics on this operation, yet. - */ - void report_duplicate_removal(); - - friend class Iterator; - /*************** Statistics retrieval interface ***************/ - bool had_varied_executions() { return varied_executions; } - - class Iterator - { - Sort_and_group_tracker *owner; - int idx; - public: - Iterator(Sort_and_group_tracker *owner_arg) : - owner(owner_arg), idx(owner_arg->get_n_actions() - 1) - {} - - enum_qep_action get_next(Filesort_tracker **tracker/*, - enum_tmp_table_use *tmp_table_use*/) - { - /* Walk back through the array... */ - if (idx < 0) - return EXPL_ACTION_EOF; - switch (owner->qep_actions[idx]) - { - case EXPL_ACTION_FILESORT: - *tracker= owner->qep_actions_data[idx].filesort_tracker; - break; - case EXPL_ACTION_TEMPTABLE: - //*tmp_table_use= tmp_table_kind[tmp_table_idx++]; - break; - default: - break; - } - return owner->qep_actions[idx--]; - } - - bool is_last_element() { return idx == -1; } - }; -}; - diff --git a/sql/sql_array.h b/sql/sql_array.h index 159951e26a6..bbaa653b177 100644 --- a/sql/sql_array.h +++ b/sql/sql_array.h @@ -85,6 +85,15 @@ public: Element_type *array() const { return m_array; } + bool operator==(const Bounds_checked_array<Element_type>&rhs) const + { + return m_array == rhs.m_array && m_size == rhs.m_size; + } + bool operator!=(const Bounds_checked_array<Element_type>&rhs) const + { + return m_array != rhs.m_array || m_size != rhs.m_size; + } + private: Element_type *m_array; size_t m_size; @@ -230,6 +239,11 @@ public: delete_dynamic(&array); } + void free_memory() + { + delete_dynamic(&array); + } + typedef int (*CMP_FUNC)(const Elem *el1, const Elem *el2); void sort(CMP_FUNC cmp_func) diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 08509a0e2bf..18e7ee950e6 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -2551,8 +2551,13 @@ retry_share: (void) ot_ctx->request_backoff_action(Open_table_context::OT_DISCOVER, table_list); else if (share->crashed) - (void) ot_ctx->request_backoff_action(Open_table_context::OT_REPAIR, - table_list); + { + if (!(flags & MYSQL_OPEN_IGNORE_REPAIR)) + (void) ot_ctx->request_backoff_action(Open_table_context::OT_REPAIR, + table_list); + else + table_list->crashed= 1; /* Mark that table was crashed */ + } goto err_lock; } if (open_table_entry_fini(thd, share, table)) @@ -6824,6 +6829,8 @@ find_field_in_tables(THD *thd, Item_ident *item, or as a field name without alias, or as a field hidden by alias, or ignoring alias) + limit How many items in the list to check + (if limit==0 then all items are to be checked) RETURN VALUES 0 Item is not found or item is not unique, @@ -6841,9 +6848,10 @@ Item **not_found_item= (Item**) 0x1; Item ** find_item_in_list(Item *find, List<Item> &items, uint *counter, find_item_error_report_type report_error, - enum_resolution_type *resolution) + enum_resolution_type *resolution, uint limit) { List_iterator<Item> li(items); + uint n_items= limit == 0 ? items.elements : limit; Item **found=0, **found_unaliased= 0, *item; const char *db_name=0; const char *field_name=0; @@ -6867,8 +6875,9 @@ find_item_in_list(Item *find, List<Item> &items, uint *counter, db_name= ((Item_ident*) find)->db_name; } - for (uint i= 0; (item=li++); i++) + for (uint i= 0; i < n_items; i++) { + item= li++; if (field_name && (item->real_item()->type() == Item::FIELD_ITEM || ((item->type() == Item::REF_ITEM) && @@ -7749,11 +7758,13 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, List<Item> *sum_func_list, uint wild_num) { + if (!wild_num) + return(0); + Item *item; List_iterator<Item> it(fields); Query_arena *arena, backup; DBUG_ENTER("setup_wild"); - DBUG_ASSERT(wild_num != 0); /* Don't use arena if we are not in prepared statements or stored procedures @@ -7832,7 +7843,7 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, ** Check that all given fields exists and fill struct with current data ****************************************************************************/ -bool setup_fields(THD *thd, Item **ref_pointer_array, +bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &fields, enum_mark_columns mark_used_columns, List<Item> *sum_func_list, bool allow_sum_func) { @@ -7842,7 +7853,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, List_iterator<Item> it(fields); bool save_is_item_list_lookup; DBUG_ENTER("setup_fields"); - DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array)); + DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array.array())); thd->mark_used_columns= mark_used_columns; DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns)); @@ -7864,8 +7875,11 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, TODO: remove it when (if) we made one list for allfields and ref_pointer_array */ - if (ref_pointer_array) - bzero(ref_pointer_array, sizeof(Item *) * fields.elements); + if (!ref_pointer_array.is_null()) + { + DBUG_ASSERT(ref_pointer_array.size() >= fields.elements); + memset(ref_pointer_array.array(), 0, sizeof(Item *) * fields.elements); + } /* We call set_entry() there (before fix_fields() of the whole list of field @@ -7883,7 +7897,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, while ((var= li++)) var->set_entry(thd, FALSE); - Item **ref= ref_pointer_array; + Ref_ptr_array ref= ref_pointer_array; thd->lex->current_select->cur_pos_in_select_list= 0; while ((item= it++)) { @@ -7896,12 +7910,20 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns)); DBUG_RETURN(TRUE); /* purecov: inspected */ } - if (ref) - *(ref++)= item; - if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM && - sum_func_list) + if (!ref.is_null()) + { + ref[0]= item; + ref.pop_front(); + } + /* + split_sum_func() must be called for Window Function items, see + Item_window_func::split_sum_func. + */ + if ((item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM && + sum_func_list) || item->with_window_func) item->split_sum_func(thd, ref_pointer_array, *sum_func_list, SPLIT_SUM_SELECT); + thd->lex->current_select->select_list_tables|= item->used_tables(); thd->lex->used_tables|= item->used_tables(); thd->lex->current_select->cur_pos_in_select_list++; } @@ -8320,7 +8342,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, views and natural joins this update is performed inside the loop below. */ if (table) + { thd->lex->used_tables|= table->map; + thd->lex->current_select->select_list_tables|= table->map; + } /* Initialize a generic field iterator for the current table reference. @@ -8412,6 +8437,8 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, if (field_table) { thd->lex->used_tables|= field_table->map; + thd->lex->current_select->select_list_tables|= + field_table->map; field_table->covering_keys.intersect(field->part_of_key); field_table->merge_keys.merge(field->part_of_key); field_table->used_fields++; diff --git a/sql/sql_base.h b/sql/sql_base.h index b6e135b6feb..d6bd0e2ace7 100644 --- a/sql/sql_base.h +++ b/sql/sql_base.h @@ -107,6 +107,10 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type update, table flush, wait on thr_lock.c locks) while opening and locking table. */ #define MYSQL_OPEN_IGNORE_KILLED 0x8000 +/** + Don't try to auto-repair table +*/ +#define MYSQL_OPEN_IGNORE_REPAIR 0x10000 /** Please refer to the internals manual. */ #define MYSQL_OPEN_REOPEN (MYSQL_OPEN_IGNORE_FLUSH |\ @@ -168,7 +172,7 @@ void make_leaves_list(THD *thd, List<TABLE_LIST> &list, TABLE_LIST *tables, bool full_table_list, TABLE_LIST *boundary); int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, List<Item> *sum_func_list, uint wild_num); -bool setup_fields(THD *thd, Item** ref_pointer_array, +bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &item, enum_mark_columns mark_used_columns, List<Item> *sum_func_list, bool allow_sum_func); void unfix_fields(List<Item> &items); @@ -197,7 +201,7 @@ Field * find_field_in_table_sef(TABLE *table, const char *name); Item ** find_item_in_list(Item *item, List<Item> &items, uint *counter, find_item_error_report_type report_error, - enum_resolution_type *resolution); + enum_resolution_type *resolution, uint limit= 0); bool setup_tables(THD *thd, Name_resolution_context *context, List<TABLE_LIST> *from_clause, TABLE_LIST *tables, List<TABLE_LIST> &leaves, bool select_insert, @@ -382,7 +386,7 @@ inline TABLE_LIST *find_table_in_local_list(TABLE_LIST *table, } -inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array, +inline bool setup_fields_with_no_wrap(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &item, enum_mark_columns mark_used_columns, List<Item> *sum_func_list, diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index 91dd8ad7325..42b80d9b143 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -354,7 +354,7 @@ const uchar *query_state_map; #include "emb_qcache.h" #endif -#if !defined(EXTRA_DBUG) && !defined(DBUG_OFF) +#if defined(EXTRA_DEBUG) && !defined(DBUG_OFF) #define RW_WLOCK(M) {DBUG_PRINT("lock", ("rwlock wlock 0x%lx",(ulong)(M))); \ if (!mysql_rwlock_wrlock(M)) DBUG_PRINT("lock", ("rwlock wlock ok")); \ else DBUG_PRINT("lock", ("rwlock wlock FAILED %d", errno)); } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index e3b70566597..b1217cb1f9f 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -908,7 +908,8 @@ THD::THD(bool is_wsrep_applier) */ THD *old_THR_THD= current_thd; set_current_thd(this); - status_var.local_memory_used= status_var.global_memory_used= 0; + status_var.local_memory_used= sizeof(THD); + status_var.global_memory_used= 0; main_da.init(); /* @@ -1641,6 +1642,8 @@ THD::~THD() that memory allocation counting is done correctly */ set_current_thd(this); + if (!status_in_global) + add_status_to_global(); /* Ensure that no one is using THD */ mysql_mutex_lock(&LOCK_thd_data); @@ -1703,6 +1706,7 @@ THD::~THD() if (xid_hash_pins) lf_hash_put_pins(xid_hash_pins); /* Ensure everything is freed */ + status_var.local_memory_used-= sizeof(THD); if (status_var.local_memory_used != 0) { DBUG_PRINT("error", ("memory_used: %lld", status_var.local_memory_used)); @@ -1710,7 +1714,7 @@ THD::~THD() DBUG_ASSERT(status_var.local_memory_used == 0 || !debug_assert_on_not_freed_memory); } - + update_global_memory_status(status_var.global_memory_used); set_current_thd(orig_thd == this ? 0 : orig_thd); DBUG_VOID_RETURN; } @@ -1748,12 +1752,17 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var) to_var->binlog_bytes_written+= from_var->binlog_bytes_written; to_var->cpu_time+= from_var->cpu_time; to_var->busy_time+= from_var->busy_time; - to_var->local_memory_used+= from_var->local_memory_used; /* Update global_memory_used. We have to do this with atomic_add as the global value can change outside of LOCK_status. */ + if (to_var == &global_status_var) + { + DBUG_PRINT("info", ("global memory_used: %lld size: %lld", + (longlong) global_status_var.global_memory_used, + (longlong) from_var->global_memory_used)); + } // workaround for gcc 4.2.4-1ubuntu4 -fPIE (from DEB_BUILD_HARDENING=1) int64 volatile * volatile ptr= &to_var->global_memory_used; my_atomic_add64_explicit(ptr, from_var->global_memory_used, diff --git a/sql/sql_class.h b/sql/sql_class.h index be652e6dd01..e0792a4059f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -658,7 +658,7 @@ typedef struct system_variables /* Error messages */ MY_LOCALE *lc_messages; - const char **errmsgs; /* lc_messages->errmsg->errmsgs */ + const char ***errmsgs; /* lc_messages->errmsg->errmsgs */ /* Locale Support */ MY_LOCALE *lc_time_names; @@ -824,6 +824,20 @@ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var); void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var, STATUS_VAR *dec_var); +/* + Update global_memory_used. We have to do this with atomic_add as the + global value can change outside of LOCK_status. +*/ +inline void update_global_memory_status(int64 size) +{ + DBUG_PRINT("info", ("global memory_used: %lld size: %lld", + (longlong) global_status_var.global_memory_used, + size)); + // workaround for gcc 4.2.4-1ubuntu4 -fPIE (from DEB_BUILD_HARDENING=1) + int64 volatile * volatile ptr= &global_status_var.global_memory_used; + my_atomic_add64_explicit(ptr, size, MY_MEMORY_ORDER_RELAXED); +} + /** Get collation by name, send error to client on failure. @param name Collation name @@ -3846,9 +3860,11 @@ public: void add_status_to_global() { + DBUG_ASSERT(status_in_global == 0); mysql_mutex_lock(&LOCK_status); add_to_status(&global_status_var, &status_var); /* Mark that this THD status has already been added in global status */ + status_var.global_memory_used= 0; status_in_global= 1; mysql_mutex_unlock(&LOCK_status); } @@ -4527,16 +4543,9 @@ inline uint tmp_table_max_key_parts() { return MI_MAX_KEY_SEG; } class TMP_TABLE_PARAM :public Sql_alloc { -private: - /* Prevent use of these (not safe because of lists and copy_field) */ - TMP_TABLE_PARAM(const TMP_TABLE_PARAM &); - void operator=(TMP_TABLE_PARAM &); - public: List<Item> copy_funcs; - List<Item> save_copy_funcs; Copy_field *copy_field, *copy_field_end; - Copy_field *save_copy_field, *save_copy_field_end; uchar *group_buff; Item **items_to_copy; /* Fields in tmp table */ TMP_ENGINE_COLUMNDEF *recinfo, *start_recinfo; @@ -4571,7 +4580,13 @@ public: uint hidden_field_count; uint group_parts,group_length,group_null_parts; uint quick_group; - bool using_indirect_summary_function; + /** + Enabled when we have atleast one outer_sum_func. Needed when used + along with distinct. + + @see create_tmp_table + */ + bool using_outer_summary_function; CHARSET_INFO *table_charset; bool schema_table; /* TRUE if the temp table is created for subquery materialization. */ @@ -4601,9 +4616,10 @@ public: TMP_TABLE_PARAM() :copy_field(0), group_parts(0), group_length(0), group_null_parts(0), - schema_table(0), materialized_subquery(0), force_not_null_cols(0), - precomputed_group_by(0), - force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0) + using_outer_summary_function(0), + schema_table(0), materialized_subquery(0), force_not_null_cols(0), + precomputed_group_by(0), + force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0) {} ~TMP_TABLE_PARAM() { @@ -4615,8 +4631,8 @@ public: if (copy_field) /* Fix for Intel compiler */ { delete [] copy_field; - save_copy_field= copy_field= NULL; - save_copy_field_end= copy_field_end= NULL; + copy_field= NULL; + copy_field_end= NULL; } } }; diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index ea114bf40a5..66564bd5e94 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -1192,7 +1192,6 @@ void prepare_new_connection_state(THD* thd) */ thd->proc_info= 0; thd->set_command(COM_SLEEP); - thd->set_time(); thd->init_for_queries(); if (opt_init_connect.length && !(sctx->master_access & SUPER_ACL)) @@ -1234,7 +1233,6 @@ void prepare_new_connection_state(THD* thd) } thd->proc_info=0; - thd->set_time(); thd->init_for_queries(); } } diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc index 1203a4ce0c8..77f0bcf04ba 100644 --- a/sql/sql_cte.cc +++ b/sql/sql_cte.cc @@ -512,7 +512,10 @@ With_element *st_select_lex::find_table_def_in_with_clauses(TABLE_LIST *table) { With_clause *with_clause=sl->get_with_clause(); if (with_clause && (found= with_clause->find_table_def(table))) - return found; + return found; + /* Do not look for the table's definition beyond the scope of the view */ + if (sl->master_unit()->is_view) + break; } return found; } diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 42e7f6c3569..95aee805c7f 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -263,7 +263,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE"); DBUG_RETURN(TRUE); } - if (!(table= table_list->table) || !table->created) + if (!(table= table_list->table) || !table->is_created()) { my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), table_list->view_db.str, table_list->view_name.str); @@ -490,27 +490,31 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (query_plan.using_filesort) { - uint length= 0; - SORT_FIELD *sortorder; { + Filesort fsort(order, HA_POS_ERROR, true, select); DBUG_ASSERT(query_plan.index == MAX_KEY); + Filesort_tracker *fs_tracker= thd->lex->explain->get_upd_del_plan()->filesort_tracker; - if (!(sortorder= make_unireg_sortorder(thd, order, &length, NULL)) || - !(file_sort= filesort(thd, table, sortorder, length, - select, HA_POS_ERROR, - true, - fs_tracker))) + if (!(file_sort= filesort(thd, table, &fsort, fs_tracker))) goto got_error; + thd->inc_examined_row_count(file_sort->examined_rows); /* Filesort has already found and selected the rows we want to delete, so we don't need the where clause */ delete select; - free_underlaid_joins(thd, select_lex); + + /* + If we are not in DELETE ... RETURNING, we can free subqueries. (in + DELETE ... RETURNING we can't, because the RETURNING part may have + a subquery in it) + */ + if (!with_select) + free_underlaid_joins(thd, select_lex); select= 0; } } @@ -737,7 +741,7 @@ got_error: wild_num - number of wildcards used in optional SELECT clause field_list - list of items in optional SELECT clause conds - conditions - +l RETURN VALUE FALSE OK TRUE error @@ -758,7 +762,8 @@ got_error: DELETE_ACL, SELECT_ACL, TRUE)) DBUG_RETURN(TRUE); if ((wild_num && setup_wild(thd, table_list, field_list, NULL, wild_num)) || - setup_fields(thd, NULL, field_list, MARK_COLUMNS_READ, NULL, 0) || + setup_fields(thd, Ref_ptr_array(), + field_list, MARK_COLUMNS_READ, NULL, 0) || setup_conds(thd, table_list, select_lex->leaf_tables, conds) || setup_ftfuncs(select_lex)) DBUG_RETURN(TRUE); diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 1ef83b3bf1f..79e57cded81 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -477,7 +477,7 @@ exit_merge: unconditional_materialization: derived->change_refs_to_fields(); derived->set_materialized_derived(); - if (!derived->table || !derived->table->created) + if (!derived->table || !derived->table->is_created()) res= mysql_derived_create(thd, lex, derived); if (!res) res= mysql_derived_fill(thd, lex, derived); @@ -859,7 +859,7 @@ bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived) TABLE *table= derived->table; SELECT_LEX_UNIT *unit= derived->get_unit(); - if (table->created) + if (table->is_created()) DBUG_RETURN(FALSE); select_union *result= (select_union*)unit->result; if (table->s->db_type() == TMP_ENGINE_HTON) @@ -912,7 +912,7 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived) if (unit->executed && !unit->uncacheable && !unit->describe) DBUG_RETURN(FALSE); /*check that table creation passed without problems. */ - DBUG_ASSERT(derived->table && derived->table->created); + DBUG_ASSERT(derived->table && derived->table->is_created()); SELECT_LEX *first_select= unit->first_select(); select_union *derived_result= derived->derived_result; SELECT_LEX *save_current_select= lex->current_select; @@ -928,7 +928,7 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived) first_select->options&= ~OPTION_FOUND_ROWS; lex->current_select= first_select; - res= mysql_select(thd, &first_select->ref_pointer_array, + res= mysql_select(thd, first_select->table_list.first, first_select->with_wild, first_select->item_list, first_select->where, diff --git a/sql/sql_do.cc b/sql/sql_do.cc index 9e58031f6a4..6d86ece6a9f 100644 --- a/sql/sql_do.cc +++ b/sql/sql_do.cc @@ -29,7 +29,7 @@ bool mysql_do(THD *thd, List<Item> &values) List_iterator<Item> li(values); Item *value; DBUG_ENTER("mysql_do"); - if (setup_fields(thd, 0, values, MARK_COLUMNS_NONE, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_NONE, 0, 0)) DBUG_RETURN(TRUE); while ((value = li++)) (void) value->is_null(); diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index dae5127cbf8..1f8b4f2dcb1 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -30,6 +30,7 @@ const char * STR_IMPOSSIBLE_WHERE= "Impossible WHERE"; const char * STR_NO_ROWS_AFTER_PRUNING= "No matching rows after partition pruning"; static void write_item(Json_writer *writer, Item *item); +static void append_item_to_str(String *out, Item *item); Explain_query::Explain_query(THD *thd_arg, MEM_ROOT *root) : mem_root(root), upd_del_plan(NULL), insert_plan(NULL), @@ -697,13 +698,6 @@ bool Explain_node::print_explain_json_cache(Json_writer *writer, } -void Explain_select::replace_table(uint idx, Explain_table_access *new_tab) -{ - delete join_tabs[idx]; - join_tabs[idx]= new_tab; -} - - Explain_basic_join::~Explain_basic_join() { if (join_tabs) @@ -754,35 +748,23 @@ int Explain_select::print_explain(Explain_query *query, } else { - bool using_tmp; - bool using_fs; + bool using_tmp= false; + bool using_fs= false; - if (is_analyze) + for (Explain_aggr_node *node= aggr_tree; node; node= node->child) { - /* - Get the data about "Using temporary; Using filesort" from execution - tracking system. - */ - using_tmp= false; - using_fs= false; - Sort_and_group_tracker::Iterator iter(&ops_tracker); - enum_qep_action action; - Filesort_tracker *dummy; - - while ((action= iter.get_next(&dummy)) != EXPL_ACTION_EOF) + switch (node->get_type()) { - if (action == EXPL_ACTION_FILESORT) - using_fs= true; - else if (action == EXPL_ACTION_TEMPTABLE) + case AGGR_OP_TEMP_TABLE: using_tmp= true; + break; + case AGGR_OP_FILESORT: + using_fs= true; + break; + default: + break; } } - else - { - /* Use imprecise "estimates" we got with the query plan */ - using_tmp= using_temporary; - using_fs= using_filesort; - } for (uint i=0; i< n_join_tabs; i++) { @@ -882,88 +864,40 @@ void Explain_select::print_explain_json(Explain_query *query, } } - Filesort_tracker *first_table_sort= NULL; - bool first_table_sort_used= false; int started_objects= 0; + + Explain_aggr_node *node= aggr_tree; - if (is_analyze) + for (; node; node= node->child) { - /* ANALYZE has collected this part of query plan independently */ - if (ops_tracker.had_varied_executions()) + switch (node->get_type()) { - writer->add_member("varied-sort-and-tmp").start_object(); - started_objects++; - } - else - { - Sort_and_group_tracker::Iterator iter(&ops_tracker); - enum_qep_action action; - Filesort_tracker *fs_tracker; - - while ((action= iter.get_next(&fs_tracker)) != EXPL_ACTION_EOF) + case AGGR_OP_TEMP_TABLE: + writer->add_member("temporary_table").start_object(); + break; + case AGGR_OP_FILESORT: { - if (action == EXPL_ACTION_FILESORT) - { - if (iter.is_last_element()) - { - first_table_sort= fs_tracker; - break; - } - writer->add_member("filesort").start_object(); - started_objects++; - fs_tracker->print_json_members(writer); - } - else if (action == EXPL_ACTION_TEMPTABLE) - { - writer->add_member("temporary_table").start_object(); - started_objects++; - /* - if (tmp == EXPL_TMP_TABLE_BUFFER) - func= "buffer"; - else if (tmp == EXPL_TMP_TABLE_GROUP) - func= "group-by"; - else - func= "distinct"; - writer->add_member("function").add_str(func); - */ - } - else if (action == EXPL_ACTION_REMOVE_DUPS) - { - writer->add_member("duplicate_removal").start_object(); - started_objects++; - } - else - DBUG_ASSERT(0); + writer->add_member("filesort").start_object(); + ((Explain_aggr_filesort*)node)->print_json_members(writer, is_analyze); + break; } - } - - if (first_table_sort) - first_table_sort_used= true; - } - else - { - /* This is just EXPLAIN. Try to produce something meaningful */ - if (using_temporary) - { - started_objects= 1; - if (using_filesort) + case AGGR_OP_REMOVE_DUPLICATES: + writer->add_member("duplicate_removal").start_object(); + break; + case AGGR_OP_WINDOW_FUNCS: { - started_objects++; - writer->add_member("filesort").start_object(); + //TODO: make print_json_members virtual? + writer->add_member("window_functions_computation").start_object(); + ((Explain_aggr_window_funcs*)node)->print_json_members(writer, is_analyze); + break; } - writer->add_member("temporary_table").start_object(); - writer->add_member("function").add_str("buffer"); - } - else - { - if (using_filesort) - first_table_sort_used= true; + default: + DBUG_ASSERT(0); } + started_objects++; } - Explain_basic_join::print_explain_json_interns(query, writer, is_analyze, - first_table_sort, - first_table_sort_used); + Explain_basic_join::print_explain_json_interns(query, writer, is_analyze); for (;started_objects; started_objects--) writer->end_object(); @@ -976,6 +910,64 @@ void Explain_select::print_explain_json(Explain_query *query, } +Explain_aggr_filesort::Explain_aggr_filesort(MEM_ROOT *mem_root, + bool is_analyze, + Filesort *filesort) + : tracker(is_analyze) +{ + child= NULL; + for (ORDER *ord= filesort->order; ord; ord= ord->next) + { + sort_items.push_back(ord->item[0], mem_root); + } + filesort->tracker= &tracker; +} + + +void Explain_aggr_filesort::print_json_members(Json_writer *writer, + bool is_analyze) +{ + char item_buf[256]; + String str(item_buf, sizeof(item_buf), &my_charset_bin); + str.length(0); + + List_iterator_fast<Item> it(sort_items); + Item *item; + bool first= true; + while ((item= it++)) + { + if (first) + first= false; + else + { + str.append(", "); + } + append_item_to_str(&str, item); + } + + writer->add_member("sort_key").add_str(str.c_ptr_safe()); + + if (is_analyze) + tracker.print_json_members(writer); +} + + +void Explain_aggr_window_funcs::print_json_members(Json_writer *writer, + bool is_analyze) +{ + Explain_aggr_filesort *srt; + List_iterator<Explain_aggr_filesort> it(sorts); + writer->add_member("sorts").start_object(); + while ((srt= it++)) + { + writer->add_member("filesort").start_object(); + srt->print_json_members(writer, is_analyze); + writer->end_object(); // filesort + } + writer->end_object(); // sorts +} + + void Explain_basic_join::print_explain_json(Explain_query *query, Json_writer *writer, bool is_analyze) @@ -983,7 +975,7 @@ void Explain_basic_join::print_explain_json(Explain_query *query, writer->add_member("query_block").start_object(); writer->add_member("select_id").add_ll(select_id); - print_explain_json_interns(query, writer, is_analyze, NULL, false); + print_explain_json_interns(query, writer, is_analyze); writer->end_object(); } @@ -992,9 +984,7 @@ void Explain_basic_join::print_explain_json(Explain_query *query, void Explain_basic_join:: print_explain_json_interns(Explain_query *query, Json_writer *writer, - bool is_analyze, - Filesort_tracker *first_table_sort, - bool first_table_sort_used) + bool is_analyze) { Json_writer_nesting_guard guard(writer); for (uint i=0; i< n_join_tabs; i++) @@ -1002,12 +992,7 @@ print_explain_json_interns(Explain_query *query, if (join_tabs[i]->start_dups_weedout) writer->add_member("duplicates_removal").start_object(); - join_tabs[i]->print_explain_json(query, writer, is_analyze, - first_table_sort, - first_table_sort_used); - - first_table_sort= NULL; - first_table_sort_used= false; + join_tabs[i]->print_explain_json(query, writer, is_analyze); if (join_tabs[i]->end_dups_weedout) writer->end_object(); @@ -1299,7 +1284,7 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai extra_buf.append(STRING_WITH_LEN("Using temporary")); } - if (using_filesort) + if (using_filesort || this->pre_join_sort) { if (first) first= false; @@ -1359,6 +1344,15 @@ static void write_item(Json_writer *writer, Item *item) writer->add_str(str.c_ptr_safe()); } +static void append_item_to_str(String *out, Item *item) +{ + THD *thd= current_thd; + ulonglong save_option_bits= thd->variables.option_bits; + thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE; + + item->print(out, QT_EXPLAIN); + thd->variables.option_bits= save_option_bits; +} void Explain_table_access::tag_to_json(Json_writer *writer, enum explain_extra_tag tag) { @@ -1486,25 +1480,14 @@ void add_json_keyset(Json_writer *writer, const char *elem_name, print_json_array(writer, elem_name, *keyset); } -/* - @param fs_tracker Normally NULL. When not NULL, it means that the join tab - used filesort to pre-sort the data. Then, sorted data - was read and the rest of the join was executed. - - @note - EXPLAIN command will check whether fs_tracker is present, but it can't use - any value from fs_tracker (these are only valid for ANALYZE). -*/ void Explain_table_access::print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, - Filesort_tracker *fs_tracker, - bool first_table_sort_used) + bool is_analyze) { Json_writer_nesting_guard guard(writer); - if (first_table_sort_used) + if (pre_join_sort) { /* filesort was invoked on this join tab before doing the join with the rest */ writer->add_member("read_sorted_file").start_object(); @@ -1531,8 +1514,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, } } writer->add_member("filesort").start_object(); - if (is_analyze) - fs_tracker->print_json_members(writer); + pre_join_sort->print_json_members(writer, is_analyze); } if (bka_type.is_using_jbuf()) @@ -1610,11 +1592,11 @@ void Explain_table_access::print_explain_json(Explain_query *query, if (is_analyze) { writer->add_member("r_rows"); - if (fs_tracker) + if (pre_join_sort) { /* Get r_rows value from filesort */ - if (fs_tracker->get_r_loops()) - writer->add_double(fs_tracker->get_avg_examined_rows()); + if (pre_join_sort->tracker.get_r_loops()) + writer->add_double(pre_join_sort->tracker.get_avg_examined_rows()); else writer->add_null(); } @@ -1641,11 +1623,11 @@ void Explain_table_access::print_explain_json(Explain_query *query, if (is_analyze) { writer->add_member("r_filtered"); - if (fs_tracker) + if (pre_join_sort) { /* Get r_filtered value from filesort */ - if (fs_tracker->get_r_loops()) - writer->add_double(fs_tracker->get_r_filtered()); + if (pre_join_sort->tracker.get_r_loops()) + writer->add_double(pre_join_sort->tracker.get_r_filtered()); else writer->add_null(); } @@ -1723,7 +1705,7 @@ void Explain_table_access::print_explain_json(Explain_query *query, writer->end_object(); } - if (first_table_sort_used) + if (pre_join_sort) { writer->end_object(); // filesort writer->end_object(); // read_sorted_file diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 844773c4a47..abdb1bb978b 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -176,9 +176,7 @@ public: bool is_analyze); void print_explain_json_interns(Explain_query *query, Json_writer *writer, - bool is_analyze, - Filesort_tracker *first_table_sort, - bool first_table_sort_used); + bool is_analyze); /* A flat array of Explain structs for tables. */ Explain_table_access** join_tabs; @@ -186,6 +184,7 @@ public: }; +class Explain_aggr_node; /* EXPLAIN structure for a SELECT. @@ -212,15 +211,9 @@ public: having(NULL), having_value(Item::COND_UNDEF), using_temporary(false), using_filesort(false), time_tracker(is_analyze), - ops_tracker(is_analyze) + aggr_tree(NULL) {} - /* - This is used to save the results of "late" test_if_skip_sort_order() calls - that are made from JOIN::exec - */ - void replace_table(uint idx, Explain_table_access *new_tab); - public: const char *select_type; @@ -244,9 +237,13 @@ public: /* ANALYZE members */ Time_and_counter_tracker time_tracker; - - Sort_and_group_tracker ops_tracker; + /* + Part of query plan describing sorting, temp.table usage, and duplicate + removal + */ + Explain_aggr_node* aggr_tree; + int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); void print_explain_json(Explain_query *query, Json_writer *writer, @@ -260,6 +257,64 @@ private: Table_access_tracker using_temporary_read_tracker; }; +///////////////////////////////////////////////////////////////////////////// +// EXPLAIN structures for ORDER/GROUP operations. +///////////////////////////////////////////////////////////////////////////// +typedef enum +{ + AGGR_OP_TEMP_TABLE, + AGGR_OP_FILESORT, + //AGGR_OP_READ_SORTED_FILE, // need this? + AGGR_OP_REMOVE_DUPLICATES, + AGGR_OP_WINDOW_FUNCS + //AGGR_OP_JOIN // Need this? +} enum_explain_aggr_node_type; + + +class Explain_aggr_node : public Sql_alloc +{ +public: + virtual enum_explain_aggr_node_type get_type()= 0; + virtual ~Explain_aggr_node() {} + Explain_aggr_node *child; +}; + +class Explain_aggr_filesort : public Explain_aggr_node +{ + List<Item> sort_items; +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_FILESORT; } + Filesort_tracker tracker; + + Explain_aggr_filesort(MEM_ROOT *mem_root, bool is_analyze, + Filesort *filesort); + + void print_json_members(Json_writer *writer, bool is_analyze); +}; + +class Explain_aggr_tmp_table : public Explain_aggr_node +{ +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_TEMP_TABLE; } +}; + +class Explain_aggr_remove_dups : public Explain_aggr_node +{ +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_REMOVE_DUPLICATES; } +}; + +class Explain_aggr_window_funcs : public Explain_aggr_node +{ + List<Explain_aggr_filesort> sorts; +public: + enum_explain_aggr_node_type get_type() { return AGGR_OP_WINDOW_FUNCS; } + + void print_json_members(Json_writer *writer, bool is_analyze); + friend class Window_funcs_computation; +}; + +///////////////////////////////////////////////////////////////////////////// /* Explain structure for a UNION. @@ -617,7 +672,8 @@ public: where_cond(NULL), cache_cond(NULL), pushed_index_cond(NULL), - sjm_nest(NULL) + sjm_nest(NULL), + pre_join_sort(NULL) {} ~Explain_table_access() { delete sjm_nest; } @@ -710,6 +766,12 @@ public: Item *pushed_index_cond; Explain_basic_join *sjm_nest; + + /* + This describes a possible filesort() call that is done before doing the + join operation. + */ + Explain_aggr_filesort *pre_join_sort; /* ANALYZE members */ @@ -723,9 +785,7 @@ public: uint select_id, const char *select_type, bool using_temporary, bool using_filesort); void print_explain_json(Explain_query *query, Json_writer *writer, - bool is_analyze, - Filesort_tracker *fs_tracker, - bool first_table_sort_used); + bool is_analyze); private: void append_tag_name(String *str, enum explain_extra_tag tag); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 9817b882bdd..65af14b62f6 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -258,7 +258,7 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, if (table_list->is_view()) unfix_fields(fields); - res= setup_fields(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0); + res= setup_fields(thd, Ref_ptr_array(), fields, MARK_COLUMNS_WRITE, 0, 0); /* Restore the current context. */ ctx_state.restore_state(context, table_list); @@ -346,7 +346,8 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, } /* Check the fields we are going to modify */ - if (setup_fields(thd, 0, update_fields, MARK_COLUMNS_WRITE, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), + update_fields, MARK_COLUMNS_WRITE, 0, 0)) return -1; if (insert_table_list->is_view() && @@ -771,7 +772,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), counter); goto abort; } - if (setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), *values, MARK_COLUMNS_READ, 0, 0)) goto abort; switch_to_nullable_trigger_fields(*values, table); } @@ -1466,7 +1467,8 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, table_list->next_local= 0; context->resolve_in_table_list_only(table_list); - res= (setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0) || + res= (setup_fields(thd, Ref_ptr_array(), + *values, MARK_COLUMNS_READ, 0, 0) || check_insert_fields(thd, context->table_list, fields, *values, !insert_into_view, 0, &map)); @@ -1482,7 +1484,8 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, } if (!res) - res= setup_fields(thd, 0, update_values, MARK_COLUMNS_READ, 0, 0); + res= setup_fields(thd, Ref_ptr_array(), + update_values, MARK_COLUMNS_READ, 0, 0); if (!res && duplic == DUP_UPDATE) { @@ -2014,6 +2017,7 @@ public: mysql_cond_t cond, cond_client; volatile uint tables_in_use,stacked_inserts; volatile bool status; + bool retry; /** When the handler thread starts, it clones a metadata lock ticket which protects against GRL and ticket for the table to be inserted. @@ -2038,7 +2042,7 @@ public: Delayed_insert(SELECT_LEX *current_select) :locks_in_memory(0), table(0),tables_in_use(0),stacked_inserts(0), - status(0), handler_thread_initialized(FALSE), group_count(0) + status(0), retry(0), handler_thread_initialized(FALSE), group_count(0) { DBUG_ENTER("Delayed_insert constructor"); thd.security_ctx->user=(char*) delayed_user; @@ -2297,7 +2301,7 @@ bool delayed_get_table(THD *thd, MDL_request *grl_protection_request, } if (di->thd.killed) { - if (di->thd.is_error()) + if (di->thd.is_error() && ! di->retry) { /* Copy the error message. Note that we don't treat fatal @@ -2523,7 +2527,7 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd) copy->vcol_set= copy->def_vcol_set; } copy->tmp_set.bitmap= 0; // To catch errors - bzero((char*) bitmap, share->column_bitmap_size + (share->vfields ? 3 : 2)); + bzero((char*) bitmap, share->column_bitmap_size * (share->vfields ? 3 : 2)); copy->read_set= ©->def_read_set; copy->write_set= ©->def_write_set; @@ -2532,7 +2536,6 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd) /* Got fatal error */ error: tables_in_use--; - status=1; mysql_cond_signal(&cond); // Inform thread about abort DBUG_RETURN(0); } @@ -2774,13 +2777,20 @@ bool Delayed_insert::open_and_lock_table() /* Use special prelocking strategy to get ER_DELAYED_NOT_SUPPORTED error for tables with engines which don't support delayed inserts. + + We can't do auto-repair in insert delayed thread, as it would hang + when trying to an exclusive MDL_LOCK on the table during repair + as the connection thread has a SHARED_WRITE lock. */ if (!(table= open_n_lock_single_table(&thd, &table_list, TL_WRITE_DELAYED, - MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK, + MYSQL_OPEN_IGNORE_GLOBAL_READ_LOCK | + MYSQL_OPEN_IGNORE_REPAIR, &prelocking_strategy))) { - thd.fatal_error(); // Abort waiting inserts + /* If table was crashed, then upper level should retry open+repair */ + retry= table_list.crashed; + thd.fatal_error(); // Abort waiting inserts return TRUE; } @@ -3440,7 +3450,7 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u) */ lex->current_select= &lex->select_lex; - res= (setup_fields(thd, 0, values, MARK_COLUMNS_READ, 0, 0) || + res= (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_READ, 0, 0) || check_insert_fields(thd, table_list, *fields, values, !insert_into_view, 1, &map)); @@ -3493,7 +3503,7 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u) table_list->next_name_resolution_table= ctx_state.get_first_name_resolution_table(); - res= res || setup_fields(thd, 0, *info.update_values, + res= res || setup_fields(thd, Ref_ptr_array(), *info.update_values, MARK_COLUMNS_READ, 0, 0); if (!res) { @@ -3622,7 +3632,7 @@ void select_insert::cleanup() select_insert::~select_insert() { DBUG_ENTER("~select_insert"); - if (table && table->created) + if (table && table->is_created()) { table->next_number_field=0; table->auto_increment_field_not_null= FALSE; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 65257c9b2ce..de345b4dd1c 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -748,6 +748,14 @@ void lex_start(THD *thd) lex->stmt_var_list.empty(); lex->proc_list.elements=0; + lex->save_group_list.empty(); + lex->save_order_list.empty(); + lex->win_ref= NULL; + lex->win_frame= NULL; + lex->frame_top_bound= NULL; + lex->frame_bottom_bound= NULL; + lex->win_spec= NULL; + lex->is_lex_started= TRUE; DBUG_VOID_RETURN; } @@ -2070,6 +2078,7 @@ void st_select_lex_unit::init_query() found_rows_for_union= 0; insert_table_with_stored_vcol= 0; derived= 0; + is_view= false; with_clause= 0; with_element= 0; columns_are_renamed= false; @@ -2103,8 +2112,7 @@ void st_select_lex::init_query() parent_lex->push_context(&context, parent_lex->thd->mem_root); cond_count= between_count= with_wild= 0; max_equal_elems= 0; - ref_pointer_array= 0; - ref_pointer_array_size= 0; + ref_pointer_array.reset(); select_n_where_fields= 0; select_n_reserved= 0; select_n_having_items= 0; @@ -2122,8 +2130,11 @@ void st_select_lex::init_query() prep_leaf_list_state= UNINIT; have_merged_subqueries= FALSE; bzero((char*) expr_cache_may_be_used, sizeof(expr_cache_may_be_used)); + select_list_tables= 0; m_non_agg_field_used= false; m_agg_func_used= false; + window_specs.empty(); + window_funcs.empty(); } void st_select_lex::init_select() @@ -2650,7 +2661,7 @@ bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num) select_n_having_items + select_n_where_fields + order_group_num) * 5; - if (ref_pointer_array != NULL) + if (!ref_pointer_array.is_null()) { /* We need to take 'n_sum_items' into account when allocating the array, @@ -2659,17 +2670,24 @@ bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num) In the usual case we can reuse the array from the prepare phase. If we need a bigger array, we must allocate a new one. */ - if (ref_pointer_array_size >= n_elems) - { - DBUG_PRINT("info", ("reusing old ref_array")); + if (ref_pointer_array.size() == n_elems) return false; - } - } - ref_pointer_array= static_cast<Item**>(arena->alloc(sizeof(Item*) * n_elems)); - if (ref_pointer_array != NULL) - ref_pointer_array_size= n_elems; - return ref_pointer_array == NULL; + /* + We need to take 'n_sum_items' into account when allocating the array, + and this may actually increase during the optimization phase due to + MIN/MAX rewrite in Item_in_subselect::single_value_transformer. + In the usual case we can reuse the array from the prepare phase. + If we need a bigger array, we must allocate a new one. + */ + if (ref_pointer_array.size() == n_elems) + return false; + } + Item **array= static_cast<Item**>(arena->alloc(sizeof(Item*) * n_elems)); + if (array != NULL) + ref_pointer_array= Ref_ptr_array(array, n_elems); + + return array == NULL; } @@ -2734,8 +2752,8 @@ void st_select_lex::print_order(String *str, else (*order->item)->print(str, query_type); } - if (!order->asc) - str->append(STRING_WITH_LEN(" desc")); + if (order->direction == ORDER::ORDER_DESC) + str->append(STRING_WITH_LEN(" desc")); if (order->next) str->append(','); } @@ -4177,9 +4195,11 @@ void SELECT_LEX::update_used_tables() Item *item; List_iterator_fast<Item> it(join->fields_list); + select_list_tables= 0; while ((item= it++)) { item->update_used_tables(); + select_list_tables|= item->used_tables(); } Item_outer_ref *ref; List_iterator_fast<Item_outer_ref> ref_it(inner_refs_list); @@ -4229,6 +4249,8 @@ void st_select_lex::update_correlated_cache() if (join->conds) is_correlated|= MY_TEST(join->conds->used_tables() & OUTER_REF_TABLE_BIT); + is_correlated|= join->having_is_correlated; + if (join->having) is_correlated|= MY_TEST(join->having->used_tables() & OUTER_REF_TABLE_BIT); diff --git a/sql/sql_lex.h b/sql/sql_lex.h index c64ed6b8d5c..10247bd33a2 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -28,6 +28,7 @@ #include "mem_root_array.h" #include "sql_cmd.h" #include "sql_alter.h" // Alter_info +#include "sql_window.h" /* YACC and LEX Definitions */ @@ -47,6 +48,7 @@ class sys_var; class Item_func_match; class File_parser; class Key_part_spec; +class Item_window_func; struct sql_digest_state; class With_clause; @@ -643,6 +645,7 @@ public: derived tables/views handling. */ TABLE_LIST *derived; + bool is_view; /* With clause attached to this unit (if any) */ With_clause *with_clause; /* With element where this unit is used as the specification (if any) */ @@ -721,6 +724,7 @@ public: }; typedef class st_select_lex_unit SELECT_LEX_UNIT; +typedef Bounds_checked_array<Item*> Ref_ptr_array; /* SELECT_LEX - store information of parsed SELECT statment @@ -799,9 +803,9 @@ public: SQL_I_List<ORDER> order_list; /* ORDER clause */ SQL_I_List<ORDER> gorder_list; Item *select_limit, *offset_limit; /* LIMIT clause parameters */ - // Arrays of pointers to top elements of all_fields list - Item **ref_pointer_array; - size_t ref_pointer_array_size; // Number of elements in array. + + /// Array of pointers to top elements of all_fields list + Ref_ptr_array ref_pointer_array; /* number of items in select_list and HAVING clause used to get number @@ -898,6 +902,12 @@ public: */ List<String> *prev_join_using; + /** + The set of those tables whose fields are referenced in the select list of + this select level. + */ + table_map select_list_tables; + /* namp of nesting SELECT visibility (for aggregate functions check) */ nesting_map name_visibility_map; @@ -1088,6 +1098,24 @@ public: } With_element *find_table_def_in_with_clauses(TABLE_LIST *table); + List<Window_spec> window_specs; + void prepare_add_window_spec(THD *thd); + bool add_window_def(THD *thd, LEX_STRING *win_name, LEX_STRING *win_ref, + SQL_I_List<ORDER> win_partition_list, + SQL_I_List<ORDER> win_order_list, + Window_frame *win_frame); + bool add_window_spec(THD *thd, LEX_STRING *win_ref, + SQL_I_List<ORDER> win_partition_list, + SQL_I_List<ORDER> win_order_list, + Window_frame *win_frame); + List<Item_window_func> window_funcs; + bool add_window_func(Item_window_func *win_func) + { + return window_funcs.push_back(win_func); + } + + bool have_window_funcs() const { return (window_funcs.elements !=0); } + private: bool m_non_agg_field_used; bool m_agg_func_used; @@ -2756,6 +2784,14 @@ public: } + SQL_I_List<ORDER> save_group_list; + SQL_I_List<ORDER> save_order_list; + LEX_STRING *win_ref; + Window_frame *win_frame; + Window_frame_bound *frame_top_bound; + Window_frame_bound *frame_bottom_bound; + Window_spec *win_spec; + inline void free_set_stmt_mem_root() { DBUG_ASSERT(!is_arena_for_set_stmt()); diff --git a/sql/sql_list.h b/sql/sql_list.h index 113af35bad7..94e97e55aa7 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -451,6 +451,11 @@ public: el= ¤t->next; return current->info; } + /* Get what calling next() would return, without moving the iterator */ + inline void *peek() + { + return (*el)->info; + } inline void *next_fast(void) { list_node *tmp; @@ -503,6 +508,10 @@ public: { return el == &list->last_ref()->next; } + inline bool at_end() + { + return current == &end_of_list; + } friend class error_list_iterator; }; @@ -550,6 +559,7 @@ public: List_iterator() : base_list_iterator() {} inline void init(List<T> &a) { base_list_iterator::init(a); } inline T* operator++(int) { return (T*) base_list_iterator::next(); } + inline T* peek() { return (T*) base_list_iterator::peek(); } inline T *replace(T *a) { return (T*) base_list_iterator::replace(a); } inline T *replace(List<T> &a) { return (T*) base_list_iterator::replace(a); } inline void rewind(void) { base_list_iterator::rewind(); } @@ -607,7 +617,7 @@ inline void bubble_sort(List<T> *list_to_sort, swap= FALSE; while ((item2= it++) && (ref2= it.ref()) != last_ref) { - if (sort_func(item1, item2, arg) < 0) + if (sort_func(item1, item2, arg) > 0) { *ref1= item2; *ref2= item1; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index c70e545675d..a4044dd0d59 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -61,6 +61,39 @@ XML_TAG::XML_TAG(int l, String f, String v) } +/* + Field and line terminators must be interpreted as sequence of unsigned char. + Otherwise, non-ascii terminators will be negative on some platforms, + and positive on others (depending on the implementation of char). +*/ +class Term_string +{ + const uchar *m_ptr; + uint m_length; + int m_initial_byte; +public: + Term_string(const String &str) : + m_ptr(static_cast<const uchar*>(static_cast<const void*>(str.ptr()))), + m_length(str.length()), + m_initial_byte((uchar) (str.length() ? str.ptr()[0] : INT_MAX)) + { } + void set(const uchar *str, uint length, int initial_byte) + { + m_ptr= str; + m_length= length; + m_initial_byte= initial_byte; + } + void reset() { set(NULL, 0, INT_MAX); } + const uchar *ptr() const { return m_ptr; } + uint length() const { return m_length; } + int initial_byte() const { return m_initial_byte; } + bool eq(const Term_string &other) const + { + return length() == other.length() && !memcmp(ptr(), other.ptr(), length()); + } +}; + + #define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache)) #define PUSH(A) *(stack_pos++)=(A) @@ -69,10 +102,10 @@ class READ_INFO { String data; /* Read buffer */ uint fixed_length; /* Length of the fixed length record */ uint max_length; /* Max length of row */ - const uchar *field_term_ptr,*line_term_ptr; - const char *line_start_ptr,*line_start_end; - uint field_term_length,line_term_length,enclosed_length; - int field_term_char,line_term_char,enclosed_char,escape_char; + Term_string m_field_term; /* FIELDS TERMINATED BY 'string' */ + Term_string m_line_term; /* LINES TERMINATED BY 'string' */ + Term_string m_line_start; /* LINES STARTING BY 'string' */ + int enclosed_char,escape_char; int *stack,*stack_pos; bool found_end_of_line,start_of_line,eof; NET *io_net; @@ -86,6 +119,70 @@ class READ_INFO { *to= chr; return false; } + + /** + Read a tail of a multi-byte character. + The first byte of the character is assumed to be already + read from the file and appended to "str". + + @returns true - if EOF happened unexpectedly + @returns false - no EOF happened: found a good multi-byte character, + or a bad byte sequence + + Note: + The return value depends only on EOF: + - read_mbtail() returns "false" is a good character was read, but also + - read_mbtail() returns "false" if an incomplete byte sequence was found + and no EOF happened. + + For example, suppose we have an ujis file with bytes 0x8FA10A, where: + - 0x8FA1 is an incomplete prefix of a 3-byte character + (it should be [8F][A1-FE][A1-FE] to make a full 3-byte character) + - 0x0A is a line demiliter + This file has some broken data, the trailing [A1-FE] is missing. + + In this example it works as follows: + - 0x8F is read from the file and put into "data" before the call + for read_mbtail() + - 0xA1 is read from the file and put into "data" by read_mbtail() + - 0x0A is kept in the read queue, so the next read iteration after + the current read_mbtail() call will normally find it and recognize as + a line delimiter + - the current call for read_mbtail() returns "false", + because no EOF happened + */ + bool read_mbtail(String *str) + { + int chlen; + if ((chlen= my_charlen(read_charset, str->end() - 1, str->end())) == 1) + return false; // Single byte character found + for (uint32 length0= str->length() - 1 ; MY_CS_IS_TOOSMALL(chlen); ) + { + int chr= GET; + if (chr == my_b_EOF) + { + DBUG_PRINT("info", ("read_mbtail: chlen=%d; unexpected EOF", chlen)); + return true; // EOF + } + str->append(chr); + chlen= my_charlen(read_charset, str->ptr() + length0, str->end()); + if (chlen == MY_CS_ILSEQ) + { + /** + It has been an incomplete (but a valid) sequence so far, + but the last byte turned it into a bad byte sequence. + Unget the very last byte. + */ + str->length(str->length() - 1); + PUSH(chr); + DBUG_PRINT("info", ("read_mbtail: ILSEQ")); + return false; // Bad byte sequence + } + } + DBUG_PRINT("info", ("read_mbtail: chlen=%d", chlen)); + return false; // Good multi-byte character + } + public: bool error,line_cuted,found_null,enclosed; uchar *row_start, /* Found row starts here */ @@ -101,7 +198,11 @@ public: int read_fixed_length(void); int next_line(void); char unescape(char chr); - int terminator(const uchar *ptr, uint length); + bool terminator(const uchar *ptr, uint length); + bool terminator(const Term_string &str) + { return terminator(str.ptr(), str.length()); } + bool terminator(int chr, const Term_string &str) + { return str.initial_byte() == chr && terminator(str); } bool find_start_of_fields(); /* load xml */ List<XML_TAG> taglist; @@ -284,22 +385,25 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, Let us also prepare SET clause, altough it is probably empty in this case. */ - if (setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) || - setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), + set_fields, MARK_COLUMNS_WRITE, 0, 0) || + setup_fields(thd, Ref_ptr_array(), set_values, MARK_COLUMNS_READ, 0, 0)) DBUG_RETURN(TRUE); } else { // Part field list /* TODO: use this conds for 'WITH CHECK OPTIONS' */ - if (setup_fields(thd, 0, fields_vars, MARK_COLUMNS_WRITE, 0, 0) || - setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) || + if (setup_fields(thd, Ref_ptr_array(), + fields_vars, MARK_COLUMNS_WRITE, 0, 0) || + setup_fields(thd, Ref_ptr_array(), + set_fields, MARK_COLUMNS_WRITE, 0, 0) || check_that_all_fields_are_given_values(thd, table, table_list)) DBUG_RETURN(TRUE); /* Add all fields with default functions to table->write_set. */ if (table->default_field) table->mark_default_fields_for_write(); /* Fix the expressions in SET clause */ - if (setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), set_values, MARK_COLUMNS_READ, 0, 0)) DBUG_RETURN(TRUE); } switch_to_nullable_trigger_fields(fields_vars, table); @@ -1348,8 +1452,9 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs, String &field_term, String &line_start, String &line_term, String &enclosed_par, int escape, bool get_it_from_net, bool is_fifo) - :file(file_par), fixed_length(tot_length), escape_char(escape), - found_end_of_line(false), eof(false), + :file(file_par), fixed_length(tot_length), + m_field_term(field_term), m_line_term(line_term), m_line_start(line_start), + escape_char(escape), found_end_of_line(false), eof(false), error(false), line_cuted(false), found_null(false), read_charset(cs) { data.set_thread_specific(); @@ -1358,39 +1463,17 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs, Otherwise, non-ascii terminators will be negative on some platforms, and positive on others (depending on the implementation of char). */ - field_term_ptr= - static_cast<const uchar*>(static_cast<const void*>(field_term.ptr())); - field_term_length= field_term.length(); - line_term_ptr= - static_cast<const uchar*>(static_cast<const void*>(line_term.ptr())); - line_term_length= line_term.length(); level= 0; /* for load xml */ - if (line_start.length() == 0) - { - line_start_ptr=0; - start_of_line= 0; - } - else - { - line_start_ptr= line_start.ptr(); - line_start_end=line_start_ptr+line_start.length(); - start_of_line= 1; - } + start_of_line= line_start.length() != 0; /* If field_terminator == line_terminator, don't use line_terminator */ - if (field_term_length == line_term_length && - !memcmp(field_term_ptr,line_term_ptr,field_term_length)) - { - line_term_length=0; - line_term_ptr= NULL; - } - enclosed_char= (enclosed_length=enclosed_par.length()) ? - (uchar) enclosed_par[0] : INT_MAX; - field_term_char= field_term_length ? field_term_ptr[0] : INT_MAX; - line_term_char= line_term_length ? line_term_ptr[0] : INT_MAX; + if (m_field_term.eq(m_line_term)) + m_line_term.reset(); + enclosed_char= enclosed_par.length() ? (uchar) enclosed_par[0] : INT_MAX; /* Set of a stack for unget if long terminators */ - uint length= MY_MAX(cs->mbmaxlen, MY_MAX(field_term_length, line_term_length)) + 1; + uint length= MY_MAX(cs->mbmaxlen, MY_MAX(m_field_term.length(), + m_line_term.length())) + 1; set_if_bigger(length,line_start.length()); stack= stack_pos= (int*) thd->alloc(sizeof(int) * length); @@ -1432,7 +1515,7 @@ READ_INFO::~READ_INFO() } -inline int READ_INFO::terminator(const uchar *ptr,uint length) +inline bool READ_INFO::terminator(const uchar *ptr, uint length) { int chr=0; // Keep gcc happy uint i; @@ -1444,11 +1527,11 @@ inline int READ_INFO::terminator(const uchar *ptr,uint length) } } if (i == length) - return 1; + return true; PUSH(chr); while (i-- > 1) PUSH(*--ptr); - return 0; + return false; } @@ -1516,12 +1599,12 @@ int READ_INFO::read_field() chr= escape_char; } #ifdef ALLOW_LINESEPARATOR_IN_STRINGS - if (chr == line_term_char) + if (chr == m_line_term.initial_byte()) #else - if (chr == line_term_char && found_enclosed_char == INT_MAX) + if (chr == m_line_term.initial_byte() && found_enclosed_char == INT_MAX) #endif { - if (terminator(line_term_ptr,line_term_length)) + if (terminator(m_line_term)) { // Maybe unexpected linefeed enclosed=0; found_end_of_line=1; @@ -1538,9 +1621,7 @@ int READ_INFO::read_field() continue; } // End of enclosed field if followed by field_term or line_term - if (chr == my_b_EOF || - (chr == line_term_char && terminator(line_term_ptr, - line_term_length))) + if (chr == my_b_EOF || terminator(chr, m_line_term)) { /* Maybe unexpected linefeed */ enclosed=1; @@ -1549,8 +1630,7 @@ int READ_INFO::read_field() row_end= (uchar *) data.end(); return 0; } - if (chr == field_term_char && - terminator(field_term_ptr,field_term_length)) + if (terminator(chr, m_field_term)) { enclosed=1; row_start= (uchar *) data.ptr() + 1; @@ -1565,9 +1645,10 @@ int READ_INFO::read_field() /* copy the found term character to 'to' */ chr= found_enclosed_char; } - else if (chr == field_term_char && found_enclosed_char == INT_MAX) + else if (chr == m_field_term.initial_byte() && + found_enclosed_char == INT_MAX) { - if (terminator(field_term_ptr,field_term_length)) + if (terminator(m_field_term)) { enclosed=0; row_start= (uchar *) data.ptr(); @@ -1575,38 +1656,9 @@ int READ_INFO::read_field() return 0; } } -#ifdef USE_MB - if (my_mbcharlen(read_charset, chr) > 1) - { - uint32 length0= data.length(); - int ml= my_mbcharlen(read_charset, chr); - data.append(chr); - - for (int i= 1; i < ml; i++) - { - chr= GET; - if (chr == my_b_EOF) - { - /* - Need to back up the bytes already ready from illformed - multi-byte char - */ - data.length(length0); - goto found_eof; - } - data.append(chr); - } - if (my_ismbchar(read_charset, - (const char *) data.ptr() + length0, - (const char *) data.end())) - continue; - for (int i= 0; i < ml; i++) - PUSH(data.end()[-1 - i]); - data.length(length0); - chr= GET; - } -#endif data.append(chr); + if (use_mb(read_charset) && read_mbtail(&data)) + goto found_eof; } /* ** We come here if buffer is too small. Enlarge it and continue @@ -1665,13 +1717,10 @@ int READ_INFO::read_fixed_length() data.append((uchar) unescape((char) chr)); continue; } - if (chr == line_term_char) - { - if (terminator(line_term_ptr,line_term_length)) - { // Maybe unexpected linefeed - found_end_of_line=1; - break; - } + if (terminator(chr, m_line_term)) + { // Maybe unexpected linefeed + found_end_of_line= true; + break; } data.append(chr); } @@ -1690,14 +1739,14 @@ found_eof: int READ_INFO::next_line() { line_cuted=0; - start_of_line= line_start_ptr != 0; + start_of_line= m_line_start.length() != 0; if (found_end_of_line || eof) { found_end_of_line=0; return eof; } found_end_of_line=0; - if (!line_term_length) + if (!m_line_term.length()) return 0; // No lines for (;;) { @@ -1725,10 +1774,11 @@ int READ_INFO::next_line() or a broken byte sequence was found. Check if the sequence is a prefix of the "LINES TERMINATED BY" string. */ - if ((uchar) buf[0] == line_term_char && i <= line_term_length && - !memcmp(buf, line_term_ptr, i)) + if ((uchar) buf[0] == m_line_term.initial_byte() && + i <= m_line_term.length() && + !memcmp(buf, m_line_term.ptr(), i)) { - if (line_term_length == i) + if (m_line_term.length() == i) { /* We found a "LINES TERMINATED BY" string that consists @@ -1742,10 +1792,11 @@ int READ_INFO::next_line() that still needs to be checked is (line_term_length - i). Note, READ_INFO::terminator() assumes that the leftmost byte of the argument is already scanned from the file and is checked to - be a known prefix (e.g. against line_term_char). + be a known prefix (e.g. against line_term.initial_char()). So we need to pass one extra byte. */ - if (terminator(line_term_ptr + i - 1, line_term_length - i + 1)) + if (terminator(m_line_term.ptr() + i - 1, + m_line_term.length() - i + 1)) return 0; } /* @@ -1768,7 +1819,7 @@ int READ_INFO::next_line() return 1; continue; } - if (buf[0] == line_term_char && terminator(line_term_ptr,line_term_length)) + if (terminator(buf[0], m_line_term)) return 0; line_cuted= true; } @@ -1777,30 +1828,12 @@ int READ_INFO::next_line() bool READ_INFO::find_start_of_fields() { - int chr; - try_again: - do - { - if ((chr=GET) == my_b_EOF) - { - found_end_of_line=eof=1; - return 1; - } - } while ((char) chr != line_start_ptr[0]); - for (const char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++) + for (int chr= GET ; chr != my_b_EOF ; chr= GET) { - chr=GET; // Eof will be checked later - if ((char) chr != *ptr) - { // Can't be line_start - PUSH(chr); - while (--ptr != line_start_ptr) - { // Restart with next char - PUSH( *ptr); - } - goto try_again; - } + if (terminator(chr, m_line_start)) + return false; } - return 0; + return (found_end_of_line= eof= true); } @@ -1881,26 +1914,8 @@ int READ_INFO::read_value(int delim, String *val) int chr; String tmp; - for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;) + for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF; chr= GET) { -#ifdef USE_MB - if (my_mbcharlen(read_charset, chr) > 1) - { - DBUG_PRINT("read_xml",("multi byte")); - int i, ml= my_mbcharlen(read_charset, chr); - for (i= 1; i < ml; i++) - { - val->append(chr); - /* - Don't use my_tospace() in the middle of a multi-byte character - TODO: check that the multi-byte sequence is valid. - */ - chr= GET; - if (chr == my_b_EOF) - return chr; - } - } -#endif if(chr == '&') { tmp.length(0); @@ -1920,8 +1935,11 @@ int READ_INFO::read_value(int delim, String *val) } } else + { val->append(chr); - chr= GET; + if (use_mb(read_charset) && read_mbtail(val)) + return my_b_EOF; + } } return my_tospace(chr); } @@ -1990,11 +2008,11 @@ int READ_INFO::read_xml(THD *thd) } // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term - if((tag.length() == line_term_length -2) && - (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0)) + if((tag.length() == m_line_term.length() - 2) && + (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0)) { DBUG_PRINT("read_xml", ("start-of-row: %i %s %s", - level,tag.c_ptr_safe(), line_term_ptr)); + level,tag.c_ptr_safe(), m_line_term.ptr())); } if(chr == ' ' || chr == '>') @@ -2061,8 +2079,8 @@ int READ_INFO::read_xml(THD *thd) chr= my_tospace(GET); } - if((tag.length() == line_term_length -2) && - (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0)) + if((tag.length() == m_line_term.length() - 2) && + (memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0)) { DBUG_PRINT("read_xml", ("found end-of-row %i %s", level, tag.c_ptr_safe())); diff --git a/sql/sql_locale.h b/sql/sql_locale.h index 8357a9ecba4..e231393eec6 100644 --- a/sql/sql_locale.h +++ b/sql/sql_locale.h @@ -19,7 +19,7 @@ typedef struct my_locale_errmsgs { const char *language; - const char **errmsgs; + const char ***errmsgs; } MY_LOCALE_ERRMSGS; #include "my_global.h" /* uint */ diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 7c50e4ed680..a6bb89f05df 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1122,7 +1122,6 @@ static enum enum_server_command fetch_command(THD *thd, char *packet) { enum enum_server_command command= (enum enum_server_command) (uchar) packet[0]; - NET *net= &thd->net; DBUG_ENTER("fetch_command"); if (command >= COM_END || @@ -1130,7 +1129,7 @@ static enum enum_server_command fetch_command(THD *thd, char *packet) command= COM_END; // Wrong command DBUG_PRINT("info",("Command on %s = %d (%s)", - vio_description(net->vio), command, + vio_description(thd->net.vio), command, command_name[command].str)); DBUG_RETURN(command); } @@ -4521,7 +4520,7 @@ end_with_restore_list: lex->table_count); if (result) { - res= mysql_select(thd, &select_lex->ref_pointer_array, + res= mysql_select(thd, select_lex->get_table_list(), select_lex->with_wild, select_lex->item_list, @@ -5309,6 +5308,7 @@ end_with_restore_list: } case SQLCOM_SHUTDOWN: #ifndef EMBEDDED_LIBRARY + DBUG_EXECUTE_IF("crash_shutdown", DBUG_SUICIDE();); if (check_global_access(thd,SHUTDOWN_ACL)) goto error; kill_mysql(thd); @@ -7789,7 +7789,6 @@ add_proc_to_list(THD* thd, Item *item) item_ptr = (Item**) (order+1); *item_ptr= item; order->item=item_ptr; - order->free_me=0; thd->lex->proc_list.link_in_list(order, &order->next); return 0; } @@ -7807,8 +7806,7 @@ bool add_to_list(THD *thd, SQL_I_List<ORDER> &list, Item *item,bool asc) DBUG_RETURN(1); order->item_ptr= item; order->item= &order->item_ptr; - order->asc = asc; - order->free_me=0; + order->direction= (asc ? ORDER::ORDER_ASC : ORDER::ORDER_DESC); order->used=0; order->counter_used= 0; order->fast_field_copier_setup= 0; @@ -8225,6 +8223,65 @@ TABLE_LIST *st_select_lex::convert_right_join() DBUG_RETURN(tab1); } + +void st_select_lex::prepare_add_window_spec(THD *thd) +{ + LEX *lex= thd->lex; + lex->save_group_list= group_list; + lex->save_order_list= order_list; + lex->win_ref= NULL; + lex->win_frame= NULL; + lex->frame_top_bound= NULL; + lex->frame_bottom_bound= NULL; + group_list.empty(); + order_list.empty(); +} + +bool st_select_lex::add_window_def(THD *thd, + LEX_STRING *win_name, + LEX_STRING *win_ref, + SQL_I_List<ORDER> win_partition_list, + SQL_I_List<ORDER> win_order_list, + Window_frame *win_frame) +{ + SQL_I_List<ORDER> *win_part_list_ptr= + new (thd->mem_root) SQL_I_List<ORDER> (win_partition_list); + SQL_I_List<ORDER> *win_order_list_ptr= + new (thd->mem_root) SQL_I_List<ORDER> (win_order_list); + if (!(win_part_list_ptr && win_order_list_ptr)) + return true; + Window_def *win_def= new (thd->mem_root) Window_def(win_name, + win_ref, + win_part_list_ptr, + win_order_list_ptr, + win_frame); + group_list= thd->lex->save_group_list; + order_list= thd->lex->save_order_list; + return (win_def == NULL || window_specs.push_back(win_def)); +} + +bool st_select_lex::add_window_spec(THD *thd, + LEX_STRING *win_ref, + SQL_I_List<ORDER> win_partition_list, + SQL_I_List<ORDER> win_order_list, + Window_frame *win_frame) +{ + SQL_I_List<ORDER> *win_part_list_ptr= + new (thd->mem_root) SQL_I_List<ORDER> (win_partition_list); + SQL_I_List<ORDER> *win_order_list_ptr= + new (thd->mem_root) SQL_I_List<ORDER> (win_order_list); + if (!(win_part_list_ptr && win_order_list_ptr)) + return true; + Window_spec *win_spec= new (thd->mem_root) Window_spec(win_ref, + win_part_list_ptr, + win_order_list_ptr, + win_frame); + group_list= thd->lex->save_group_list; + order_list= thd->lex->save_order_list; + thd->lex->win_spec= win_spec; + return (win_spec == NULL || window_specs.push_back(win_spec)); +} + /** Set lock for all tables in current select level. diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index dbe19674cf2..f540c268923 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -1544,22 +1544,26 @@ int plugin_init(int *argc, char **argv, int flags) init_alloc_root(&plugin_vars_mem_root, 4096, 4096, MYF(0)); init_alloc_root(&tmp_root, 4096, 4096, MYF(0)); - if (my_hash_init(&bookmark_hash, &my_charset_bin, 16, 0, 0, + if (my_hash_init(&bookmark_hash, &my_charset_bin, 32, 0, 0, get_bookmark_hash_key, NULL, HASH_UNIQUE)) goto err; mysql_mutex_init(key_LOCK_plugin, &LOCK_plugin, MY_MUTEX_INIT_FAST); + /* + The 80 is from 2016-04-27 when we had 71 default plugins + Big enough to avoid many mallocs even in future + */ if (my_init_dynamic_array(&plugin_dl_array, sizeof(struct st_plugin_dl *), 16, 16, MYF(0)) || my_init_dynamic_array(&plugin_array, - sizeof(struct st_plugin_int *), 16, 16, MYF(0))) + sizeof(struct st_plugin_int *), 80, 32, MYF(0))) goto err; for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) { - if (my_hash_init(&plugin_hash[i], system_charset_info, 16, 0, 0, + if (my_hash_init(&plugin_hash[i], system_charset_info, 32, 0, 0, get_plugin_hash_key, NULL, HASH_UNIQUE)) goto err; } diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 8e5ab71288d..2d6a7302afc 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1316,7 +1316,7 @@ static bool mysql_test_insert(Prepared_statement *stmt, my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), counter); goto error; } - if (setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), *values, MARK_COLUMNS_NONE, 0, 0)) goto error; } } @@ -1406,7 +1406,8 @@ static int mysql_test_update(Prepared_statement *stmt, table_list->register_want_access(want_privilege); #endif thd->lex->select_lex.no_wrap_view_item= TRUE; - res= setup_fields(thd, 0, select->item_list, MARK_COLUMNS_READ, 0, 0); + res= setup_fields(thd, Ref_ptr_array(), + select->item_list, MARK_COLUMNS_READ, 0, 0); thd->lex->select_lex.no_wrap_view_item= FALSE; if (res) goto error; @@ -1417,7 +1418,8 @@ static int mysql_test_update(Prepared_statement *stmt, (SELECT_ACL & ~table_list->table->grant.privilege); table_list->register_want_access(SELECT_ACL); #endif - if (setup_fields(thd, 0, stmt->lex->value_list, MARK_COLUMNS_NONE, 0, 0) || + if (setup_fields(thd, Ref_ptr_array(), + stmt->lex->value_list, MARK_COLUMNS_NONE, 0, 0) || check_unique_table(thd, table_list)) goto error; /* TODO: here we should send types of placeholders to the client. */ @@ -1463,7 +1465,7 @@ static bool mysql_test_delete(Prepared_statement *stmt, my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE"); goto error; } - if (!table_list->table || !table_list->table->created) + if (!table_list->table || !table_list->table->is_created()) { my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), table_list->view_db.str, table_list->view_name.str); @@ -1589,7 +1591,8 @@ static bool mysql_test_do_fields(Prepared_statement *stmt, if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL, DT_PREPARE | DT_CREATE)) DBUG_RETURN(TRUE); - DBUG_RETURN(setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0)); + DBUG_RETURN(setup_fields(thd, Ref_ptr_array(), + *values, MARK_COLUMNS_NONE, 0, 0)); } diff --git a/sql/sql_priv.h b/sql/sql_priv.h index cc56daacf3e..b15a80a889a 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -344,6 +344,7 @@ enum enum_parsing_place IN_WHERE, IN_ON, IN_GROUP_BY, + IN_ORDER_BY, PARSING_PLACE_SIZE /* always should be the last */ }; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c9e2b3a586d..36f0cd84cbf 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -182,6 +182,7 @@ struct binlog_send_info { { error_text[0] = 0; bzero(&error_gtid, sizeof(error_gtid)); + until_binlog_state.init(); } }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 3134f8eb007..6c4d2e1fc9c 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -54,6 +54,7 @@ #include "sql_derived.h" #include "sql_statistics.h" #include "sql_cte.h" +#include "sql_window.h" #include "debug_sync.h" // DEBUG_SYNC #include <m_ctype.h> @@ -117,7 +118,6 @@ static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2); C_MODE_END static uint cache_record_length(JOIN *join,uint index); -bool get_best_combination(JOIN *join); static store_key *get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, KEY_PART_INFO *key_part, uchar *key_buff, @@ -164,8 +164,11 @@ static COND *optimize_cond(JOIN *join, COND *conds, COND_EQUAL **cond_equal, int flags= 0); bool const_expression_in_where(COND *conds,Item *item, Item **comp_item); -static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table, - Procedure *proc); +static int do_select(JOIN *join, Procedure *procedure); +static bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, + MARIA_COLUMNDEF *start_recinfo, + MARIA_COLUMNDEF **recinfo, + ulonglong options); static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int); static enum_nested_loop_state @@ -179,7 +182,6 @@ end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); static enum_nested_loop_state end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); -static int test_if_group_changed(List<Cached_item> &list); static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos); static int join_read_system(JOIN_TAB *tab); static int join_read_const(JOIN_TAB *tab); @@ -232,11 +234,7 @@ static bool list_contains_unique_index(TABLE *table, bool (*find_func) (Field *, void *), void *data); static bool find_field_in_item_list (Field *field, void *data); static bool find_field_in_order_list (Field *field, void *data); -static int create_sort_index(THD *thd, JOIN *join, ORDER *order, - ha_rows filesort_limit, ha_rows select_limit, - bool is_order_by); -static int remove_duplicates(JOIN *join,TABLE *entry,List<Item> &fields, - Item *having); +int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field, Item *having); static int remove_dup_with_hash_index(THD *thd,TABLE *table, @@ -245,7 +243,7 @@ static int remove_dup_with_hash_index(THD *thd,TABLE *table, static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref); static bool setup_new_fields(THD *thd, List<Item> &fields, List<Item> &all_fields, ORDER *new_order); -static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array, +static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array, ORDER *order, List<Item> &fields, List<Item> &all_fields, bool *all_order_by_fields_used); @@ -256,12 +254,12 @@ static void calc_group_buffer(JOIN *join,ORDER *group); static bool make_group_fields(JOIN *main_join, JOIN *curr_join); static bool alloc_group_fields(JOIN *join,ORDER *group); // Create list for using with tempory table -static bool change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, +static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &new_list1, List<Item> &new_list2, uint elements, List<Item> &items); // Create list for using with tempory table -static bool change_refs_to_tmp_fields(THD *thd, Item **ref_pointer_array, +static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &new_list1, List<Item> &new_list2, uint elements, List<Item> &items); @@ -288,7 +286,7 @@ static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, uint n_top_tabs_count, JOIN_TAB *tab); static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, table_map rem_tables); - +void set_postjoin_aggr_write_func(JOIN_TAB *tab); #ifndef DBUG_OFF /* @@ -364,7 +362,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, every PS/SP execution new, we will not need reset this flag if setup_tables_done_option changed for next rexecution */ - res= mysql_select(thd, &select_lex->ref_pointer_array, + res= mysql_select(thd, select_lex->table_list.first, select_lex->with_wild, select_lex->item_list, select_lex->where, @@ -455,7 +453,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, - Item **ref_pointer_array) + Ref_ptr_array ref_pointer_array) { Item_outer_ref *ref; @@ -484,10 +482,9 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, existing one. The change will lead to less operations for copying fields, smaller temporary tables and less data passed through filesort. */ - if (ref_pointer_array && !ref->found_in_select_list) + if (!ref_pointer_array.is_null() && !ref->found_in_select_list) { int el= all_fields.elements; - DBUG_ASSERT(all_fields.elements <= select->ref_pointer_array_size); ref_pointer_array[el]= item; /* Add the field item to the select list of the current select. */ all_fields.push_front(item, thd->mem_root); @@ -495,7 +492,7 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, If it's needed reset each Item_ref item that refers this field with a new reference taken from ref_pointer_array. */ - item_ref= ref_pointer_array + el; + item_ref= &ref_pointer_array[el]; } if (ref->in_sum_func) @@ -533,6 +530,7 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, if (!ref->fixed && ref->fix_fields(thd, 0)) return TRUE; thd->lex->used_tables|= item->used_tables(); + thd->lex->current_select->select_list_tables|= item->used_tables(); } return false; } @@ -615,22 +613,26 @@ void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex) /** Function to setup clauses without sum functions. */ -inline int setup_without_group(THD *thd, Item **ref_pointer_array, - TABLE_LIST *tables, - List<TABLE_LIST> &leaves, - List<Item> &fields, - List<Item> &all_fields, - COND **conds, - ORDER *order, - ORDER *group, - bool *hidden_group_fields, - uint *reserved) +static inline int +setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array, + TABLE_LIST *tables, + List<TABLE_LIST> &leaves, + List<Item> &fields, + List<Item> &all_fields, + COND **conds, + ORDER *order, + ORDER *group, + List<Window_spec> &win_specs, + List<Item_window_func> &win_funcs, + bool *hidden_group_fields, + uint *reserved) { int res; + enum_parsing_place save_place; st_select_lex *const select= thd->lex->current_select; nesting_map save_allow_sum_func= thd->lex->allow_sum_func; /* - Need to save the value, so we can turn off only any new non_agg_field_used + Need to stave the value, so we can turn off only any new non_agg_field_used additions coming from the WHERE */ const bool saved_non_agg_field_used= select->non_agg_field_used(); @@ -650,11 +652,21 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array, select->set_non_agg_field_used(saved_non_agg_field_used); thd->lex->allow_sum_func|= (nesting_map)1 << select->nest_level; + + save_place= thd->lex->current_select->parsing_place; + thd->lex->current_select->parsing_place= IN_ORDER_BY; res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields, order); - thd->lex->allow_sum_func&= ~((nesting_map)1 << select->nest_level); + thd->lex->current_select->parsing_place= save_place; + thd->lex->allow_sum_func&= ~((nesting_map)1 << select->nest_level); + save_place= thd->lex->current_select->parsing_place; + thd->lex->current_select->parsing_place= IN_GROUP_BY; res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields, group, hidden_group_fields); + thd->lex->current_select->parsing_place= save_place; + thd->lex->allow_sum_func|= (nesting_map)1 << select->nest_level; + res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields, + win_specs, win_funcs); thd->lex->allow_sum_func= save_allow_sum_func; DBUG_RETURN(res); } @@ -678,8 +690,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array, 0 on success */ int -JOIN::prepare(Item ***rref_pointer_array, - TABLE_LIST *tables_init, +JOIN::prepare(TABLE_LIST *tables_init, uint wild_num, COND *conds_init, uint og_num, ORDER *order_init, bool skip_order_by, ORDER *group_init, Item *having_init, @@ -783,24 +794,33 @@ JOIN::prepare(Item ***rref_pointer_array, tbl->table->maybe_null= 1; } - if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields, - wild_num)) || - select_lex->setup_ref_array(thd, og_num) || - setup_fields(thd, (*rref_pointer_array), fields_list, MARK_COLUMNS_READ, - &all_fields, 1) || - setup_without_group(thd, (*rref_pointer_array), tables_list, - select_lex->leaf_tables, fields_list, - all_fields, &conds, order, group_list, - &hidden_group_fields, &select_lex->select_n_reserved)) - DBUG_RETURN(-1); /* purecov: inspected */ + if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num)) + DBUG_RETURN(-1); + if (select_lex->setup_ref_array(thd, og_num)) + DBUG_RETURN(-1); - ref_pointer_array= *rref_pointer_array; + ref_ptrs= ref_ptr_array_slice(0); + + enum_parsing_place save_place= thd->lex->current_select->parsing_place; + thd->lex->current_select->parsing_place= SELECT_LIST; + if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ, + &all_fields, 1)) + DBUG_RETURN(-1); + thd->lex->current_select->parsing_place= save_place; + if (setup_without_group(thd, ref_ptrs, tables_list, + select_lex->leaf_tables, fields_list, + all_fields, &conds, order, group_list, + select_lex->window_specs, + select_lex->window_funcs, + &hidden_group_fields, + &select_lex->select_n_reserved)) + DBUG_RETURN(-1); /* Resolve the ORDER BY that was skipped, then remove it. */ if (skip_order_by && select_lex != select_lex->master_unit()->global_parameters()) { - if (setup_order(thd, (*rref_pointer_array), tables_list, fields_list, + if (setup_order(thd, ref_ptrs, tables_list, fields_list, all_fields, select_lex->order_list.first)) DBUG_RETURN(-1); select_lex->order_list.empty(); @@ -828,6 +848,12 @@ JOIN::prepare(Item ***rref_pointer_array, if (having_fix_rc || thd->is_error()) DBUG_RETURN(-1); /* purecov: inspected */ thd->lex->allow_sum_func= save_allow_sum_func; + + if (having->with_window_func) + { + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + DBUG_RETURN(-1); + } } With_clause *with_clause=select_lex->get_with_clause(); @@ -867,14 +893,14 @@ JOIN::prepare(Item ***rref_pointer_array, real_order= TRUE; if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM) - item->split_sum_func(thd, ref_pointer_array, all_fields, 0); + item->split_sum_func(thd, ref_ptrs, all_fields, 0); } if (!real_order) order= NULL; } if (having && having->with_sum_func) - having->split_sum_func2(thd, ref_pointer_array, all_fields, + having->split_sum_func2(thd, ref_ptrs, all_fields, &having, SPLIT_SUM_SKIP_REGISTERED); if (select_lex->inner_sum_func_list) { @@ -883,13 +909,13 @@ JOIN::prepare(Item ***rref_pointer_array, do { item_sum= item_sum->next; - item_sum->split_sum_func2(thd, ref_pointer_array, + item_sum->split_sum_func2(thd, ref_ptrs, all_fields, item_sum->ref_by, 0); } while (item_sum != end); } if (select_lex->inner_refs_list.elements && - fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array)) + fix_inner_refs(thd, all_fields, select_lex, ref_ptrs)) DBUG_RETURN(-1); if (group_list) @@ -907,10 +933,9 @@ JOIN::prepare(Item ***rref_pointer_array, { Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item); int el= all_fields.elements; - DBUG_ASSERT(all_fields.elements <= select_lex->ref_pointer_array_size); - ref_pointer_array[el]= field; + ref_ptrs[el]= field; all_fields.push_front(field, thd->mem_root); - ord->item= ref_pointer_array + el; + ord->item= &ref_ptrs[el]; } } } @@ -963,6 +988,12 @@ JOIN::prepare(Item ***rref_pointer_array, } if (thd->lex->derived_tables) { + /* + Queries with derived tables and PROCEDURE are not allowed. + Many of such queries are disallowed grammatically, but there + are still some complex cases: + SELECT 1 FROM (SELECT 1) a PROCEDURE ANALYSE() + */ my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", thd->lex->derived_tables & DERIVED_VIEW ? "view" : "subquery"); @@ -970,6 +1001,7 @@ JOIN::prepare(Item ***rref_pointer_array, } if (thd->lex->sql_command != SQLCOM_SELECT) { + // EXPLAIN SELECT * FROM t1 PROCEDURE ANALYSE() my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT"); goto err; } @@ -1006,11 +1038,14 @@ bool JOIN::prepare_stage2() /* Init join struct */ count_field_types(select_lex, &tmp_table_param, all_fields, 0); - ref_pointer_array_size= all_fields.elements*sizeof(Item*); this->group= group_list != 0; if (tmp_table_param.sum_func_count && !group_list) + { implicit_grouping= TRUE; + // Result will contain zero or one row - ordering is meaningless + order= NULL; + } #ifdef RESTRICTED_GROUP if (implicit_grouping) @@ -1056,6 +1091,24 @@ int JOIN::optimize() need_tmp, !skip_sort_order && !no_order && (order || group_list), select_distinct); + uint select_nr= select_lex->select_number; + JOIN_TAB *curr_tab= join_tab + top_join_tab_count; + for (uint i= 0; i < aggr_tables; i++, curr_tab++) + { + if (select_nr == INT_MAX) + { + /* this is a fake_select_lex of a union */ + select_nr= select_lex->master_unit()->first_select()->select_number; + curr_tab->tracker= thd->lex->explain->get_union(select_nr)-> + get_tmptable_read_tracker(); + } + else + { + curr_tab->tracker= thd->lex->explain->get_select(select_nr)-> + get_using_temporary_read_tracker(); + } + } + } return res; } @@ -1344,6 +1397,8 @@ JOIN::optimize_inner() { DBUG_PRINT("info",("No tables")); error= 0; + if (make_aggr_tables_info()) + DBUG_RETURN(1); goto setup_subq_exit; } error= -1; // Error is sent to client @@ -1355,7 +1410,6 @@ JOIN::optimize_inner() calling make_join_statistics() as this may call get_best_group_min_max() which needs a simplfied group_list. */ - simple_group= 1; if (group_list && table_count == 1) { group_list= remove_const(this, group_list, conds, @@ -1612,7 +1666,8 @@ JOIN::optimize_inner() (!join_tab[const_tables].select || !join_tab[const_tables].select->quick || join_tab[const_tables].select->quick->get_type() != - QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) && + !select_lex->have_window_funcs()) { if (group && rollup.state == ROLLUP::STATE_NONE && list_contains_unique_index(join_tab[const_tables].table, @@ -1662,11 +1717,13 @@ JOIN::optimize_inner() } if (group || tmp_table_param.sum_func_count) { - if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE) + if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE + && !select_lex->have_window_funcs()) select_distinct=0; } else if (select_distinct && table_count - const_tables == 1 && - rollup.state == ROLLUP::STATE_NONE) + rollup.state == ROLLUP::STATE_NONE && + !select_lex->have_window_funcs()) { /* We are only using one table. In this case we change DISTINCT to a @@ -1688,16 +1745,20 @@ JOIN::optimize_inner() tab= &join_tab[const_tables]; if (order) { - skip_sort_order= test_if_skip_sort_order(tab, order, select_limit, 1, - &tab->table->keys_in_use_for_order_by); + skip_sort_order= + test_if_skip_sort_order(tab, order, select_limit, + true, // no_changes + &tab->table->keys_in_use_for_order_by); } if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array, order, fields_list, all_fields, &all_order_fields_used))) { - bool skip_group= (skip_sort_order && - test_if_skip_sort_order(tab, group_list, select_limit, 1, - &tab->table->keys_in_use_for_group_by) != 0); + const bool skip_group= + skip_sort_order && + test_if_skip_sort_order(tab, group_list, select_limit, + true, // no_changes + &tab->table->keys_in_use_for_group_by); count_field_types(select_lex, &tmp_table_param, all_fields, 0); if ((skip_group && all_order_fields_used) || select_limit == HA_POS_ERROR || @@ -1726,6 +1787,7 @@ JOIN::optimize_inner() else if (thd->is_fatal_error) // End of memory DBUG_RETURN(1); } + simple_group= rollup.state == ROLLUP::STATE_NONE; if (group) { /* @@ -1749,6 +1811,7 @@ JOIN::optimize_inner() group_optimized_away= 1; } } + calc_group_buffer(this, group_list); send_group_parts= tmp_table_param.group_parts; /* Save org parts */ if (procedure && procedure->group) @@ -1790,6 +1853,11 @@ JOIN::optimize_inner() } need_tmp= test_if_need_tmp_table(); + //TODO this could probably go in test_if_need_tmp_table. + if (this->select_lex->window_specs.elements > 0) { + need_tmp= TRUE; + simple_order= FALSE; + } /* If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table @@ -1813,6 +1881,31 @@ JOIN::optimize_inner() if (!(select_options & SELECT_DESCRIBE)) init_ftfuncs(thd, select_lex, MY_TEST(order)); + /* + It's necessary to check const part of HAVING cond as + there is a chance that some cond parts may become + const items after make_join_statisctics(for example + when Item is a reference to cost table field from + outer join). + This check is performed only for those conditions + which do not use aggregate functions. In such case + temporary table may not be used and const condition + elements may be lost during further having + condition transformation in JOIN::exec. + */ + if (having && const_table_map && !having->with_sum_func) + { + having->update_used_tables(); + having= having->remove_eq_conds(thd, &select_lex->having_value, true); + if (select_lex->having_value == Item::COND_FALSE) + { + having= new (thd->mem_root) Item_int(thd, (longlong) 0,1); + zero_result_cause= "Impossible HAVING noticed after reading const tables"; + error= 0; + DBUG_RETURN(0); + } + } + if (optimize_unflattened_subqueries()) DBUG_RETURN(1); @@ -1839,8 +1932,28 @@ JOIN::optimize_inner() DBUG_EXECUTE("info",TEST_join(this);); - if (const_tables != table_count) + if (!only_const_tables()) { + JOIN_TAB *tab= &join_tab[const_tables]; + + if (order) + { + /* + Force using of tmp table if sorting by a SP or UDF function due to + their expensive and probably non-deterministic nature. + */ + for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next) + { + Item *item= *tmp_order->item; + if (item->is_expensive()) + { + /* Force tmp table without sort */ + need_tmp=1; simple_order=simple_group=0; + break; + } + } + } + /* Because filesort always does a full table scan or a quick range scan we must add the removed reference to the select for the table. @@ -1848,72 +1961,197 @@ JOIN::optimize_inner() as in other cases the join is done before the sort. */ if ((order || group_list) && - join_tab[const_tables].type != JT_ALL && - join_tab[const_tables].type != JT_FT && - join_tab[const_tables].type != JT_REF_OR_NULL && + tab->type != JT_ALL && + tab->type != JT_FT && + tab->type != JT_REF_OR_NULL && ((order && simple_order) || (group_list && simple_group))) { - if (add_ref_to_table_cond(thd,&join_tab[const_tables])) { + if (add_ref_to_table_cond(thd,tab)) { DBUG_RETURN(1); } } /* - Calculate a possible 'limit' of table rows for 'GROUP BY': 'need_tmp' - implies that there will be more postprocessing so the specified - 'limit' should not be enforced yet in the call to - 'test_if_skip_sort_order'. + Investigate whether we may use an ordered index as part of either + DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be + used for only the first of any of these terms to be executed. This + is reflected in the order which we check for test_if_skip_sort_order() + below. However we do not check for DISTINCT here, as it would have + been transformed to a GROUP BY at this stage if it is a candidate for + ordered index optimization. + If a decision was made to use an ordered index, the availability + of such an access path is stored in 'ordered_index_usage' for later + use by 'execute' or 'explain' */ - const ha_rows limit = need_tmp ? HA_POS_ERROR : unit->select_limit_cnt; + DBUG_ASSERT(ordered_index_usage == ordered_index_void); - if (!(select_options & SELECT_BIG_RESULT) && - ((group_list && - (!simple_group || - !test_if_skip_sort_order(&join_tab[const_tables], group_list, - limit, 0, - &join_tab[const_tables].table-> - keys_in_use_for_group_by))) || - select_distinct) && - tmp_table_param.quick_group && !procedure) + if (group_list) // GROUP BY honoured first + // (DISTINCT was rewritten to GROUP BY if skippable) { - need_tmp=1; simple_order=simple_group=0; // Force tmp table without sort - } - if (order) - { - /* - Do we need a temporary table due to the ORDER BY not being equal to - the GROUP BY? The call to test_if_skip_sort_order above tests for the - GROUP BY clause only and hence is not valid in this case. So the - estimated number of rows to be read from the first table is not valid. - We clear it here so that it doesn't show up in EXPLAIN. - */ - if (need_tmp && (select_options & SELECT_DESCRIBE) != 0) - join_tab[const_tables].limit= 0; /* - Force using of tmp table if sorting by a SP or UDF function due to - their expensive and probably non-deterministic nature. + When there is SQL_BIG_RESULT do not sort using index for GROUP BY, + and thus force sorting on disk unless a group min-max optimization + is going to be used as it is applied now only for one table queries + with covering indexes. */ - for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next) - { - Item *item= *tmp_order->item; - if (item->is_expensive()) + if (!(select_options & SELECT_BIG_RESULT) || + (tab->select && + tab->select->quick && + tab->select->quick->get_type() == + QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) + { + if (simple_group && // GROUP BY is possibly skippable + !select_distinct) // .. if not preceded by a DISTINCT { - /* Force tmp table without sort */ - need_tmp=1; simple_order=simple_group=0; - break; + /* + Calculate a possible 'limit' of table rows for 'GROUP BY': + A specified 'LIMIT' is relative to the final resultset. + 'need_tmp' implies that there will be more postprocessing + so the specified 'limit' should not be enforced yet. + */ + const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit; + if (test_if_skip_sort_order(tab, group_list, limit, false, + &tab->table->keys_in_use_for_group_by)) + { + ordered_index_usage= ordered_index_group_by; + } + } + + /* + If we are going to use semi-join LooseScan, it will depend + on the selected index scan to be used. If index is not used + for the GROUP BY, we risk that sorting is put on the LooseScan + table. In order to avoid this, force use of temporary table. + TODO: Explain the quick_group part of the test below. + */ + if ((ordered_index_usage != ordered_index_group_by) && + ((tmp_table_param.quick_group && !procedure) || + (tab->emb_sj_nest && + best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN))) + { + need_tmp=1; + simple_order= simple_group= false; // Force tmp table without sort } } } - } + else if (order && // ORDER BY wo/ preceeding GROUP BY + (simple_order || skip_sort_order)) // which is possibly skippable + { + if (test_if_skip_sort_order(tab, order, select_limit, false, + &tab->table->keys_in_use_for_order_by)) + { + ordered_index_usage= ordered_index_order_by; + } + } + } + + if (having) + having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT); + tmp_having= having; if ((select_lex->options & OPTION_SCHEMA_TABLE)) optimize_schema_tables_reads(this); /* + The loose index scan access method guarantees that all grouping or + duplicate row elimination (for distinct) is already performed + during data retrieval, and that all MIN/MAX functions are already + computed for each group. Thus all MIN/MAX functions should be + treated as regular functions, and there is no need to perform + grouping in the main execution loop. + Notice that currently loose index scan is applicable only for + single table queries, thus it is sufficient to test only the first + join_tab element of the plan for its access method. + */ + if (join_tab->is_using_loose_index_scan()) + { + tmp_table_param.precomputed_group_by= TRUE; + if (join_tab->is_using_agg_loose_index_scan()) + { + need_distinct= FALSE; + tmp_table_param.precomputed_group_by= FALSE; + } + } + + if (make_aggr_tables_info()) + DBUG_RETURN(1); + + error= 0; + + if (select_options & SELECT_DESCRIBE) + goto derived_exit; + + DBUG_RETURN(0); + +setup_subq_exit: + /* Choose an execution strategy for this JOIN. */ + if (!tables_list || !table_count) + choose_tableless_subquery_plan(); + /* + Even with zero matching rows, subqueries in the HAVING clause may + need to be evaluated if there are aggregate functions in the query. + */ + if (optimize_unflattened_subqueries()) + DBUG_RETURN(1); + error= 0; + +derived_exit: + + select_lex->mark_const_derived(zero_result_cause); + DBUG_RETURN(0); +} + + +/** + Set info for aggregation tables + + @details + This function finalizes execution plan by taking following actions: + .) aggregation temporary tables are created, but not instantiated + (this is done during execution). + JOIN_TABs for aggregation tables are set appropriately + (see JOIN::create_postjoin_aggr_table). + .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for + each required stage of execution. These fields lists are set for + working tables' tabs and for the tab of last table in the join. + .) info for sorting/grouping/dups removal is prepared and saved in + appropriate tabs. Here is an example: + + @returns + false - Ok + true - Error +*/ + +bool JOIN::make_aggr_tables_info() +{ + List<Item> *curr_all_fields= &all_fields; + List<Item> *curr_fields_list= &fields_list; + JOIN_TAB *curr_tab= join_tab + const_tables; + TABLE *exec_tmp_table= NULL; + bool distinct= false; + bool keep_row_order= false; + DBUG_ENTER("JOIN::make_aggr_tables_info"); + + const bool has_group_by= this->group; + + sort_and_group_aggr_tab= NULL; + + + /* + Setup last table to provide fields and all_fields lists to the next + node in the plan. + */ + if (join_tab) + { + join_tab[top_join_tab_count - 1].fields= &fields_list; + join_tab[top_join_tab_count - 1].all_fields= &all_fields; + } + + /* All optimization is done. Check if we can use the storage engines group by handler to evaluate the group by */ - - if ((tmp_table_param.sum_func_count || group_list) && !procedure) + if (tables_list && (tmp_table_param.sum_func_count || group_list) && + !procedure) { /* At the moment we only support push down for queries where @@ -1933,24 +2171,39 @@ JOIN::optimize_inner() Query query= {&all_fields, select_distinct, tables_list, conds, group_list, order ? order : group_list, having}; group_by_handler *gbh= ht->create_group_by(thd, &query); + if (gbh) { pushdown_query= new (thd->mem_root) Pushdown_query(select_lex, gbh); - /* We must store rows in the tmp table if we need to do an ORDER BY or DISTINCT and the storage handler can't handle it. */ need_tmp= query.order_by || query.group_by || query.distinct; - tmp_table_param.hidden_field_count= (all_fields.elements - - fields_list.elements); - if (!(exec_tmp_table1= - create_tmp_table(thd, &tmp_table_param, all_fields, 0, - query.distinct, 1, - select_options, HA_POS_ERROR, "", - !need_tmp, query.order_by || query.group_by))) + distinct= query.distinct; + keep_row_order= query.order_by || query.group_by; + + order= query.order_by; + + aggr_tables++; + curr_tab= join_tab + top_join_tab_count; + bzero(curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + curr_tab->join= this; + + curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param); + TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param, + all_fields, + NULL, query.distinct, + TRUE, select_options, HA_POS_ERROR, + "", !need_tmp, + query.order_by || query.group_by); + if (!table) DBUG_RETURN(1); + curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab); + curr_tab->aggr->set_write_func(::end_send); + curr_tab->table= table; /* Setup reference fields, used by summary functions and group by fields, to point to the temporary table. @@ -1959,17 +2212,18 @@ JOIN::optimize_inner() set_items_ref_array(items1). */ init_items_ref_array(); - items1= items0 + all_fields.elements; + items1= ref_ptr_array_slice(2); + //items1= items0 + all_fields.elements; if (change_to_use_tmp_fields(thd, items1, tmp_fields_list1, tmp_all_fields1, fields_list.elements, all_fields)) DBUG_RETURN(1); /* Give storage engine access to temporary table */ - gbh->table= exec_tmp_table1; - + gbh->table= table; pushdown_query->store_data_in_temp_table= need_tmp; pushdown_query->having= having; + /* Group by and having is calculated by the group_by handler. Reset the group by and having @@ -1986,21 +2240,19 @@ JOIN::optimize_inner() tmp_table_param.field_count+= tmp_table_param.sum_func_count; tmp_table_param.sum_func_count= 0; - /* Remember information about the original join */ - original_join_tab= join_tab; - original_table_count= table_count; + fields= curr_fields_list; - /* Set up one join tab to get sorting to work */ - const_tables= 0; - table_count= 1; - join_tab= (JOIN_TAB*) thd->calloc(sizeof(JOIN_TAB)); - join_tab[0].table= exec_tmp_table1; + //todo: new: + curr_tab->ref_array= &items1; + curr_tab->all_fields= &tmp_all_fields1; + curr_tab->fields= &tmp_fields_list1; DBUG_RETURN(thd->is_fatal_error); } } } + /* The loose index scan access method guarantees that all grouping or duplicate row elimination (for distinct) is already performed @@ -2012,194 +2264,584 @@ JOIN::optimize_inner() single table queries, thus it is sufficient to test only the first join_tab element of the plan for its access method. */ - if (join_tab->is_using_loose_index_scan()) + if (join_tab && join_tab->is_using_loose_index_scan()) + tmp_table_param.precomputed_group_by= + !join_tab->is_using_agg_loose_index_scan(); + + group_list_for_estimates= group_list; + /* Create a tmp table if distinct or if the sort is too complicated */ + if (need_tmp) { - tmp_table_param.precomputed_group_by= TRUE; - if (join_tab->is_using_agg_loose_index_scan()) + aggr_tables++; + curr_tab= join_tab + top_join_tab_count; + bzero(curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; + if (only_const_tables()) + first_select= sub_select_postjoin_aggr; + + /* + Create temporary table on first execution of this join. + (Will be reused if this is a subquery that is executed several times.) + */ + init_items_ref_array(); + + ORDER *tmp_group= (ORDER *) 0; + if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP)) + tmp_group= group_list; + + tmp_table_param.hidden_field_count= + all_fields.elements - fields_list.elements; + + distinct= select_distinct && !group_list && + !select_lex->have_window_funcs(); + keep_row_order= false; + if (create_postjoin_aggr_table(curr_tab, + &all_fields, tmp_group, + group_list && simple_group, + distinct, keep_row_order)) + DBUG_RETURN(true); + exec_tmp_table= curr_tab->table; + + if (exec_tmp_table->distinct) + optimize_distinct(); + + /* + We don't have to store rows in temp table that doesn't match HAVING if: + - we are sorting the table and writing complete group rows to the + temp table. + - We are using DISTINCT without resolving the distinct as a GROUP BY + on all columns. + + If having is not handled here, it will be checked before the row + is sent to the client. + */ + if (having && + (sort_and_group || (exec_tmp_table->distinct && !group_list))) { - need_distinct= FALSE; - tmp_table_param.precomputed_group_by= FALSE; + // Attach HAVING to tmp table's condition + curr_tab->having= having; + having= NULL; // Already done } - } - error= 0; + /* Change sum_fields reference to calculated fields in tmp_table */ + items1= ref_ptr_array_slice(2); + if (sort_and_group || curr_tab->table->group || + tmp_table_param.precomputed_group_by) + { + if (change_to_use_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(true); + } + else + { + if (change_refs_to_tmp_fields(thd, items1, + tmp_fields_list1, tmp_all_fields1, + fields_list.elements, all_fields)) + DBUG_RETURN(true); + } + curr_all_fields= &tmp_all_fields1; + curr_fields_list= &tmp_fields_list1; + // Need to set them now for correct group_fields setup, reset at the end. + set_items_ref_array(items1); + curr_tab->ref_array= &items1; + curr_tab->all_fields= &tmp_all_fields1; + curr_tab->fields= &tmp_fields_list1; + set_postjoin_aggr_write_func(curr_tab); - tmp_having= having; - if (select_options & SELECT_DESCRIBE) - goto derived_exit; - having= 0; + tmp_table_param.func_count= 0; + tmp_table_param.field_count+= tmp_table_param.func_count; + if (sort_and_group || curr_tab->table->group) + { + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + } - DBUG_RETURN(0); + if (exec_tmp_table->group) + { // Already grouped + if (!order && !no_order && !skip_sort_order) + order= group_list; /* order by group */ + group_list= NULL; + } -setup_subq_exit: - /* Choose an execution strategy for this JOIN. */ - if (!tables_list || !table_count) - choose_tableless_subquery_plan(); - /* - Even with zero matching rows, subqueries in the HAVING clause may - need to be evaluated if there are aggregate functions in the query. - */ - if (optimize_unflattened_subqueries()) - DBUG_RETURN(1); - error= 0; + /* + If we have different sort & group then we must sort the data by group + and copy it to another tmp table + This code is also used if we are using distinct something + we haven't been able to store in the temporary table yet + like SEC_TO_TIME(SUM(...)). + */ + if ((group_list && + (!test_if_subpart(group_list, order) || select_distinct)) || + (select_distinct && tmp_table_param.using_outer_summary_function)) + { /* Must copy to another table */ + DBUG_PRINT("info",("Creating group table")); + + calc_group_buffer(this, group_list); + count_field_types(select_lex, &tmp_table_param, tmp_all_fields1, + select_distinct && !group_list); + tmp_table_param.hidden_field_count= + tmp_all_fields1.elements - tmp_fields_list1.elements; + + curr_tab++; + aggr_tables++; + bzero(curr_tab, sizeof(JOIN_TAB)); + curr_tab->ref.key= -1; -derived_exit: + /* group data to new table */ + /* + If the access method is loose index scan then all MIN/MAX + functions are precomputed, and should be treated as regular + functions. See extended comment above. + */ + if (join_tab->is_using_loose_index_scan()) + tmp_table_param.precomputed_group_by= TRUE; - select_lex->mark_const_derived(zero_result_cause); - DBUG_RETURN(0); -} + tmp_table_param.hidden_field_count= + curr_all_fields->elements - curr_fields_list->elements; + ORDER *dummy= NULL; //TODO can use table->group here also + if (create_postjoin_aggr_table(curr_tab, + curr_all_fields, dummy, true, + distinct, keep_row_order)) + DBUG_RETURN(true); -/** - Create and initialize objects neeed for the execution of a query plan. - Evaluate constant expressions not evaluated during optimization. -*/ + if (group_list) + { + if (!only_const_tables()) // No need to sort a single row + { + if (add_sorting_to_table(curr_tab - 1, group_list)) + DBUG_RETURN(true); + } -int JOIN::init_execution() -{ - DBUG_ENTER("JOIN::init_execution"); + if (make_group_fields(this, this)) + DBUG_RETURN(true); + } - DBUG_ASSERT(optimized); - DBUG_ASSERT(!(select_options & SELECT_DESCRIBE)); - initialized= true; + // Setup sum funcs only when necessary, otherwise we might break info + // for the first table + if (group_list || tmp_table_param.sum_func_count) + { + if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true)) + DBUG_RETURN(true); + if (prepare_sum_aggregators(sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + DBUG_RETURN(true); + group_list= NULL; + if (setup_sum_funcs(thd, sum_funcs)) + DBUG_RETURN(true); + } + // No sum funcs anymore + DBUG_ASSERT(items2.is_null()); - /* - Enable LIMIT ROWS EXAMINED during query execution if: - (1) This JOIN is the outermost query (not a subquery or derived table) - This ensures that the limit is enabled when actual execution begins, - and not if a subquery is evaluated during optimization of the outer - query. - (2) This JOIN is not the result of a UNION. In this case do not apply the - limit in order to produce the partial query result stored in the - UNION temp table. - */ - if (!select_lex->outer_select() && // (1) - select_lex != select_lex->master_unit()->fake_select_lex) // (2) - thd->lex->set_limit_rows_examined(); + items2= ref_ptr_array_slice(3); + if (change_to_use_tmp_fields(thd, items2, + tmp_fields_list2, tmp_all_fields2, + fields_list.elements, tmp_all_fields1)) + DBUG_RETURN(true); - /* Create a tmp table if distinct or if the sort is too complicated */ - if (need_tmp && !exec_tmp_table1) - { - DBUG_PRINT("info",("Creating tmp table")); - THD_STAGE_INFO(thd, stage_creating_tmp_table); + curr_fields_list= &tmp_fields_list2; + curr_all_fields= &tmp_all_fields2; + set_items_ref_array(items2); + curr_tab->ref_array= &items2; + curr_tab->all_fields= &tmp_all_fields2; + curr_tab->fields= &tmp_fields_list2; + set_postjoin_aggr_write_func(curr_tab); - init_items_ref_array(); + tmp_table_param.field_count+= tmp_table_param.sum_func_count; + tmp_table_param.sum_func_count= 0; + } + if (curr_tab->table->distinct) + select_distinct= false; /* Each row is unique */ - tmp_table_param.hidden_field_count= (all_fields.elements - - fields_list.elements); - ORDER *tmp_group= ((!simple_group && !procedure && - !(test_flags & TEST_NO_KEY_GROUP)) ? group_list : - (ORDER*) 0); - /* - Pushing LIMIT to the temporary table creation is not applicable - when there is ORDER BY or GROUP BY or there is no GROUP BY, but - there are aggregate functions, because in all these cases we need - all result rows. - */ - ha_rows tmp_rows_limit= ((order == 0 || skip_sort_order) && - !tmp_group && - !thd->lex->current_select->with_sum_func) ? - select_limit : HA_POS_ERROR; - - if (!(exec_tmp_table1= - create_tmp_table(thd, &tmp_table_param, all_fields, - tmp_group, group_list ? 0 : select_distinct, - group_list && simple_group, - select_options, tmp_rows_limit, ""))) - DBUG_RETURN(1); - explain->ops_tracker.report_tmp_table(exec_tmp_table1); - /* - We don't have to store rows in temp table that doesn't match HAVING if: - - we are sorting the table and writing complete group rows to the - temp table. - - We are using DISTINCT without resolving the distinct as a GROUP BY - on all columns. - - If having is not handled here, it will be checked before the row - is sent to the client. - */ - if (tmp_having && - (sort_and_group || (exec_tmp_table1->distinct && !group_list))) - having= tmp_having; - - /* if group or order on first table, sort first */ - if (group_list && simple_group) - { - DBUG_PRINT("info",("Sorting for group")); - THD_STAGE_INFO(thd, stage_sorting_for_group); - if (create_sort_index(thd, this, group_list, - HA_POS_ERROR, HA_POS_ERROR, FALSE) || - alloc_group_fields(this, group_list) || - make_sum_func_list(all_fields, fields_list, 1) || - prepare_sum_aggregators(sum_funcs, need_distinct) || - setup_sum_funcs(thd, sum_funcs)) + if (select_distinct && !group_list) + { + if (having) { - DBUG_RETURN(1); + curr_tab->having= having; + having->update_used_tables(); } - group_list=0; + curr_tab->distinct= true; + having= NULL; + select_distinct= false; + } + /* Clean tmp_table_param for the next tmp table. */ + tmp_table_param.field_count= tmp_table_param.sum_func_count= + tmp_table_param.func_count= 0; + + tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + first_record= sort_and_group=0; + + if (!group_optimized_away) + { + group= false; } else { - if (make_sum_func_list(all_fields, fields_list, 0) || - prepare_sum_aggregators(sum_funcs, need_distinct) || - setup_sum_funcs(thd, sum_funcs)) + /* + If grouping has been optimized away, a temporary table is + normally not needed unless we're explicitly requested to create + one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT). + + In this case (grouping was optimized away), temp_table was + created without a grouping expression and JOIN::exec() will not + perform the necessary grouping (by the use of end_send_group() + or end_write_group()) if JOIN::group is set to false. + */ + // the temporary table was explicitly requested + DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT)); + // the temporary table does not have a grouping expression + DBUG_ASSERT(!curr_tab->table->group); + } + calc_group_buffer(this, group_list); + count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false); + } + + if (group || implicit_grouping || tmp_table_param.sum_func_count) + { + if (make_group_fields(this, this)) + DBUG_RETURN(true); + + DBUG_ASSERT(items3.is_null()); + + if (items0.is_null()) + init_items_ref_array(); + items3= ref_ptr_array_slice(4); + setup_copy_fields(thd, &tmp_table_param, + items3, tmp_fields_list3, tmp_all_fields3, + curr_fields_list->elements, *curr_all_fields); + + curr_fields_list= &tmp_fields_list3; + curr_all_fields= &tmp_all_fields3; + set_items_ref_array(items3); + if (join_tab) + { + JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1; + // Set grouped fields on the last table + last_tab->ref_array= &items3; + last_tab->all_fields= &tmp_all_fields3; + last_tab->fields= &tmp_fields_list3; + } + if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true)) + DBUG_RETURN(true); + if (prepare_sum_aggregators(sum_funcs, + !join_tab || + !join_tab-> is_using_agg_loose_index_scan())) + DBUG_RETURN(true); + if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error) + DBUG_RETURN(true); + } + if (group_list || order) + { + DBUG_PRINT("info",("Sorting for send_result_set_metadata")); + THD_STAGE_INFO(thd, stage_sorting_result); + /* If we have already done the group, add HAVING to sorted table */ + if (tmp_having && !group_list && !sort_and_group) + { + // Some tables may have been const + tmp_having->update_used_tables(); + table_map used_tables= (const_table_map | curr_tab->table->map); + + Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables, + (table_map) 0, false, + false, false); + if (sort_table_cond) { - DBUG_RETURN(1); + if (!curr_tab->select) + if (!(curr_tab->select= new SQL_SELECT)) + DBUG_RETURN(true); + if (!curr_tab->select->cond) + curr_tab->select->cond= sort_table_cond; + else + { + if (!(curr_tab->select->cond= + new (thd->mem_root) Item_cond_and(thd, curr_tab->select->cond, + sort_table_cond))) + DBUG_RETURN(true); + } + if (curr_tab->pre_idx_push_select_cond) + { + if (sort_table_cond->type() == Item::COND_ITEM) + sort_table_cond= sort_table_cond->copy_andor_structure(thd); + if (!(curr_tab->pre_idx_push_select_cond= + new (thd->mem_root) Item_cond_and(thd, + curr_tab->pre_idx_push_select_cond, + sort_table_cond))) + DBUG_RETURN(true); + } + if (curr_tab->select->cond && !curr_tab->select->cond->fixed) + curr_tab->select->cond->fix_fields(thd, 0); + if (curr_tab->pre_idx_push_select_cond && + !curr_tab->pre_idx_push_select_cond->fixed) + curr_tab->pre_idx_push_select_cond->fix_fields(thd, 0); + curr_tab->select->pre_idx_push_select_cond= + curr_tab->pre_idx_push_select_cond; + curr_tab->set_select_cond(curr_tab->select->cond, __LINE__); + curr_tab->select_cond->top_level_item(); + DBUG_EXECUTE("where",print_where(curr_tab->select->cond, + "select and having", + QT_ORDINARY);); + + having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0, + ~used_tables, false, false, false); + DBUG_EXECUTE("where", + print_where(having, "having after sort", QT_ORDINARY);); } + } - if (!group_list && ! exec_tmp_table1->distinct && order && simple_order) + if (group) + select_limit= HA_POS_ERROR; + else if (!need_tmp) + { + /* + We can abort sorting after thd->select_limit rows if there are no + filter conditions for any tables after the sorted one. + Filter conditions come in several forms: + 1. as a condition item attached to the join_tab, or + 2. as a keyuse attached to the join_tab (ref access). + */ + for (uint i= const_tables + 1; i < top_join_tab_count; i++) { - DBUG_PRINT("info",("Sorting for order")); - THD_STAGE_INFO(thd, stage_sorting_for_order); - if (create_sort_index(thd, this, order, - HA_POS_ERROR, HA_POS_ERROR, TRUE)) + JOIN_TAB *const tab= join_tab + i; + if (tab->select_cond || // 1 + (tab->keyuse && !tab->first_inner)) // 2 { - DBUG_RETURN(1); + /* We have to sort all rows */ + select_limit= HA_POS_ERROR; + break; } - order=0; } } - /* - Optimize distinct when used on some of the tables - SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b - In this case we can stop scanning t2 when we have found one t1.a + Here we add sorting stage for ORDER BY/GROUP BY clause, if the + optimiser chose FILESORT to be faster than INDEX SCAN or there is + no suitable index present. + OPTION_FOUND_ROWS supersedes LIMIT and is taken into account. */ + DBUG_PRINT("info",("Sorting for order by/group by")); + ORDER *order_arg= group_list ? group_list : order; + if (join_tab && + ordered_index_usage != + (group_list ? ordered_index_group_by : ordered_index_order_by) && + curr_tab->type != JT_CONST && + curr_tab->type != JT_EQ_REF) // Don't sort 1 row + { + // Sort either first non-const table or the last tmp table + JOIN_TAB *sort_tab= curr_tab; + + if (add_sorting_to_table(sort_tab, order_arg)) + DBUG_RETURN(true); + /* + filesort_limit: Return only this many rows from filesort(). + We can use select_limit_cnt only if we have no group_by and 1 table. + This allows us to use Bounded_queue for queries like: + "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;" + m_select_limit == HA_POS_ERROR (we need a full table scan) + unit->select_limit_cnt == 1 (we only need one row in the result set) + */ + sort_tab->filesort->limit= + (has_group_by || (join_tab + table_count > curr_tab + 1)) ? + select_limit : unit->select_limit_cnt; + } + if (!only_const_tables() && + !join_tab[const_tables].filesort && + !(select_options & SELECT_DESCRIBE)) + { + /* + If no IO cache exists for the first table then we are using an + INDEX SCAN and no filesort. Thus we should not remove the sorted + attribute on the INDEX SCAN. + */ + skip_sort_order= true; + } + } + + /* + Window functions computation step should be attached to the last join_tab + that's doing aggregation. + The last join_tab reads the data from the temp. table. It also may do + - sorting + - duplicate value removal + Both of these operations are done after window function computation step. + */ + curr_tab= join_tab + top_join_tab_count + aggr_tables - 1; + if (select_lex->window_funcs.elements) + { + curr_tab->window_funcs_step= new Window_funcs_computation; + if (curr_tab->window_funcs_step->setup(thd, &select_lex->window_funcs, + curr_tab)) + DBUG_RETURN(true); + } + + fields= curr_fields_list; + // Reset before execution + set_items_ref_array(items0); + if (join_tab) + join_tab[top_join_tab_count + aggr_tables - 1].next_select= + setup_end_select_func(this, NULL); + group= has_group_by; + + DBUG_RETURN(false); +} + - if (exec_tmp_table1->distinct) + +bool +JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields, + ORDER *table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_order) +{ + DBUG_ENTER("JOIN::create_postjoin_aggr_table"); + THD_STAGE_INFO(thd, stage_creating_tmp_table); + + /* + Pushing LIMIT to the post-join temporary table creation is not applicable + when there is ORDER BY or GROUP BY or there is no GROUP BY, but + there are aggregate functions, because in all these cases we need + all result rows. + */ + ha_rows table_rows_limit= ((order == NULL || skip_sort_order) && + !table_group && + !select_lex->with_sum_func) ? + select_limit : HA_POS_ERROR; + + tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param); + tab->tmp_table_param->skip_create_table= true; + TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields, + table_group, distinct, + save_sum_fields, select_options, table_rows_limit, + "", true, keep_row_order); + if (!table) + DBUG_RETURN(true); + tmp_table_param.using_outer_summary_function= + tab->tmp_table_param->using_outer_summary_function; + tab->join= this; + DBUG_ASSERT(tab > tab->join->join_tab); + (tab - 1)->next_select= sub_select_postjoin_aggr; + tab->aggr= new (thd->mem_root) AGGR_OP(tab); + if (!tab->aggr) + goto err; + tab->table= table; + table->reginfo.join_tab= tab; + + /* if group or order on first table, sort first */ + if (group_list && simple_group) + { + DBUG_PRINT("info",("Sorting for group")); + THD_STAGE_INFO(thd, stage_sorting_for_group); + + if (ordered_index_usage != ordered_index_group_by && + (join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row + add_sorting_to_table(join_tab + const_tables, group_list)) + goto err; + + if (alloc_group_fields(this, group_list)) + goto err; + if (make_sum_func_list(all_fields, fields_list, true)) + goto err; + if (prepare_sum_aggregators(sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + goto err; + if (setup_sum_funcs(thd, sum_funcs)) + goto err; + group_list= NULL; + } + else + { + if (make_sum_func_list(all_fields, fields_list, false)) + goto err; + if (prepare_sum_aggregators(sum_funcs, + !join_tab->is_using_agg_loose_index_scan())) + goto err; + if (setup_sum_funcs(thd, sum_funcs)) + goto err; + + if (!group_list && !table->distinct && order && simple_order) { - table_map used_tables= select_list_used_tables; - JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; - do - { - if (used_tables & last_join_tab->table->map || - last_join_tab->use_join_cache) - break; - last_join_tab->shortcut_for_distinct= true; - } while (last_join_tab-- != join_tab); - /* Optimize "select distinct b from t1 order by key_part_1 limit #" */ - if (order && skip_sort_order) - { - /* Should always succeed */ - if (test_if_skip_sort_order(&join_tab[const_tables], - order, unit->select_limit_cnt, 0, - &join_tab[const_tables].table-> - keys_in_use_for_order_by)) - order=0; - join_tab[const_tables].update_explain_data(const_tables); - } + DBUG_PRINT("info",("Sorting for order")); + THD_STAGE_INFO(thd, stage_sorting_for_order); + + if (ordered_index_usage != ordered_index_order_by && + add_sorting_to_table(join_tab + const_tables, order)) + goto err; + order= NULL; } + } - /* If this join belongs to an uncacheable query save the original join */ - if (select_lex->uncacheable && init_save_join_tab()) - DBUG_RETURN(-1); /* purecov: inspected */ + DBUG_RETURN(false); + +err: + if (table != NULL) + free_tmp_table(thd, table); + DBUG_RETURN(true); +} + + +void +JOIN::optimize_distinct() +{ + for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;) + { + if (select_lex->select_list_tables & last_join_tab->table->map || + last_join_tab->use_join_cache) + break; + last_join_tab->shortcut_for_distinct= true; + if (last_join_tab == join_tab) + break; + --last_join_tab; } - DBUG_RETURN(0); + /* Optimize "select distinct b from t1 order by key_part_1 limit #" */ + if (order && skip_sort_order) + { + /* Should already have been optimized away */ + DBUG_ASSERT(ordered_index_usage == ordered_index_order_by); + if (ordered_index_usage == ordered_index_order_by) + { + order= NULL; + } + } } /** + @brief Add Filesort object to the given table to sort if with filesort + + @param tab the JOIN_TAB object to attach created Filesort object to + @param order List of expressions to sort the table by + + @note This function moves tab->select, if any, to filesort->select + + @return false on success, true on OOM +*/ + +bool +JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) +{ + tab->filesort= + new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid, + tab->select); + if (!tab->filesort) + return true; + /* + Select was moved to filesort->select to force join_init_read_record to use + sorted result instead of reading table through select. + */ + if (tab->select) + { + tab->select= NULL; + tab->set_select_cond(NULL, __LINE__); + } + tab->read_first_record= join_init_read_record; + return false; +} + + + + +/** Setup expression caches for subqueries that need them @details @@ -2291,17 +2933,6 @@ bool JOIN::setup_subquery_caches() } -/** - Restore values in temporary join. -*/ -void JOIN::restore_tmp() -{ - DBUG_PRINT("info", ("restore_tmp this %p tmp_join %p", this, tmp_join)); - DBUG_ASSERT(tmp_join != this); - memcpy(tmp_join, this, (size_t) sizeof(JOIN)); -} - - /* Shrink join buffers used for preceding tables to reduce the occupied space @@ -2366,25 +2997,29 @@ JOIN::reinit() unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ? select_lex->offset_limit->val_uint() : 0); - first_record= 0; + first_record= false; + group_sent= false; cleaned= false; - if (exec_tmp_table1) - { - exec_tmp_table1->file->extra(HA_EXTRA_RESET_STATE); - exec_tmp_table1->file->ha_delete_all_rows(); - } - if (exec_tmp_table2) + if (aggr_tables) { - exec_tmp_table2->file->extra(HA_EXTRA_RESET_STATE); - exec_tmp_table2->file->ha_delete_all_rows(); + JOIN_TAB *curr_tab= join_tab + top_join_tab_count; + JOIN_TAB *end_tab= curr_tab + aggr_tables; + for ( ; curr_tab < end_tab; curr_tab++) + { + TABLE *tmp_table= curr_tab->table; + if (!tmp_table->is_created()) + continue; + tmp_table->file->extra(HA_EXTRA_RESET_STATE); + tmp_table->file->ha_delete_all_rows(); + } } clear_sj_tmp_tables(this); - if (items0) + if (current_ref_ptrs != items0) + { set_items_ref_array(items0); - - if (join_tab_save) - memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * table_count); + set_group_rpa= false; + } /* need to reset ref access state (see join_read_key) */ if (join_tab) @@ -2397,9 +3032,6 @@ JOIN::reinit() } } - if (tmp_join) - restore_tmp(); - /* Reset of sum functions */ if (sum_funcs) { @@ -2424,38 +3056,40 @@ JOIN::reinit() DBUG_RETURN(0); } + /** - @brief Save the original join layout - - @details Saves the original join layout so it can be reused in - re-execution and for EXPLAIN. - - @return Operation status - @retval 0 success. - @retval 1 error occurred. + Prepare join result. + + @details Prepare join result prior to join execution or describing. + Instantiate derived tables and get schema tables result if necessary. + + @return + TRUE An error during derived or schema tables instantiation. + FALSE Ok */ -bool -JOIN::init_save_join_tab() +bool JOIN::prepare_result(List<Item> **columns_list) { - if (!(tmp_join= (JOIN*)thd->alloc(sizeof(JOIN)))) - return 1; /* purecov: inspected */ - error= 0; // Ensure that tmp_join.error= 0 - restore_tmp(); - return 0; -} + DBUG_ENTER("JOIN::prepare_result"); + error= 0; + /* Create result tables for materialized views. */ + if (!zero_result_cause && + select_lex->handle_derived(thd->lex, DT_CREATE)) + goto err; -bool -JOIN::save_join_tab() -{ - if (!join_tab_save && select_lex->master_unit()->uncacheable) - { - if (!(join_tab_save= (JOIN_TAB*)thd->memdup((uchar*) join_tab, - sizeof(JOIN_TAB) * table_count))) - return 1; - } - return 0; + if (result->prepare2()) + goto err; + + if ((select_lex->options & OPTION_SCHEMA_TABLE) && + get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC)) + goto err; + + DBUG_RETURN(FALSE); + +err: + error= 1; + DBUG_RETURN(TRUE); } @@ -2496,6 +3130,14 @@ void JOIN::save_explain_data(Explain_query *output, bool can_overwrite, Explain_union *eu= output->get_union(nr); explain= &eu->fake_select_lex_explain; join_tab[0].tracker= eu->get_fake_select_lex_tracker(); + for (uint i=0 ; i < top_join_tab_count + aggr_tables; i++) + { + if (join_tab[i].filesort) + { + join_tab[i].filesort->tracker= + new Filesort_tracker(thd->lex->analyze_stmt); + } + } } } @@ -2509,7 +3151,6 @@ void JOIN::exec() dbug_serve_apcs(thd, 1); ); ANALYZE_START_TRACKING(&explain->time_tracker); - explain->ops_tracker.report_join_start(); exec_inner(); ANALYZE_STOP_TRACKING(&explain->time_tracker); @@ -2522,29 +3163,26 @@ void JOIN::exec() } -/** - Exec select. - - @todo - Note, that create_sort_index calls test_if_skip_sort_order and may - finally replace sorting with index scan if there is a LIMIT clause in - the query. It's never shown in EXPLAIN! - - @todo - When can we have here thd->net.report_error not zero? -*/ - void JOIN::exec_inner() { List<Item> *columns_list= &fields_list; - int tmp_error; + DBUG_ENTER("JOIN::exec_inner"); - DBUG_ENTER("JOIN::exec"); + THD_STAGE_INFO(thd, stage_executing); - const bool has_group_by= this->group; + /* + Enable LIMIT ROWS EXAMINED during query execution if: + (1) This JOIN is the outermost query (not a subquery or derived table) + This ensures that the limit is enabled when actual execution begins, and + not if a subquery is evaluated during optimization of the outer query. + (2) This JOIN is not the result of a UNION. In this case do not apply the + limit in order to produce the partial query result stored in the + UNION temp table. + */ + if (!select_lex->outer_select() && // (1) + select_lex != select_lex->master_unit()->fake_select_lex) // (2) + thd->lex->set_limit_rows_examined(); - THD_STAGE_INFO(thd, stage_executing); - error= 0; if (procedure) { procedure_fields_list= fields_list; @@ -2565,13 +3203,16 @@ void JOIN::exec_inner() if (select_options & SELECT_DESCRIBE) select_describe(this, FALSE, FALSE, FALSE, (zero_result_cause?zero_result_cause:"No tables used")); + else { if (result->send_result_set_metadata(*columns_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) { DBUG_VOID_RETURN; } + /* We have to test for 'conds' here as the WHERE may not be constant even if we don't have any tables for prepared statements or if @@ -2638,7 +3279,7 @@ void JOIN::exec_inner() having ? having : tmp_having, all_fields); DBUG_VOID_RETURN; } - + /* Evaluate all constant expressions with subqueries in the ORDER/GROUP clauses to make sure that all subqueries return a @@ -2667,42 +3308,6 @@ void JOIN::exec_inner() if (select_options & SELECT_DESCRIBE) { - /* - Check if we managed to optimize ORDER BY away and don't use temporary - table to resolve ORDER BY: in that case, we only may need to do - filesort for GROUP BY. - */ - if (!order && !no_order && (!skip_sort_order || !need_tmp)) - { - /* - Reset 'order' to 'group_list' and reinit variables describing - 'order' - */ - order= group_list; - simple_order= simple_group; - skip_sort_order= 0; - } - if (order && join_tab) - { - bool made_call= false; - SQL_SELECT *tmp_select= join_tab[const_tables].select; - if ((order != group_list || - !(select_options & SELECT_BIG_RESULT) || - (tmp_select && tmp_select->quick && - tmp_select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) && - (const_tables == table_count || - ((simple_order || skip_sort_order) && - (made_call=true) && - test_if_skip_sort_order(&join_tab[const_tables], order, - select_limit, 0, - &join_tab[const_tables].table-> - keys_in_use_for_query)))) - order=0; - if (made_call) - join_tab[const_tables].update_explain_data(const_tables); - } - having= tmp_having; select_describe(this, need_tmp, order != 0 && !skip_sort_order, select_distinct, @@ -2715,537 +3320,31 @@ void JOIN::exec_inner() select_lex->mark_const_derived(zero_result_cause); } - if (!initialized && init_execution()) - DBUG_VOID_RETURN; - - JOIN *curr_join= this; - List<Item> *curr_all_fields= &all_fields; - List<Item> *curr_fields_list= &fields_list; - TABLE *curr_tmp_table= 0; - /* - curr_join->join_free() will call JOIN::cleanup(full=TRUE). It will not - be safe to call update_used_tables() after that. - */ - if (curr_join->tmp_having) - curr_join->tmp_having->update_used_tables(); - /* Initialize examined rows here because the values from all join parts must be accumulated in examined_row_count. Hence every join iteration must count from zero. */ - curr_join->join_examined_rows= 0; - - curr_join->do_select_call_count= 0; - - /* Create a tmp table if distinct or if the sort is too complicated */ - if (need_tmp) - { - if (tmp_join) - { - /* - We are in a non cacheable sub query. Get the saved join structure - after optimization. - (curr_join may have been modified during last exection and we need - to reset it) - */ - curr_join= tmp_join; - } - curr_tmp_table= exec_tmp_table1; + join_examined_rows= 0; - /* Copy data to the temporary table */ - THD_STAGE_INFO(thd, stage_copying_to_tmp_table); - DBUG_PRINT("info", ("%s", thd->proc_info)); - if (!curr_join->sort_and_group && - curr_join->const_tables != curr_join->table_count) - { - JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables; - first_tab->sorted= MY_TEST(first_tab->loosescan_match_tab); - } - - Procedure *save_proc= curr_join->procedure; - tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0); - curr_join->procedure= save_proc; - if (tmp_error) - { - error= tmp_error; - DBUG_VOID_RETURN; - } - curr_tmp_table->file->info(HA_STATUS_VARIABLE); - - if (curr_join->having) - curr_join->having= curr_join->tmp_having= 0; // Allready done - - /* Change sum_fields reference to calculated fields in tmp_table */ -#ifdef HAVE_valgrind - if (curr_join != this) -#endif - curr_join->all_fields= *curr_all_fields; - if (!items1) - { - items1= items0 + all_fields.elements; - if (sort_and_group || curr_tmp_table->group || - tmp_table_param.precomputed_group_by) - { - if (change_to_use_tmp_fields(thd, items1, - tmp_fields_list1, tmp_all_fields1, - fields_list.elements, all_fields)) - DBUG_VOID_RETURN; - } - else - { - if (change_refs_to_tmp_fields(thd, items1, - tmp_fields_list1, tmp_all_fields1, - fields_list.elements, all_fields)) - DBUG_VOID_RETURN; - } -#ifdef HAVE_valgrind - if (curr_join != this) -#endif - { - curr_join->tmp_all_fields1= tmp_all_fields1; - curr_join->tmp_fields_list1= tmp_fields_list1; - } - curr_join->items1= items1; - } - curr_all_fields= &tmp_all_fields1; - curr_fields_list= &tmp_fields_list1; - curr_join->set_items_ref_array(items1); - - if (sort_and_group || curr_tmp_table->group) - { - curr_join->tmp_table_param.field_count+= - curr_join->tmp_table_param.sum_func_count+ - curr_join->tmp_table_param.func_count; - curr_join->tmp_table_param.sum_func_count= - curr_join->tmp_table_param.func_count= 0; - } - else - { - curr_join->tmp_table_param.field_count+= - curr_join->tmp_table_param.func_count; - curr_join->tmp_table_param.func_count= 0; - } - - // procedure can't be used inside subselect => we do nothing special for it - if (procedure) - procedure->update_refs(); - - if (curr_tmp_table->group) - { // Already grouped - if (!curr_join->order && !curr_join->no_order && !skip_sort_order) - curr_join->order= curr_join->group_list; /* order by group */ - curr_join->group_list= 0; - } - - /* - If we have different sort & group then we must sort the data by group - and copy it to another tmp table - This code is also used if we are using distinct something - we haven't been able to store in the temporary table yet - like SEC_TO_TIME(SUM(...)). - */ - - if ((curr_join->group_list && (!test_if_subpart(curr_join->group_list, - curr_join->order) || - curr_join->select_distinct)) || - (curr_join->select_distinct && - curr_join->tmp_table_param.using_indirect_summary_function)) - { /* Must copy to another table */ - DBUG_PRINT("info",("Creating group table")); - - /* Free first data from old join */ - - /* - psergey-todo: this is the place of pre-mature JOIN::free call. - */ - curr_join->join_free(); - if (curr_join->make_simple_join(this, curr_tmp_table)) - DBUG_VOID_RETURN; - calc_group_buffer(curr_join, group_list); - count_field_types(select_lex, &curr_join->tmp_table_param, - curr_join->tmp_all_fields1, - curr_join->select_distinct && !curr_join->group_list); - curr_join->tmp_table_param.hidden_field_count= - (curr_join->tmp_all_fields1.elements- - curr_join->tmp_fields_list1.elements); - - if (exec_tmp_table2) - curr_tmp_table= exec_tmp_table2; - else - { - /* group data to new table */ - - /* - If the access method is loose index scan then all MIN/MAX - functions are precomputed, and should be treated as regular - functions. See extended comment in JOIN::exec. - */ - if (curr_join->join_tab->is_using_loose_index_scan()) - curr_join->tmp_table_param.precomputed_group_by= TRUE; - - if (!(curr_tmp_table= - exec_tmp_table2= create_tmp_table(thd, - &curr_join->tmp_table_param, - *curr_all_fields, - (ORDER*) 0, - curr_join->select_distinct && - !curr_join->group_list, - 1, curr_join->select_options, - HA_POS_ERROR, ""))) - DBUG_VOID_RETURN; - curr_join->exec_tmp_table2= exec_tmp_table2; - explain->ops_tracker.report_tmp_table(exec_tmp_table2); - } - if (curr_join->group_list) - { - if (curr_join->join_tab == join_tab && save_join_tab()) - { - DBUG_VOID_RETURN; - } - DBUG_PRINT("info",("Sorting for index")); - THD_STAGE_INFO(thd, stage_creating_sort_index); - if (create_sort_index(thd, curr_join, curr_join->group_list, - HA_POS_ERROR, HA_POS_ERROR, FALSE) || - make_group_fields(this, curr_join)) - { - DBUG_VOID_RETURN; - } - sortorder= curr_join->sortorder; - } - - THD_STAGE_INFO(thd, stage_copying_to_group_table); - DBUG_PRINT("info", ("%s", thd->proc_info)); - if (curr_join != this) - { - if (sum_funcs2) - { - curr_join->sum_funcs= sum_funcs2; - curr_join->sum_funcs_end= sum_funcs_end2; - } - else - { - curr_join->alloc_func_list(); - sum_funcs2= curr_join->sum_funcs; - sum_funcs_end2= curr_join->sum_funcs_end; - } - } - if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list, - 1, TRUE) || - prepare_sum_aggregators(curr_join->sum_funcs, - !curr_join->join_tab->is_using_agg_loose_index_scan())) - DBUG_VOID_RETURN; - curr_join->group_list= 0; - if (!curr_join->sort_and_group && - curr_join->const_tables != curr_join->table_count) - { - JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables; - first_tab->sorted= MY_TEST(first_tab->loosescan_match_tab); - } - tmp_error= -1; - if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || - (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, - 0))) - { - error= tmp_error; - DBUG_VOID_RETURN; - } - end_read_record(&curr_join->join_tab->read_record); - curr_join->const_tables= curr_join->table_count; // Mark free for cleanup() - curr_join->join_tab[0].table= 0; // Table is freed - - // No sum funcs anymore - if (!items2) - { - items2= items1 + all_fields.elements; - if (change_to_use_tmp_fields(thd, items2, - tmp_fields_list2, tmp_all_fields2, - fields_list.elements, tmp_all_fields1)) - DBUG_VOID_RETURN; -#ifdef HAVE_valgrind - /* - Some GCCs use memcpy() for struct assignment, even for x=x. - GCC bug 19410: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410 - */ - if (curr_join != this) -#endif - { - curr_join->tmp_fields_list2= tmp_fields_list2; - curr_join->tmp_all_fields2= tmp_all_fields2; - } - } - curr_fields_list= &curr_join->tmp_fields_list2; - curr_all_fields= &curr_join->tmp_all_fields2; - curr_join->set_items_ref_array(items2); - curr_join->tmp_table_param.field_count+= - curr_join->tmp_table_param.sum_func_count; - curr_join->tmp_table_param.sum_func_count= 0; - } - if (curr_tmp_table->distinct) - curr_join->select_distinct=0; /* Each row is unique */ - - curr_join->join_free(); /* Free quick selects */ - - if (curr_join->select_distinct && ! curr_join->group_list) - { - THD_STAGE_INFO(thd, stage_removing_duplicates); - if (remove_duplicates(curr_join, curr_tmp_table, - *curr_fields_list, curr_join->tmp_having)) - DBUG_VOID_RETURN; - curr_join->tmp_having=0; - curr_join->select_distinct=0; - } - curr_tmp_table->reginfo.lock_type= TL_UNLOCK; - // psergey-todo: here is one place where we switch to - if (curr_join->make_simple_join(this, curr_tmp_table)) - DBUG_VOID_RETURN; - calc_group_buffer(curr_join, curr_join->group_list); - count_field_types(select_lex, &curr_join->tmp_table_param, - *curr_all_fields, 0); - - } - if (procedure) - count_field_types(select_lex, &curr_join->tmp_table_param, - *curr_all_fields, 0); - - if (curr_join->group || curr_join->implicit_grouping || - curr_join->tmp_table_param.sum_func_count || - (procedure && (procedure->flags & PROC_GROUP))) - { - if (make_group_fields(this, curr_join)) - { - DBUG_VOID_RETURN; - } - if (!items3) - { - if (!items0) - init_items_ref_array(); - items3= ref_pointer_array + (all_fields.elements*4); - setup_copy_fields(thd, &curr_join->tmp_table_param, - items3, tmp_fields_list3, tmp_all_fields3, - curr_fields_list->elements, *curr_all_fields); - tmp_table_param.save_copy_funcs= curr_join->tmp_table_param.copy_funcs; - tmp_table_param.save_copy_field= curr_join->tmp_table_param.copy_field; - tmp_table_param.save_copy_field_end= - curr_join->tmp_table_param.copy_field_end; -#ifdef HAVE_valgrind - if (curr_join != this) -#endif - { - curr_join->tmp_all_fields3= tmp_all_fields3; - curr_join->tmp_fields_list3= tmp_fields_list3; - } - } - else - { - curr_join->tmp_table_param.copy_funcs= tmp_table_param.save_copy_funcs; - curr_join->tmp_table_param.copy_field= tmp_table_param.save_copy_field; - curr_join->tmp_table_param.copy_field_end= - tmp_table_param.save_copy_field_end; - } - curr_fields_list= &tmp_fields_list3; - curr_all_fields= &tmp_all_fields3; - curr_join->set_items_ref_array(items3); - - if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list, - 1, TRUE) || - prepare_sum_aggregators(curr_join->sum_funcs, - !curr_join->join_tab || - !curr_join->join_tab-> - is_using_agg_loose_index_scan()) || - setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || - thd->is_fatal_error) - DBUG_VOID_RETURN; - } - if (curr_join->group_list || curr_join->order) - { - DBUG_PRINT("info",("Sorting for send_result_set_metadata")); - THD_STAGE_INFO(thd, stage_sorting_result); - /* If we have already done the group, add HAVING to sorted table */ - if (curr_join->tmp_having && ! curr_join->group_list && - ! curr_join->sort_and_group) - { - JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables]; - table_map used_tables= (curr_join->const_table_map | - curr_table->table->map); - curr_join->tmp_having->update_used_tables(); - - Item* sort_table_cond= make_cond_for_table(thd, curr_join->tmp_having, - used_tables, - (table_map)0, -1, - FALSE, FALSE); - if (sort_table_cond) - { - if (!curr_table->select) - if (!(curr_table->select= new SQL_SELECT)) - DBUG_VOID_RETURN; - if (!curr_table->select->cond) - curr_table->select->cond= sort_table_cond; - else - { - if (!(curr_table->select->cond= - new (thd->mem_root) Item_cond_and(thd, curr_table->select->cond, - sort_table_cond))) - DBUG_VOID_RETURN; - } - if (curr_table->pre_idx_push_select_cond) - { - if (sort_table_cond->type() == Item::COND_ITEM) - sort_table_cond= sort_table_cond->copy_andor_structure(thd); - if (!(curr_table->pre_idx_push_select_cond= - new (thd->mem_root) Item_cond_and(thd, curr_table->pre_idx_push_select_cond, - sort_table_cond))) - DBUG_VOID_RETURN; - } - if (curr_table->select->cond && !curr_table->select->cond->fixed) - curr_table->select->cond->fix_fields(thd, 0); - if (curr_table->pre_idx_push_select_cond && - !curr_table->pre_idx_push_select_cond->fixed) - curr_table->pre_idx_push_select_cond->fix_fields(thd, 0); - - curr_table->select->pre_idx_push_select_cond= - curr_table->pre_idx_push_select_cond; - curr_table->set_select_cond(curr_table->select->cond, __LINE__); - curr_table->select_cond->top_level_item(); - DBUG_EXECUTE("where",print_where(curr_table->select->cond, - "select and having", - QT_ORDINARY);); - curr_join->tmp_having= make_cond_for_table(thd, curr_join->tmp_having, - ~ (table_map) 0, - ~used_tables, -1, - FALSE, FALSE); - DBUG_EXECUTE("where",print_where(curr_join->tmp_having, - "having after sort", - QT_ORDINARY);); - } - } - { - if (group) - curr_join->select_limit= HA_POS_ERROR; - else - { - /* - We can abort sorting after thd->select_limit rows if we there is no - WHERE clause for any tables after the sorted one. - */ - JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1]; - JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_join_tab_count]; - for (; curr_table < end_table ; curr_table++) - { - /* - table->keyuse is set in the case there was an original WHERE clause - on the table that was optimized away. - */ - if (curr_table->select_cond || - (curr_table->keyuse && !curr_table->first_inner)) - { - /* We have to sort all rows */ - curr_join->select_limit= HA_POS_ERROR; - break; - } - } - } - if (curr_join->join_tab == join_tab && save_join_tab()) - { - DBUG_VOID_RETURN; - } - /* - Here we sort rows for ORDER BY/GROUP BY clause, if the optimiser - chose FILESORT to be faster than INDEX SCAN or there is no - suitable index present. - Note, that create_sort_index calls test_if_skip_sort_order and may - finally replace sorting with index scan if there is a LIMIT clause in - the query. XXX: it's never shown in EXPLAIN! - OPTION_FOUND_ROWS supersedes LIMIT and is taken into account. - */ - DBUG_PRINT("info",("Sorting for order by/group by")); - ORDER *order_arg= - curr_join->group_list ? curr_join->group_list : curr_join->order; - /* - filesort_limit: Return only this many rows from filesort(). - We can use select_limit_cnt only if we have no group_by and 1 table. - This allows us to use Bounded_queue for queries like: - "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;" - select_limit == HA_POS_ERROR (we need a full table scan) - unit->select_limit_cnt == 1 (we only need one row in the result set) - */ - const ha_rows filesort_limit_arg= - (has_group_by || curr_join->table_count > 1) - ? curr_join->select_limit : unit->select_limit_cnt; - const ha_rows select_limit_arg= - select_options & OPTION_FOUND_ROWS - ? HA_POS_ERROR : unit->select_limit_cnt; - curr_join->filesort_found_rows= filesort_limit_arg != HA_POS_ERROR; - - DBUG_PRINT("info", ("has_group_by %d " - "curr_join->table_count %d " - "curr_join->m_select_limit %d " - "unit->select_limit_cnt %d", - has_group_by, - curr_join->table_count, - (int) curr_join->select_limit, - (int) unit->select_limit_cnt)); - if (create_sort_index(thd, - curr_join, - order_arg, - filesort_limit_arg, - select_limit_arg, - curr_join->group_list ? FALSE : TRUE)) - DBUG_VOID_RETURN; - sortorder= curr_join->sortorder; - if (curr_join->const_tables != curr_join->table_count && - !curr_join->join_tab[curr_join->const_tables].filesort) - { - /* - If no filesort for the first table then we are using an - INDEX SCAN. Thus we should not remove the sorted attribute - on the INDEX SCAN. - */ - skip_sort_order= 1; - } - } - } /* XXX: When can we have here thd->is_error() not zero? */ if (thd->is_error()) { error= thd->is_error(); DBUG_VOID_RETURN; } - curr_join->having= curr_join->tmp_having; - curr_join->fields= curr_fields_list; - curr_join->procedure= procedure; THD_STAGE_INFO(thd, stage_sending_data); DBUG_PRINT("info", ("%s", thd->proc_info)); - result->send_result_set_metadata((procedure ? curr_join->procedure_fields_list : - *curr_fields_list), - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); - error= do_select(curr_join, curr_fields_list, NULL, procedure); - if (curr_join->order && curr_join->sortorder && - curr_join->filesort_found_rows) - { - /* Use info provided by filesort. */ - DBUG_ASSERT(curr_join->table_count > curr_join->const_tables); - JOIN_TAB *tab= curr_join->join_tab + curr_join->const_tables; - thd->limit_found_rows= tab->records; - } - + result->send_result_set_metadata( + procedure ? procedure_fields_list : *fields, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); + error= do_select(this, procedure); /* Accumulate the counts from all join iterations of all join parts. */ - thd->inc_examined_row_count(curr_join->join_examined_rows); + thd->inc_examined_row_count(join_examined_rows); DBUG_PRINT("counts", ("thd->examined_row_count: %lu", (ulong) thd->get_examined_row_count())); - /* - With EXPLAIN EXTENDED we have to restore original ref_array - for a derived table which is always materialized. - We also need to do this when we have temp table(s). - Otherwise we would not be able to print the query correctly. - */ - if (items0 && (thd->lex->describe & DESCRIBE_EXTENDED) && - (select_lex->linkage == DERIVED_TABLE_TYPE || - exec_tmp_table1 || exec_tmp_table2)) - set_items_ref_array(items0); - DBUG_VOID_RETURN; } @@ -3263,41 +3362,34 @@ JOIN::destroy() DBUG_ENTER("JOIN::destroy"); select_lex->join= 0; - if (tmp_join) + cond_equal= 0; + having_equal= 0; + + cleanup(1); + + if (join_tab) { - if (join_tab != tmp_join->join_tab) + DBUG_ASSERT(table_count+aggr_tables > 0); + for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS, + WITH_CONST_TABLES); + tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { - JOIN_TAB *tab; - for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); - tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) + if (tab->aggr) { - tab->cleanup(); + free_tmp_table(thd, tab->table); + delete tab->tmp_table_param; + tab->tmp_table_param= NULL; + tab->aggr= NULL; } + tab->table= NULL; } - tmp_join->tmp_join= 0; - /* - We need to clean up tmp_table_param for reusable JOINs (having non-zero - and different from self tmp_join) because it's not being cleaned up - anywhere else (as we need to keep the join is reusable). - */ - tmp_table_param.cleanup(); - tmp_join->tmp_table_param.copy_field= 0; - DBUG_RETURN(tmp_join->destroy()); } - cond_equal= 0; - having_equal= 0; - cleanup(1); - /* Cleanup items referencing temporary table columns */ + /* Cleanup items referencing temporary table columns */ cleanup_item_list(tmp_all_fields1); cleanup_item_list(tmp_all_fields3); - if (exec_tmp_table1) - free_tmp_table(thd, exec_tmp_table1); - if (exec_tmp_table2) - free_tmp_table(thd, exec_tmp_table2); - delete select; destroy_sj_tmp_tables(this); - delete_dynamic(&keyuse); + delete_dynamic(&keyuse); delete procedure; DBUG_RETURN(error); } @@ -3362,7 +3454,7 @@ void JOIN::cleanup_item_list(List<Item> &items) const */ bool -mysql_select(THD *thd, Item ***rref_pointer_array, +mysql_select(THD *thd, TABLE_LIST *tables, uint wild_num, List<Item> &fields, COND *conds, uint og_num, ORDER *order, ORDER *group, Item *having, ORDER *proc_param, ulonglong select_options, @@ -3397,7 +3489,7 @@ mysql_select(THD *thd, Item ***rref_pointer_array, } else { - if ((err= join->prepare(rref_pointer_array, tables, wild_num, + if ((err= join->prepare( tables, wild_num, conds, og_num, order, false, group, having, proc_param, select_lex, unit))) { @@ -3421,7 +3513,7 @@ mysql_select(THD *thd, Item ***rref_pointer_array, DBUG_RETURN(TRUE); THD_STAGE_INFO(thd, stage_init); thd->lex->used_tables=0; - if ((err= join->prepare(rref_pointer_array, tables, wild_num, + if ((err= join->prepare(tables, wild_num, conds, og_num, order, false, group, having, proc_param, select_lex, unit))) { @@ -4224,7 +4316,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, DEBUG_SYNC(join->thd, "inside_make_join_statistics"); /* Generate an execution plan from the found optimal join order. */ - DBUG_RETURN(join->thd->check_killed() || get_best_combination(join)); + DBUG_RETURN(join->thd->check_killed() || join->get_best_combination()); error: /* @@ -4516,9 +4608,9 @@ add_key_field(JOIN *join, ((join->is_allowed_hash_join_access() && field->hash_join_is_possible() && !(field->table->pos_in_table_list->is_materialized_derived() && - field->table->created)) || + field->table->is_created())) || (field->table->pos_in_table_list->is_materialized_derived() && - !field->table->created && !(field->flags & BLOB_FLAG)))) + !field->table->is_created() && !(field->flags & BLOB_FLAG)))) { optimize= KEY_OPTIMIZE_EQ; } @@ -5725,7 +5817,8 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab) item->walk(&Item::collect_item_field_processor, 0, (uchar*) &indexed_fields); } - else if (is_indexed_agg_distinct(join, &indexed_fields)) + else if (join->tmp_table_param.sum_func_count && + is_indexed_agg_distinct(join, &indexed_fields)) { join->sort_and_group= 1; } @@ -7312,13 +7405,13 @@ double JOIN::get_examined_rows() { double examined_rows; double prev_fanout= 1; - JOIN_TAB *tab= first_breadth_first_optimization_tab(); + JOIN_TAB *tab= first_breadth_first_tab(); JOIN_TAB *prev_tab= tab; examined_rows= tab->get_examined_rows(); - while ((tab= next_breadth_first_tab(first_breadth_first_optimization_tab(), - top_table_access_tabs_count, tab))) + while ((tab= next_breadth_first_tab(first_breadth_first_tab(), + top_join_tab_count, tab))) { prev_fanout *= prev_tab->records_read; examined_rows+= tab->get_examined_rows() * prev_fanout; @@ -8215,6 +8308,7 @@ prev_record_reads(POSITION *positions, uint idx, table_map found_ref) static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, uint n_top_tabs_count, JOIN_TAB *tab) { + n_top_tabs_count += tab->join->aggr_tables; if (!tab->bush_root_tab) { /* We're at top level. Get the next top-level tab */ @@ -8266,7 +8360,7 @@ static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab, JOIN_TAB *first_explain_order_tab(JOIN* join) { JOIN_TAB* tab; - tab= join->table_access_tabs; + tab= join->join_tab; return (tab->bush_children) ? tab->bush_children->start : tab; } @@ -8280,7 +8374,7 @@ JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab) /* Move to next tab in the array we're traversing */ tab++; - if (tab == join->table_access_tabs + join->top_join_tab_count) + if (tab == join->join_tab + join->top_join_tab_count) return NULL; /* Outside SJM nest and reached EOF */ if (tab->bush_children) @@ -8306,7 +8400,7 @@ JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab) { - tab= next_breadth_first_tab(join->first_breadth_first_execution_tab(), + tab= next_breadth_first_tab(join->first_breadth_first_tab(), join->top_join_tab_count, tab); if (tab && tab->bush_root_tab) tab= NULL; @@ -8384,7 +8478,7 @@ JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, } /* If no more JOIN_TAB's on the top level */ - if (++tab == join->join_tab + join->top_join_tab_count) + if (++tab == join->join_tab + join->top_join_tab_count + join->aggr_tables) return NULL; if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children) @@ -8478,37 +8572,58 @@ static Item * const null_ptr= NULL; TRUE Out of memory */ -bool -get_best_combination(JOIN *join) +bool JOIN::get_best_combination() { uint tablenr; table_map used_tables; - JOIN_TAB *join_tab,*j; + JOIN_TAB *j; KEYUSE *keyuse; - uint table_count; - THD *thd=join->thd; DBUG_ENTER("get_best_combination"); - table_count=join->table_count; - if (!(join->join_tab=join_tab= - (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count))) + /* + Additional plan nodes for postjoin tmp tables: + 1? + // For GROUP BY + 1? + // For DISTINCT + 1? + // For aggregation functions aggregated in outer query + // when used with distinct + 1? + // For ORDER BY + 1? // buffer result + Up to 2 tmp tables are actually used, but it's hard to tell exact number + at this stage. + */ + uint aggr_tables= (group_list ? 1 : 0) + + (select_distinct ? + (tmp_table_param. using_outer_summary_function ? 2 : 1) : 0) + + (order ? 1 : 0) + + (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ; + + if (aggr_tables == 0) + aggr_tables= 1; /* For group by pushdown */ + + if (select_lex->window_specs.elements) + aggr_tables++; + + if (aggr_tables > 2) + aggr_tables= 2; + if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)* + (top_join_tab_count + aggr_tables)))) DBUG_RETURN(TRUE); - join->full_join=0; - join->hash_join= FALSE; + full_join=0; + hash_join= FALSE; used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read - fix_semijoin_strategies_for_picked_join_order(join); - + fix_semijoin_strategies_for_picked_join_order(this); + JOIN_TAB_RANGE *root_range; if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE)) DBUG_RETURN(TRUE); - root_range->start= join->join_tab; + root_range->start= join_tab; /* root_range->end will be set later */ - join->join_tab_ranges.empty(); + join_tab_ranges.empty(); - if (join->join_tab_ranges.push_back(root_range, thd->mem_root)) + if (join_tab_ranges.push_back(root_range, thd->mem_root)) DBUG_RETURN(TRUE); JOIN_TAB *sjm_nest_end= NULL; @@ -8517,7 +8632,7 @@ get_best_combination(JOIN *join) for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) { TABLE *form; - POSITION *cur_pos= &join->best_positions[tablenr]; + POSITION *cur_pos= &best_positions[tablenr]; if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) { @@ -8528,7 +8643,7 @@ get_best_combination(JOIN *join) in the temptable. */ bzero(j, sizeof(JOIN_TAB)); - j->join= join; + j->join= this; j->table= NULL; //temporary way to tell SJM tables from others. j->ref.key = -1; j->on_expr_ref= (Item**) &null_ptr; @@ -8544,12 +8659,12 @@ get_best_combination(JOIN *join) j->cond_selectivity= 1.0; JOIN_TAB *jt; JOIN_TAB_RANGE *jt_range; - if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) || + if (!(jt= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) || !(jt_range= new JOIN_TAB_RANGE)) DBUG_RETURN(TRUE); jt_range->start= jt; jt_range->end= jt + sjm->tables; - join->join_tab_ranges.push_back(jt_range, join->thd->mem_root); + join_tab_ranges.push_back(jt_range, thd->mem_root); j->bush_children= jt_range; sjm_nest_end= jt + sjm->tables; sjm_nest_root= j; @@ -8557,11 +8672,11 @@ get_best_combination(JOIN *join) j= jt; } - *j= *join->best_positions[tablenr].table; + *j= *best_positions[tablenr].table; j->bush_root_tab= sjm_nest_root; - form=join->table[tablenr]=j->table; + form= table[tablenr]= j->table; used_tables|= form->map; form->reginfo.join_tab=j; if (!*j->on_expr_ref) @@ -8577,36 +8692,36 @@ get_best_combination(JOIN *join) if (j->type == JT_SYSTEM) goto loop_end; - if ( !(keyuse= join->best_positions[tablenr].key)) + if ( !(keyuse= best_positions[tablenr].key)) { j->type=JT_ALL; - if (join->best_positions[tablenr].use_join_buffer && - tablenr != join->const_tables) - join->full_join= 1; + if (best_positions[tablenr].use_join_buffer && + tablenr != const_tables) + full_join= 1; } - /*if (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN) + /*if (best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN) { DBUG_ASSERT(!keyuse || keyuse->key == - join->best_positions[tablenr].loosescan_picker.loosescan_key); - j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key; + best_positions[tablenr].loosescan_picker.loosescan_key); + j->index= best_positions[tablenr].loosescan_picker.loosescan_key; }*/ - if (keyuse && create_ref_for_key(join, j, keyuse, TRUE, used_tables)) + if (keyuse && create_ref_for_key(this, j, keyuse, TRUE, used_tables)) DBUG_RETURN(TRUE); // Something went wrong if ((j->type == JT_REF || j->type == JT_EQ_REF) && is_hash_join_key_no(j->ref.key)) - join->hash_join= TRUE; + hash_join= TRUE; loop_end: /* Save records_read in JOIN_TAB so that select_describe()/etc don't have to access join->best_positions[]. */ - j->records_read= join->best_positions[tablenr].records_read; - j->cond_selectivity= join->best_positions[tablenr].cond_selectivity; - join->map2table[j->table->tablenr]= j; + j->records_read= best_positions[tablenr].records_read; + j->cond_selectivity= best_positions[tablenr].cond_selectivity; + map2table[j->table->tablenr]= j; /* If we've reached the end of sjm nest, switch back to main sequence */ if (j + 1 == sjm_nest_end) @@ -8619,16 +8734,10 @@ get_best_combination(JOIN *join) } root_range->end= j; - join->top_join_tab_count= join->join_tab_ranges.head()->end - - join->join_tab_ranges.head()->start; - /* - Save pointers to select join tabs for SHOW EXPLAIN - */ - join->table_access_tabs= join->join_tab; - join->top_table_access_tabs_count= join->top_join_tab_count; - + top_join_tab_count= join_tab_ranges.head()->end - + join_tab_ranges.head()->start; - update_depend_map(join); + update_depend_map(this); DBUG_RETURN(0); } @@ -9027,115 +9136,6 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables, keyuse->val, FALSE); } -/** - @details Initialize a JOIN as a query execution plan - that accesses a single table via a table scan. - - @param parent contains JOIN_TAB and TABLE object buffers for this join - @param tmp_table temporary table - - @retval FALSE success - @retval TRUE error occurred -*/ -bool -JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) -{ - DBUG_ENTER("JOIN::make_simple_join"); - - /* - Reuse TABLE * and JOIN_TAB if already allocated by a previous call - to this function through JOIN::exec (may happen for sub-queries). - - psergey-todo: here, save the pointer for original join_tabs. - */ - if (!(join_tab= parent->join_tab_reexec)) - { - if (!(join_tab= parent->join_tab_reexec= - (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - } - else - { - /* Free memory used by previous allocations */ - delete join_tab->filesort; - } - - table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table; - table_count= top_join_tab_count= 1; - - const_tables= 0; - const_table_map= 0; - eliminated_tables= 0; - tmp_table_param.field_count= tmp_table_param.sum_func_count= - tmp_table_param.func_count= 0; - /* - We need to destruct the copy_field (allocated in create_tmp_table()) - before setting it to 0 if the join is not "reusable". - */ - if (!tmp_join || tmp_join != this) - tmp_table_param.cleanup(); - tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; - first_record= sort_and_group=0; - send_records= (ha_rows) 0; - - if (group_optimized_away && !tmp_table_param.precomputed_group_by) - { - /* - If grouping has been optimized away, a temporary table is - normally not needed unless we're explicitly requested to create - one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT). - - In this case (grouping was optimized away), temp_table was - created without a grouping expression and JOIN::exec() will not - perform the necessary grouping (by the use of end_send_group() - or end_write_group()) if JOIN::group is set to false. - - There is one exception: if the loose index scan access method is - used to read into the temporary table, grouping and aggregate - functions are handled. - */ - // the temporary table was explicitly requested - DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT)); - // the temporary table does not have a grouping expression - DBUG_ASSERT(!temp_table->group); - } - else - group= false; - - row_limit= unit->select_limit_cnt; - do_send_rows= row_limit ? 1 : 0; - - bzero(join_tab, sizeof(JOIN_TAB)); - join_tab->table=temp_table; - join_tab->set_select_cond(NULL, __LINE__); - join_tab->type= JT_ALL; /* Map through all records */ - join_tab->keys.init(); - join_tab->keys.set_all(); /* test everything in quick */ - join_tab->ref.key = -1; - join_tab->shortcut_for_distinct= false; - join_tab->read_first_record= join_init_read_record; - join_tab->join= this; - join_tab->ref.key_parts= 0; - - uint select_nr= select_lex->select_number; - if (select_nr == INT_MAX) - { - /* this is a fake_select_lex of a union */ - select_nr= select_lex->master_unit()->first_select()->select_number; - join_tab->tracker= thd->lex->explain->get_union(select_nr)-> - get_tmptable_read_tracker(); - } - else - { - join_tab->tracker= thd->lex->explain->get_select(select_nr)-> - get_using_temporary_read_tracker(); - } - bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record)); - temp_table->status=0; - temp_table->null_row=0; - DBUG_RETURN(FALSE); -} - inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2) { @@ -9551,6 +9551,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab= next_depth_first_tab(join, tab), i++) { bool is_hj; + /* first_inner is the X in queries like: SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X @@ -11181,7 +11182,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) join_read_system :join_read_const; if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) - table->enable_keyread(); + table->set_keyread(true); else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered()) push_index_cond(tab, tab->ref.key); break; @@ -11190,7 +11191,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) /* fall through */ if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) - table->enable_keyread(); + table->set_keyread(true); else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered()) push_index_cond(tab, tab->ref.key); break; @@ -11205,7 +11206,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) tab->quick=0; if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) - table->enable_keyread(); + table->set_keyread(true); else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered()) push_index_cond(tab, tab->ref.key); break; @@ -11268,7 +11269,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) if (tab->select && tab->select->quick && tab->select->quick->index != MAX_KEY && //not index_merge table->covering_keys.is_set(tab->select->quick->index)) - table->enable_keyread(); + table->set_keyread(true); else if (!table->covering_keys.is_clear_all() && !(tab->select && tab->select->quick)) { // Only read index tree @@ -11430,17 +11431,27 @@ void JOIN_TAB::cleanup() select= 0; delete quick; quick= 0; - delete filesort; - filesort= 0; if (cache) { cache->free(); cache= 0; } limit= 0; + // Free select that was created for filesort outside of create_sort_index + if (filesort && filesort->select && !filesort->own_select) + delete filesort->select; + delete filesort; + filesort= NULL; + /* Skip non-existing derived tables/views result tables */ + if (table && + (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created())) + { + table->set_keyread(FALSE); + table->file->ha_index_or_rnd_end(); + } if (table) { - table->disable_keyread(); + table->set_keyread(false); table->file->ha_index_or_rnd_end(); preread_init_done= FALSE; if (table->pos_in_table_list && @@ -11490,7 +11501,7 @@ void JOIN_TAB::cleanup() double JOIN_TAB::scan_time() { double res; - if (table->created) + if (table->is_created()) { if (table->is_filled_at_execution()) { @@ -11529,9 +11540,10 @@ double JOIN_TAB::scan_time() ha_rows JOIN_TAB::get_examined_rows() { double examined_rows; + SQL_SELECT *sel= filesort? filesort->select : this->select; - if (select && select->quick && use_quick != 2) - examined_rows= select->quick->records; + if (sel && sel->quick && use_quick != 2) + examined_rows= sel->quick->records; else if (type == JT_NEXT || type == JT_ALL || type == JT_HASH || type ==JT_HASH_NEXT) { @@ -11818,35 +11830,12 @@ void JOIN::cleanup(bool full) table_count= original_table_count; } - if (table) + if (join_tab) { JOIN_TAB *tab; - /* - Only a sorted table may be cached. This sorted table is always the - first non const table in join->table - */ - if (table_count > const_tables) // Test for not-const tables - { - JOIN_TAB *first_tab= first_top_level_tab(this, WITHOUT_CONST_TABLES); - if (first_tab->table) - { - delete first_tab->filesort; - first_tab->filesort= 0; - } - } + if (full) { - JOIN_TAB *sort_tab= first_linear_tab(this, WITH_BUSH_ROOTS, - WITHOUT_CONST_TABLES); - if (pre_sort_join_tab) - { - if (sort_tab && sort_tab->select == pre_sort_join_tab->select) - { - pre_sort_join_tab->select= NULL; - } - else - clean_pre_sort_join_tab(); - } /* Call cleanup() on join tabs used by the join optimization (join->join_tab may now be pointing to result of make_simple_join @@ -11858,49 +11847,64 @@ void JOIN::cleanup(bool full) */ if (table_count) { - for (tab= first_breadth_first_optimization_tab(); tab; - tab= next_breadth_first_tab(first_breadth_first_optimization_tab(), - top_table_access_tabs_count, tab)) - tab->cleanup(); - - /* We've walked optimization tabs, do execution ones too. */ - if (first_breadth_first_execution_tab() != - first_breadth_first_optimization_tab()) + for (tab= first_breadth_first_tab(); tab; + tab= next_breadth_first_tab(first_breadth_first_tab(), + top_join_tab_count, tab)) { - for (tab= first_breadth_first_execution_tab(); tab; - tab= next_breadth_first_tab(first_breadth_first_execution_tab(), - top_join_tab_count, tab)) - tab->cleanup(); + tab->cleanup(); + delete tab->filesort_result; + tab->filesort_result= NULL; } } cleaned= true; + //psergey2: added (Q: why not in the above loop?) + { + JOIN_TAB *curr_tab= join_tab + top_join_tab_count; + for (uint i= 0; i < aggr_tables; i++, curr_tab++) + { + if (curr_tab->aggr) + { + free_tmp_table(thd, curr_tab->table); + delete curr_tab->tmp_table_param; + curr_tab->tmp_table_param= NULL; + curr_tab->aggr= NULL; + + delete curr_tab->filesort_result; + curr_tab->filesort_result= NULL; + } + } + aggr_tables= 0; // psergey3 + } } else { for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { - if (tab->table) + if (!tab->table) + continue; + DBUG_PRINT("info", ("close index: %s.%s alias: %s", + tab->table->s->db.str, + tab->table->s->table_name.str, + tab->table->alias.c_ptr())); + if (tab->table->is_created()) { - DBUG_PRINT("info", ("close index: %s.%s alias: %s", - tab->table->s->db.str, - tab->table->s->table_name.str, - tab->table->alias.c_ptr())); tab->table->file->ha_index_or_rnd_end(); + if (tab->aggr) + { + int tmp= 0; + if ((tmp= tab->table->file->extra(HA_EXTRA_NO_CACHE))) + tab->table->file->print_error(tmp, MYF(0)); + } } + delete tab->filesort_result; + tab->filesort_result= NULL; } } } if (full) { cleanup_empty_jtbm_semi_joins(this, join_list); - /* - Ensure that the following delete_elements() would not be called - twice for the same list. - */ - if (tmp_join && tmp_join != this && - tmp_join->group_fields == this->group_fields) - tmp_join->group_fields.empty(); // Run Cached_item DTORs! group_fields.delete_elements(); @@ -11916,14 +11920,6 @@ void JOIN::cleanup(bool full) pointer to tmp_table_param.copy_field from tmp_join, because it qill be removed in tmp_table_param.cleanup(). */ - if (tmp_join && - tmp_join != this && - tmp_join->tmp_table_param.copy_field == - tmp_table_param.copy_field) - { - tmp_join->tmp_table_param.copy_field= - tmp_join->tmp_table_param.save_copy_field= 0; - } tmp_table_param.cleanup(); delete pushdown_query; @@ -11945,6 +11941,12 @@ void JOIN::cleanup(bool full) } } } + /* Restore ref array to original state */ + if (current_ref_ptrs != items0) + { + set_items_ref_array(items0); + set_group_rpa= false; + } DBUG_VOID_RETURN; } @@ -12120,8 +12122,8 @@ static ORDER * remove_const(JOIN *join,ORDER *first_order, COND *cond, bool change_list, bool *simple_order) { - *simple_order= 1; - if (join->table_count == join->const_tables) + *simple_order= join->rollup.state == ROLLUP::STATE_NONE; + if (join->only_const_tables()) return change_list ? 0 : first_order; // No need to sort ORDER *order,**prev_ptr, *tmp_order; @@ -13282,16 +13284,16 @@ static int compare_fields_by_table_order(Item *field1, Item_field *f1= (Item_field *) (field1->real_item()); Item_field *f2= (Item_field *) (field2->real_item()); if (field1->const_item() || f1->const_item()) - return 1; - if (field2->const_item() || f2->const_item()) return -1; - if (f2->used_tables() & OUTER_REF_TABLE_BIT) - { + if (field2->const_item() || f2->const_item()) + return 1; + if (f1->used_tables() & OUTER_REF_TABLE_BIT) + { outer_ref= 1; cmp= -1; } - if (f1->used_tables() & OUTER_REF_TABLE_BIT) - { + if (f2->used_tables() & OUTER_REF_TABLE_BIT) + { outer_ref= 1; cmp++; } @@ -13315,10 +13317,12 @@ static int compare_fields_by_table_order(Item *field1, tab2= tab2->bush_root_tab; } - cmp= tab2 - tab1; + cmp= tab1 - tab2; if (!cmp) { + /* Fields f1, f2 belong to the same table */ + JOIN_TAB *tab= idx[f1->field->table->tablenr]; uint keyno= MAX_KEY; if (tab->ref.key_parts) @@ -13327,31 +13331,38 @@ static int compare_fields_by_table_order(Item *field1, keyno = tab->select->quick->index; if (keyno != MAX_KEY) { - if (f2->field->part_of_key.is_set(keyno)) - cmp= -1; if (f1->field->part_of_key.is_set(keyno)) + cmp= -1; + if (f2->field->part_of_key.is_set(keyno)) cmp++; + /* + Here: + if both f1, f2 are components of the key tab->ref.key then cmp==0, + if only f1 is a component of the key then cmp==-1 (f1 is better), + if only f2 is a component of the key then cmp==1, (f2 is better), + if none of f1,f1 is component of the key cmp==0. + */ if (!cmp) { KEY *key_info= tab->table->key_info + keyno; for (uint i= 0; i < key_info->user_defined_key_parts; i++) { Field *fld= key_info->key_part[i].field; - if (fld->eq(f2->field)) + if (fld->eq(f1->field)) { - cmp= -1; + cmp= -1; // f1 is better break; } - if (fld->eq(f1->field)) + if (fld->eq(f2->field)) { - cmp= 1; + cmp= 1; // f2 is better break; } } } } - else - cmp= f2->field->field_index-f1->field->field_index; + if (!cmp) + cmp= f1->field->field_index-f2->field->field_index; } return cmp < 0 ? -1 : (cmp ? 1 : 0); } @@ -15973,6 +15984,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type, case Item::NULL_ITEM: case Item::VARBIN_ITEM: case Item::CACHE_ITEM: + case Item::WINDOW_FUNC_ITEM: // psergey-winfunc: case Item::EXPR_CACHE_ITEM: if (make_copy_field) { @@ -16249,7 +16261,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, reclength= string_total_length= 0; blob_count= string_count= null_count= hidden_null_count= group_null_items= 0; - param->using_indirect_summary_function=0; + param->using_outer_summary_function= 0; List_iterator_fast<Item> li(fields); Item *item; @@ -16271,7 +16283,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, function. We need to know this if someone is going to use DISTINCT on the result. */ - param->using_indirect_summary_function=1; + param->using_outer_summary_function=1; continue; } } @@ -16885,13 +16897,8 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, if (!do_not_open) { - if (share->db_type() == TMP_ENGINE_HTON) - { - if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo, - ¶m->recinfo, select_options)) - goto err; - } - if (open_tmp_table(table)) + if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo, + ¶m->recinfo, select_options)) goto err; } @@ -17032,9 +17039,9 @@ bool open_tmp_table(TABLE *table) } table->db_stat= HA_OPEN_KEYFILE+HA_OPEN_RNDFILE; (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */ - if (!table->created) + if (!table->is_created()) { - table->created= TRUE; + table->set_created(); table->in_use->inc_status_created_tmp_tables(); } @@ -17240,7 +17247,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, table->in_use->inc_status_created_tmp_tables(); table->in_use->query_plan_flags|= QPLAN_TMP_DISK; share->db_record_offset= 1; - table->created= TRUE; + table->set_created(); DBUG_RETURN(0); err: DBUG_RETURN(1); @@ -17558,7 +17565,7 @@ free_tmp_table(THD *thd, TABLE *entry) save_proc_info=thd->proc_info; THD_STAGE_INFO(thd, stage_removing_tmp_table); - if (entry->file && entry->created) + if (entry->file && entry->is_created()) { entry->file->ha_index_or_rnd_end(); if (entry->db_stat) @@ -17586,81 +17593,101 @@ free_tmp_table(THD *thd, TABLE *entry) /** - @details - Rows produced by a join sweep may end up in a temporary table or be sent - to a client. Setup the function of the nested loop join algorithm which - handles final fully constructed and matched records. + @brief + Set write_func of AGGR_OP object - @param join join to setup the function for. + @param join_tab JOIN_TAB of the corresponding tmp table - @return - end_select function to use. This function can't fail. + @details + Function sets up write_func according to how AGGR_OP object that + is attached to the given join_tab will be used in the query. */ -Next_select_func setup_end_select_func(JOIN *join) +void set_postjoin_aggr_write_func(JOIN_TAB *tab) { - TABLE *table= join->tmp_table; - TMP_TABLE_PARAM *tmp_tbl= &join->tmp_table_param; - Next_select_func end_select; + JOIN *join= tab->join; + TABLE *table= tab->table; + AGGR_OP *aggr= tab->aggr; + TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param; - /* Set up select_end */ - if (table) + DBUG_ASSERT(table && aggr); + + if (table->group && tmp_tbl->sum_func_count && + !tmp_tbl->precomputed_group_by) { - if (table->group && tmp_tbl->sum_func_count && - !tmp_tbl->precomputed_group_by) - { - if (table->s->keys) - { - DBUG_PRINT("info",("Using end_update")); - end_select=end_update; - } - else - { - DBUG_PRINT("info",("Using end_unique_update")); - end_select=end_unique_update; - } - } - else if (join->sort_and_group && !tmp_tbl->precomputed_group_by) + /* + Note for MyISAM tmp tables: if uniques is true keys won't be + created. + */ + if (table->s->keys && !table->s->uniques) { - DBUG_PRINT("info",("Using end_write_group")); - end_select=end_write_group; + DBUG_PRINT("info",("Using end_update")); + aggr->set_write_func(end_update); } else { - DBUG_PRINT("info",("Using end_write")); - end_select=end_write; - if (tmp_tbl->precomputed_group_by) - { - /* - A preceding call to create_tmp_table in the case when loose - index scan is used guarantees that - TMP_TABLE_PARAM::items_to_copy has enough space for the group - by functions. It is OK here to use memcpy since we copy - Item_sum pointers into an array of Item pointers. - */ - memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count, - join->sum_funcs, - sizeof(Item*)*tmp_tbl->sum_func_count); - tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0; - } + DBUG_PRINT("info",("Using end_unique_update")); + aggr->set_write_func(end_unique_update); } } + else if (join->sort_and_group && !tmp_tbl->precomputed_group_by && + !join->sort_and_group_aggr_tab) + { + DBUG_PRINT("info",("Using end_write_group")); + aggr->set_write_func(end_write_group); + join->sort_and_group_aggr_tab= tab; + } else { - /* - Choose method for presenting result to user. Use end_send_group - if the query requires grouping (has a GROUP BY clause and/or one or - more aggregate functions). Use end_send if the query should not - be grouped. - */ - if ((join->sort_and_group || - (join->procedure && join->procedure->flags & PROC_GROUP)) && - !tmp_tbl->precomputed_group_by) - end_select= end_send_group; - else - end_select= end_send; + DBUG_PRINT("info",("Using end_write")); + aggr->set_write_func(end_write); + if (tmp_tbl->precomputed_group_by) + { + /* + A preceding call to create_tmp_table in the case when loose + index scan is used guarantees that + TMP_TABLE_PARAM::items_to_copy has enough space for the group + by functions. It is OK here to use memcpy since we copy + Item_sum pointers into an array of Item pointers. + */ + memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count, + join->sum_funcs, + sizeof(Item*)*tmp_tbl->sum_func_count); + tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0; + } + } +} + + +/** + @details + Rows produced by a join sweep may end up in a temporary table or be sent + to a client. Set the function of the nested loop join algorithm which + handles final fully constructed and matched records. + + @param join join to setup the function for. + + @return + end_select function to use. This function can't fail. +*/ + +Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab) +{ + TMP_TABLE_PARAM *tmp_tbl= tab ? tab->tmp_table_param : &join->tmp_table_param; + + /* + Choose method for presenting result to user. Use end_send_group + if the query requires grouping (has a GROUP BY clause and/or one or + more aggregate functions). Use end_send if the query should not + be grouped. + */ + if (join->sort_and_group && !tmp_tbl->precomputed_group_by) + { + DBUG_PRINT("info",("Using end_send_group")); + return end_send_group; } - return end_select; + DBUG_PRINT("info",("Using end_send")); + return end_send; } @@ -17676,19 +17703,13 @@ Next_select_func setup_end_select_func(JOIN *join) */ static int -do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) +do_select(JOIN *join, Procedure *procedure) { int rc= 0; enum_nested_loop_state error= NESTED_LOOP_OK; - JOIN_TAB *UNINIT_VAR(join_tab); DBUG_ENTER("do_select"); - - join->procedure=procedure; - join->tmp_table= table; /* Save for easy recursion */ - join->fields= fields; - join->do_select_call_count++; - if (join->pushdown_query && join->do_select_call_count == 1) + if (join->pushdown_query) { /* Select fields are in the temporary table */ join->fields= &join->tmp_fields_list1; @@ -17696,34 +17717,33 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) join->set_items_ref_array(join->items1); /* The storage engine will take care of the group by query result */ int res= join->pushdown_query->execute(join); - DBUG_RETURN(res); - } - if (table) - { - (void) table->file->extra(HA_EXTRA_WRITE_CACHE); - empty_record(table); - if (table->group && join->tmp_table_param.sum_func_count && - table->s->keys && !table->file->inited) + if (res) + DBUG_RETURN(res); + + if (join->pushdown_query->store_data_in_temp_table) { - rc= table->file->ha_index_init(0, 0); - if (rc) - { - table->file->print_error(rc, MYF(0)); - DBUG_RETURN(-1); - } + JOIN_TAB *last_tab= join->join_tab + join->table_count; + last_tab->next_select= end_send; + + enum_nested_loop_state state= last_tab->aggr->end_send(); + if (state >= NESTED_LOOP_OK) + state= sub_select(join, last_tab, true); + + if (state < NESTED_LOOP_OK) + res= 1; + + if (join->result->send_eof()) + res= 1; } + DBUG_RETURN(res); } - /* Set up select_end */ - Next_select_func end_select= setup_end_select_func(join); - if (join->table_count) - { - join->join_tab[join->top_join_tab_count - 1].next_select= end_select; - join_tab=join->join_tab+join->const_tables; - } + + join->procedure= procedure; join->send_records=0; - if (join->table_count == join->const_tables) + if (join->only_const_tables() && !join->need_tmp) { + Next_select_func end_select= setup_end_select_func(join, NULL); /* HAVING will be checked after processing aggregate functions, But WHERE should checked here (we alredy have read tables). @@ -17735,8 +17755,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) DBUG_ASSERT(join->outer_ref_cond == NULL); if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int()) { + // HAVING will be checked by end_select error= (*end_select)(join, 0, 0); - if (error == NESTED_LOOP_OK || error == NESTED_LOOP_QUERY_LIMIT) + if (error >= NESTED_LOOP_OK) error= (*end_select)(join, 0, 1); /* @@ -17752,7 +17773,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) if (!join->having || join->having->val_int()) { List<Item> *columns_list= (procedure ? &join->procedure_fields_list : - fields); + join->fields); rc= join->result->send_data(*columns_list) > 0; } } @@ -17766,8 +17787,6 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) } else { - DBUG_ASSERT(join->table_count); - DBUG_EXECUTE_IF("show_explain_probe_do_select", if (dbug_user_var_equals_int(join->thd, "show_explain_probe_select_id", @@ -17775,15 +17794,13 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) dbug_serve_apcs(join->thd, 1); ); + JOIN_TAB *join_tab= join->join_tab + join->const_tables; if (join->outer_ref_cond && !join->outer_ref_cond->val_int()) error= NESTED_LOOP_NO_MORE_ROWS; else - error= sub_select(join,join_tab,0); - if ((error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) && - join->thd->killed != ABORT_QUERY) - error= sub_select(join,join_tab,1); - if (error == NESTED_LOOP_QUERY_LIMIT) - error= NESTED_LOOP_OK; /* select_limit used */ + error= join->first_select(join,join_tab,0); + if (error >= NESTED_LOOP_OK && join->thd->killed != ABORT_QUERY) + error= join->first_select(join,join_tab,1); } join->thd->limit_found_rows= join->send_records; @@ -17791,23 +17808,37 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) if (error == NESTED_LOOP_NO_MORE_ROWS || join->thd->killed == ABORT_QUERY) error= NESTED_LOOP_OK; - if (table) + /* + For "order by with limit", we cannot rely on send_records, but need + to use the rowcount read originally into the join_tab applying the + filesort. There cannot be any post-filtering conditions, nor any + following join_tabs in this case, so this rowcount properly represents + the correct number of qualifying rows. + */ + if (join->order) { - int tmp, new_errno= 0; - if ((tmp=table->file->extra(HA_EXTRA_NO_CACHE))) + // Save # of found records prior to cleanup + JOIN_TAB *sort_tab; + JOIN_TAB *join_tab= join->join_tab; + uint const_tables= join->const_tables; + + // Take record count from first non constant table or from last tmp table + if (join->aggr_tables > 0) + sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1; + else { - DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed")); - new_errno= tmp; + DBUG_ASSERT(!join->only_const_tables()); + sort_tab= join_tab + const_tables; } - if ((tmp=table->file->ha_index_or_rnd_end())) + if (sort_tab->filesort && + join->select_options & OPTION_FOUND_ROWS && + sort_tab->filesort->sortorder && + sort_tab->filesort->limit != HA_POS_ERROR) { - DBUG_PRINT("error",("ha_index_or_rnd_end() failed")); - new_errno= tmp; + join->thd->limit_found_rows= sort_tab->records; } - if (new_errno) - table->file->print_error(new_errno,MYF(0)); } - else + { /* The following will unlock all cursors if the command wasn't an @@ -17821,11 +17852,8 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) Sic: this branch works even if rc != 0, e.g. when send_data above returns an error. */ - if (!table) // If sending data to client - { - if (join->result->send_eof()) - rc= 1; // Don't send error - } + if (join->result->send_eof()) + rc= 1; // Don't send error DBUG_PRINT("info",("%ld records output", (long) join->send_records)); } else @@ -17836,7 +17864,8 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) DBUG_PRINT("error",("Error: do_select() failed")); } #endif - DBUG_RETURN(join->thd->is_error() ? -1 : rc); + rc= join->thd->is_error() ? -1 : rc; + DBUG_RETURN(rc); } @@ -17853,6 +17882,106 @@ int rr_sequential_and_unpack(READ_RECORD *info) } +/** + @brief + Instantiates temporary table + + @param table Table object that describes the table to be + instantiated + @param keyinfo Description of the index (there is always one index) + @param start_recinfo Column descriptions + @param recinfo INOUT End of column descriptions + @param options Option bits + + @details + Creates tmp table and opens it. + + @return + FALSE - OK + TRUE - Error +*/ + +static +bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, + MARIA_COLUMNDEF *start_recinfo, + MARIA_COLUMNDEF **recinfo, + ulonglong options) +{ + if (table->s->db_type() == TMP_ENGINE_HTON) + { + if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo, + options)) + return TRUE; + // Make empty record so random data is not written to disk + empty_record(table); + } + if (open_tmp_table(table)) + return TRUE; + + return FALSE; +} + + +/** + @brief + Accumulate rows of the result of an aggregation operation in a tmp table + + @param join pointer to the structure providing all context info for the query + @param join_tab the JOIN_TAB object to which the operation is attached + @param end_records TRUE <=> all records were accumulated, send them further + + @details + This function accumulates records of the aggreagation operation for + the node join_tab from the execution plan in a tmp table. To add a new + record the function calls join_tab->aggr->put_records. + When there is no more records to save, in this + case the end_of_records argument == true, function tells the operation to + send records further by calling aggr->send_records(). + When all records are sent this function passes 'end_of_records' signal + further by calling sub_select() with end_of_records argument set to + true. After that aggr->end_send() is called to tell the operation that + it could end internal buffer scan. + + @note + This function is not expected to be called when dynamic range scan is + used to scan join_tab because range scans aren't used for tmp tables. + + @return + return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + enum_nested_loop_state rc; + AGGR_OP *aggr= join_tab->aggr; + + /* This function cannot be called if join_tab has no associated aggregation */ + DBUG_ASSERT(aggr != NULL); + + DBUG_ENTER("sub_select_aggr_tab"); + + if (join->thd->killed) + { + /* The user has aborted the execution of the query */ + join->thd->send_kill_message(); + DBUG_RETURN(NESTED_LOOP_KILLED); + } + + if (end_of_records) + { + rc= aggr->end_send(); + if (rc >= NESTED_LOOP_OK) + rc= sub_select(join, join_tab, end_of_records); + DBUG_RETURN(rc); + } + + rc= aggr->put_record(); + + DBUG_RETURN(rc); +} + + /* Fill the join buffer with partial records, retrieve all full matches for them @@ -17906,7 +18035,8 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) if (end_of_records) { rc= cache->join_records(FALSE); - if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS) + if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS || + rc == NESTED_LOOP_QUERY_LIMIT) rc= sub_select(join, join_tab, end_of_records); DBUG_RETURN(rc); } @@ -17933,7 +18063,8 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) without it. If it's not the case remove it. */ rc= cache->join_records(TRUE); - if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS) + if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS || + rc == NESTED_LOOP_QUERY_LIMIT) rc= sub_select(join, join_tab, end_of_records); DBUG_RETURN(rc); } @@ -18020,7 +18151,7 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) is the same as the value of the predicate, otherwise it's just returns true. To carry out a return to a nested loop level of join table t the pointer - to t is remembered in the field 'return_tab' of the join structure. + to t is remembered in the field 'return_rtab' of the join structure. Consider the following query: @code SELECT * FROM t1, @@ -18083,7 +18214,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) int error; enum_nested_loop_state rc= NESTED_LOOP_OK; READ_RECORD *info= &join_tab->read_record; - + + for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table; flush_dups_table; flush_dups_table= flush_dups_table->next_flush_table) @@ -18174,7 +18306,6 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) DBUG_RETURN(rc); } - /** @brief Process one row of the nested loop join. @@ -18236,6 +18367,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, condition is true => a match is found. */ join_tab->tracker->r_rows_after_where++; + bool found= 1; while (join_tab->first_unmatched && found) { @@ -18577,11 +18709,11 @@ join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos) !table->no_keyread && (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY) { - table->enable_keyread(); + table->set_keyread(true); tab->index= tab->ref.key; } error=join_read_const(tab); - table->disable_keyread(); + table->set_keyread(false); if (error) { tab->info= ET_UNIQUE_ROW_NOT_FOUND; @@ -19028,12 +19160,26 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab) int join_init_read_record(JOIN_TAB *tab) { - if (tab->select && tab->select->quick && tab->select->quick->reset()) + int error; + /* + Note: the query plan tree for the below operations is constructed in + save_agg_explain_data. + */ + if (tab->distinct && tab->remove_duplicates()) // Remove duplicates. + return 1; + if (tab->filesort && tab->sort_table()) // Sort table. return 1; + + if (tab->select && tab->select->quick && (error= tab->select->quick->reset())) + { + /* Ensures error status is propageted back to client */ + report_error(tab->table, error); + return 1; + } if (!tab->preread_init_done && tab->preread_init()) return 1; if (init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort, 1,1, FALSE)) + tab->select, tab->filesort_result, 1,1, FALSE)) return 1; return (*tab->read_record.read_record)(&tab->read_record); } @@ -19051,7 +19197,7 @@ join_read_record_no_init(JOIN_TAB *tab) save_copy_end= tab->read_record.copy_field_end; init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort, 1, 1, FALSE); + tab->select, tab->filesort_result, 1, 1, FALSE); tab->read_record.copy_field= save_copy; tab->read_record.copy_field_end= save_copy_end; @@ -19060,6 +19206,25 @@ join_read_record_no_init(JOIN_TAB *tab) return (*tab->read_record.read_record)(&tab->read_record); } + +/* + Helper function for sorting table with filesort. +*/ + +bool +JOIN_TAB::sort_table() +{ + int rc; + DBUG_PRINT("info",("Sorting for index")); + THD_STAGE_INFO(join->thd, stage_creating_sort_index); + DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ? + JOIN::ordered_index_order_by : + JOIN::ordered_index_group_by)); + rc= create_sort_index(join->thd, join, this, NULL); + return (rc != 0); +} + + static int join_read_first(JOIN_TAB *tab) { @@ -19069,7 +19234,7 @@ join_read_first(JOIN_TAB *tab) if (table->covering_keys.is_set(tab->index) && !table->no_keyread && !table->key_read) - table->enable_keyread(); + table->set_keyread(true); tab->table->status=0; tab->read_record.read_record=join_read_next; tab->read_record.table=table; @@ -19109,7 +19274,7 @@ join_read_last(JOIN_TAB *tab) if (table->covering_keys.is_set(tab->index) && !table->no_keyread && !table->key_read) - table->enable_keyread(); + table->set_keyread(true); tab->table->status=0; tab->read_record.read_record=join_read_prev; tab->read_record.table=table; @@ -19233,16 +19398,21 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records) { DBUG_ENTER("end_send"); + /* + When all tables are const this function is called with jointab == NULL. + This function shouldn't be called for the first join_tab as it needs + to get fields from previous tab. + */ + DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab); + //TODO pass fields via argument + List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields; + if (!end_of_records) { +#if 0 +#endif if (join->table_count && - (join->join_tab->is_using_loose_index_scan() || - /* - When order by used a loose scan as its input, the quick select may - be attached to pre_sort_join_tab. - */ - (join->pre_sort_join_tab && - join->pre_sort_join_tab->is_using_loose_index_scan()))) + join->join_tab->is_using_loose_index_scan()) { /* Copy non-aggregated fields when loose index scan is used. */ copy_fields(&join->tmp_table_param); @@ -19259,7 +19429,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { int error; /* result < 0 if row was not accepted and should not be counted */ - if ((error= join->result->send_data(*join->fields))) + if ((error= join->result->send_data(*fields))) DBUG_RETURN(error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR); } @@ -19268,13 +19438,15 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), !join->do_send_rows) { /* - If filesort is used for sorting, stop after select_limit_cnt+1 - records are read. Because of optimization in some cases it can - provide only select_limit_cnt+1 records. + If we have used Priority Queue for optimizing order by with limit, + then stop here, there are no more records to consume. + When this optimization is used, end_send is called on the next + join_tab. */ - if (join->order && join->sortorder && - join->filesort_found_rows && - join->select_options & OPTION_FOUND_ROWS) + if (join->order && + join->select_options & OPTION_FOUND_ROWS && + join_tab > join->join_tab && + (join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq) { DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT")); DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); @@ -19286,7 +19458,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (join->select_options & OPTION_FOUND_ROWS) { JOIN_TAB *jt=join->join_tab; - if ((join->table_count == 1) && !join->tmp_table && !join->sort_and_group + if ((join->table_count == 1) && !join->sort_and_group && !join->send_group_parts && !join->having && !jt->select_cond && !(jt->select && jt->select->quick) && (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && @@ -19295,10 +19467,9 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), /* Join over all rows in table; Return number of found rows */ TABLE *table=jt->table; - join->select_options ^= OPTION_FOUND_ROWS; - if (jt->filesort) // If filesort was used + if (jt->filesort_result) // If filesort was used { - join->send_records= jt->filesort->found_rows; + join->send_records= jt->filesort_result->found_rows; } else { @@ -19349,13 +19520,21 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { int idx= -1; enum_nested_loop_state ok_code= NESTED_LOOP_OK; + List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields; DBUG_ENTER("end_send_group"); + if (!join->items3.is_null() && !join->set_group_rpa) + { + join->set_group_rpa= true; + join->set_items_ref_array(join->items3); + } + if (!join->first_record || end_of_records || (idx=test_if_group_changed(join->group_fields)) >= 0) { - if (join->first_record || - (end_of_records && !join->group && !join->group_optimized_away)) + if (!join->group_sent && + (join->first_record || + (end_of_records && !join->group && !join->group_optimized_away))) { if (join->procedure) join->procedure->end_group(); @@ -19369,7 +19548,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), else { if (join->do_send_rows) - error=join->procedure->send_row(*join->fields) ? 1 : 0; + error=join->procedure->send_row(*fields) ? 1 : 0; join->send_records++; } if (end_of_records && join->procedure->end_of_records()) @@ -19381,11 +19560,8 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { List_iterator_fast<Item> it(*join->fields); Item *item; - DBUG_PRINT("info", ("no matching rows")); - - /* No matching rows for group function */ - join->clear(); - join->no_rows_in_result_called= 1; + /* No matching rows for group function */ + join->clear(); while ((item= it++)) item->no_rows_in_result(); @@ -19396,7 +19572,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { if (join->do_send_rows) { - error= join->result->send_data(*join->fields); + error=join->result->send_data(*fields); if (error < 0) { /* Duplicate row, don't count */ @@ -19405,6 +19581,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } } join->send_records++; + join->group_sent= true; } if (join->rollup.state != ROLLUP::STATE_NONE && error <= 0) { @@ -19456,6 +19633,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), DBUG_RETURN(NESTED_LOOP_ERROR); if (join->procedure) join->procedure->add(); + join->group_sent= false; DBUG_RETURN(ok_code); } } @@ -19472,16 +19650,16 @@ static enum_nested_loop_state end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records) { - TABLE *table=join->tmp_table; + TABLE *const table= join_tab->table; DBUG_ENTER("end_write"); if (!end_of_records) { - copy_fields(&join->tmp_table_param); - if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd)) + copy_fields(join_tab->tmp_table_param); + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ - if (!join->having || join->having->val_int()) + if (!join_tab->having || join_tab->having->val_int()) { int error; join->found_records++; @@ -19491,15 +19669,16 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), goto end; bool is_duplicate; if (create_internal_tmp_table_from_heap(join->thd, table, - join->tmp_table_param.start_recinfo, - &join->tmp_table_param.recinfo, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, error, 1, &is_duplicate)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error if (is_duplicate) goto end; table->s->uniques=0; // To ensure rows are the same } - if (++join->send_records >= join->tmp_table_param.end_write_records && + if (++join_tab->send_records >= + join_tab->tmp_table_param->end_write_records && join->do_send_rows) { if (!(join->select_options & OPTION_FOUND_ROWS)) @@ -19534,7 +19713,7 @@ static enum_nested_loop_state end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records) { - TABLE *table=join->tmp_table; + TABLE *const table= join_tab->table; ORDER *group; int error; DBUG_ENTER("end_update"); @@ -19543,7 +19722,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), DBUG_RETURN(NESTED_LOOP_OK); join->found_records++; - copy_fields(&join->tmp_table_param); // Groups are copied twice. + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. /* Make a key of group index */ for (group=table->group ; group ; group=group->next) { @@ -19563,7 +19742,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), group->buff[-1]= (char) group->field->is_null(); } if (!table->file->ha_index_read_map(table->record[1], - join->tmp_table_param.group_buff, + join_tab->tmp_table_param->group_buff, HA_WHOLE_KEY, HA_READ_KEY_EXACT)) { /* Update old record */ @@ -19579,13 +19758,13 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } init_tmptable_sum_functions(join->sum_funcs); - if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd)) + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if ((error= table->file->ha_write_tmp_row(table->record[0]))) { if (create_internal_tmp_table_from_heap(join->thd, table, - join->tmp_table_param.start_recinfo, - &join->tmp_table_param.recinfo, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, error, 0, NULL)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error /* Change method to update rows */ @@ -19595,9 +19774,9 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), DBUG_RETURN(NESTED_LOOP_ERROR); } - join->join_tab[join->top_join_tab_count-1].next_select=end_unique_update; + join_tab->aggr->set_write_func(end_unique_update); } - join->send_records++; + join_tab->send_records++; end: if (join->thd->check_killed()) { @@ -19614,7 +19793,7 @@ static enum_nested_loop_state end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records) { - TABLE *table=join->tmp_table; + TABLE *table= join_tab->table; int error; DBUG_ENTER("end_unique_update"); @@ -19622,12 +19801,12 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), DBUG_RETURN(NESTED_LOOP_OK); init_tmptable_sum_functions(join->sum_funcs); - copy_fields(&join->tmp_table_param); // Groups are copied twice. - if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd)) + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if (!(error= table->file->ha_write_tmp_row(table->record[0]))) - join->send_records++; // New group + join_tab->send_records++; // New group else { if ((int) table->file->get_dup_key(error) < 0) @@ -19673,7 +19852,7 @@ enum_nested_loop_state end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records) { - TABLE *table=join->tmp_table; + TABLE *table= join_tab->table; int idx= -1; DBUG_ENTER("end_write_group"); @@ -19687,21 +19866,21 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), int send_group_parts= join->send_group_parts; if (idx < send_group_parts) { - if (!join->first_record) - { - /* No matching rows for group function */ - join->clear(); - } + if (!join->first_record) + { + /* No matching rows for group function */ + join->clear(); + } copy_sum_funcs(join->sum_funcs, join->sum_funcs_end[send_group_parts]); - if (!join->having || join->having->val_int()) + if (!join_tab->having || join_tab->having->val_int()) { int error= table->file->ha_write_tmp_row(table->record[0]); if (error && create_internal_tmp_table_from_heap(join->thd, table, - join->tmp_table_param.start_recinfo, - &join->tmp_table_param.recinfo, - error, 0, NULL)) + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 0, NULL)) DBUG_RETURN(NESTED_LOOP_ERROR); } if (join->rollup.state != ROLLUP::STATE_NONE) @@ -19722,8 +19901,8 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } if (idx < (int) join->send_group_parts) { - copy_fields(&join->tmp_table_param); - if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd)) + copy_fields(join_tab->tmp_table_param); + if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1])) DBUG_RETURN(NESTED_LOOP_ERROR); @@ -20281,9 +20460,11 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx, if (key_part->field != field || !field->part_of_sortkey.is_set(idx)) DBUG_RETURN(0); + const ORDER::enum_order keypart_order= + (key_part->key_part_flag & HA_REVERSE_SORT) ? + ORDER::ORDER_DESC : ORDER::ORDER_ASC; /* set flag to 1 if we can use read-next on key, else to -1 */ - flag= ((order->asc == !(key_part->key_part_flag & HA_REVERSE_SORT)) ? - 1 : -1); + flag= (order->direction == keypart_order) ? 1 : -1; if (reverse && flag != reverse) DBUG_RETURN(0); reverse=flag; // Remember if reverse @@ -20856,11 +21037,11 @@ check_reverse_order: and best_key doesn't, then revert the decision. */ if (!table->covering_keys.is_set(best_key)) - table->disable_keyread(); + table->set_keyread(false); else { if (!table->key_read) - table->enable_keyread(); + table->set_keyread(true); } if (!quick_created) @@ -20891,7 +21072,7 @@ check_reverse_order: tab->ref.key_parts= 0; if (select_limit < table->stat_records()) tab->limit= select_limit; - table->disable_keyread(); + table->set_keyread(false); } } else if (tab->type != JT_ALL || tab->select->quick) @@ -21015,14 +21196,9 @@ use_filesort: create_sort_index() thd Thread handler join Join with table to sort - order How table should be sorted - filesort_limit Max number of rows that needs to be sorted - select_limit Max number of rows in final output - Used to decide if we should use index or not - is_order_by true if we are sorting on ORDER BY, false if GROUP BY - Used to decide if we should use index or not - - + join_tab What table to sort + fsort Filesort object. NULL means "use tab->filesort". + IMPLEMENTATION - If there is an index that can be used, the first non-const join_tab in 'join' is modified to use this index. @@ -21036,150 +21212,72 @@ use_filesort: 1 No records */ -static int -create_sort_index(THD *thd, JOIN *join, ORDER *order, - ha_rows filesort_limit, ha_rows select_limit, - bool is_order_by) +int +create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort) { - uint length; TABLE *table; SQL_SELECT *select; - JOIN_TAB *tab; bool quick_created= FALSE; SORT_INFO *file_sort= 0; DBUG_ENTER("create_sort_index"); - if (join->table_count == join->const_tables) - DBUG_RETURN(0); // One row, no need to sort - tab= join->join_tab + join->const_tables; - table= tab->table; - select= tab->select; - - JOIN_TAB *save_pre_sort_join_tab= NULL; - if (join->pre_sort_join_tab) - { - /* - we've already been in this function, and stashed away the - original access method in join->pre_sort_join_tab, restore it - now. - */ - - /* First, restore state of the handler */ - if (join->pre_sort_index != MAX_KEY) - { - if (table->file->ha_index_or_rnd_end()) - goto err; - if (join->pre_sort_idx_pushed_cond) - { - table->file->idx_cond_push(join->pre_sort_index, - join->pre_sort_idx_pushed_cond); - } - } - else - { - if (table->file->ha_index_or_rnd_end() || - table->file->ha_rnd_init(TRUE)) - goto err; - } - - /* Second, restore access method parameters */ - tab->records= join->pre_sort_join_tab->records; - tab->select= join->pre_sort_join_tab->select; - tab->select_cond= join->pre_sort_join_tab->select_cond; - tab->type= join->pre_sort_join_tab->type; - tab->read_first_record= join->pre_sort_join_tab->read_first_record; - - save_pre_sort_join_tab= join->pre_sort_join_tab; - join->pre_sort_join_tab= NULL; - } - else - { - /* - Save index #, save index condition. Do it right now, because MRR may - */ - if (table->file->inited == handler::INDEX) - { - join->pre_sort_index= table->file->active_index; - join->pre_sort_idx_pushed_cond= table->file->pushed_idx_cond; - // no need to save key_read - } - else - join->pre_sort_index= MAX_KEY; - } + if (fsort == NULL) + fsort= tab->filesort; + table= tab->table; + select= fsort->select; + /* Currently ORDER BY ... LIMIT is not supported in subqueries. */ DBUG_ASSERT(join->group_list || !join->is_in_subquery()); - /* - When there is SQL_BIG_RESULT do not sort using index for GROUP BY, - and thus force sorting on disk unless a group min-max optimization - is going to be used as it is applied now only for one table queries - with covering indexes. - The expections is if we are already using the index for GROUP BY - (in which case sort would be free) or ORDER and GROUP BY are different. - */ - if ((order != join->group_list || - !(join->select_options & SELECT_BIG_RESULT) || - (select && select->quick && - select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) && - test_if_skip_sort_order(tab,order,select_limit,0, - is_order_by ? &table->keys_in_use_for_order_by : - &table->keys_in_use_for_group_by)) - { - tab->update_explain_data(join->const_tables); - DBUG_RETURN(0); - } - tab->update_explain_data(join->const_tables); - - /* - Calculate length of join->order as this may be longer than 'order', - which may come from 'group by'. This is needed as join->sortorder is - used both for grouping and ordering. - */ - length= 0; - for (ORDER *ord= join->order; ord; ord= ord->next) - length++; - - if (!(join->sortorder= - make_unireg_sortorder(thd, order, &length, join->sortorder))) - goto err; /* purecov: inspected */ - table->status=0; // May be wrong if quick_select if (!tab->preread_init_done && tab->preread_init()) goto err; // If table has a range, move it to select - if (select && !select->quick && tab->ref.key >= 0) + if (select && tab->ref.key >= 0) { - if (tab->quick) + if (!select->quick) { - select->quick=tab->quick; - tab->quick=0; + if (tab->quick) + { + select->quick= tab->quick; + tab->quick= NULL; /* We can only use 'Only index' if quick key is same as ref_key and in index_merge 'Only index' cannot be used */ if (((uint) tab->ref.key != select->quick->index)) - table->disable_keyread(); + table->set_keyread(FALSE); + } + else + { + /* + We have a ref on a const; Change this to a range that filesort + can use. + For impossible ranges (like when doing a lookup on NULL on a NOT NULL + field, quick will contain an empty record set. + */ + if (!(select->quick= (tab->type == JT_FT ? + get_ft_select(thd, table, tab->ref.key) : + get_quick_select_for_ref(thd, table, &tab->ref, + tab->found_records)))) + goto err; + quick_created= TRUE; + } + fsort->own_select= true; } else { - /* - We have a ref on a const; Change this to a range that filesort - can use. - For impossible ranges (like when doing a lookup on NULL on a NOT NULL - field, quick will contain an empty record set. - */ - if (!(select->quick= (tab->type == JT_FT ? - get_ft_select(thd, table, tab->ref.key) : - get_quick_select_for_ref(thd, table, &tab->ref, - tab->found_records)))) - goto err; - quick_created= TRUE; + DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF); + // Update ref value + if ((cp_buffer_from_ref(thd, table, &tab->ref) && thd->is_fatal_error)) + goto err; // out of memory } } + /* Fill schema tables with data before filesort if it's necessary */ if ((join->select_lex->options & OPTION_SCHEMA_TABLE) && get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX)) @@ -21187,11 +21285,9 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order, if (table->s->tmp_table) table->file->info(HA_STATUS_VARIABLE); // Get record count - file_sort= filesort(thd, table, join->sortorder, length, - select, filesort_limit, 0, - join->explain->ops_tracker.report_sorting(thd)); - DBUG_ASSERT(tab->filesort == 0); - tab->filesort= file_sort; + file_sort= filesort(thd, table, fsort, fsort->tracker); + DBUG_ASSERT(tab->filesort_result == 0); + tab->filesort_result= file_sort; tab->records= 0; if (file_sort) { @@ -21205,42 +21301,18 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order, /* This will delete the quick select. */ select->cleanup(); } - - if (!join->pre_sort_join_tab) - { - if (save_pre_sort_join_tab) - join->pre_sort_join_tab= save_pre_sort_join_tab; - else if (!(join->pre_sort_join_tab= (JOIN_TAB*)thd->alloc(sizeof(JOIN_TAB)))) - goto err; - } - - *(join->pre_sort_join_tab)= *tab; - - tab->select=NULL; - tab->set_select_cond(NULL, __LINE__); - tab->type=JT_ALL; // Read with normal read_record - tab->read_first_record= join_init_read_record; - tab->table->file->ha_index_or_rnd_end(); + + table->set_keyread(FALSE); // Restore if we used indexes + if (tab->type == JT_FT) + table->file->ft_end(); + else + table->file->ha_index_or_rnd_end(); DBUG_RETURN(file_sort == 0); err: DBUG_RETURN(-1); } -void JOIN::clean_pre_sort_join_tab() -{ - //TABLE *table= pre_sort_join_tab->table; - /* - Note: we can come here for fake_select_lex object. That object will have - the table already deleted by st_select_lex_unit::cleanup(). - We rely on that fake_select_lex didn't have quick select. - */ - if (pre_sort_join_tab->select && pre_sort_join_tab->select->quick) - { - pre_sort_join_tab->select->cleanup(); - } -} - /** Compare fields from table->record[0] and table->record[1], @@ -21304,22 +21376,28 @@ static void free_blobs(Field **ptr) Rows that do not satisfy 'having' condition are also removed. */ -static int -remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having) +bool +JOIN_TAB::remove_duplicates() + { - int error; + bool error; ulong keylength= 0; uint field_count; + List<Item> *fields= (this-1)->fields; THD *thd= join->thd; DBUG_ENTER("remove_duplicates"); - join->explain->ops_tracker.report_duplicate_removal(); + + DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE); + THD_STAGE_INFO(join->thd, stage_removing_duplicates); + + //join->explain->ops_tracker.report_duplicate_removal(); table->reginfo.lock_type=TL_WRITE; /* Calculate how many saved fields there is in list */ field_count=0; - List_iterator<Item> it(fields); + List_iterator<Item> it(*fields); Item *item; while ((item=it++)) { @@ -21330,7 +21408,7 @@ remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having) if (!field_count && !(join->select_options & OPTION_FOUND_ROWS) && !having) { // only const items with no OPTION_FOUND_ROWS join->unit->select_limit_cnt= 1; // Only send first row - DBUG_RETURN(0); + DBUG_RETURN(false); } Field **first_field=table->field+table->s->fields - field_count; @@ -21558,67 +21636,9 @@ err: } -SORT_FIELD *make_unireg_sortorder(THD *thd, ORDER *order, uint *length, - SORT_FIELD *sortorder) -{ - uint count; - SORT_FIELD *sort,*pos; - DBUG_ENTER("make_unireg_sortorder"); - - count=0; - for (ORDER *tmp = order; tmp; tmp=tmp->next) - count++; - if (!sortorder) - sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * - (MY_MAX(count, *length) + 1)); - pos= sort= sortorder; - - if (!pos) - DBUG_RETURN(0); - - for (;order;order=order->next,pos++) - { - Item *const item= order->item[0], *const real_item= item->real_item(); - pos->field= 0; pos->item= 0; - if (real_item->type() == Item::FIELD_ITEM) - { - // Could be a field, or Item_direct_view_ref wrapping a field - DBUG_ASSERT(item->type() == Item::FIELD_ITEM || - (item->type() == Item::REF_ITEM && - static_cast<Item_ref*>(item)->ref_type() == - Item_ref::VIEW_REF)); - pos->field= static_cast<Item_field*>(real_item)->field; - } - else if (real_item->type() == Item::SUM_FUNC_ITEM && - !real_item->const_item()) - { - // Aggregate, or Item_aggregate_ref - DBUG_ASSERT(item->type() == Item::SUM_FUNC_ITEM || - (item->type() == Item::REF_ITEM && - static_cast<Item_ref*>(item)->ref_type() == - Item_ref::AGGREGATE_REF)); - pos->field= item->get_tmp_table_field(); - } - else if (real_item->type() == Item::COPY_STR_ITEM) - { // Blob patch - pos->item= static_cast<Item_copy*>(real_item)->get_item(); - } - else - pos->item= item; - pos->reverse=! order->asc; - DBUG_ASSERT(pos->field != NULL || pos->item != NULL); - } - *length=count; - DBUG_RETURN(sort); -} - - /* eq_ref: Create the lookup key and check if it is the same as saved key - - - SYNOPSIS cmp_buffer_with_ref() tab Join tab of the accessed table @@ -21715,6 +21735,7 @@ cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref) @param[in,out] all_fields All select, group and order by fields @param[in] is_group_field True if order is a GROUP field, false if ORDER by field + @param[in] search_in_all_fields If true then search in all_fields @retval FALSE if OK @@ -21723,9 +21744,9 @@ cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref) */ static bool -find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, ORDER *order, List<Item> &fields, List<Item> &all_fields, - bool is_group_field) + bool is_group_field, bool search_in_all_fields) { Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */ Item::Type order_item_type; @@ -21751,7 +21772,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, order_item->full_name(), thd->where); return TRUE; } - thd->change_item_tree((Item**)&order->item, (Item*)(ref_pointer_array + count - 1)); + thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]); order->in_field_list= 1; order->counter= count; order->counter_used= 1; @@ -21811,7 +21832,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, 'shadowed' a table field with the same name, the table field will be chosen over the derived field. */ - order->item= ref_pointer_array + counter; + order->item= &ref_pointer_array[counter]; order->in_field_list=1; return FALSE; } @@ -21830,6 +21851,18 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, thd->where); } } + else if (search_in_all_fields) + { + Item **found_item= find_item_in_list(order_item, all_fields, &counter, + REPORT_EXCEPT_NOT_FOUND, &resolution, + all_fields.elements - fields.elements); + if (found_item != not_found_item) + { + order->item= &ref_pointer_array[all_fields.elements-1-counter]; + order->in_field_list= 0; + return FALSE; + } + } order->in_field_list=0; /* @@ -21850,8 +21883,6 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, return TRUE; /* Wrong field. */ uint el= all_fields.elements; - DBUG_ASSERT(all_fields.elements <= - thd->lex->current_select->ref_pointer_array_size); /* Add new field to field list. */ all_fields.push_front(order_item, thd->mem_root); ref_pointer_array[el]= order_item; @@ -21866,7 +21897,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, if (order_item->type() == Item::SUM_FUNC_ITEM) ((Item_sum *)order_item)->ref_by= all_fields.head_ref(); - order->item= ref_pointer_array + el; + order->item= &ref_pointer_array[el]; return FALSE; } @@ -21878,15 +21909,22 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, the field list. */ -int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, - List<Item> &fields, List<Item> &all_fields, ORDER *order) -{ +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List<Item> &all_fields, ORDER *order, + bool search_in_all_fields) +{ + enum_parsing_place parsing_place= thd->lex->current_select->parsing_place; thd->where="order clause"; for (; order; order=order->next) { if (find_order_in_list(thd, ref_pointer_array, tables, order, fields, - all_fields, FALSE)) + all_fields, FALSE, search_in_all_fields)) + return 1; + if ((*order->item)->with_window_func && parsing_place != IN_ORDER_BY) + { + my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0)); return 1; + } } return 0; } @@ -21895,18 +21933,19 @@ int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, /** Intitialize the GROUP BY list. - @param thd Thread handler - @param ref_pointer_array We store references to all fields that was + @param thd Thread handler + @param ref_pointer_array We store references to all fields that was not in 'fields' here. - @param fields All fields in the select part. Any item in + @param fields All fields in the select part. Any item in 'order' that is part of these list is replaced by a pointer to this fields. - @param all_fields Total list of all unique fields used by the + @param all_fields Total list of all unique fields used by the select. All items in 'order' that was not part of fields will be added first to this list. - @param order The fields we should do GROUP BY on. - @param hidden_group_fields Pointer to flag that is set to 1 if we added + @param order The fields we should do GROUP/PARTITION BY on + @param hidden_group_fields Pointer to flag that is set to 1 if we added any fields to all_fields. + @param search_in_all_fields If true then search in all_fields @todo change ER_WRONG_FIELD_WITH_GROUP to more detailed @@ -21919,10 +21958,11 @@ int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, */ int -setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, List<Item> &fields, List<Item> &all_fields, ORDER *order, - bool *hidden_group_fields) + bool *hidden_group_fields, bool search_in_all_fields) { + enum_parsing_place parsing_place= thd->lex->current_select->parsing_place; *hidden_group_fields=0; ORDER *ord; @@ -21932,22 +21972,26 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, uint org_fields=all_fields.elements; thd->where="group statement"; - enum_parsing_place save_place= thd->lex->current_select->parsing_place; - thd->lex->current_select->parsing_place= IN_GROUP_BY; for (ord= order; ord; ord= ord->next) { if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields, - all_fields, TRUE)) + all_fields, TRUE, search_in_all_fields)) return 1; (*ord->item)->marker= UNDEF_POS; /* Mark found */ - if ((*ord->item)->with_sum_func) + if ((*ord->item)->with_sum_func && parsing_place == IN_GROUP_BY) { my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name()); return 1; } + if ((*ord->item)->with_window_func) + { + if (parsing_place == IN_GROUP_BY) + my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0)); + else + my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0)); + return 1; + } } - thd->lex->current_select->parsing_place= save_place; - if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) { /* @@ -22054,14 +22098,16 @@ setup_new_fields(THD *thd, List<Item> &fields, */ ORDER * -create_distinct_group(THD *thd, Item **ref_pointer_array, +create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array, ORDER *order_list, List<Item> &fields, List<Item> &all_fields, bool *all_order_by_fields_used) { List_iterator<Item> li(fields); - Item *item, **orig_ref_pointer_array= ref_pointer_array; + Item *item; + Ref_ptr_array orig_ref_pointer_array= ref_pointer_array; ORDER *order,*group,**prev; + uint idx= 0; *all_order_by_fields_used= 1; while ((item=li++)) @@ -22108,16 +22154,14 @@ create_distinct_group(THD *thd, Item **ref_pointer_array, Because HEAP tables can't index BIT fields we need to use an additional hidden field for grouping because later it will be converted to a LONG field. Original field will remain of the - BIT type and will be returned to a client. + BIT type and will be returned [el]client. */ Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item); int el= all_fields.elements; - DBUG_ASSERT(all_fields.elements <= - thd->lex->current_select->ref_pointer_array_size); orig_ref_pointer_array[el]= new_item; all_fields.push_front(new_item, thd->mem_root); - ord->item= orig_ref_pointer_array + el; - } + ord->item=&orig_ref_pointer_array[el]; + } else { /* @@ -22125,14 +22169,14 @@ create_distinct_group(THD *thd, Item **ref_pointer_array, simple indexing of ref_pointer_array (order in the array and in the list are same) */ - ord->item= ref_pointer_array; + ord->item= &ref_pointer_array[idx]; } - ord->asc=1; + ord->direction= ORDER::ORDER_ASC; *prev=ord; prev= &ord->next; } next_item: - ref_pointer_array++; + idx++; } *prev=0; return group; @@ -22204,7 +22248,7 @@ test_if_subpart(ORDER *a,ORDER *b) for (; a && b; a=a->next,b=b->next) { if ((*a->item)->eq(*b->item,1)) - a->asc=b->asc; + a->direction=b->direction; else return 0; } @@ -22382,9 +22426,9 @@ make_group_fields(JOIN *main_join, JOIN *curr_join) /** - Get a list of buffers for saveing last group. + Get a list of buffers for saving last group. - Groups are saved in reverse order for easyer check loop. + Groups are saved in reverse order for easier check loop. */ static bool @@ -22435,8 +22479,13 @@ int test_if_item_cache_changed(List<Cached_item> &list) } +/* + @return + -1 - Group not changed + value>=0 - Number of the component where the group changed +*/ -static int +int test_if_group_changed(List<Cached_item> &list) { DBUG_ENTER("test_if_group_changed"); @@ -22485,7 +22534,7 @@ test_if_group_changed(List<Cached_item> &list) bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, - Item **ref_pointer_array, + Ref_ptr_array ref_pointer_array, List<Item> &res_selected_fields, List<Item> &res_all_fields, uint elements, List<Item> &all_fields) { @@ -22714,7 +22763,8 @@ bool JOIN::alloc_func_list() 1 error */ -bool JOIN::make_sum_func_list(List<Item> &field_list, List<Item> &send_result_set_metadata, +bool JOIN::make_sum_func_list(List<Item> &field_list, + List<Item> &send_result_set_metadata, bool before_group_by, bool recompute) { List_iterator_fast<Item> it(field_list); @@ -22769,7 +22819,7 @@ bool JOIN::make_sum_func_list(List<Item> &field_list, List<Item> &send_result_se */ static bool -change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, +change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &res_selected_fields, List<Item> &res_all_fields, uint elements, List<Item> &all_fields) @@ -22806,14 +22856,6 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, Item_field *new_field= new (thd->mem_root) Item_temptable_field(thd, field); if (!suv || !new_field) DBUG_RETURN(true); // Fatal error - /* - We are replacing the argument of Item_func_set_user_var after - its value has been read. The argument's null_value should be - set by now, so we must set it explicitly for the replacement - argument since the null_value may be read without any - preceeding call to val_*(). - */ - new_field->update_null_value(); List<Item> list; list.push_back(new_field, thd->mem_root); suv->set_arguments(thd, list); @@ -22887,7 +22929,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, */ static bool -change_refs_to_tmp_fields(THD *thd, Item **ref_pointer_array, +change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, List<Item> &res_selected_fields, List<Item> &res_all_fields, uint elements, List<Item> &all_fields) @@ -23240,17 +23282,23 @@ bool JOIN::rollup_init() */ tmp_table_param.group_parts= send_group_parts; - if (!(rollup.null_items= (Item_null_result**) thd->alloc((sizeof(Item*) + - sizeof(Item**) + - sizeof(List<Item>) + - ref_pointer_array_size) - * send_group_parts ))) - return 1; - - rollup.fields= (List<Item>*) (rollup.null_items + send_group_parts); - rollup.ref_pointer_arrays= (Item***) (rollup.fields + send_group_parts); + Item_null_result **null_items= + static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts)); + + rollup.null_items= Item_null_array(null_items, send_group_parts); + rollup.ref_pointer_arrays= + static_cast<Ref_ptr_array*> + (thd->alloc((sizeof(Ref_ptr_array) + + all_fields.elements * sizeof(Item*)) * send_group_parts)); + rollup.fields= + static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts)); + + if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields) + return true; + ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts); + /* Prepare space for field list for the different levels These will be filled up in rollup_make_fields() @@ -23260,7 +23308,7 @@ bool JOIN::rollup_init() rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd); List<Item> *rollup_fields= &rollup.fields[i]; rollup_fields->empty(); - rollup.ref_pointer_arrays[i]= ref_array; + rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements); ref_array+= all_fields.elements; } for (i= 0 ; i < send_group_parts; i++) @@ -23407,11 +23455,12 @@ bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields, bool real_fields= 0; Item *item; List_iterator<Item> new_it(rollup.fields[pos]); - Item **ref_array_start= rollup.ref_pointer_arrays[pos]; + Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos]; ORDER *start_group; /* Point to first hidden field */ - Item **ref_array= ref_array_start + fields_arg.elements-1; + uint ref_array_ix= fields_arg.elements-1; + /* Remember where the sum functions ends for the previous level */ sum_funcs_end[pos+1]= *func; @@ -23428,7 +23477,7 @@ bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields, if (item == first_field) { real_fields= 1; // End of hidden fields - ref_array= ref_array_start; + ref_array_ix= 0; } if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() && @@ -23472,15 +23521,15 @@ bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields, } } } - *ref_array= item; + ref_array_start[ref_array_ix]= item; if (real_fields) { (void) new_it++; // Point to next item new_it.replace(item); // Replace previous - ref_array++; + ref_array_ix++; } else - ref_array--; + ref_array_ix--; } } sum_funcs_end[0]= *func; // Point to last function @@ -23513,9 +23562,7 @@ int JOIN::rollup_send_data(uint idx) { int res= 0; /* Get reference pointers to sum functions in place */ - memcpy((char*) ref_pointer_array, - (char*) rollup.ref_pointer_arrays[i], - ref_pointer_array_size); + copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); if ((!having || having->val_int())) { if (send_records < unit->select_limit_cnt && do_send_rows && @@ -23526,7 +23573,7 @@ int JOIN::rollup_send_data(uint idx) } } /* Restore ref_pointer_array */ - set_items_ref_array(current_ref_pointer_array); + set_items_ref_array(current_ref_ptrs); return 0; } @@ -23556,9 +23603,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg) for (i= send_group_parts ; i-- > idx ; ) { /* Get reference pointers to sum functions in place */ - memcpy((char*) ref_pointer_array, - (char*) rollup.ref_pointer_arrays[i], - ref_pointer_array_size); + copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); if ((!having || having->val_int())) { int write_error; @@ -23581,7 +23626,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg) } } /* Restore ref_pointer_array */ - set_items_ref_array(current_ref_pointer_array); + set_items_ref_array(current_ref_ptrs); return 0; } @@ -23704,33 +23749,9 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, } -/* - TODO: this function is only applicable for the first non-const optimization - join tab. -*/ - -void JOIN_TAB::update_explain_data(uint idx) -{ - if (this == join->first_breadth_first_optimization_tab() + join->const_tables && - join->select_lex->select_number != INT_MAX && - join->select_lex->select_number != UINT_MAX) - { - Explain_table_access *eta= new (join->thd->mem_root) - Explain_table_access(join->thd->mem_root); - save_explain_data(eta, join->const_table_map, join->select_distinct, - join->first_breadth_first_optimization_tab()); - - Explain_select *sel= join->thd->lex->explain-> - get_select(join->select_lex->select_number); - idx -= my_count_bits(join->eliminated_tables); - sel->replace_table(idx, eta); - } -} - - void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tables, - bool distinct, JOIN_TAB *first_top_tab) + bool distinct_arg, JOIN_TAB *first_top_tab) { int quick_type; CHARSET_INFO *cs= system_charset_info; @@ -23746,6 +23767,21 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, explain_plan= eta; eta->key.clear(); eta->quick_info= NULL; + + SQL_SELECT *tab_select; + /* + We assume that if this table does pre-sorting, then it doesn't do filtering + with SQL_SELECT. + */ + DBUG_ASSERT(!(select && filesort)); + tab_select= (filesort)? filesort->select : select; + + if (filesort) + { + eta->pre_join_sort= new Explain_aggr_filesort(thd->mem_root, + thd->lex->analyze_stmt, + filesort); + } tracker= &eta->tracker; jbuf_tracker= &eta->jbuf_tracker; @@ -23823,9 +23859,9 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, /* "type" column */ enum join_type tab_type= type; if ((type == JT_ALL || type == JT_HASH) && - select && select->quick && use_quick != 2) + tab_select && tab_select->quick && use_quick != 2) { - cur_quick= select->quick; + cur_quick= tab_select->quick; quick_type= cur_quick->get_type(); if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) || @@ -23860,9 +23896,9 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type that still have quick selects. */ - if (select && select->quick && tab_type != JT_CONST) + if (tab_select && tab_select->quick && tab_type != JT_CONST) { - eta->quick_info= select->quick->get_explain(thd->mem_root); + eta->quick_info= tab_select->quick->get_explain(thd->mem_root); } if (key_info) /* 'index' or 'ref' access */ @@ -23986,7 +24022,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, uint keyno= MAX_KEY; if (ref.key_parts) keyno= ref.key; - else if (select && cur_quick) + else if (tab_select && cur_quick) keyno = cur_quick->index; if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno && @@ -24008,7 +24044,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, { eta->push_extra(ET_USING); } - if (select) + if (tab_select) { if (use_quick == 2) { @@ -24018,7 +24054,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, eta->range_checked_fer-> append_possible_keys_stat(thd->mem_root, table, keys); } - else if (select->cond || + else if (tab_select->cond || (cache_select && cache_select->cond)) { const COND *pushed_cond= table->file->pushed_cond; @@ -24031,7 +24067,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, } else { - eta->where_cond= select->cond; + eta->where_cond= tab_select->cond; eta->cache_cond= cache_select? cache_select->cond : NULL; eta->push_extra(ET_USING_WHERE); } @@ -24063,7 +24099,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) { QUICK_GROUP_MIN_MAX_SELECT *qgs= - (QUICK_GROUP_MIN_MAX_SELECT *) select->quick; + (QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick; eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY); eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning(); } @@ -24075,14 +24111,15 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) { - explain_append_mrr_info((QUICK_RANGE_SELECT*)(select->quick), + explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick), &eta->mrr_type); if (eta->mrr_type.length() > 0) eta->push_extra(ET_USING_MRR); } - if (distinct & test_all_bits(prefix_tables, join->select_list_used_tables)) + if (shortcut_for_distinct) eta->push_extra(ET_DISTINCT); + if (loosescan_match_tab) { eta->push_extra(ET_LOOSESCAN); @@ -24150,14 +24187,69 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, /* + Walk through join->aggr_tables and save aggregation/grouping query plan into + an Explain_select object +*/ + +void save_agg_explain_data(JOIN *join, Explain_select *xpl_sel) +{ + JOIN_TAB *join_tab=join->join_tab + join->top_join_tab_count; + Explain_aggr_node *prev_node; + Explain_aggr_node *node= xpl_sel->aggr_tree; + bool is_analyze= join->thd->lex->analyze_stmt; + THD *thd= join->thd; + + for (uint i= 0; i < join->aggr_tables; i++, join_tab++) + { + // Each aggregate means a temp.table + prev_node= node; + node= new Explain_aggr_tmp_table; + node->child= prev_node; + + if (join_tab->window_funcs_step) + { + Explain_aggr_node *new_node= + join_tab->window_funcs_step->save_explain_plan(thd->mem_root, + is_analyze); + if (new_node) + { + prev_node=node; + node= new_node; + node->child= prev_node; + } + } + + /* The below matches execution in join_init_read_record() */ + if (join_tab->distinct) + { + prev_node= node; + node= new Explain_aggr_remove_dups; + node->child= prev_node; + } + + if (join_tab->filesort) + { + Explain_aggr_filesort *eaf = + new Explain_aggr_filesort(thd->mem_root, is_analyze, join_tab->filesort); + prev_node= node; + node= eaf; + node->child= prev_node; + } + } + xpl_sel->aggr_tree= node; +} + + +/* Save Query Plan Footprint @note Currently, this function may be called multiple times */ -int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, - bool need_order, bool distinct, +int JOIN::save_explain_data_intern(Explain_query *output, + bool need_tmp_table_arg, + bool need_order_arg, bool distinct_arg, const char *message) { JOIN *join= this; /* Legacy: this code used to be a non-member function */ @@ -24186,7 +24278,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, explain->select_id= join->select_lex->select_number; explain->select_type= join->select_lex->type; explain->using_temporary= need_tmp; - explain->using_filesort= need_order; + explain->using_filesort= need_order_arg; /* Setting explain->message means that all other members are invalid */ explain->message= message; @@ -24203,7 +24295,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, explain->select_id= select_lex->select_number; explain->select_type= select_lex->type; explain->using_temporary= need_tmp; - explain->using_filesort= need_order; + explain->using_filesort= need_order_arg; explain->message= "Storage engine handles GROUP BY"; if (select_lex->master_unit()->derived) @@ -24223,12 +24315,8 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, xpl_sel->select_type= join->select_lex->type; if (select_lex->master_unit()->derived) xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED; - - if (need_tmp_table) - xpl_sel->using_temporary= true; - - if (need_order) - xpl_sel->using_filesort= true; + + save_agg_explain_data(this, xpl_sel); xpl_sel->exec_const_cond= exec_const_cond; xpl_sel->outer_ref_cond= outer_ref_cond; @@ -24238,7 +24326,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, xpl_sel->having= having; xpl_sel->having_value= having_value; - JOIN_TAB* const first_top_tab= join->first_breadth_first_optimization_tab(); + JOIN_TAB* const first_top_tab= join->first_breadth_first_tab(); JOIN_TAB* prev_bush_root_tab= NULL; Explain_basic_join *cur_parent= xpl_sel; @@ -24257,13 +24345,6 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, } - if (join->table_access_tabs == join->join_tab && - tab == (first_top_tab + join->const_tables) && pre_sort_join_tab) - { - saved_join_tab= tab; - tab= pre_sort_join_tab; - } - Explain_table_access *eta= (new (output->mem_root) Explain_table_access(output->mem_root)); @@ -24294,7 +24375,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table, prev_bush_root_tab= tab->bush_root_tab; cur_parent->add_table(eta, output); - tab->save_explain_data(eta, used_tables, distinct, first_top_tab); + tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab); if (saved_join_tab) tab= saved_join_tab; @@ -24354,14 +24435,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, DBUG_ENTER("select_describe"); /* Update the QPF with latest values of using_temporary, using_filesort */ - Explain_select *explain_sel; - uint select_nr= join->select_lex->select_number; - if ((explain_sel= thd->lex->explain->get_select(select_nr))) - { - explain_sel->using_temporary= need_tmp_table; - explain_sel->using_filesort= need_order; - } - for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit(); unit; unit= unit->next_unit()) @@ -24425,18 +24498,17 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result) { thd->lex->current_select= first; unit->set_limit(unit->global_parameters()); - res= mysql_select(thd, &first->ref_pointer_array, - first->table_list.first, - first->with_wild, first->item_list, - first->where, - first->order_list.elements + - first->group_list.elements, - first->order_list.first, - first->group_list.first, - first->having, - thd->lex->proc_list.first, - first->options | thd->variables.option_bits | SELECT_DESCRIBE, - result, unit, first); + res= mysql_select(thd, + first->table_list.first, + first->with_wild, first->item_list, + first->where, + first->order_list.elements + first->group_list.elements, + first->order_list.first, + first->group_list.first, + first->having, + thd->lex->proc_list.first, + first->options | thd->variables.option_bits | SELECT_DESCRIBE, + result, unit, first); } DBUG_RETURN(res || thd->is_error()); } @@ -24480,6 +24552,7 @@ static void print_table_array(THD *thd, str->append(STRING_WITH_LEN(" semi join ")); else str->append(STRING_WITH_LEN(" join ")); + curr->print(thd, eliminated_tables, str, query_type); if (curr->on_expr) { @@ -25808,6 +25881,153 @@ err: DBUG_RETURN(0); } +/**************************************************************************** + AGGR_OP implementation +****************************************************************************/ + +/** + @brief Instantiate tmp table for aggregation and start index scan if needed + @todo Tmp table always would be created, even for empty result. Extend + executor to avoid tmp table creation when no rows were written + into tmp table. + @return + true error + false ok +*/ + +bool +AGGR_OP::prepare_tmp_table() +{ + TABLE *table= join_tab->table; + JOIN *join= join_tab->join; + int rc= 0; + + if (!join_tab->table->is_created()) + { + if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + join->select_options)) + return true; + (void) table->file->extra(HA_EXTRA_WRITE_CACHE); + empty_record(table); + } + /* If it wasn't already, start index scan for grouping using table index. */ + if (!table->file->inited && table->group && + join_tab->tmp_table_param->sum_func_count && table->s->keys) + rc= table->file->ha_index_init(0, 0); + else + { + /* Start index scan in scanning mode */ + rc= table->file->ha_rnd_init(true); + } + if (rc) + { + table->file->print_error(rc, MYF(0)); + return true; + } + return false; +} + + +/** + @brief Prepare table if necessary and call write_func to save record + + @param end_of_records the end_of_record signal to pass to the writer + + @return return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +AGGR_OP::put_record(bool end_of_records) +{ + // Lasy tmp table creation/initialization + if (!join_tab->table->file->inited) + prepare_tmp_table(); + enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab, + end_of_records); + return rc; +} + + +/** + @brief Finish rnd/index scan after accumulating records, switch ref_array, + and send accumulated records further. + @return return one of enum_nested_loop_state. +*/ + +enum_nested_loop_state +AGGR_OP::end_send() +{ + enum_nested_loop_state rc= NESTED_LOOP_OK; + TABLE *table= join_tab->table; + JOIN *join= join_tab->join; + + // All records were stored, send them further + int tmp, new_errno= 0; + + if ((rc= put_record(true)) < NESTED_LOOP_OK) + return rc; + + if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE))) + { + DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed")); + new_errno= tmp; + } + if ((tmp= table->file->ha_index_or_rnd_end())) + { + DBUG_PRINT("error",("ha_index_or_rnd_end() failed")); + new_errno= tmp; + } + if (new_errno) + { + table->file->print_error(new_errno,MYF(0)); + return NESTED_LOOP_ERROR; + } + + // Update ref array + join_tab->join->set_items_ref_array(*join_tab->ref_array); + if (join_tab->window_funcs_step) + { + if (join_tab->window_funcs_step->exec(join)) + return NESTED_LOOP_ERROR; + } + + table->reginfo.lock_type= TL_UNLOCK; + + bool in_first_read= true; + while (rc == NESTED_LOOP_OK) + { + int error; + if (in_first_read) + { + in_first_read= false; + error= join_init_read_record(join_tab); + } + else + error= join_tab->read_record.read_record(&join_tab->read_record); + + if (error > 0 || (join->thd->is_error())) // Fatal error + rc= NESTED_LOOP_ERROR; + else if (error < 0) + break; + else if (join->thd->killed) // Aborted by user + { + join->thd->send_kill_message(); + rc= NESTED_LOOP_KILLED; + } + else + rc= evaluate_join_record(join, join_tab, 0); + } + + // Finish rnd scn after sending records + if (join_tab->table->file->inited) + join_tab->table->file->ha_rnd_end(); + + return rc; +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 86c5ef87d89..c143d58c2e8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -34,6 +34,7 @@ #include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */ #include "filesort.h" +typedef struct st_join_table JOIN_TAB; /* Values in optimize */ #define KEY_OPTIMIZE_EXISTS 1 #define KEY_OPTIMIZE_REF_OR_NULL 2 @@ -184,7 +185,7 @@ enum sj_strategy_enum typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); -Next_select_func setup_end_select_func(JOIN *join); +Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); int rr_sequential_and_unpack(READ_RECORD *info); @@ -198,9 +199,11 @@ int rr_sequential_and_unpack(READ_RECORD *info); class JOIN_CACHE; class SJ_TMP_TABLE; class JOIN_TAB_RANGE; +class AGGR_OP; +class Filesort; typedef struct st_join_table { - st_join_table() {} /* Remove gcc warning */ + st_join_table() {} TABLE *table; TABLE_LIST *tab_list; KEYUSE *keyuse; /**< pointer to first used key */ @@ -238,7 +241,6 @@ typedef struct st_join_table { For join tabs that are inside an SJM bush: root of the bush */ st_join_table *bush_root_tab; - SORT_INFO *filesort; /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ bool last_leaf_in_bush; @@ -262,6 +264,7 @@ typedef struct st_join_table { */ uint packed_info; + // READ_RECORD::Setup_func materialize_table; READ_RECORD::Setup_func read_first_record; Next_select_func next_select; READ_RECORD read_record; @@ -348,6 +351,7 @@ typedef struct st_join_table { */ Item *cache_idx_cond; SQL_SELECT *cache_select; + AGGR_OP *aggr; JOIN *join; /* Embedding SJ-nest (may be not the direct parent), or NULL if none. @@ -414,6 +418,46 @@ typedef struct st_join_table { /* NestedOuterJoins: Bitmap of nested joins this table is part of */ nested_join_map embedding_map; + /* Tmp table info */ + TMP_TABLE_PARAM *tmp_table_param; + + /* Sorting related info */ + Filesort *filesort; + SORT_INFO *filesort_result; + + /* + Non-NULL value means this join_tab must do window function computation + before reading. + */ + Window_funcs_computation* window_funcs_step; + + /** + List of topmost expressions in the select list. The *next* JOIN TAB + in the plan should use it to obtain correct values. Same applicable to + all_fields. These lists are needed because after tmp tables functions + will be turned to fields. These variables are pointing to + tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp + table in the query plan. + @see JOIN::make_tmp_tables_info() + */ + List<Item> *fields; + /** List of all expressions in the select list */ + List<Item> *all_fields; + /* + Pointer to the ref array slice which to switch to before sending + records. Valid only for tmp tables. + */ + Ref_ptr_array *ref_array; + + /** Number of records saved in tmp table */ + ha_rows send_records; + + /** HAVING condition for checking prior saving a record into tmp table*/ + Item *having; + + /** TRUE <=> remove duplicates on this table. */ + bool distinct; + /* Semi-join strategy to be used for this join table. This is a copy of POSITION::sj_strategy field. This field is set up by the @@ -428,9 +472,9 @@ typedef struct st_join_table { void cleanup(); inline bool is_using_loose_index_scan() { - return (select && select->quick && - (select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); + const SQL_SELECT *sel= filesort ? filesort->select : select; + return (sel && sel->quick && + (sel->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); } bool is_using_agg_loose_index_scan () { @@ -565,16 +609,22 @@ typedef struct st_join_table { void save_explain_data(Explain_table_access *eta, table_map prefix_tables, bool distinct, struct st_join_table *first_top_tab); - void update_explain_data(uint idx); + bool use_order() const; ///< Use ordering provided by chosen index? + bool sort_table(); + bool remove_duplicates(); + } JOIN_TAB; #include "sql_join_cache.h" -enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool - end_of_records); -enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool - end_of_records); +enum_nested_loop_state +sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); @@ -869,12 +919,14 @@ typedef struct st_position Sj_materialization_picker sjmat_picker; } POSITION; +typedef Bounds_checked_array<Item_null_result*> Item_null_array; + typedef struct st_rollup { enum State { STATE_NONE, STATE_INITED, STATE_READY }; State state; - Item_null_result **null_items; - Item ***ref_pointer_arrays; + Item_null_array null_items; + Ref_ptr_array *ref_pointer_arrays; List<Item> *fields; } ROLLUP; @@ -888,6 +940,56 @@ public: class Pushdown_query; +/** + @brief + Class to perform postjoin aggregation operations + + @details + The result records are obtained on the put_record() call. + The aggrgation process is determined by the write_func, it could be: + end_write Simply store all records in tmp table. + end_write_group Perform grouping using join->group_fields, + records are expected to be sorted. + end_update Perform grouping using the key generated on tmp + table. Input records aren't expected to be sorted. + Tmp table uses the heap engine + end_update_unique Same as above, but the engine is myisam. + + Lazy table initialization is used - the table will be instantiated and + rnd/index scan started on the first put_record() call. + +*/ + +class AGGR_OP :public Sql_alloc +{ +public: + JOIN_TAB *join_tab; + + AGGR_OP(JOIN_TAB *tab) : join_tab(tab), write_func(NULL) + {}; + + enum_nested_loop_state put_record() { return put_record(false); }; + /* + Send the result of operation further (to a next operation/client) + This function is called after all records were put into tmp table. + + @return return one of enum_nested_loop_state values. + */ + enum_nested_loop_state end_send(); + /** write_func setter */ + void set_write_func(Next_select_func new_write_func) + { + write_func= new_write_func; + } + +private: + /** Write function that would be used for saving records in tmp table. */ + Next_select_func write_func; + enum_nested_loop_state put_record(bool end_of_records); + bool prepare_tmp_table(); +}; + + class JOIN :public Sql_alloc { private: @@ -956,33 +1058,11 @@ protected: public: JOIN_TAB *join_tab, **best_ref; - - /* - Saved join_tab for pre_sorting. create_sort_index() will save here.. - */ - JOIN_TAB *pre_sort_join_tab; - uint pre_sort_index; - Item *pre_sort_idx_pushed_cond; - void clean_pre_sort_join_tab(); /* List of fields that aren't under an aggregate function */ List<Item_field> non_agg_fields; - /* - For "Using temporary+Using filesort" queries, JOIN::join_tab can point to - either: - 1. array of join tabs describing how to run the select, or - 2. array of single join tab describing read from the temporary table. - - SHOW EXPLAIN code needs to read/show #1. This is why two next members are - there for saving it. - */ - JOIN_TAB *table_access_tabs; - uint top_table_access_tabs_count; - JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs - JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution - List<JOIN_TAB_RANGE> join_tab_ranges; /* @@ -1013,14 +1093,9 @@ public: We keep it here so that it is saved/restored with JOIN::restore_tmp. */ uint top_join_tab_count; + uint aggr_tables; ///< Number of post-join tmp tables uint send_group_parts; /* - This counts how many times do_select() was invoked for this JOIN. - It's used to restrict Pushdown_query::execute() only to the first - do_select() invocation. - */ - uint do_select_call_count; - /* True if the query has GROUP BY. (that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it will set this, too. It is not clear why we need a separate var from @@ -1125,6 +1200,7 @@ public: */ table_map complex_firstmatch_tables; + Next_select_func first_select; /* The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into @@ -1140,9 +1216,6 @@ public: double join_record_count; List<Item> *fields; List<Cached_item> group_fields, group_fields_cache; - TABLE *tmp_table; - /// used to store 2 possible tmp table of SELECT - TABLE *exec_tmp_table1, *exec_tmp_table2; THD *thd; Item_sum **sum_funcs, ***sum_funcs_end; /** second copy of sumfuncs (for queries with 2 temporary tables */ @@ -1151,6 +1224,8 @@ public: Item *having; Item *tmp_having; ///< To store having when processed temporary table Item *having_history; ///< Store having for explain + ORDER *group_list_for_estimates; + bool having_is_correlated; ulonglong select_options; /* Bitmap of allowed types of the join caches that @@ -1189,26 +1264,6 @@ public: */ bool filesort_found_rows; - /** - Copy of this JOIN to be used with temporary tables. - - tmp_join is used when the JOIN needs to be "reusable" (e.g. in a - subquery that gets re-executed several times) and we know will use - temporary tables for materialization. The materialization to a - temporary table overwrites the JOIN structure to point to the - temporary table after the materialization is done. This is where - tmp_join is used : it's a copy of the JOIN before the - materialization and is used in restoring before re-execution by - overwriting the current JOIN structure with the saved copy. - Because of this we should pay extra care of not freeing up helper - structures that are referenced by the original contents of the - JOIN. We can check for this by making sure the "current" join is - not the temporary copy, e.g. !tmp_join || tmp_join != join - - We should free these sub-structures at JOIN::destroy() if the - "current" join has a copy is not that copy. - */ - JOIN *tmp_join; ROLLUP rollup; ///< Used with rollup bool mixed_implicit_grouping; @@ -1230,6 +1285,19 @@ public: GROUP/ORDER BY. */ bool simple_order, simple_group; + + /* + ordered_index_usage is set if an ordered index access + should be used instead of a filesort when computing + ORDER/GROUP BY. + */ + enum + { + ordered_index_void, // No ordered index avail. + ordered_index_group_by, // Use index for GROUP BY + ordered_index_order_by // Use index for ORDER BY + } ordered_index_usage; + /** Is set only in case if we have a GROUP BY clause and no ORDER BY after constant elimination of 'order'. @@ -1282,10 +1350,19 @@ public: List<Item> exec_const_order_group_cond; SQL_SELECT *select; ///<created in optimisation phase JOIN_TAB *return_tab; ///<used only for outer joins - Item **ref_pointer_array; ///<used pointer reference for this select - // Copy of above to be used with different lists - Item **items0, **items1, **items2, **items3, **current_ref_pointer_array; - uint ref_pointer_array_size; ///< size of above in bytes + + /* + Used pointer reference for this select. + select_lex->ref_pointer_array contains five "slices" of the same length: + |========|========|========|========|========| + ref_ptrs items0 items1 items2 items3 + */ + Ref_ptr_array ref_ptrs; + // Copy of the initial slice above, to be used with different lists + Ref_ptr_array items0, items1, items2, items3; + // Used by rollup, to restore ref_ptrs after overwriting it. + Ref_ptr_array current_ref_ptrs; + const char *zero_result_cause; ///< not 0 if exec must return zero result bool union_part; ///< this subselect is part of union @@ -1312,20 +1389,12 @@ public: /* SJM nests that are executed with SJ-Materialization strategy */ List<SJ_MATERIALIZATION_INFO> sjm_info_list; - /* - storage for caching buffers allocated during query execution. - These buffers allocations need to be cached as the thread memory pool is - cleared only at the end of the execution of the whole query and not caching - allocations that occur in repetition at execution time will result in - excessive memory usage. - Note: make_simple_join always creates an execution plan that accesses - a single table, thus it is sufficient to have a one-element array for - table_reexec. - */ - SORT_FIELD *sortorder; // make_unireg_sortorder() - TABLE *table_reexec[1]; // make_simple_join() - JOIN_TAB *join_tab_reexec; // make_simple_join() - /* end of allocation caching storage */ + /** TRUE <=> ref_pointer_array is set to items3. */ + bool set_group_rpa; + /** Exec time only: TRUE <=> current group has been sent */ + bool group_sent; + + JOIN_TAB *sort_and_group_aggr_tab; JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) @@ -1337,12 +1406,13 @@ public: void init(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) { - join_tab= join_tab_save= 0; + join_tab= 0; table= 0; table_count= 0; top_join_tab_count= 0; const_tables= 0; const_table_map= 0; + aggr_tables= 0; eliminated_tables= 0; join_list= 0; implicit_grouping= FALSE; @@ -1352,25 +1422,21 @@ public: send_records= 0; found_records= 0; fetch_limit= HA_POS_ERROR; - join_examined_rows= 0; - exec_tmp_table1= 0; - exec_tmp_table2= 0; - sortorder= 0; - table_reexec[0]= 0; - join_tab_reexec= 0; thd= thd_arg; sum_funcs= sum_funcs2= 0; procedure= 0; having= tmp_having= having_history= 0; + having_is_correlated= false; + group_list_for_estimates= 0; select_options= select_options_arg; result= result_arg; lock= thd_arg->lock; select_lex= 0; //for safety - tmp_join= 0; select_distinct= MY_TEST(select_options & SELECT_DISTINCT); no_order= 0; simple_order= 0; simple_group= 0; + ordered_index_usage= ordered_index_void; need_distinct= 0; skip_sort_order= 0; need_tmp= 0; @@ -1378,8 +1444,11 @@ public: error= 0; select= 0; return_tab= 0; - ref_pointer_array= items0= items1= items2= items3= 0; - ref_pointer_array_size= 0; + ref_ptrs.reset(); + items0.reset(); + items1.reset(); + items2.reset(); + items3.reset(); zero_result_cause= 0; optimized= 0; have_query_plan= QEP_NOT_PRESENT_YET; @@ -1393,8 +1462,6 @@ public: positions= best_positions= 0; pushdown_query= 0; original_join_tab= 0; - do_select_call_count= 0; - explain= NULL; all_fields= fields_arg; @@ -1407,22 +1474,21 @@ public: rollup.state= ROLLUP::STATE_NONE; no_const_tables= FALSE; + first_select= sub_select; + set_group_rpa= false; + group_sent= 0; + outer_ref_cond= pseudo_bits_cond= NULL; in_to_exists_where= NULL; in_to_exists_having= NULL; - pre_sort_join_tab= NULL; emb_sjm_nest= NULL; sjm_lookup_tables= 0; - - /* - The following is needed because JOIN::cleanup(true) may be called for - joins for which JOIN::optimize was aborted with an error before a proper - query plan was produced - */ - table_access_tabs= NULL; } - int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num, + /* True if the plan guarantees that it will be returned zero or one row */ + bool only_const_tables() { return const_tables == table_count; } + + int prepare(TABLE_LIST *tables, uint wind_num, COND *conds, uint og_num, ORDER *order, bool skip_order_by, ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select, SELECT_LEX_UNIT *unit); @@ -1432,7 +1498,9 @@ public: int reinit(); int init_execution(); void exec(); + void exec_inner(); + bool prepare_result(List<Item> **columns_list); int destroy(); void restore_tmp(); bool alloc_func_list(); @@ -1442,16 +1510,42 @@ public: bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields, bool before_group_by, bool recompute= FALSE); - inline void set_items_ref_array(Item **ptr) + /// Initialzes a slice, see comments for ref_ptrs above. + Ref_ptr_array ref_ptr_array_slice(size_t slice_num) { - memcpy((char*) ref_pointer_array, (char*) ptr, ref_pointer_array_size); - current_ref_pointer_array= ptr; + size_t slice_sz= select_lex->ref_pointer_array.size() / 5U; + DBUG_ASSERT(select_lex->ref_pointer_array.size() % 5 == 0); + DBUG_ASSERT(slice_num < 5U); + return Ref_ptr_array(&select_lex->ref_pointer_array[slice_num * slice_sz], + slice_sz); } - inline void init_items_ref_array() + + /** + Overwrites one slice with the contents of another slice. + In the normal case, dst and src have the same size(). + However: the rollup slices may have smaller size than slice_sz. + */ + void copy_ref_ptr_array(Ref_ptr_array dst_arr, Ref_ptr_array src_arr) + { + DBUG_ASSERT(dst_arr.size() >= src_arr.size()); + void *dest= dst_arr.array(); + const void *src= src_arr.array(); + memcpy(dest, src, src_arr.size() * src_arr.element_size()); + } + + /// Overwrites 'ref_ptrs' and remembers the the source as 'current'. + void set_items_ref_array(Ref_ptr_array src_arr) { - items0= ref_pointer_array + all_fields.elements; - memcpy(items0, ref_pointer_array, ref_pointer_array_size); - current_ref_pointer_array= items0; + copy_ref_ptr_array(ref_ptrs, src_arr); + current_ref_ptrs= src_arr; + } + + /// Initializes 'items0' and remembers that it is 'current'. + void init_items_ref_array() + { + items0= ref_ptr_array_slice(1); + copy_ref_ptr_array(items0, ref_ptrs); + current_ref_ptrs= items0; } bool rollup_init(); @@ -1460,18 +1554,10 @@ public: Item_sum ***func); int rollup_send_data(uint idx); int rollup_write_data(uint idx, TABLE *table); - /** - Release memory and, if possible, the open tables held by this execution - plan (and nested plans). It's used to release some tables before - the end of execution in order to increase concurrency and reduce - memory consumption. - */ void join_free(); /** Cleanup this JOIN, possibly for reuse */ void cleanup(bool full); void clear(); - bool save_join_tab(); - bool init_save_join_tab(); bool send_row_on_empty_set() { return (do_send_rows && implicit_grouping && !group_optimized_away && @@ -1490,6 +1576,8 @@ public: return (table_map(1) << table_count) - 1; } void drop_unused_derived_keys(); + bool get_best_combination(); + bool add_sorting_to_table(JOIN_TAB *tab, ORDER *order); inline void eval_select_list_used_tables(); /* Return the table for which an index scan can be used to satisfy @@ -1551,16 +1639,44 @@ public: int save_explain_data_intern(Explain_query *output, bool need_tmp_table, bool need_order, bool distinct, const char *message); - JOIN_TAB *first_breadth_first_optimization_tab() { return table_access_tabs; } - JOIN_TAB *first_breadth_first_execution_tab() { return join_tab; } + JOIN_TAB *first_breadth_first_tab() { return join_tab; } private: /** + Create a temporary table to be used for processing DISTINCT/ORDER + BY/GROUP BY. + + @note Will modify JOIN object wrt sort/group attributes + + @param tab the JOIN_TAB object to attach created table to + @param tmp_table_fields List of items that will be used to define + column types of the table. + @param tmp_table_group Group key to use for temporary table, NULL if none. + @param save_sum_fields If true, do not replace Item_sum items in + @c tmp_fields list with Item_field items referring + to fields in temporary table. + + @returns false on success, true on failure + */ + bool create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *tmp_table_fields, + ORDER *tmp_table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_ordermake); + /** + Optimize distinct when used on a subset of the tables. + + E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b + In this case we can stop scanning t2 when we have found one t1.a + */ + void optimize_distinct(); + + /** TRUE if the query contains an aggregate function but has no GROUP BY clause. */ bool implicit_grouping; - bool make_simple_join(JOIN *join, TABLE *tmp_table); void cleanup_item_list(List<Item> &items) const; + bool make_aggr_tables_info(); }; enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; @@ -1585,7 +1701,7 @@ extern const char *join_type_str[]; void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, List<Item> &fields, bool reset_with_sum_func); bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, - Item **ref_pointer_array, + Ref_ptr_array ref_pointer_array, List<Item> &new_list1, List<Item> &new_list2, uint elements, List<Item> &fields); void copy_fields(TMP_TABLE_PARAM *param); @@ -1826,19 +1942,20 @@ int safe_index_read(JOIN_TAB *tab); int get_quick_record(SQL_SELECT *select); SORT_FIELD * make_unireg_sortorder(THD *thd, ORDER *order, uint *length, SORT_FIELD *sortorder); -int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, - List<Item> &fields, List <Item> &all_fields, ORDER *order); -int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List <Item> &all_fields, ORDER *order, + bool search_in_all_fields= true); +int setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, List<Item> &fields, List<Item> &all_fields, ORDER *order, - bool *hidden_group_fields); + bool *hidden_group_fields, bool search_in_all_fields= true); bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, - Item **ref_pointer_array); + Ref_ptr_array ref_pointer_array); int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table, struct st_table_ref *table_ref); bool handle_select(THD *thd, LEX *lex, select_result *result, ulong setup_tables_done_option); -bool mysql_select(THD *thd, Item ***rref_pointer_array, +bool mysql_select(THD *thd, TABLE_LIST *tables, uint wild_num, List<Item> &list, COND *conds, uint og_num, ORDER *order, ORDER *group, Item *having, ORDER *proc_param, ulonglong select_type, @@ -2154,4 +2271,7 @@ public: int execute(JOIN *join); }; +bool test_if_order_compatible(SQL_I_List<ORDER> &a, SQL_I_List<ORDER> &b); +int test_if_group_changed(List<Cached_item> &list); +int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_show.cc b/sql/sql_show.cc index aa2b47fa4b7..f41fb394b47 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -2960,8 +2960,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) thread in this thread. However it's better that we notice it eventually than hide it. */ - table->field[12]->store((longlong) (tmp->status_var.local_memory_used + - sizeof(THD)), + table->field[12]->store((longlong) tmp->status_var.local_memory_used, FALSE); table->field[12]->set_notnull(); table->field[13]->store((longlong) tmp->get_examined_row_count(), TRUE); @@ -3044,7 +3043,7 @@ int add_status_vars(SHOW_VAR *list) if (status_vars_inited) mysql_mutex_lock(&LOCK_show_status); if (!all_status_vars.buffer && // array is not allocated yet - do it now - my_init_dynamic_array(&all_status_vars, sizeof(SHOW_VAR), 200, 20, MYF(0))) + my_init_dynamic_array(&all_status_vars, sizeof(SHOW_VAR), 250, 50, MYF(0))) { res= 1; goto err; @@ -3257,7 +3256,8 @@ static bool show_status_array(THD *thd, const char *wild, */ for (var=variables; var->type == SHOW_FUNC || var->type == SHOW_SIMPLE_FUNC; var= &tmp) - ((mysql_show_var_func)(var->value))(thd, &tmp, buff, scope); + ((mysql_show_var_func)(var->value))(thd, &tmp, buff, + status_var, scope); SHOW_TYPE show_type=var->type; if (show_type == SHOW_ARRAY) @@ -3389,10 +3389,14 @@ end: DBUG_RETURN(res); } -/* collect status for all running threads */ +/* + collect status for all running threads + Return number of threads used +*/ -void calc_sum_of_all_status(STATUS_VAR *to) +uint calc_sum_of_all_status(STATUS_VAR *to) { + uint count= 0; DBUG_ENTER("calc_sum_of_all_status"); /* Ensure that thread id not killed during loop */ @@ -3403,16 +3407,21 @@ void calc_sum_of_all_status(STATUS_VAR *to) /* Get global values as base */ *to= global_status_var; + to->local_memory_used= 0; /* Add to this status from existing threads */ while ((tmp= it++)) { + count++; if (!tmp->status_in_global) + { add_to_status(to, &tmp->status_var); + to->local_memory_used+= tmp->status_var.local_memory_used; + } } mysql_mutex_unlock(&LOCK_thread_count); - DBUG_VOID_RETURN; + DBUG_RETURN(count); } diff --git a/sql/sql_show.h b/sql/sql_show.h index 9dae78e7f0e..dbae2a42b39 100644 --- a/sql/sql_show.h +++ b/sql/sql_show.h @@ -102,7 +102,7 @@ bool mysqld_show_authors(THD *thd); bool mysqld_show_contributors(THD *thd); bool mysqld_show_privileges(THD *thd); char *make_backup_log_name(char *buff, const char *name, const char* log_ext); -void calc_sum_of_all_status(STATUS_VAR *to); +uint calc_sum_of_all_status(STATUS_VAR *to); void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user, const LEX_STRING *definer_host); int add_status_vars(SHOW_VAR *list); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index dad51139af3..758757ea7dd 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -9351,7 +9351,6 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, int error= 1; Copy_field *copy= NULL, *copy_end; ha_rows found_count= 0, delete_count= 0; - SORT_FIELD *sortorder; SORT_INFO *file_sort= 0; READ_RECORD info; TABLE_LIST tables; @@ -9441,7 +9440,6 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, } else { - uint length= 0; bzero((char *) &tables, sizeof(tables)); tables.table= from; tables.alias= tables.table_name= from->s->table_name.str; @@ -9449,14 +9447,14 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, THD_STAGE_INFO(thd, stage_sorting); Filesort_tracker dummy_tracker(false); + Filesort fsort(order, HA_POS_ERROR, true, NULL); + if (thd->lex->select_lex.setup_ref_array(thd, order_num) || setup_order(thd, thd->lex->select_lex.ref_pointer_array, - &tables, fields, all_fields, order) || - !(sortorder= make_unireg_sortorder(thd, order, &length, NULL)) || - !(file_sort= filesort(thd, from, sortorder, length, - NULL, HA_POS_ERROR, - true, - &dummy_tracker))) + &tables, fields, all_fields, order)) + goto err; + + if (!(file_sort= filesort(thd, from, &fsort, &dummy_tracker))) goto err; } thd_progress_next_stage(thd); @@ -9670,6 +9668,18 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, bool table_copy) } +static void flush_checksum(ha_checksum *row_crc, uchar **checksum_start, + size_t *checksum_length) +{ + if (*checksum_start) + { + *row_crc= my_checksum(*row_crc, *checksum_start, *checksum_length); + *checksum_start= NULL; + *checksum_length= 0; + } +} + + bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt) { @@ -9746,23 +9756,23 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, if (!(check_opt->flags & T_EXTEND) && (((t->file->ha_table_flags() & HA_HAS_OLD_CHECKSUM) && thd->variables.old_mode) || ((t->file->ha_table_flags() & HA_HAS_NEW_CHECKSUM) && !thd->variables.old_mode))) - protocol->store((ulonglong)t->file->checksum()); + protocol->store((ulonglong)t->file->checksum()); else if (check_opt->flags & T_QUICK) - protocol->store_null(); + protocol->store_null(); else { - /* calculating table's checksum */ - ha_checksum crc= 0; + /* calculating table's checksum */ + ha_checksum crc= 0; uchar null_mask=256 - (1 << t->s->last_null_bit_pos); t->use_all_columns(); - if (t->file->ha_rnd_init(1)) - protocol->store_null(); - else - { - for (;;) - { + if (t->file->ha_rnd_init(1)) + protocol->store_null(); + else + { + for (;;) + { if (thd->killed) { /* @@ -9773,7 +9783,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, thd->protocol->remove_last_row(); goto err; } - ha_checksum row_crc= 0; + ha_checksum row_crc= 0; int error= t->file->ha_rnd_next(t->record[0]); if (unlikely(error)) { @@ -9781,22 +9791,27 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, continue; break; } - if (t->s->null_bytes) + if (t->s->null_bytes) { /* fix undefined null bits */ t->record[0][t->s->null_bytes-1] |= null_mask; if (!(t->s->db_create_options & HA_OPTION_PACK_RECORD)) t->record[0][0] |= 1; - row_crc= my_checksum(row_crc, t->record[0], t->s->null_bytes); + row_crc= my_checksum(row_crc, t->record[0], t->s->null_bytes); } - for (uint i= 0; i < t->s->fields; i++ ) - { - Field *f= t->field[i]; + uchar *checksum_start= NULL; + size_t checksum_length= 0; + for (uint i= 0; i < t->s->fields; i++ ) + { + Field *f= t->field[i]; if (! thd->variables.old_mode && f->is_real_null(0)) + { + flush_checksum(&row_crc, &checksum_start, &checksum_length); continue; + } /* BLOB and VARCHAR have pointers in their field, we must convert to string; GEOMETRY is implemented on top of BLOB. @@ -9808,6 +9823,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, case MYSQL_TYPE_GEOMETRY: case MYSQL_TYPE_BIT: { + flush_checksum(&row_crc, &checksum_start, &checksum_length); String tmp; f->val_str(&tmp); row_crc= my_checksum(row_crc, (uchar*) tmp.ptr(), @@ -9815,16 +9831,20 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, break; } default: - row_crc= my_checksum(row_crc, f->ptr, f->pack_length()); + if (!checksum_start) + checksum_start= f->ptr; + DBUG_ASSERT(checksum_start + checksum_length == f->ptr); + checksum_length+= f->pack_length(); break; - } - } + } + } + flush_checksum(&row_crc, &checksum_start, &checksum_length); - crc+= row_crc; - } - protocol->store((ulonglong)crc); + crc+= row_crc; + } + protocol->store((ulonglong)crc); t->file->ha_rnd_end(); - } + } } trans_rollback_stmt(thd); close_thread_tables(thd); diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 642cf208908..50d51dcc8cc 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -559,16 +559,19 @@ C_MODE_END void mysql_print_status() { char current_dir[FN_REFLEN]; + char llbuff[10][22]; STATUS_VAR tmp; + uint count; - calc_sum_of_all_status(&tmp); + count= calc_sum_of_all_status(&tmp); printf("\nStatus information:\n\n"); (void) my_getwd(current_dir, sizeof(current_dir),MYF(0)); printf("Current dir: %s\n", current_dir); - printf("Running threads: %d Stack size: %ld\n", thread_count, + printf("Running threads: %d Cached threads: %lu Stack size: %ld\n", + count, cached_thread_count, (long) my_thread_stack_size); +#ifdef EXTRA_DEBUG thr_print_locks(); // Write some debug info -#ifndef DBUG_OFF print_cached_tables(); #endif /* Print key cache status */ @@ -614,28 +617,33 @@ Next alarm time: %lu\n", #ifdef HAVE_MALLINFO struct mallinfo info= mallinfo(); printf("\nMemory status:\n\ -Non-mmapped space allocated from system: %d\n\ -Number of free chunks: %d\n\ -Number of fastbin blocks: %d\n\ -Number of mmapped regions: %d\n\ -Space in mmapped regions: %d\n\ -Maximum total allocated space: %d\n\ -Space available in freed fastbin blocks: %d\n\ -Total allocated space: %d\n\ -Total free space: %d\n\ -Top-most, releasable space: %d\n\ -Estimated memory (with thread stack): %ld\n", - (int) info.arena , - (int) info.ordblks, - (int) info.smblks, - (int) info.hblks, - (int) info.hblkhd, - (int) info.usmblks, - (int) info.fsmblks, - (int) info.uordblks, - (int) info.fordblks, - (int) info.keepcost, - (long) (thread_count * my_thread_stack_size + info.hblkhd + info.arena)); +Non-mmapped space allocated from system: %s\n\ +Number of free chunks: %lu\n\ +Number of fastbin blocks: %lu\n\ +Number of mmapped regions: %lu\n\ +Space in mmapped regions: %s\n\ +Maximum total allocated space: %s\n\ +Space available in freed fastbin blocks: %s\n\ +Total allocated space: %s\n\ +Total free space: %s\n\ +Top-most, releasable space: %s\n\ +Estimated memory (with thread stack): %s\n\ +Global memory allocated by server: %s\n\ +Memory allocated by threads: %s\n", + llstr(info.arena, llbuff[0]), + (ulong) info.ordblks, + (ulong) info.smblks, + (ulong) info.hblks, + llstr(info.hblkhd, llbuff[1]), + llstr(info.usmblks, llbuff[2]), + llstr(info.fsmblks, llbuff[3]), + llstr(info.uordblks, llbuff[4]), + llstr(info.fordblks, llbuff[5]), + llstr(info.keepcost, llbuff[6]), + llstr((count + cached_thread_count)* my_thread_stack_size + info.hblkhd + info.arena, llbuff[7]), + llstr(tmp.global_memory_used, llbuff[8]), + llstr(tmp.local_memory_used, llbuff[9])); + #endif #ifdef HAVE_EVENT_SCHEDULER diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 7a61279fc9c..63093620805 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -1581,7 +1581,6 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db, DBUG_RETURN(0); err_with_lex_cleanup: - // QQ: anything else ? lex_end(&lex); thd->lex= old_lex; thd->spcont= save_spcont; diff --git a/sql/sql_union.cc b/sql/sql_union.cc index 5685c90850a..87b836f40d9 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -436,8 +436,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result, can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit); - saved_error= join->prepare(&sl->ref_pointer_array, - sl->table_list.first, + saved_error= join->prepare(sl->table_list.first, sl->with_wild, sl->where, (can_skip_order_by ? 0 : @@ -646,8 +645,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result, fake_select_lex->n_child_sum_items+= global_parameters()->n_sum_items; saved_error= fake_select_lex->join-> - prepare(&fake_select_lex->ref_pointer_array, - fake_select_lex->table_list.first, + prepare(fake_select_lex->table_list.first, 0, 0, global_parameters()->order_list.elements, // og_num global_parameters()->order_list.first, // order @@ -702,7 +700,7 @@ bool st_select_lex_unit::optimize() { item->assigned(0); // We will reinit & rexecute unit item->reset(); - if (table->created) + if (table->is_created()) { table->file->ha_delete_all_rows(); table->file->info(HA_STATUS_VARIABLE); @@ -946,13 +944,13 @@ bool st_select_lex_unit::exec() Don't add more sum_items if we have already done JOIN::prepare for this (with a different join object) */ - if (!fake_select_lex->ref_pointer_array) + if (fake_select_lex->ref_pointer_array.is_null()) fake_select_lex->n_child_sum_items+= global_parameters()->n_sum_items; if (!was_executed) save_union_explain_part2(thd->lex->explain); - saved_error= mysql_select(thd, &fake_select_lex->ref_pointer_array, + saved_error= mysql_select(thd, &result_table_list, 0, item_list, NULL, global_parameters()->order_list.elements, @@ -975,7 +973,7 @@ bool st_select_lex_unit::exec() to reset them back, we re-do all of the actions (yes it is ugly): */ // psergey-todo: is the above really necessary anymore?? join->init(thd, item_list, fake_select_lex->options, result); - saved_error= mysql_select(thd, &fake_select_lex->ref_pointer_array, + saved_error= mysql_select(thd, &result_table_list, 0, item_list, NULL, global_parameters()->order_list.elements, @@ -1022,27 +1020,11 @@ bool st_select_lex_unit::cleanup() } cleaned= 1; - if (union_result) - { - delete union_result; - union_result=0; // Safety - if (table) - free_tmp_table(thd, table); - table= 0; // Safety - } - for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) error|= sl->cleanup(); if (fake_select_lex) { - JOIN *join; - if ((join= fake_select_lex->join)) - { - join->tables_list= 0; - join->table_count= 0; - join->top_join_tab_count= 0; - } error|= fake_select_lex->cleanup(); /* There are two cases when we should clean order items: @@ -1064,6 +1046,15 @@ bool st_select_lex_unit::cleanup() } } + if (union_result) + { + delete union_result; + union_result=0; // Safety + if (table) + free_tmp_table(thd, table); + table= 0; // Safety + } + DBUG_RETURN(error); } diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 61c16a905fe..6c60350844e 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -341,7 +341,8 @@ int mysql_update(THD *thd, if (table_list->is_view()) unfix_fields(fields); - if (setup_fields_with_no_wrap(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0)) + if (setup_fields_with_no_wrap(thd, Ref_ptr_array(), + fields, MARK_COLUMNS_WRITE, 0, 0)) DBUG_RETURN(1); /* purecov: inspected */ if (table_list->view && check_fields(thd, fields)) { @@ -360,7 +361,7 @@ int mysql_update(THD *thd, table_list->grant.want_privilege= table->grant.want_privilege= (SELECT_ACL & ~table->grant.privilege); #endif - if (setup_fields(thd, 0, values, MARK_COLUMNS_READ, 0, 0)) + if (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_READ, 0, 0)) { free_underlaid_joins(thd, select_lex); DBUG_RETURN(1); /* purecov: inspected */ @@ -557,17 +558,12 @@ int mysql_update(THD *thd, to update NOTE: filesort will call table->prepare_for_position() */ - uint length= 0; - SORT_FIELD *sortorder; + Filesort fsort(order, limit, true, select); Filesort_tracker *fs_tracker= thd->lex->explain->get_upd_del_plan()->filesort_tracker; - if (!(sortorder=make_unireg_sortorder(thd, order, &length, NULL)) || - !(file_sort= filesort(thd, table, sortorder, length, - select, limit, - true, - fs_tracker))) + if (!(file_sort= filesort(thd, table, &fsort, fs_tracker))) goto err; thd->inc_examined_row_count(file_sort->examined_rows); @@ -696,7 +692,7 @@ int mysql_update(THD *thd, if (error >= 0) goto err; } - table->disable_keyread(); + table->set_keyread(false); table->column_bitmaps_set(save_read_set, save_write_set); } @@ -1050,7 +1046,7 @@ err: delete select; delete file_sort; free_underlaid_joins(thd, select_lex); - table->disable_keyread(); + table->set_keyread(false); thd->abort_on_warning= 0; DBUG_RETURN(1); @@ -1424,7 +1420,8 @@ int mysql_multi_update_prepare(THD *thd) if (lex->select_lex.handle_derived(thd->lex, DT_MERGE)) DBUG_RETURN(TRUE); - if (setup_fields_with_no_wrap(thd, 0, *fields, MARK_COLUMNS_WRITE, 0, 0)) + if (setup_fields_with_no_wrap(thd, Ref_ptr_array(), + *fields, MARK_COLUMNS_WRITE, 0, 0)) DBUG_RETURN(TRUE); for (tl= table_list; tl ; tl= tl->next_local) @@ -1611,7 +1608,7 @@ bool mysql_multi_update(THD *thd, thd->abort_on_warning= thd->is_strict_mode(); List<Item> total_list; - res= mysql_select(thd, &select_lex->ref_pointer_array, + res= mysql_select(thd, table_list, select_lex->with_wild, total_list, conds, 0, (ORDER *) NULL, (ORDER *)NULL, (Item *) NULL, @@ -1707,7 +1704,8 @@ int multi_update::prepare(List<Item> ¬_used_values, reference tables */ - int error= setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0); + int error= setup_fields(thd, Ref_ptr_array(), + *values, MARK_COLUMNS_READ, 0, 0); ti.rewind(); while ((table_ref= ti++)) @@ -2034,7 +2032,7 @@ loop_end: /* Make an unique key over the first field to avoid duplicated updates */ bzero((char*) &group, sizeof(group)); - group.asc= 1; + group.direction= ORDER::ORDER_ASC; group.item= (Item**) temp_fields.head_ref(); tmp_param->quick_group=1; diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 41fd5b78f04..b66f678adfc 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -1612,6 +1612,8 @@ bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table, sl->context.error_processor_data= (void *)table; } + table->select_lex->master_unit()->is_view= true; + /* check MERGE algorithm ability - algorithm is not explicit TEMPORARY TABLE diff --git a/sql/sql_window.cc b/sql/sql_window.cc new file mode 100644 index 00000000000..e3e42fc4f75 --- /dev/null +++ b/sql/sql_window.cc @@ -0,0 +1,2129 @@ +#include "sql_select.h" +#include "sql_list.h" +#include "item_windowfunc.h" +#include "filesort.h" +#include "sql_base.h" +#include "sql_window.h" + + +bool +Window_spec::check_window_names(List_iterator_fast<Window_spec> &it) +{ + if (window_names_are_checked) + return false; + char *name= this->name(); + char *ref_name= window_reference(); + it.rewind(); + Window_spec *win_spec; + while((win_spec= it++) && win_spec != this) + { + char *win_spec_name= win_spec->name(); + if (!win_spec_name) + break; + if (name && my_strcasecmp(system_charset_info, name, win_spec_name) == 0) + { + my_error(ER_DUP_WINDOW_NAME, MYF(0), name); + return true; + } + if (ref_name && + my_strcasecmp(system_charset_info, ref_name, win_spec_name) == 0) + { + if (partition_list->elements) + { + my_error(ER_PARTITION_LIST_IN_REFERENCING_WINDOW_SPEC, MYF(0), + ref_name); + return true; + } + if (win_spec->order_list->elements && order_list->elements) + { + my_error(ER_ORDER_LIST_IN_REFERENCING_WINDOW_SPEC, MYF(0), ref_name); + return true; + } + if (win_spec->window_frame) + { + my_error(ER_WINDOW_FRAME_IN_REFERENCED_WINDOW_SPEC, MYF(0), ref_name); + return true; + } + referenced_win_spec= win_spec; + if (partition_list->elements == 0) + partition_list= win_spec->partition_list; + if (order_list->elements == 0) + order_list= win_spec->order_list; + } + } + if (ref_name && !referenced_win_spec) + { + my_error(ER_WRONG_WINDOW_SPEC_NAME, MYF(0), ref_name); + return true; + } + window_names_are_checked= true; + return false; +} + +bool +Window_frame::check_frame_bounds() +{ + if ((top_bound->is_unbounded() && + top_bound->precedence_type == Window_frame_bound::FOLLOWING) || + (bottom_bound->is_unbounded() && + bottom_bound->precedence_type == Window_frame_bound::PRECEDING) || + (top_bound->precedence_type == Window_frame_bound::CURRENT && + bottom_bound->precedence_type == Window_frame_bound::PRECEDING) || + (bottom_bound->precedence_type == Window_frame_bound::CURRENT && + top_bound->precedence_type == Window_frame_bound::FOLLOWING)) + { + my_error(ER_BAD_COMBINATION_OF_WINDOW_FRAME_BOUND_SPECS, MYF(0)); + return true; + } + + return false; +} + + +/* + Setup window functions in a select +*/ + +int +setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List<Item> &all_fields, + List<Window_spec> &win_specs, List<Item_window_func> &win_funcs) +{ + Window_spec *win_spec; + DBUG_ENTER("setup_windows"); + List_iterator<Window_spec> it(win_specs); + + /* + Move all unnamed specifications after the named ones. + We could have avoided it if we had built two separate lists for + named and unnamed specifications. + */ + Query_arena *arena, backup; + arena= thd->activate_stmt_arena_if_needed(&backup); + uint i = 0; + uint elems= win_specs.elements; + while ((win_spec= it++) && i++ < elems) + { + if (win_spec->name() == NULL) + { + it.remove(); + win_specs.push_back(win_spec); + } + } + if (arena) + thd->restore_active_arena(arena, &backup); + + it.rewind(); + + List_iterator_fast<Window_spec> itp(win_specs); + + while ((win_spec= it++)) + { + bool hidden_group_fields; + if (win_spec->check_window_names(itp) || + setup_group(thd, ref_pointer_array, tables, fields, all_fields, + win_spec->partition_list->first, &hidden_group_fields, + true) || + setup_order(thd, ref_pointer_array, tables, fields, all_fields, + win_spec->order_list->first, true) || + (win_spec->window_frame && + win_spec->window_frame->check_frame_bounds())) + { + DBUG_RETURN(1); + } + + if (win_spec->window_frame && + win_spec->window_frame->exclusion != Window_frame::EXCL_NONE) + { + my_error(ER_FRAME_EXCLUSION_NOT_SUPPORTED, MYF(0)); + DBUG_RETURN(1); + } + /* + For "win_func() OVER (ORDER BY order_list RANGE BETWEEN ...)", + - ORDER BY order_list must not be ommitted + - the list must have a single element. + */ + if (win_spec->window_frame && + win_spec->window_frame->units == Window_frame::UNITS_RANGE) + { + if (win_spec->order_list->elements != 1) + { + my_error(ER_RANGE_FRAME_NEEDS_SIMPLE_ORDERBY, MYF(0)); + DBUG_RETURN(1); + } + + /* + "The declared type of SK shall be numeric, datetime, or interval" + we don't support datetime or interval, yet. + */ + Item_result rtype= win_spec->order_list->first->item[0]->result_type(); + if (rtype != REAL_RESULT && rtype != INT_RESULT && + rtype != DECIMAL_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0)); + DBUG_RETURN(1); + } + + /* + "The declared type of UVS shall be numeric if the declared type of SK + is numeric; otherwise, it shall be an interval type that may be added + to or subtracted from the declared type of SK" + */ + Window_frame_bound *bounds[]= {win_spec->window_frame->top_bound, + win_spec->window_frame->bottom_bound, + NULL}; + for (Window_frame_bound **pbound= &bounds[0]; *pbound; pbound++) + { + if (!(*pbound)->is_unbounded() && + ((*pbound)->precedence_type == Window_frame_bound::FOLLOWING || + (*pbound)->precedence_type == Window_frame_bound::PRECEDING)) + { + Item_result rtype= (*pbound)->offset->result_type(); + if (rtype != REAL_RESULT && rtype != INT_RESULT && + rtype != DECIMAL_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0)); + DBUG_RETURN(1); + } + } + } + } + + /* "ROWS PRECEDING|FOLLOWING $n" must have a numeric $n */ + if (win_spec->window_frame && + win_spec->window_frame->units == Window_frame::UNITS_ROWS) + { + Window_frame_bound *bounds[]= {win_spec->window_frame->top_bound, + win_spec->window_frame->bottom_bound, + NULL}; + for (Window_frame_bound **pbound= &bounds[0]; *pbound; pbound++) + { + if (!(*pbound)->is_unbounded() && + ((*pbound)->precedence_type == Window_frame_bound::FOLLOWING || + (*pbound)->precedence_type == Window_frame_bound::PRECEDING)) + { + Item *offset= (*pbound)->offset; + if (offset->result_type() != INT_RESULT) + { + my_error(ER_WRONG_TYPE_FOR_ROWS_FRAME, MYF(0)); + DBUG_RETURN(1); + } + } + } + } + } + + List_iterator_fast<Item_window_func> li(win_funcs); + Item_window_func *win_func_item; + while ((win_func_item= li++)) + { + win_func_item->update_used_tables(); + } + + DBUG_RETURN(0); +} + +///////////////////////////////////////////////////////////////////////////// +// Sorting window functions to minimize the number of table scans +// performed during the computation of these functions +///////////////////////////////////////////////////////////////////////////// + +#define CMP_LT -2 // Less than +#define CMP_LT_C -1 // Less than and compatible +#define CMP_EQ 0 // Equal to +#define CMP_GT_C 1 // Greater than and compatible +#define CMP_GT 2 // Greater then + +static +int compare_order_elements(ORDER *ord1, ORDER *ord2) +{ + if (*ord1->item == *ord2->item) + return CMP_EQ; + Item *item1= (*ord1->item)->real_item(); + Item *item2= (*ord2->item)->real_item(); + DBUG_ASSERT(item1->type() == Item::FIELD_ITEM && + item2->type() == Item::FIELD_ITEM); + int cmp= ((Item_field *) item1)->field - ((Item_field *) item2)->field; + if (cmp == 0) + { + if (ord1->direction == ord2->direction) + return CMP_EQ; + return ord1->direction > ord2->direction ? CMP_GT : CMP_LT; + } + else + return cmp > 0 ? CMP_GT : CMP_LT; +} + +static +int compare_order_lists(SQL_I_List<ORDER> *part_list1, + SQL_I_List<ORDER> *part_list2) +{ + if (part_list1 == part_list2) + return CMP_EQ; + ORDER *elem1= part_list1->first; + ORDER *elem2= part_list2->first; + for ( ; elem1 && elem2; elem1= elem1->next, elem2= elem2->next) + { + int cmp; + if ((cmp= compare_order_elements(elem1, elem2))) + return cmp; + } + if (elem1) + return CMP_GT_C; + if (elem2) + return CMP_LT_C; + return CMP_EQ; +} + + +static +int compare_window_frame_bounds(Window_frame_bound *win_frame_bound1, + Window_frame_bound *win_frame_bound2, + bool is_bottom_bound) +{ + int res; + if (win_frame_bound1->precedence_type != win_frame_bound2->precedence_type) + { + res= win_frame_bound1->precedence_type > win_frame_bound2->precedence_type ? + CMP_GT : CMP_LT; + if (is_bottom_bound) + res= -res; + return res; + } + + if (win_frame_bound1->is_unbounded() && win_frame_bound2->is_unbounded()) + return CMP_EQ; + + if (!win_frame_bound1->is_unbounded() && !win_frame_bound2->is_unbounded()) + { + if (win_frame_bound1->offset->eq(win_frame_bound2->offset, true)) + return CMP_EQ; + else + { + res= strcmp(win_frame_bound1->offset->name, + win_frame_bound2->offset->name); + res= res > 0 ? CMP_GT : CMP_LT; + if (is_bottom_bound) + res= -res; + return res; + } + } + + /* + Here we have: + win_frame_bound1->is_unbounded() != win_frame_bound1->is_unbounded() + */ + return is_bottom_bound != win_frame_bound1->is_unbounded() ? CMP_LT : CMP_GT; +} + + +static +int compare_window_frames(Window_frame *win_frame1, + Window_frame *win_frame2) +{ + int cmp; + + if (win_frame1 == win_frame2) + return CMP_EQ; + + if (!win_frame1) + return CMP_LT; + + if (!win_frame2) + return CMP_GT; + + if (win_frame1->units != win_frame2->units) + return win_frame1->units > win_frame2->units ? CMP_GT : CMP_LT; + + cmp= compare_window_frame_bounds(win_frame1->top_bound, + win_frame2->top_bound, + false); + if (cmp) + return cmp; + + cmp= compare_window_frame_bounds(win_frame1->bottom_bound, + win_frame2->bottom_bound, + true); + if (cmp) + return cmp; + + if (win_frame1->exclusion != win_frame2->exclusion) + return win_frame1->exclusion > win_frame2->exclusion ? CMP_GT_C : CMP_LT_C; + + return CMP_EQ; +} + +static +int compare_window_spec_joined_lists(Window_spec *win_spec1, + Window_spec *win_spec2) +{ + win_spec1->join_partition_and_order_lists(); + win_spec2->join_partition_and_order_lists(); + int cmp= compare_order_lists(win_spec1->partition_list, + win_spec2->partition_list); + win_spec1->disjoin_partition_and_order_lists(); + win_spec2->disjoin_partition_and_order_lists(); + return cmp; +} + + +static +int compare_window_funcs_by_window_specs(Item_window_func *win_func1, + Item_window_func *win_func2, + void *arg) +{ + int cmp; + Window_spec *win_spec1= win_func1->window_spec; + Window_spec *win_spec2= win_func2->window_spec; + if (win_spec1 == win_spec2) + return CMP_EQ; + cmp= compare_order_lists(win_spec1->partition_list, + win_spec2->partition_list); + if (cmp == CMP_EQ) + { + /* + Partition lists contain the same elements. + Let's use only one of the lists. + */ + if (!win_spec1->name() && win_spec2->name()) + win_spec1->partition_list= win_spec2->partition_list; + else + win_spec2->partition_list= win_spec1->partition_list; + + cmp= compare_order_lists(win_spec1->order_list, + win_spec2->order_list); + + if (cmp != CMP_EQ) + return cmp; + + /* + Order lists contain the same elements. + Let's use only one of the lists. + */ + if (!win_spec1->name() && win_spec2->name()) + win_spec1->order_list= win_spec2->order_list; + else + win_spec2->order_list= win_spec1->order_list; + + cmp= compare_window_frames(win_spec1->window_frame, + win_spec2->window_frame); + + if (cmp != CMP_EQ) + return cmp; + + /* Window frames are equal. Let's use only one of them. */ + if (!win_spec1->name() && win_spec2->name()) + win_spec1->window_frame= win_spec2->window_frame; + else + win_spec2->window_frame= win_spec1->window_frame; + + return CMP_EQ; + } + + if (cmp == CMP_GT || cmp == CMP_LT) + return cmp; + + /* one of the partitions lists is the proper beginning of the another */ + cmp= compare_window_spec_joined_lists(win_spec1, win_spec2); + + if (CMP_LT_C <= cmp && cmp <= CMP_GT_C) + cmp= win_spec1->partition_list->elements < + win_spec2->partition_list->elements ? CMP_GT_C : CMP_LT_C; + + return cmp; +} + + +#define SORTORDER_CHANGE_FLAG 1 +#define PARTITION_CHANGE_FLAG 2 +#define FRAME_CHANGE_FLAG 4 + +typedef int (*Item_window_func_cmp)(Item_window_func *f1, + Item_window_func *f2, + void *arg); +/* + @brief + Sort window functions so that those that can be computed together are + adjacent. + + @detail + Sort window functions by their + - required sorting order, + - partition list, + - window frame compatibility. + + The changes between the groups are marked by setting item_window_func->marker. +*/ + +static +void order_window_funcs_by_window_specs(List<Item_window_func> *win_func_list) +{ + if (win_func_list->elements == 0) + return; + + bubble_sort<Item_window_func>(win_func_list, + compare_window_funcs_by_window_specs, + NULL); + + List_iterator_fast<Item_window_func> it(*win_func_list); + Item_window_func *prev= it++; + prev->marker= SORTORDER_CHANGE_FLAG | + PARTITION_CHANGE_FLAG | + FRAME_CHANGE_FLAG; + Item_window_func *curr; + while ((curr= it++)) + { + Window_spec *win_spec_prev= prev->window_spec; + Window_spec *win_spec_curr= curr->window_spec; + curr->marker= 0; + if (!(win_spec_prev->partition_list == win_spec_curr->partition_list && + win_spec_prev->order_list == win_spec_curr->order_list)) + { + int cmp; + if (win_spec_prev->partition_list == win_spec_curr->partition_list) + cmp= compare_order_lists(win_spec_prev->order_list, + win_spec_curr->order_list); + else + cmp= compare_window_spec_joined_lists(win_spec_prev, win_spec_curr); + if (!(CMP_LT_C <= cmp && cmp <= CMP_GT_C)) + { + curr->marker= SORTORDER_CHANGE_FLAG | + PARTITION_CHANGE_FLAG | + FRAME_CHANGE_FLAG; + } + else if (win_spec_prev->partition_list != win_spec_curr->partition_list) + { + curr->marker|= PARTITION_CHANGE_FLAG | FRAME_CHANGE_FLAG; + } + } + else if (win_spec_prev->window_frame != win_spec_curr->window_frame) + curr->marker|= FRAME_CHANGE_FLAG; + + prev= curr; + } +} + + +///////////////////////////////////////////////////////////////////////////// + + +/* + Do a pass over sorted table and compute window function values. + + This function is for handling window functions that can be computed on the + fly. Examples are RANK() and ROW_NUMBER(). +*/ +bool compute_window_func_values(Item_window_func *item_win, + TABLE *tbl, READ_RECORD *info) +{ + int err; + while (!(err=info->read_record(info))) + { + store_record(tbl,record[1]); + + /* + This will cause window function to compute its value for the + current row : + */ + item_win->advance_window(); + + /* + Put the new value into temptable's field + TODO: Should this use item_win->update_field() call? + Regular aggegate function implementations seem to implement it. + */ + item_win->save_in_field(item_win->result_field, true); + err= tbl->file->ha_update_row(tbl->record[1], tbl->record[0]); + if (err && err != HA_ERR_RECORD_IS_THE_SAME) + return true; + } + return false; +} + +///////////////////////////////////////////////////////////////////////////// +// Window Frames support +///////////////////////////////////////////////////////////////////////////// + +// note: make rr_from_pointers static again when not need it here anymore +int rr_from_pointers(READ_RECORD *info); + +/* + A temporary way to clone READ_RECORD structures until Monty provides the real + one. +*/ +bool clone_read_record(const READ_RECORD *src, READ_RECORD *dst) +{ + //DBUG_ASSERT(src->table->sort.record_pointers); + DBUG_ASSERT(src->read_record == rr_from_pointers); + memcpy(dst, src, sizeof(READ_RECORD)); + return false; +} + +///////////////////////////////////////////////////////////////////////////// + + +/* + A cursor over a sequence of rowids. One can + - Move to next rowid + - jump to given number in the sequence + - Know the number of the current rowid (i.e. how many rowids have been read) +*/ + +class Rowid_seq_cursor +{ + uchar *cache_start; + uchar *cache_pos; + uchar *cache_end; + uint ref_length; + +public: + virtual ~Rowid_seq_cursor() {} + + void init(READ_RECORD *info) + { + cache_start= info->cache_pos; + cache_pos= info->cache_pos; + cache_end= info->cache_end; + ref_length= info->ref_length; + } + + virtual int get_next() + { + /* Allow multiple get_next() calls in EOF state*/ + if (cache_pos == cache_end) + return -1; + cache_pos+= ref_length; + return 0; + } + + ha_rows get_rownum() + { + return (cache_pos - cache_start) / ref_length; + } + + // will be called by ROWS n FOLLOWING to catch up. + void move_to(ha_rows row_number) + { + cache_pos= cache_start + row_number * ref_length; + } +protected: + bool at_eof() { return (cache_pos == cache_end); } + + uchar *get_last_rowid() + { + if (cache_pos == cache_start) + return NULL; + else + return cache_pos - ref_length; + } + + uchar *get_curr_rowid() { return cache_pos; } +}; + + +/* + Cursor which reads from rowid sequence and also retrieves table rows. +*/ + +class Table_read_cursor : public Rowid_seq_cursor +{ + /* + Note: we don't own *read_record, somebody else is using it. + We only look at the constant part of it, e.g. table, record buffer, etc. + */ + READ_RECORD *read_record; +public: + virtual ~Table_read_cursor() {} + + void init(READ_RECORD *info) + { + Rowid_seq_cursor::init(info); + read_record= info; + } + + virtual int get_next() + { + if (at_eof()) + return -1; + + uchar* curr_rowid= get_curr_rowid(); + int res= Rowid_seq_cursor::get_next(); + if (!res) + { + res= read_record->table->file->ha_rnd_pos(read_record->record, + curr_rowid); + } + return res; + } + + bool restore_last_row() + { + uchar *p; + if ((p= get_last_rowid())) + { + int rc= read_record->table->file->ha_rnd_pos(read_record->record, p); + if (!rc) + return true; // restored ok + } + return false; // didn't restore + } + + // todo: should move_to() also read row here? +}; + + +/* + A cursor which only moves within a partition. The scan stops at the partition + end, and it needs an explicit command to move to the next partition. +*/ + +class Partition_read_cursor +{ + Table_read_cursor tbl_cursor; + Group_bound_tracker bound_tracker; + bool end_of_partition; +public: + void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list) + { + tbl_cursor.init(info); + bound_tracker.init(thd, partition_list); + end_of_partition= false; + } + + /* + Informs the cursor that we need to move into the next partition. + The next partition is provided in two ways: + - in table->record[0].. + - rownum parameter has the row number. + */ + void on_next_partition(int rownum) + { + /* Remember the sort key value from the new partition */ + bound_tracker.check_if_next_group(); + end_of_partition= false; + } + + /* + Moves to a new row. The row is assumed to be within the current partition + */ + void move_to(int rownum) { tbl_cursor.move_to(rownum); } + + /* + This returns -1 when end of partition was reached. + */ + int get_next() + { + int res; + if (end_of_partition) + return -1; + if ((res= tbl_cursor.get_next())) + return res; + + if (bound_tracker.compare_with_cache()) + { + end_of_partition= true; + return -1; + } + return 0; + } + + bool restore_last_row() + { + return tbl_cursor.restore_last_row(); + } +}; + +///////////////////////////////////////////////////////////////////////////// + + +/* + Window frame bound cursor. Abstract interface. + + @detail + The cursor moves within the partition that the current row is in. + It may be ahead or behind the current row. + + The cursor also assumes that the current row moves forward through the + partition and will move to the next adjacent partition after this one. + + List of all cursor classes: + Frame_cursor + Frame_range_n_top + Frame_range_n_bottom + + Frame_range_current_row_top + Frame_range_current_row_bottom + + Frame_n_rows_preceding + Frame_n_rows_following + + Frame_rows_current_row_top = Frame_n_rows_preceding(0) + Frame_rows_current_row_bottom + + // These handle both RANGE and ROWS-type bounds + Frame_unbounded_preceding + Frame_unbounded_following + + // This is not used as a frame bound, it counts rows in the partition: + Frame_unbounded_following_set_count : public Frame_unbounded_following + + @todo + - if we want to allocate this on the MEM_ROOT we should make sure + it is not re-allocated for every subquery execution. +*/ + +class Frame_cursor : public Sql_alloc +{ +public: + virtual void init(THD *thd, READ_RECORD *info, + SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + {} + + /* + Current row has moved to the next partition and is positioned on the first + row there. Position the frame bound accordingly. + + @param first - TRUE means this is the first partition + @param item - Put or remove rows from there. + + @detail + - if first==false, the caller guarantees that tbl->record[0] points at the + first row in the new partition. + - if first==true, we are just starting in the first partition and no such + guarantee is provided. + + - The callee may move tbl->file and tbl->record[0] to point to some other + row. + */ + virtual void pre_next_partition(longlong rownum, Item_sum* item){}; + virtual void next_partition(longlong rownum, Item_sum* item)=0; + + /* + The current row has moved one row forward. + Move this frame bound accordingly, and update the value of aggregate + function as necessary. + */ + virtual void pre_next_row(Item_sum* item){}; + virtual void next_row(Item_sum* item)=0; + + virtual ~Frame_cursor(){} +}; + +////////////////////////////////////////////////////////////////////////////// +// RANGE-type frames +////////////////////////////////////////////////////////////////////////////// + +/* + Frame_range_n_top handles the top end of RANGE-type frame. + + That is, it handles: + RANGE BETWEEN n PRECEDING AND ... + RANGE BETWEEN n FOLLOWING AND ... + + Top of the frame doesn't need to check for partition end, since bottom will + reach it before. +*/ + +class Frame_range_n_top : public Frame_cursor +{ + Table_read_cursor cursor; + + Cached_item_item *range_expr; + + Item *n_val; + Item *item_add; + + const bool is_preceding; + /* + 1 when order_list uses ASC ordering + -1 when order_list uses DESC ordering + */ + int order_direction; +public: + Frame_range_n_top(bool is_preceding_arg, Item *n_val_arg) : + n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg) + {} + + void init(THD *thd, READ_RECORD *info, + SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + cursor.init(info); + + DBUG_ASSERT(order_list->elements == 1); + Item *src_expr= order_list->first->item[0]; + if (order_list->first->direction == ORDER::ORDER_ASC) + order_direction= 1; + else + order_direction= -1; + + range_expr= (Cached_item_item*) new_Cached_item(thd, src_expr, FALSE); + + bool use_minus= is_preceding; + if (order_direction == -1) + use_minus= !use_minus; + + if (use_minus) + item_add= new (thd->mem_root) Item_func_minus(thd, src_expr, n_val); + else + item_add= new (thd->mem_root) Item_func_plus(thd, src_expr, n_val); + + item_add->fix_fields(thd, &item_add); + } + + void pre_next_partition(longlong rownum, Item_sum* item) + { + // Save the value of FUNC(current_row) + range_expr->fetch_value_from(item_add); + } + + void next_partition(longlong rownum, Item_sum* item) + { + cursor.move_to(rownum); + walk_till_non_peer(item); + } + + void pre_next_row(Item_sum* item) + { + range_expr->fetch_value_from(item_add); + } + + void next_row(Item_sum* item) + { + /* + Ok, our cursor is at the first row R where + (prev_row + n) >= R + We need to check about the current row. + */ + if (cursor.restore_last_row()) + { + if (order_direction * range_expr->cmp_read_only() <= 0) + return; + item->remove(); + } + walk_till_non_peer(item); + } + +private: + void walk_till_non_peer(Item_sum* item) + { + while (!cursor.get_next()) + { + if (order_direction * range_expr->cmp_read_only() <= 0) + break; + item->remove(); + } + } +}; + + +/* + Frame_range_n_bottom handles bottom end of RANGE-type frame. + + That is, it handles frame bounds in form: + RANGE BETWEEN ... AND n PRECEDING + RANGE BETWEEN ... AND n FOLLOWING + + Bottom end moves first so it needs to check for partition end + (todo: unless it's PRECEDING and in that case it doesnt) + (todo: factor out common parts with Frame_range_n_top into + a common ancestor) +*/ + +class Frame_range_n_bottom: public Frame_cursor +{ + Partition_read_cursor cursor; + + Cached_item_item *range_expr; + + Item *n_val; + Item *item_add; + + const bool is_preceding; + + bool end_of_partition; + + /* + 1 when order_list uses ASC ordering + -1 when order_list uses DESC ordering + */ + int order_direction; +public: + Frame_range_n_bottom(bool is_preceding_arg, Item *n_val_arg) : + n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg) + {} + + void init(THD *thd, READ_RECORD *info, + SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + cursor.init(thd, info, partition_list); + + DBUG_ASSERT(order_list->elements == 1); + Item *src_expr= order_list->first->item[0]; + + if (order_list->first->direction == ORDER::ORDER_ASC) + order_direction= 1; + else + order_direction= -1; + + range_expr= (Cached_item_item*) new_Cached_item(thd, src_expr, FALSE); + + bool use_minus= is_preceding; + if (order_direction == -1) + use_minus= !use_minus; + + if (use_minus) + item_add= new (thd->mem_root) Item_func_minus(thd, src_expr, n_val); + else + item_add= new (thd->mem_root) Item_func_plus(thd, src_expr, n_val); + + item_add->fix_fields(thd, &item_add); + } + + void pre_next_partition(longlong rownum, Item_sum* item) + { + // Save the value of FUNC(current_row) + range_expr->fetch_value_from(item_add); + + cursor.on_next_partition(rownum); + end_of_partition= false; + } + + void next_partition(longlong rownum, Item_sum* item) + { + cursor.move_to(rownum); + walk_till_non_peer(item); + } + + void pre_next_row(Item_sum* item) + { + if (end_of_partition) + return; + range_expr->fetch_value_from(item_add); + } + + void next_row(Item_sum* item) + { + if (end_of_partition) + return; + /* + Ok, our cursor is at the first row R where + (prev_row + n) >= R + We need to check about the current row. + */ + if (cursor.restore_last_row()) + { + if (order_direction * range_expr->cmp_read_only() < 0) + return; + item->add(); + } + walk_till_non_peer(item); + } + +private: + void walk_till_non_peer(Item_sum* item) + { + int res; + while (!(res= cursor.get_next())) + { + if (order_direction * range_expr->cmp_read_only() < 0) + break; + item->add(); + } + if (res) + end_of_partition= true; + } +}; + + +/* + RANGE BETWEEN ... AND CURRENT ROW, bottom frame bound for CURRENT ROW + ... + | peer1 + | peer2 <----- current_row + | peer3 + +-peer4 <----- the cursor points here. peer4 itself is included. + nonpeer1 + nonpeer2 + + This bound moves in front of the current_row. It should be a the first row + that is still a peer of the current row. +*/ + +class Frame_range_current_row_bottom: public Frame_cursor +{ + Partition_read_cursor cursor; + + Group_bound_tracker peer_tracker; + + bool dont_move; +public: + void init(THD *thd, READ_RECORD *info, + SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + cursor.init(thd, info, partition_list); + peer_tracker.init(thd, order_list); + } + + void pre_next_partition(longlong rownum, Item_sum* item) + { + // Save the value of the current_row + peer_tracker.check_if_next_group(); + cursor.on_next_partition(rownum); + if (rownum != 0) + { + // Add the current row now because our cursor has already seen it + item->add(); + } + } + + void next_partition(longlong rownum, Item_sum* item) + { + walk_till_non_peer(item); + } + + void pre_next_row(Item_sum* item) + { + dont_move= !peer_tracker.check_if_next_group(); + if (!dont_move) + item->add(); + } + + void next_row(Item_sum* item) + { + // Check if our cursor is pointing at a peer of the current row. + // If not, move forward until that becomes true + if (dont_move) + { + /* + Our current is not a peer of the current row. + No need to move the bound. + */ + return; + } + walk_till_non_peer(item); + } + +private: + void walk_till_non_peer(Item_sum* item) + { + /* + Walk forward until we've met first row that's not a peer of the current + row + */ + while (!cursor.get_next()) + { + if (peer_tracker.compare_with_cache()) + break; + item->add(); + } + } +}; + + +/* + RANGE BETWEEN CURRENT ROW AND .... Top CURRENT ROW, RANGE-type frame bound + + nonpeer1 + nonpeer2 + +-peer1 <----- the cursor points here. peer1 itself is included. + | peer2 + | peer3 <----- current_row + | peer4 + ... + + It moves behind the current_row. It is located right after the first peer of + the current_row. +*/ + +class Frame_range_current_row_top : public Frame_cursor +{ + Group_bound_tracker bound_tracker; + + Table_read_cursor cursor; + Group_bound_tracker peer_tracker; + + bool move; +public: + void init(THD *thd, READ_RECORD *info, + SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + bound_tracker.init(thd, partition_list); + + cursor.init(info); + peer_tracker.init(thd, order_list); + } + + void pre_next_partition(longlong rownum, Item_sum* item) + { + // Fetch the value from the first row + peer_tracker.check_if_next_group(); + cursor.move_to(rownum+1); + } + + void next_partition(longlong rownum, Item_sum* item) {} + + void pre_next_row(Item_sum* item) + { + // Check if the new current_row is a peer of the row that our cursor is + // pointing to. + move= peer_tracker.check_if_next_group(); + } + + void next_row(Item_sum* item) + { + if (move) + { + /* + Our cursor is pointing at the first row that was a peer of the previous + current row. Or, it was the first row in the partition. + */ + if (cursor.restore_last_row()) + { + // todo: need the following check ? + if (!peer_tracker.compare_with_cache()) + return; + item->remove(); + } + + do + { + if (cursor.get_next()) + return; + if (!peer_tracker.compare_with_cache()) + return; + item->remove(); + } + while (1); + } + } +}; + + +///////////////////////////////////////////////////////////////////////////// +// UNBOUNDED frame bounds (shared between RANGE and ROWS) +///////////////////////////////////////////////////////////////////////////// + +/* + UNBOUNDED PRECEDING frame bound +*/ +class Frame_unbounded_preceding : public Frame_cursor +{ +public: + void next_partition(longlong rownum, Item_sum* item) + { + /* + UNBOUNDED PRECEDING frame end just stays on the first row. + We are top of the frame, so we don't need to update the sum function. + */ + } + + void next_row(Item_sum* item) + { + /* Do nothing, UNBOUNDED PRECEDING frame end doesn't move. */ + } +}; + + +/* + UNBOUNDED FOLLOWING frame bound +*/ + +class Frame_unbounded_following : public Frame_cursor +{ +protected: + Partition_read_cursor cursor; + +public: + void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + cursor.init(thd, info, partition_list); + } + + void pre_next_partition(longlong rownum, Item_sum* item) + { + cursor.on_next_partition(rownum); + } + + void next_partition(longlong rownum, Item_sum* item) + { + if (!rownum) + { + /* Read the first row */ + if (cursor.get_next()) + return; + } + item->add(); + + /* Walk to the end of the partition, updating the SUM function */ + while (!cursor.get_next()) + { + item->add(); + } + } + + void next_row(Item_sum* item) + { + /* Do nothing, UNBOUNDED FOLLOWING frame end doesn't move */ + } +}; + + +class Frame_unbounded_following_set_count : public Frame_unbounded_following +{ +public: + // pre_next_partition is inherited + + void next_partition(longlong rownum, Item_sum* item) + { + ulonglong num_rows_in_partition= 0; + if (!rownum) + { + /* Read the first row */ + if (cursor.get_next()) + return; + } + num_rows_in_partition++; + + /* Walk to the end of the partition, find how many rows there are. */ + while (!cursor.get_next()) + { + num_rows_in_partition++; + } + + Item_sum_window_with_row_count* item_with_row_count = + static_cast<Item_sum_window_with_row_count *>(item); + item_with_row_count->set_row_count(num_rows_in_partition); + } +}; + +///////////////////////////////////////////////////////////////////////////// +// ROWS-type frame bounds +///////////////////////////////////////////////////////////////////////////// +/* + ROWS $n PRECEDING frame bound + +*/ +class Frame_n_rows_preceding : public Frame_cursor +{ + /* Whether this is top of the frame or bottom */ + const bool is_top_bound; + const ha_rows n_rows; + + /* Number of rows that we need to skip before our cursor starts moving */ + ha_rows n_rows_to_skip; + + Table_read_cursor cursor; +public: + Frame_n_rows_preceding(bool is_top_bound_arg, ha_rows n_rows_arg) : + is_top_bound(is_top_bound_arg), n_rows(n_rows_arg) + {} + + void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + cursor.init(info); + } + + void next_partition(longlong rownum, Item_sum* item) + { + /* + Position our cursor to point at the first row in the new partition + (for rownum=0, it is already there, otherwise, it lags behind) + */ + if (rownum != 0) + cursor.move_to(rownum); + + /* + Suppose the bound is ROWS 2 PRECEDING, and current row is row#n: + ... + n-3 + n-2 --- bound row + n-1 + n --- current_row + ... + The bound should point at row #(n-2). Bounds are inclusive, so + - bottom bound should add row #(n-2) into the window function + - top bound should remove row (#n-3) from the window function. + */ + n_rows_to_skip= n_rows + (is_top_bound? 1:0) - 1; + + /* Bottom bound "ROWS 0 PRECEDING" is a special case: */ + if (n_rows_to_skip == ha_rows(-1)) + { + cursor.get_next(); + item->add(); + n_rows_to_skip= 0; + } + } + + void next_row(Item_sum* item) + { + if (n_rows_to_skip) + { + n_rows_to_skip--; + return; + } + + if (cursor.get_next()) + return; // this is not expected to happen. + + if (is_top_bound) // this is frame start endpoint + item->remove(); + else + item->add(); + } +}; + + +/* + ROWS ... CURRENT ROW, Bottom bound. + + This case is moved to separate class because here we don't need to maintain + our own cursor, or check for partition bound. +*/ + +class Frame_rows_current_row_bottom : public Frame_cursor +{ +public: + void pre_next_partition(longlong rownum, Item_sum* item) + { + item->add(); + } + void next_partition(longlong rownum, Item_sum* item) {} + void pre_next_row(Item_sum* item) + { + /* Temp table's current row is current_row. Add it to the window func */ + item->add(); + } + void next_row(Item_sum* item) {}; +}; + + +/* + ROWS-type CURRENT ROW, top bound. + + This serves for processing "ROWS BETWEEN CURRENT ROW AND ..." frames. + + n-1 + n --+ --- current_row, and top frame bound + n+1 | + ... | + + when the current_row moves to row #n, this frame bound should remove the + row #(n-1) from the window function. + + In other words, we need what "ROWS PRECEDING 0" provides. +*/ +class Frame_rows_current_row_top: public Frame_n_rows_preceding + +{ +public: + Frame_rows_current_row_top() : + Frame_n_rows_preceding(true /*top*/, 0 /* n_rows */) + {} +}; + + +/* + ROWS $n FOLLOWING frame bound. +*/ + +class Frame_n_rows_following : public Frame_cursor +{ + /* Whether this is top of the frame or bottom */ + const bool is_top_bound; + const ha_rows n_rows; + + Partition_read_cursor cursor; + bool at_partition_end; +public: + Frame_n_rows_following(bool is_top_bound_arg, ha_rows n_rows_arg) : + is_top_bound(is_top_bound_arg), n_rows(n_rows_arg) + { + DBUG_ASSERT(n_rows > 0); + } + + void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list, + SQL_I_List<ORDER> *order_list) + { + cursor.init(thd, info, partition_list); + at_partition_end= false; + } + + void pre_next_partition(longlong rownum, Item_sum* item) + { + at_partition_end= false; + + cursor.on_next_partition(rownum); + + if (rownum != 0) + { + // This is only needed for "FOLLOWING 1". It is one row behind + cursor.move_to(rownum+1); + + // Current row points at the first row in the partition + if (is_top_bound) // this is frame top endpoint + item->remove(); + else + item->add(); + } + } + + /* Move our cursor to be n_rows ahead. */ + void next_partition(longlong rownum, Item_sum* item) + { + longlong i_end= n_rows + ((rownum==0)?1:0)- is_top_bound; + for (longlong i= 0; i < i_end; i++) + { + if (next_row_intern(item)) + break; + } + } + + void next_row(Item_sum* item) + { + if (at_partition_end) + return; + next_row_intern(item); + } + +private: + bool next_row_intern(Item_sum *item) + { + if (!cursor.get_next()) + { + if (is_top_bound) // this is frame start endpoint + item->remove(); + else + item->add(); + } + else + at_partition_end= true; + return at_partition_end; + } +}; + + +/* + Get a Frame_cursor for a frame bound. This is a "factory function". +*/ +Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound) +{ + if (!frame) + { + /* + The docs say this about the lack of frame clause: + + Let WD be a window structure descriptor. + ... + If WD has no window framing clause, then + Case: + i) If the window ordering clause of WD is not present, then WF is the + window partition of R. + ii) Otherwise, WF consists of all rows of the partition of R that + precede R or are peers of R in the window ordering of the window + partition defined by the window ordering clause. + + For case #ii, the frame bounds essentially are "RANGE BETWEEN UNBOUNDED + PRECEDING AND CURRENT ROW". + For the case #i, without ordering clause all rows are considered peers, + so again the same frame bounds can be used. + */ + if (is_top_bound) + return new Frame_unbounded_preceding; + else + return new Frame_range_current_row_bottom; + } + + Window_frame_bound *bound= is_top_bound? frame->top_bound : + frame->bottom_bound; + + if (bound->precedence_type == Window_frame_bound::PRECEDING || + bound->precedence_type == Window_frame_bound::FOLLOWING) + { + bool is_preceding= (bound->precedence_type == + Window_frame_bound::PRECEDING); + + if (bound->offset == NULL) /* this is UNBOUNDED */ + { + /* The following serve both RANGE and ROWS: */ + if (is_preceding) + return new Frame_unbounded_preceding; + else + return new Frame_unbounded_following; + } + + if (frame->units == Window_frame::UNITS_ROWS) + { + longlong n_rows= bound->offset->val_int(); + /* These should be handled in the parser */ + DBUG_ASSERT(!bound->offset->null_value); + DBUG_ASSERT(n_rows >= 0); + if (is_preceding) + return new Frame_n_rows_preceding(is_top_bound, n_rows); + else + return new Frame_n_rows_following(is_top_bound, n_rows); + } + else + { + if (is_top_bound) + return new Frame_range_n_top(is_preceding, bound->offset); + else + return new Frame_range_n_bottom(is_preceding, bound->offset); + } + } + + if (bound->precedence_type == Window_frame_bound::CURRENT) + { + if (frame->units == Window_frame::UNITS_ROWS) + { + if (is_top_bound) + return new Frame_rows_current_row_top; + else + return new Frame_rows_current_row_bottom; + } + else + { + if (is_top_bound) + return new Frame_range_current_row_top; + else + return new Frame_range_current_row_bottom; + } + } + return NULL; +} + +void add_extra_frame_cursors(List<Frame_cursor> *cursors, + const Item_sum *window_func) +{ + switch (window_func->sum_func()) + { + case Item_sum::CUME_DIST_FUNC: + cursors->push_back(new Frame_unbounded_preceding); + cursors->push_back(new Frame_range_current_row_bottom); + break; + default: + cursors->push_back(new Frame_unbounded_preceding); + cursors->push_back(new Frame_rows_current_row_bottom); + } +} + +void get_window_func_required_cursors( + List<Frame_cursor> *result, const Item_window_func* item_win) +{ + if (item_win->requires_partition_size()) + result->push_back(new Frame_unbounded_following_set_count); + + /* + If it is not a regular window function that follows frame specifications, + specific cursors are required. + */ + if (item_win->is_frame_prohibited()) + { + add_extra_frame_cursors(result, item_win->window_func()); + return; + } + + /* A regular window function follows the frame specification. */ + result->push_back(get_frame_cursor(item_win->window_spec->window_frame, + false)); + result->push_back(get_frame_cursor(item_win->window_spec->window_frame, + true)); +} + +/* + Streamed window function computation with window frames. + + We make a single pass over the ordered temp.table, but we're using three + cursors: + - current row - the row that we're computing window func value for) + - start_bound - the start of the frame + - bottom_bound - the end of the frame + + All three cursors move together. + + @todo + Provided bounds have their 'cursors'... is it better to re-clone their + cursors or re-position them onto the current row? + + @detail + ROWS BETWEEN 3 PRECEDING -- frame start + AND 3 FOLLOWING -- frame end + + /------ frame end (aka BOTTOM) + Dataset start | + --------====*=======[*]========*========-------->> dataset end + | \ + | +-------- current row + | + \-------- frame start ("TOP") + + - frame_end moves forward and adds rows into the aggregate function. + - frame_start follows behind and removes rows from the aggregate function. + - current_row is the row where the value of aggregate function is stored. + + @TODO: Only the first cursor needs to check for run-out-of-partition + condition (Others can catch up by counting rows?) + +*/ + +bool compute_window_func_with_frames(Item_window_func *item_win, + TABLE *tbl, READ_RECORD *info) +{ + THD *thd= current_thd; + int err= 0; + + Item_sum *sum_func= item_win->window_func(); + /* This algorithm doesn't support DISTINCT aggregator */ + sum_func->set_aggregator(Aggregator::SIMPLE_AGGREGATOR); + + List<Frame_cursor> cursors; + get_window_func_required_cursors(&cursors, item_win); + + List_iterator_fast<Frame_cursor> it(cursors); + Frame_cursor *c; + while((c= it++)) + { + c->init(thd, info, item_win->window_spec->partition_list, + item_win->window_spec->order_list); + } + + bool is_error= false; + longlong rownum= 0; + uchar *rowid_buf= (uchar*) my_malloc(tbl->file->ref_length, MYF(0)); + + while (true) + { + /* Move the current_row */ + if ((err=info->read_record(info))) + { + break; /* End of file */ + } + bool partition_changed= item_win->check_if_partition_changed(); + + tbl->file->position(tbl->record[0]); + memcpy(rowid_buf, tbl->file->ref, tbl->file->ref_length); + + if (partition_changed || (rownum == 0)) + { + sum_func->clear(); + /* + pre_XXX functions assume that tbl->record[0] contains current_row, and + they may not change it. + */ + it.rewind(); + while ((c= it++)) + c->pre_next_partition(rownum, sum_func); + /* + We move bottom_bound first, because we want rows to be added into the + aggregate before top_bound attempts to remove them. + */ + it.rewind(); + while ((c= it++)) + c->next_partition(rownum, sum_func); + } + else + { + /* Again, both pre_XXX function can find current_row in tbl->record[0] */ + it.rewind(); + while ((c= it++)) + c->pre_next_row(sum_func); + + /* These make no assumptions about tbl->record[0] and may change it */ + it.rewind(); + while ((c= it++)) + c->next_row(sum_func); + } + rownum++; + + /* + Frame cursors may have made tbl->record[0] to point to some record other + than current_row. This applies to tbl->file's internal state, too. + Fix this by reading the current row again. + */ + tbl->file->ha_rnd_pos(tbl->record[0], rowid_buf); + store_record(tbl,record[1]); + item_win->save_in_field(item_win->result_field, true); + err= tbl->file->ha_update_row(tbl->record[1], tbl->record[0]); + if (err && err != HA_ERR_RECORD_IS_THE_SAME) + { + is_error= true; + break; + } + } + + my_free(rowid_buf); + cursors.delete_elements(); + return is_error? true: false; +} + + +/* Make a list that is a concation of two lists of ORDER elements */ + +static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2) +{ + if (!list1) + { + list1= list2; + list2= NULL; + } + + ORDER *res= NULL; // first element in the new list + ORDER *prev= NULL; // last element in the new list + ORDER *cur_list= list1; // this goes through list1, list2 + while (cur_list) + { + for (ORDER *cur= cur_list; cur; cur= cur->next) + { + ORDER *copy= (ORDER*)alloc_root(mem_root, sizeof(ORDER)); + memcpy(copy, cur, sizeof(ORDER)); + if (prev) + prev->next= copy; + prev= copy; + if (!res) + res= copy; + } + + cur_list= (cur_list == list1)? list2: NULL; + } + + if (prev) + prev->next= NULL; + + return res; +} + + +bool Window_func_runner::setup(THD *thd) +{ + win_func->setup_partition_border_check(thd); + + Item_sum::Sumfunctype type= win_func->window_func()->sum_func(); + switch (type) + { + case Item_sum::ROW_NUMBER_FUNC: + case Item_sum::RANK_FUNC: + case Item_sum::DENSE_RANK_FUNC: + { + /* + One-pass window function computation, walk through the rows and + assign values. + */ + compute_func= compute_window_func_values; + break; + } + case Item_sum::COUNT_FUNC: + case Item_sum::SUM_BIT_FUNC: + case Item_sum::SUM_FUNC: + case Item_sum::AVG_FUNC: + case Item_sum::PERCENT_RANK_FUNC: + case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: + { + /* + Frame-aware window function computation. It does one pass, but + uses three cursors -frame_start, current_row, and frame_end. + */ + compute_func= compute_window_func_with_frames; + break; + } + default: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "This aggregate as window function"); + return true; + } + + return false; +} + + +/* + Compute the value of window function for all rows. +*/ +bool Window_func_runner::exec(TABLE *tbl, SORT_INFO *filesort_result) +{ + THD *thd= current_thd; + win_func->set_phase_to_computation(); + + /* Go through the sorted array and compute the window function */ + READ_RECORD info; + + if (init_read_record(&info, thd, tbl, NULL/*select*/, filesort_result, + 0, 1, FALSE)) + return true; + + bool is_error= compute_func(win_func, tbl, &info); + + win_func->set_phase_to_retrieval(); + + end_read_record(&info); + + return is_error; +} + + +bool Window_funcs_sort::exec(JOIN *join) +{ + THD *thd= join->thd; + JOIN_TAB *join_tab= &join->join_tab[join->top_join_tab_count]; + + if (create_sort_index(thd, join, join_tab, filesort)) + return true; + + TABLE *tbl= join_tab->table; + SORT_INFO *filesort_result= join_tab->filesort_result; + + bool is_error= false; + List_iterator<Window_func_runner> it(runners); + Window_func_runner *runner; + + while ((runner= it++)) + { + if ((is_error= runner->exec(tbl, filesort_result))) + break; + } + + delete join_tab->filesort_result; + join_tab->filesort_result= NULL; + return is_error; +} + + +bool Window_funcs_sort::setup(THD *thd, SQL_SELECT *sel, + List_iterator<Item_window_func> &it) +{ + Item_window_func *win_func= it.peek(); + Item_window_func *prev_win_func; + + do + { + Window_func_runner *runner; + if (!(runner= new Window_func_runner(win_func)) || + runner->setup(thd)) + { + return true; + } + runners.push_back(runner); + it++; + prev_win_func= win_func; + } while ((win_func= it.peek()) && !(win_func->marker & SORTORDER_CHANGE_FLAG)); + + /* + The sort criteria must be taken from the last win_func in the group of + adjacent win_funcs that do not have SORTORDER_CHANGE_FLAG. + */ + Window_spec *spec = prev_win_func->window_spec; + + ORDER* sort_order= concat_order_lists(thd->mem_root, + spec->partition_list->first, + spec->order_list->first); + filesort= new (thd->mem_root) Filesort(sort_order, HA_POS_ERROR, true, NULL); + + /* Apply the same condition that the subsequent sort has. */ + filesort->select= sel; + + return false; +} + + +bool Window_funcs_computation::setup(THD *thd, + List<Item_window_func> *window_funcs, + JOIN_TAB *tab) +{ + order_window_funcs_by_window_specs(window_funcs); + + SQL_SELECT *sel= NULL; + if (tab->filesort && tab->filesort->select) + { + sel= tab->filesort->select; + DBUG_ASSERT(!sel->quick); + } + + Window_funcs_sort *srt; + List_iterator<Item_window_func> iter(*window_funcs); + while (iter.peek()) + { + if (!(srt= new Window_funcs_sort()) || + srt->setup(thd, sel, iter)) + { + return true; + } + win_func_sorts.push_back(srt, thd->mem_root); + } + return false; +} + + +bool Window_funcs_computation::exec(JOIN *join) +{ + List_iterator<Window_funcs_sort> it(win_func_sorts); + Window_funcs_sort *srt; + /* Execute each sort */ + while ((srt = it++)) + { + if (srt->exec(join)) + return true; + } + return false; +} + + +void Window_funcs_computation::cleanup() +{ + List_iterator<Window_funcs_sort> it(win_func_sorts); + Window_funcs_sort *srt; + while ((srt = it++)) + { + srt->cleanup(); + delete srt; + } +} + + +Explain_aggr_window_funcs* +Window_funcs_computation::save_explain_plan(MEM_ROOT *mem_root, + bool is_analyze) +{ + Explain_aggr_window_funcs *xpl= new Explain_aggr_window_funcs; + List_iterator<Window_funcs_sort> it(win_func_sorts); + Window_funcs_sort *srt; + while ((srt = it++)) + { + Explain_aggr_filesort *eaf= + new Explain_aggr_filesort(mem_root, is_analyze, srt->filesort); + xpl->sorts.push_back(eaf, mem_root); + } + return xpl; +} + +///////////////////////////////////////////////////////////////////////////// +// Unneeded comments (will be removed when we develop a replacement for +// the feature that was attempted here +///////////////////////////////////////////////////////////////////////////// + /* + TODO Get this code to set can_compute_window_function during preparation, + not during execution. + + The reason for this is the following: + Our single scan optimization for window functions without tmp table, + is valid, if and only if, we only need to perform one sorting operation, + via filesort. The cases where we need to perform one sorting operation only: + + * A select with only one window function. + * A select with multiple window functions, but they must have their + partition and order by clauses compatible. This means that one ordering + is acceptable for both window functions. + + For example: + partition by a, b, c; order by d, e results in sorting by a b c d e. + partition by a; order by d results in sorting by a d. + + This kind of sorting is compatible. The less specific partition does + not care for the order of b and c columns so it is valid if we sort + by those in case of equality over a. + + partition by a, b; order by d, e results in sorting by a b d e + partition by a; order by e results in sorting by a e + + This sorting is incompatible due to the order by clause. The partition by + clause is compatible, (partition by a) is a prefix for (partition by a, b) + However, order by e is not a prefix for order by d, e, thus it is not + compatible. + + The rule for having compatible sorting is thus: + Each partition order must contain the other window functions partitions + prefixes, or be a prefix itself. This must hold true for all partitions. + Analog for the order by clause. + */ +#if 0 + List<Item_window_func> window_functions; + SQL_I_List<ORDER> largest_partition; + SQL_I_List<ORDER> largest_order_by; + bool can_compute_window_live = !need_tmp; + // Construct the window_functions item list and check if they can be + // computed using only one sorting. + // + // TODO: Perhaps group functions into compatible sorting bins + // to minimize the number of sorting passes required to compute all of them. + while ((item= it++)) + { + if (item->type() == Item::WINDOW_FUNC_ITEM) + { + Item_window_func *item_win = (Item_window_func *) item; + window_functions.push_back(item_win); + if (!can_compute_window_live) + continue; // No point checking since we have to perform multiple sorts. + Window_spec *spec = item_win->window_spec; + // Having an empty partition list on one window function and a + // not empty list on a separate window function causes the sorting + // to be incompatible. + // + // Example: + // over (partition by a, order by x) && over (order by x). + // + // The first function requires an ordering by a first and then by x, + // while the seond function requires an ordering by x first. + // The same restriction is not required for the order by clause. + if (largest_partition.elements && !spec->partition_list.elements) + { + can_compute_window_live= FALSE; + continue; + } + can_compute_window_live= test_if_order_compatible(largest_partition, + spec->partition_list); + if (!can_compute_window_live) + continue; + + can_compute_window_live= test_if_order_compatible(largest_order_by, + spec->order_list); + if (!can_compute_window_live) + continue; + + if (largest_partition.elements < spec->partition_list.elements) + largest_partition = spec->partition_list; + if (largest_order_by.elements < spec->order_list.elements) + largest_order_by = spec->order_list; + } + } + if (can_compute_window_live && window_functions.elements && table_count == 1) + { + ha_rows examined_rows = 0; + ha_rows found_rows = 0; + ha_rows filesort_retval; + SORT_FIELD *s_order= (SORT_FIELD *) my_malloc(sizeof(SORT_FIELD) * + (largest_partition.elements + largest_order_by.elements) + 1, + MYF(MY_WME | MY_ZEROFILL | MY_THREAD_SPECIFIC)); + + size_t pos= 0; + for (ORDER* curr = largest_partition.first; curr; curr=curr->next, pos++) + s_order[pos].item = *curr->item; + + for (ORDER* curr = largest_order_by.first; curr; curr=curr->next, pos++) + s_order[pos].item = *curr->item; + + table[0]->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE), + MYF(MY_WME | MY_ZEROFILL| + MY_THREAD_SPECIFIC)); + + + filesort_retval= filesort(thd, table[0], s_order, + (largest_partition.elements + largest_order_by.elements), + this->select, HA_POS_ERROR, FALSE, + &examined_rows, &found_rows, + this->explain->ops_tracker.report_sorting(thd)); + table[0]->sort.found_records= filesort_retval; + + join_tab->read_first_record = join_init_read_record; + join_tab->records= found_rows; + + my_free(s_order); + } + else +#endif + + diff --git a/sql/sql_window.h b/sql/sql_window.h new file mode 100644 index 00000000000..54e39d827fe --- /dev/null +++ b/sql/sql_window.h @@ -0,0 +1,230 @@ + +#ifndef SQL_WINDOW_INCLUDED +#define SQL_WINDOW_INCLUDED + +#include "my_global.h" +#include "item.h" +#include "filesort.h" +#include "records.h" + +class Item_window_func; + +/* + Window functions module. + + Each instance of window function has its own element in SELECT_LEX::window_specs. +*/ + + +class Window_frame_bound : public Sql_alloc +{ + +public: + + enum Bound_precedence_type + { + PRECEDING, + CURRENT, // Used for CURRENT ROW window frame bounds + FOLLOWING + }; + + Bound_precedence_type precedence_type; + + + /* + For UNBOUNDED PRECEDING / UNBOUNDED FOLLOWING window frame bounds + precedence type is seto to PRECEDING / FOLLOWING and + offset is set to NULL. + The offset is not meaningful with precedence type CURRENT + */ + Item *offset; + + Window_frame_bound(Bound_precedence_type prec_type, + Item *offset_val) + : precedence_type(prec_type), offset(offset_val) {} + + bool is_unbounded() { return offset == NULL; } + +}; + + +class Window_frame : public Sql_alloc +{ + +public: + + enum Frame_units + { + UNITS_ROWS, + UNITS_RANGE + }; + + enum Frame_exclusion + { + EXCL_NONE, + EXCL_CURRENT_ROW, + EXCL_GROUP, + EXCL_TIES + }; + + Frame_units units; + + Window_frame_bound *top_bound; + + Window_frame_bound *bottom_bound; + + Frame_exclusion exclusion; + + Window_frame(Frame_units win_frame_units, + Window_frame_bound *win_frame_top_bound, + Window_frame_bound *win_frame_bottom_bound, + Frame_exclusion win_frame_exclusion) + : units(win_frame_units), top_bound(win_frame_top_bound), + bottom_bound(win_frame_bottom_bound), exclusion(win_frame_exclusion) {} + + bool check_frame_bounds(); + +}; + +class Window_spec : public Sql_alloc +{ + bool window_names_are_checked; + public: + virtual ~Window_spec() {} + + LEX_STRING *window_ref; + + SQL_I_List<ORDER> *partition_list; + + SQL_I_List<ORDER> *order_list; + + Window_frame *window_frame; + + Window_spec *referenced_win_spec; + + Window_spec(LEX_STRING *win_ref, + SQL_I_List<ORDER> *part_list, + SQL_I_List<ORDER> *ord_list, + Window_frame *win_frame) + : window_names_are_checked(false), window_ref(win_ref), + partition_list(part_list), order_list(ord_list), + window_frame(win_frame), referenced_win_spec(NULL) {} + + virtual char *name() { return NULL; } + + bool check_window_names(List_iterator_fast<Window_spec> &it); + + char *window_reference() { return window_ref ? window_ref->str : NULL; } + + void join_partition_and_order_lists() + { + *(partition_list->next)= order_list->first; + } + + void disjoin_partition_and_order_lists() + { + *(partition_list->next)= NULL; + } +}; + +class Window_def : public Window_spec +{ + public: + + LEX_STRING *window_name; + + Window_def(LEX_STRING *win_name, + LEX_STRING *win_ref, + SQL_I_List<ORDER> *part_list, + SQL_I_List<ORDER> *ord_list, + Window_frame *win_frame) + : Window_spec(win_ref, part_list, ord_list, win_frame), + window_name(win_name) {} + + char *name() { return window_name->str; } + +}; + +int setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, + List<Item> &fields, List<Item> &all_fields, + List<Window_spec> &win_specs, List<Item_window_func> &win_funcs); + + +////////////////////////////////////////////////////////////////////////////// +// Classes that make window functions computation a part of SELECT's query plan +////////////////////////////////////////////////////////////////////////////// + +typedef bool (*window_compute_func_t)(Item_window_func *item_win, + TABLE *tbl, READ_RECORD *info); + +/* + This handles computation of one window function. + + Currently, we make a spearate filesort() call for each window function. +*/ + +class Window_func_runner : public Sql_alloc +{ + Item_window_func *win_func; + + /* The function to use for computation*/ + window_compute_func_t compute_func; + +public: + Window_func_runner(Item_window_func *win_func_arg) : + win_func(win_func_arg) + {} + + // Set things up. Create filesort structures, etc + bool setup(THD *thd); + + // This sorts and runs the window function. + bool exec(TABLE *tbl, SORT_INFO *filesort_result); +}; + + +/* + Represents a group of window functions that require the same sorting of + rows and so share the filesort() call. + +*/ + +class Window_funcs_sort : public Sql_alloc +{ + List<Window_func_runner> runners; + + /* Window functions can be computed over this sorting */ + Filesort *filesort; +public: + bool setup(THD *thd, SQL_SELECT *sel, List_iterator<Item_window_func> &it); + bool exec(JOIN *join); + void cleanup() { delete filesort; } + + friend class Window_funcs_computation; +}; + + +struct st_join_table; +class Explain_aggr_window_funcs; +/* + This is a "window function computation phase": a single object of this class + takes care of computing all window functions in a SELECT. + + - JOIN optimizer is exected to call setup() during query optimization. + - JOIN::exec() should call exec() once it has collected join output in a + temporary table. +*/ + +class Window_funcs_computation : public Sql_alloc +{ + List<Window_funcs_sort> win_func_sorts; +public: + bool setup(THD *thd, List<Item_window_func> *window_funcs, st_join_table *tab); + bool exec(JOIN *join); + + Explain_aggr_window_funcs *save_explain_plan(MEM_ROOT *mem_root, bool is_analyze); + void cleanup(); +}; + + +#endif /* SQL_WINDOW_INCLUDED */ diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 1870b3f719f..04c1ba7e99a 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -55,6 +55,8 @@ #include "sql_signal.h" #include "sql_get_diagnostics.h" // Sql_cmd_get_diagnostics #include "sql_cte.h" +#include "sql_window.h" +#include "item_windowfunc.h" #include "event_parse_data.h" #include "create_options.h" #include <myisam.h> @@ -994,6 +996,8 @@ bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin) handlerton *db_type; st_select_lex *select_lex; struct p_elem_val *p_elem_value; + class Window_frame *window_frame; + class Window_frame_bound *window_frame_bound; udf_func *udf; /* enums */ @@ -1019,6 +1023,9 @@ bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin) enum sp_variable::enum_mode spvar_mode; enum thr_lock_type lock_type; enum enum_mysql_timestamp_type date_time_type; + enum Window_frame_bound::Bound_precedence_type bound_precedence_type; + enum Window_frame::Frame_units frame_units; + enum Window_frame::Frame_exclusion frame_exclusion; DDL_options_st object_ddl_options; } @@ -1030,10 +1037,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %parse-param { THD *thd } %lex-param { THD *thd } /* - Currently there are 121 shift/reduce conflicts. + Currently there are 123 shift/reduce conflicts. We should not introduce new conflicts any more. */ -%expect 121 +%expect 123 /* Comments for TOKENS. @@ -1156,6 +1163,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token CREATE /* SQL-2003-R */ %token CROSS /* SQL-2003-R */ %token CUBE_SYM /* SQL-2003-R */ +%token CUME_DIST_SYM %token CURDATE /* MYSQL-FUNC */ %token CURRENT_SYM /* SQL-2003-R */ %token CURRENT_USER /* SQL-2003-R */ @@ -1186,6 +1194,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token DELAYED_SYM %token DELAY_KEY_WRITE_SYM %token DELETE_SYM /* SQL-2003-R */ +%token DENSE_RANK_SYM %token DESC /* SQL-2003-N */ %token DESCRIBE /* SQL-2003-R */ %token DES_KEY_FILE @@ -1227,6 +1236,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token EVERY_SYM /* SQL-2003-N */ %token EXCHANGE_SYM %token EXAMINED_SYM +%token EXCLUDE_SYM %token EXECUTE_SYM /* SQL-2003-R */ %token EXISTS /* SQL-2003-R */ %token EXIT_SYM @@ -1245,6 +1255,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token FLOAT_NUM %token FLOAT_SYM /* SQL-2003-R */ %token FLUSH_SYM +%token FOLLOWING_SYM %token FORCE_SYM %token FOREIGN /* SQL-2003-R */ %token FOR_SYM /* SQL-2003-R */ @@ -1421,6 +1432,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NO_SYM /* SQL-2003-R */ %token NO_WAIT_SYM %token NO_WRITE_TO_BINLOG +%token NTILE_SYM %token NULL_SYM /* SQL-2003-R */ %token NUM %token NUMBER_SYM /* SQL-2003-N */ @@ -1441,9 +1453,11 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ORDER_SYM /* SQL-2003-R */ %token OR_OR_SYM /* OPERATOR */ %token OR_SYM /* SQL-2003-R */ +%token OTHERS_SYM %token OUTER %token OUTFILE %token OUT_SYM /* SQL-2003-R */ +%token OVER_SYM %token OWNER_SYM %token PACK_KEYS_SYM %token PAGE_SYM @@ -1456,6 +1470,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PARTITIONS_SYM %token PARTITIONING_SYM %token PASSWORD_SYM +%token PERCENT_RANK_SYM %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -1464,6 +1479,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token POLYGON %token PORT_SYM %token POSITION_SYM /* SQL-2003-N */ +%token PRECEDING_SYM %token PRECISION /* SQL-2003-R */ %token PREPARE_SYM /* SQL-2003-R */ %token PRESERVE_SYM @@ -1481,6 +1497,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token QUERY_SYM %token QUICK %token RANGE_SYM /* SQL-2003-R */ +%token RANK_SYM %token READS_SYM /* SQL-2003-R */ %token READ_ONLY_SYM %token READ_SYM /* SQL-2003-N */ @@ -1531,6 +1548,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ROW_FORMAT_SYM %token ROW_SYM /* SQL-2003-R */ %token ROW_COUNT_SYM /* SQL-2003-N */ +%token ROW_NUMBER_SYM %token RTREE_SYM %token SAVEPOINT_SYM /* SQL-2003-R */ %token SCHEDULE_SYM @@ -1621,6 +1639,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TEXT_SYM %token THAN_SYM %token THEN_SYM /* SQL-2003-R */ +%token TIES_SYM %token TIMESTAMP /* SQL-2003-R */ %token TIMESTAMP_ADD %token TIMESTAMP_DIFF @@ -1641,6 +1660,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TYPE_SYM /* SQL-2003-N */ %token UDF_RETURNS_SYM %token ULONGLONG_NUM +%token UNBOUNDED_SYM %token UNCOMMITTED_SYM /* SQL-2003-N */ %token UNDEFINED_SYM %token UNDERSCORE_CHARSET @@ -1682,6 +1702,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WEIGHT_STRING_SYM %token WHEN_SYM /* SQL-2003-R */ %token WHERE /* SQL-2003-R */ +%token WINDOW_SYM %token WHILE_SYM %token WITH /* SQL-2003-R */ %token WITH_CUBE_SYM /* INTERNAL */ @@ -1821,6 +1842,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); simple_ident_nospvar simple_ident_q field_or_var limit_option part_func_expr + window_func_expr + window_func + simple_window_func function_call_keyword function_call_nonkeyword function_call_generic @@ -2010,6 +2034,15 @@ END_OF_INPUT %type <cond_info_item_name> condition_information_item_name; %type <cond_info_list> condition_information; +%type <NONE> opt_window_clause window_def_list window_def window_spec +%type <lex_str_ptr> window_name +%type <NONE> opt_window_ref opt_window_frame_clause +%type <frame_units> window_frame_units; +%type <NONE> window_frame_extent; +%type <frame_exclusion> opt_window_frame_exclusion; +%type <window_frame_bound> window_frame_start window_frame_bound; + + %type <NONE> '-' '+' '*' '/' '%' '(' ')' ',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_SYM BETWEEN_SYM CASE_SYM @@ -4952,7 +4985,7 @@ opt_create_partitioning: /* This part of the parser is about handling of the partition information. - It's first version was written by Mikael Ronström with lots of answers to + It's first version was written by Mikael Ronstrm with lots of answers to questions provided by Antony Curtis. The partition grammar can be called from three places. @@ -5662,7 +5695,19 @@ create_select: { Select->parsing_place= NO_MATTER; } - table_expression + /* + TODO: + The following sequence repeats a few times: + opt_table_expression + opt_order_clause + opt_limit_clause + opt_select_lock_type + Perhaps they can be grouped into a dedicated rule. + */ + opt_table_expression + opt_order_clause + opt_limit_clause + opt_select_lock_type { /* The following work only with the local list, the global list @@ -7415,7 +7460,7 @@ alter_commands: | remove_partitioning | partitioning /* - This part was added for release 5.1 by Mikael Ronström. + This part was added for release 5.1 by Mikael Ronstrm. From here we insert a number of commands to manage the partitions of a partitioned table such as adding partitions, dropping partitions, reorganising partitions in various manners. In future releases the list @@ -8502,7 +8547,10 @@ select_paren_derived: Lex->current_select->set_braces(true); } SELECT_SYM select_part2_derived - table_expression + opt_table_expression + opt_order_clause + opt_limit_clause + opt_select_lock_type { if (setup_select_in_parentheses(Lex)) MYSQL_YYABORT; @@ -8533,23 +8581,20 @@ select_part2: | select_options_and_item_list into opt_select_lock_type | select_options_and_item_list opt_into - from_clause - opt_where_clause - opt_group_clause - opt_having_clause + table_expression opt_order_clause opt_limit_clause opt_procedure_clause opt_into opt_select_lock_type { - if ($2 && $10) + if ($2 && $7) { /* double "INTO" clause */ my_error(ER_WRONG_USAGE, MYF(0), "INTO", "INTO"); MYSQL_YYABORT; } - if ($9 && ($2 || $10)) + if ($6 && ($2 || $7)) { /* "INTO" with "PROCEDURE ANALYSE" */ my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "INTO"); @@ -8572,24 +8617,25 @@ select_options_and_item_list: } ; + +/** + <table expression>, as in the SQL standard. +*/ table_expression: - opt_from_clause + from_clause opt_where_clause opt_group_clause opt_having_clause - opt_order_clause - opt_limit_clause - opt_procedure_clause - opt_select_lock_type + opt_window_clause ; -from_clause: - FROM table_reference_list +opt_table_expression: + /* Empty */ + | table_expression ; -opt_from_clause: - /* empty */ - | from_clause +from_clause: + FROM table_reference_list ; table_reference_list: @@ -9301,6 +9347,7 @@ simple_expr: | param_marker { $$= $1; } | variable | sum_expr + | window_func_expr | simple_expr OR_OR_SYM simple_expr { $$= new (thd->mem_root) Item_func_concat(thd, $1, $3); @@ -10436,6 +10483,92 @@ sum_expr: } ; +window_func_expr: + window_func OVER_SYM window_name + { + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, $3); + if ($$ == NULL) + MYSQL_YYABORT; + if (Select->add_window_func((Item_window_func *) $$)) + MYSQL_YYABORT; + } + | + window_func OVER_SYM window_spec + { + LEX *lex= Lex; + if (Select->add_window_spec(thd, lex->win_ref, + Select->group_list, + Select->order_list, + lex->win_frame)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, + thd->lex->win_spec); + if ($$ == NULL) + MYSQL_YYABORT; + if (Select->add_window_func((Item_window_func *) $$)) + MYSQL_YYABORT; + } + ; + +window_func: + simple_window_func + | + sum_expr + ; + +simple_window_func: + ROW_NUMBER_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_row_number(thd); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + RANK_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_rank(thd); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + DENSE_RANK_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_dense_rank(thd); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + PERCENT_RANK_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_percent_rank(thd); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + CUME_DIST_SYM '(' ')' + { + $$= new (thd->mem_root) Item_sum_cume_dist(thd); + if ($$ == NULL) + MYSQL_YYABORT; + } + | + NTILE_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_ntile(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + +window_name: + ident + { + $$= (LEX_STRING *) thd->memdup(&$1, sizeof(LEX_STRING)); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + variable: '@' { @@ -11086,7 +11219,10 @@ select_derived2: { Select->parsing_place= NO_MATTER; } - table_expression + opt_table_expression + opt_order_clause + opt_limit_clause + opt_select_lock_type ; get_select_lex: @@ -11371,6 +11507,155 @@ olap_opt: ; /* + optional window clause in select +*/ + +opt_window_clause: + /* empty */ + {} + | WINDOW_SYM + window_def_list + {} + ; + +window_def_list: + window_def_list ',' window_def + | window_def + ; + +window_def: + window_name AS window_spec + { + LEX *lex= Lex; + if (Select->add_window_def(thd, $1, lex->win_ref, + Select->group_list, + Select->order_list, + lex->win_frame )) + MYSQL_YYABORT; + } + ; + +window_spec: + '(' + { Select->prepare_add_window_spec(thd); } + opt_window_ref opt_window_partition_clause + opt_window_order_clause opt_window_frame_clause + ')' + ; + +opt_window_ref: + /* empty */ {} + | ident + { + thd->lex->win_ref= (LEX_STRING *) thd->memdup(&$1, sizeof(LEX_STRING)); + if (thd->lex->win_ref == NULL) + MYSQL_YYABORT; + } + +opt_window_partition_clause: + /* empty */ { } + | PARTITION_SYM BY group_list + ; + +opt_window_order_clause: + /* empty */ { } + | ORDER_SYM BY order_list + ; + +opt_window_frame_clause: + /* empty */ {} + | window_frame_units window_frame_extent opt_window_frame_exclusion + { + LEX *lex= Lex; + lex->win_frame= + new (thd->mem_root) Window_frame($1, + lex->frame_top_bound, + lex->frame_bottom_bound, + $3); + if (lex->win_frame == NULL) + MYSQL_YYABORT; + } + ; + +window_frame_units: + ROWS_SYM { $$= Window_frame::UNITS_ROWS; } + | RANGE_SYM { $$= Window_frame::UNITS_RANGE; } + ; + +window_frame_extent: + window_frame_start + { + LEX *lex= Lex; + lex->frame_top_bound= $1; + lex->frame_bottom_bound= + new (thd->mem_root) + Window_frame_bound(Window_frame_bound::CURRENT, NULL); + if (lex->frame_bottom_bound == NULL) + MYSQL_YYABORT; + } + | BETWEEN_SYM window_frame_bound AND_SYM window_frame_bound + { + LEX *lex= Lex; + lex->frame_top_bound= $2; + lex->frame_bottom_bound= $4; + } + ; + +window_frame_start: + UNBOUNDED_SYM PRECEDING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::PRECEDING, NULL); + if ($$ == NULL) + MYSQL_YYABORT; + } + | CURRENT_SYM ROW_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::CURRENT, NULL); + if ($$ == NULL) + MYSQL_YYABORT; + } + | literal PRECEDING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::PRECEDING, $1); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + +window_frame_bound: + window_frame_start { $$= $1; } + | UNBOUNDED_SYM FOLLOWING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::FOLLOWING, NULL); + if ($$ == NULL) + MYSQL_YYABORT; + } + | literal FOLLOWING_SYM + { + $$= new (thd->mem_root) + Window_frame_bound(Window_frame_bound::FOLLOWING, $1); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + +opt_window_frame_exclusion: + /* empty */ { $$= Window_frame::EXCL_NONE; } + | EXCLUDE_SYM CURRENT_SYM ROW_SYM + { $$= Window_frame::EXCL_CURRENT_ROW; } + | EXCLUDE_SYM GROUP_SYM + { $$= Window_frame::EXCL_GROUP; } + | EXCLUDE_SYM TIES_SYM + { $$= Window_frame::EXCL_TIES; } + | EXCLUDE_SYM NO_SYM OTHERS_SYM + { $$= Window_frame::EXCL_NONE; } + ; + +/* Order by statement in ALTER TABLE */ @@ -11675,9 +11960,11 @@ opt_procedure_clause: if (&lex->select_lex != lex->current_select) { + // SELECT * FROM t1 UNION SELECT * FROM t2 PROCEDURE ANALYSE(); my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "subquery"); MYSQL_YYABORT; } + lex->proc_list.elements=0; lex->proc_list.first=0; lex->proc_list.next= &lex->proc_list.first; @@ -16088,7 +16375,10 @@ union_option: query_specification: SELECT_SYM select_init2_derived - table_expression + opt_table_expression + opt_order_clause + opt_limit_clause + opt_select_lock_type { $$= Lex->current_select->master_unit()->first_select(); } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 9d871703bfe..4bf202813f3 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -4629,8 +4629,7 @@ static bool check_locale(sys_var *self, THD *thd, set_var *var) mysql_mutex_lock(&LOCK_error_messages); res= (!locale->errmsgs->errmsgs && read_texts(ERRMSG_FILE, locale->errmsgs->language, - &locale->errmsgs->errmsgs, - ER_ERROR_LAST - ER_ERROR_FIRST + 1)); + &locale->errmsgs->errmsgs)); mysql_mutex_unlock(&LOCK_error_messages); if (res) { diff --git a/sql/table.cc b/sql/table.cc index 07e2876f5ba..dc1730b5b6f 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5748,7 +5748,7 @@ void TABLE::mark_columns_used_by_index(uint index) MY_BITMAP *bitmap= &tmp_set; DBUG_ENTER("TABLE::mark_columns_used_by_index"); - enable_keyread(); + set_keyread(true); bitmap_clear_all(bitmap); mark_columns_used_by_index_no_reset(index, bitmap); column_bitmaps_set(bitmap, bitmap); @@ -5769,7 +5769,7 @@ void TABLE::add_read_columns_used_by_index(uint index) MY_BITMAP *bitmap= &tmp_set; DBUG_ENTER("TABLE::add_read_columns_used_by_index"); - enable_keyread(); + set_keyread(true); bitmap_copy(bitmap, read_set); mark_columns_used_by_index_no_reset(index, bitmap); column_bitmaps_set(bitmap, write_set); @@ -5792,7 +5792,7 @@ void TABLE::restore_column_maps_after_mark_index() { DBUG_ENTER("TABLE::restore_column_maps_after_mark_index"); - disable_keyread(); + set_keyread(false); default_column_bitmaps(); file->column_bitmaps_signal(); DBUG_VOID_RETURN; diff --git a/sql/table.h b/sql/table.h index 1c461d96097..a105df31e93 100644 --- a/sql/table.h +++ b/sql/table.h @@ -213,8 +213,13 @@ typedef struct st_order { Field *fast_field_copier_setup; int counter; /* position in SELECT list, correct only if counter_used is true*/ - bool asc; /* true if ascending */ - bool free_me; /* true if item isn't shared */ + enum enum_order { + ORDER_NOT_RELEVANT, + ORDER_ASC, + ORDER_DESC + }; + + enum_order direction; /* Requested direction of ordering */ bool in_field_list; /* true if in select field list */ bool counter_used; /* parameter was counter of columns */ Field *field; /* If tmp-table group */ @@ -1239,7 +1244,9 @@ public: bool alias_name_used; /* true if table_name is alias */ bool get_fields_in_item_tree; /* Signal to fix_field */ bool m_needs_reopen; +private: bool created; /* For tmp tables. TRUE <=> tmp table was actually created.*/ +public: #ifdef HAVE_REPLICATION /* used in RBR Triggers */ bool master_had_triggers; @@ -1351,30 +1358,46 @@ public: map= map_arg; tablenr= tablenr_arg; } - inline void enable_keyread() + + void set_keyread(bool flag) { - DBUG_ENTER("enable_keyread"); - DBUG_ASSERT(key_read == 0); - key_read= 1; - file->extra(HA_EXTRA_KEYREAD); - DBUG_VOID_RETURN; + DBUG_ASSERT(file); + if (flag && !key_read) + { + key_read= 1; + if (is_created()) + file->extra(HA_EXTRA_KEYREAD); + } + else if (!flag && key_read) + { + key_read= 0; + if (is_created()) + file->extra(HA_EXTRA_NO_KEYREAD); + } } + + /// Return true if table is instantiated, and false otherwise. + bool is_created() const { return created; } + + /** + Set the table as "created", and enable flags in storage engine + that could not be enabled without an instantiated table. + */ + void set_created() + { + if (created) + return; + if (key_read) + file->extra(HA_EXTRA_KEYREAD); + created= true; + } + /* Returns TRUE if the table is filled at execution phase (and so, the optimizer must not do anything that depends on the contents of the table, like range analysis or constant table detection) */ bool is_filled_at_execution(); - inline void disable_keyread() - { - DBUG_ENTER("disable_keyread"); - if (key_read) - { - key_read= 0; - file->extra(HA_EXTRA_NO_KEYREAD); - } - DBUG_VOID_RETURN; - } bool update_const_key_parts(COND *conds); @@ -1941,6 +1964,7 @@ struct TABLE_LIST bool updating; /* for replicate-do/ignore table */ bool force_index; /* prefer index over table scan */ bool ignore_leaves; /* preload only non-leaf nodes */ + bool crashed; /* Table was found crashed */ table_map dep_tables; /* tables the table depends on */ table_map on_expr_dep_tables; /* tables on expression depends on */ struct st_nested_join *nested_join; /* if the element is a nested join */ @@ -2049,6 +2073,11 @@ struct TABLE_LIST /* TRUE <=> this table is a const one and was optimized away. */ bool optimized_away; + /** + TRUE <=> already materialized. Valid only for materialized derived + tables/views. + */ + bool materialized; /* I_S: Flags to open_table (e.g. OPEN_TABLE_ONLY or OPEN_VIEW_ONLY) */ uint i_s_requested_object; diff --git a/sql/unireg.h b/sql/unireg.h index 10751b6ec93..251597c1884 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -43,15 +43,16 @@ #define PLUGINDIR "lib/plugin" #endif -#define CURRENT_THD_ERRMSGS current_thd->variables.errmsgs -#define DEFAULT_ERRMSGS my_default_lc_messages->errmsgs->errmsgs - -#define ER(X) CURRENT_THD_ERRMSGS[(X) - ER_ERROR_FIRST] -#define ER_DEFAULT(X) DEFAULT_ERRMSGS[(X) - ER_ERROR_FIRST] -#define ER_SAFE(X) (((X) >= ER_ERROR_FIRST && (X) <= ER_ERROR_LAST) ? ER(X) : "Invalid error code") -#define ER_SAFE_THD(T,X) (((X) >= ER_ERROR_FIRST && (X) <= ER_ERROR_LAST) ? ER_THD(T,X) : "Invalid error code") -#define ER_THD(thd,X) ((thd)->variables.errmsgs[(X) - ER_ERROR_FIRST]) -#define ER_THD_OR_DEFAULT(thd,X) ((thd) ? ER_THD(thd, X) : ER_DEFAULT(X)) +#define MAX_ERROR_RANGES 4 /* 1000-2000, 2000-3000, 3000-4000, 4000-5000 */ +#define ERRORS_PER_RANGE 1000 + +#define DEFAULT_ERRMSGS my_default_lc_messages->errmsgs->errmsgs +#define CURRENT_THD_ERRMSGS (current_thd)->variables.errmsgs + +#define ER_DEFAULT(X) DEFAULT_ERRMSGS[((X)-ER_ERROR_FIRST) / ERRORS_PER_RANGE][(X)% ERRORS_PER_RANGE] +#define ER_THD(thd,X) ((thd)->variables.errmsgs[((X)-ER_ERROR_FIRST) / ERRORS_PER_RANGE][(X) % ERRORS_PER_RANGE]) +#define ER(X) ER_THD(current_thd, (X)) +#define ER_THD_OR_DEFAULT(thd,X) ((thd) ? ER_THD(thd, (X)) : ER_DEFAULT(X)) #define ME_INFO (ME_HOLDTANG+ME_OLDWIN+ME_NOREFRESH) #define ME_ERROR (ME_BELL+ME_OLDWIN+ME_NOREFRESH) diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index f84ebe4dbb9..3cdf95e2bf2 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -202,7 +202,7 @@ wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; long wsrep_protocol_version = 3; -wsp::Config_state wsrep_config_state; +wsp::Config_state *wsrep_config_state; // Boolean denoting if server is in initial startup phase. This is needed // to make sure that main thread waiting in wsrep_sst_wait() is signaled @@ -281,7 +281,7 @@ wsrep_view_handler_cb (void* app_ctx, *sst_req = NULL; *sst_req_len = 0; - wsrep_member_status_t memb_status= wsrep_config_state.get_status(); + wsrep_member_status_t memb_status= wsrep_config_state->get_status(); if (memcmp(&cluster_uuid, &view->state_id.uuid, sizeof(wsrep_uuid_t))) { @@ -442,7 +442,7 @@ wsrep_view_handler_cb (void* app_ctx, out: if (view->status == WSREP_VIEW_PRIMARY) wsrep_startup= FALSE; - wsrep_config_state.set(memb_status, view); + wsrep_config_state->set(memb_status, view); return WSREP_CB_SUCCESS; } @@ -484,7 +484,7 @@ static void wsrep_synced_cb(void* app_ctx) signal_main= true; } - wsrep_config_state.set(WSREP_MEMBER_SYNCED); + wsrep_config_state->set(WSREP_MEMBER_SYNCED); mysql_mutex_unlock (&LOCK_wsrep_ready); if (signal_main) @@ -753,6 +753,8 @@ done: /* Initialize wsrep thread LOCKs and CONDs */ void wsrep_thr_init() { + DBUG_ENTER("wsrep_thr_init"); + wsrep_config_state = new wsp::Config_state; #ifdef HAVE_PSI_INTERFACE mysql_mutex_register("sql", wsrep_mutexes, array_elements(wsrep_mutexes)); mysql_cond_register("sql", wsrep_conds, array_elements(wsrep_conds)); @@ -772,6 +774,7 @@ void wsrep_thr_init() mysql_mutex_init(key_LOCK_wsrep_slave_threads, &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_wsrep_desync, &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_wsrep_config_state, &LOCK_wsrep_config_state, MY_MUTEX_INIT_FAST); + DBUG_VOID_RETURN; } @@ -819,6 +822,8 @@ void wsrep_deinit(bool free_options) /* Destroy wsrep thread LOCKs and CONDs */ void wsrep_thr_deinit() { + if (!wsrep_config_state) + return; // Never initialized mysql_mutex_destroy(&LOCK_wsrep_ready); mysql_cond_destroy(&COND_wsrep_ready); mysql_mutex_destroy(&LOCK_wsrep_sst); @@ -832,6 +837,8 @@ void wsrep_thr_deinit() mysql_mutex_destroy(&LOCK_wsrep_slave_threads); mysql_mutex_destroy(&LOCK_wsrep_desync); mysql_mutex_destroy(&LOCK_wsrep_config_state); + delete wsrep_config_state; + wsrep_config_state= 0; // Safety } void wsrep_recover() diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 6d04527cbcb..562bc7effb4 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1256,7 +1256,7 @@ wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, /* This will be reset when sync callback is called. * Should we set wsrep_ready to FALSE here too? */ - wsrep_config_state.set(WSREP_MEMBER_DONOR); + wsrep_config_state->set(WSREP_MEMBER_DONOR); const char* method = (char*)msg; size_t method_len = strlen (method); diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h index ed699eabec9..54235cf5273 100644 --- a/sql/wsrep_utils.h +++ b/sql/wsrep_utils.h @@ -233,7 +233,7 @@ private: } /* namespace wsp */ -extern wsp::Config_state wsrep_config_state; +extern wsp::Config_state *wsrep_config_state; namespace wsp { /* a class to manage env vars array */ |