diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2023-02-06 17:55:01 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2023-02-06 17:55:01 +0200 |
commit | ff12a5b8977439da0675ecb9e8be1d215549d4a3 (patch) | |
tree | 2f570fc5db131e05170f35bcf39e5a164c5f05cf /sql | |
parent | f6da6b249e551cb606005e7a353883d252cba84b (diff) | |
parent | f8a85af8ca1c937b8d4f847477bd282f80251cde (diff) | |
download | mariadb-git-ff12a5b8977439da0675ecb9e8be1d215549d4a3.tar.gz |
Merge mariadb-10.5.19 into 10.5
Diffstat (limited to 'sql')
42 files changed, 606 insertions, 255 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 18054252584..8bed77aed8d 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -228,6 +228,15 @@ FOREACH(se aria partition perfschema sql_sequence wsrep) ENDIF() ENDFOREACH() +IF(VISIBILITY_HIDDEN_FLAG AND TARGET partition AND WITH_UBSAN) + # the spider plugin needs some partition symbols from inside mysqld + # when built with ubsan, in which case we need to remove + # -fvisibility=hidden from partition + GET_TARGET_PROPERTY(f partition COMPILE_FLAGS) + STRING(REPLACE "${VISIBILITY_HIDDEN_FLAG}" "" f ${f}) + SET_TARGET_PROPERTIES(partition PROPERTIES COMPILE_FLAGS "${f}") +ENDIF() + IF(WIN32) SET(MYSQLD_SOURCE main.cc message.rc) ELSE() diff --git a/sql/discover.h b/sql/discover.h index 1775f5d6551..750c2944ede 100644 --- a/sql/discover.h +++ b/sql/discover.h @@ -28,7 +28,7 @@ int writefile(const char *path, const char *db, const char *table, inline void deletefrm(const char *path) { char frm_name[FN_REFLEN]; - strxmov(frm_name, path, reg_ext, NullS); + strxnmov(frm_name, sizeof(frm_name)-1, path, reg_ext, NullS); mysql_file_delete(key_file_frm, frm_name, MYF(0)); } diff --git a/sql/field.cc b/sql/field.cc index 8afbb6fc421..a6bbd8c9c16 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1869,17 +1869,11 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, } -void Field::hash(ulong *nr, ulong *nr2) +void Field::hash_not_null(Hasher *hasher) { - if (is_null()) - { - *nr^= (*nr << 1) | 1; - } - else - { - uint len= pack_length(); - sort_charset()->hash_sort(ptr, len, nr, nr2); - } + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + hasher->add(sort_charset(), ptr, pack_length()); } size_t @@ -8268,17 +8262,12 @@ bool Field_varstring::is_equal(const Column_definition &new_field) const } -void Field_varstring::hash(ulong *nr, ulong *nr2) +void Field_varstring::hash_not_null(Hasher *hasher) { - if (is_null()) - { - *nr^= (*nr << 1) | 1; - } - else - { - uint len= length_bytes == 1 ? (uint) *ptr : uint2korr(ptr); - charset()->hash_sort(ptr + length_bytes, len, nr, nr2); - } + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + uint len= length_bytes == 1 ? (uint) *ptr : uint2korr(ptr); + hasher->add(charset(), ptr + length_bytes, len); } @@ -8653,6 +8642,17 @@ oom_error: } +void Field_blob::hash_not_null(Hasher *hasher) +{ + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + char *blob; + memcpy(&blob, ptr + packlength, sizeof(char*)); + if (blob) + hasher->add(Field_blob::charset(), blob, get_length(ptr)); +} + + double Field_blob::val_real(void) { DBUG_ASSERT(marked_for_read()); @@ -9726,20 +9726,27 @@ const DTCollation & Field_bit::dtcollation() const } -void Field_bit::hash(ulong *nr, ulong *nr2) +/* + This method always calculates hash over 8 bytes. + This is different from how the HEAP engine calculate hash: + HEAP takes into account the actual octet size, so say for BIT(18) + it calculates hash over three bytes only: + - the incomplete byte with bits 16..17 + - the two full bytes with bits 0..15 + See hp_rec_hashnr(), hp_hashnr() for details. + + The HEAP way is more efficient, especially for short lengths. + Let's consider fixing Field_bit eventually to do it in the HEAP way, + with proper measures to upgrade partitioned tables easy. +*/ +void Field_bit::hash_not_null(Hasher *hasher) { - if (is_null()) - { - *nr^= (*nr << 1) | 1; - } - else - { - CHARSET_INFO *cs= &my_charset_bin; - longlong value= Field_bit::val_int(); - uchar tmp[8]; - mi_int8store(tmp,value); - cs->hash_sort(tmp, 8, nr, nr2); - } + DBUG_ASSERT(marked_for_read()); + DBUG_ASSERT(!is_null()); + longlong value= Field_bit::val_int(); + uchar tmp[8]; + mi_int8store(tmp,value); + hasher->add(&my_charset_bin, tmp, 8); } diff --git a/sql/field.h b/sql/field.h index 43bcfe5590a..fafe0557ddc 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1827,7 +1827,14 @@ public: key_map get_possible_keys(); /* Hash value */ - virtual void hash(ulong *nr, ulong *nr2); + void hash(Hasher *hasher) + { + if (is_null()) + hasher->add_null(); + else + hash_not_null(hasher); + } + virtual void hash_not_null(Hasher *hasher); /** Get the upper limit of the MySQL integral and floating-point type. @@ -4196,7 +4203,7 @@ public: uchar *new_ptr, uint32 length, uchar *new_null_ptr, uint new_null_bit) override; bool is_equal(const Column_definition &new_field) const override; - void hash(ulong *nr, ulong *nr2) override; + void hash_not_null(Hasher *hasher) override; uint length_size() const override { return length_bytes; } void print_key_value(String *out, uint32 length) override; Binlog_type_info binlog_type_info() const override; @@ -4456,6 +4463,7 @@ public: bool make_empty_rec_store_default_value(THD *thd, Item *item) override; int store(const char *to, size_t length, CHARSET_INFO *charset) override; using Field_str::store; + void hash_not_null(Hasher *hasher) override; double val_real() override; longlong val_int() override; String *val_str(String *, String *) override; @@ -5032,7 +5040,7 @@ public: if (bit_ptr) bit_ptr= ADD_TO_PTR(bit_ptr, ptr_diff, uchar*); } - void hash(ulong *nr, ulong *nr2) override; + void hash_not_null(Hasher *hasher) override; SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part, const Item_bool_func *cond, diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 7298dcd15c6..4df4a4c7c01 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -9980,8 +9980,7 @@ uint8 ha_partition::table_cache_type() uint32 ha_partition::calculate_key_hash_value(Field **field_array) { - ulong nr1= 1; - ulong nr2= 4; + Hasher hasher; bool use_51_hash; use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm == partition_info::KEY_ALGORITHM_51); @@ -10008,12 +10007,12 @@ uint32 ha_partition::calculate_key_hash_value(Field **field_array) { if (field->is_null()) { - nr1^= (nr1 << 1) | 1; + hasher.add_null(); continue; } /* Force this to my_hash_sort_bin, which was used in 5.1! */ uint len= field->pack_length(); - my_charset_bin.hash_sort(field->ptr, len, &nr1, &nr2); + hasher.add(&my_charset_bin, field->ptr, len); /* Done with this field, continue with next one. */ continue; } @@ -10031,12 +10030,12 @@ uint32 ha_partition::calculate_key_hash_value(Field **field_array) { if (field->is_null()) { - nr1^= (nr1 << 1) | 1; + hasher.add_null(); continue; } /* Force this to my_hash_sort_bin, which was used in 5.1! */ uint len= field->pack_length(); - my_charset_latin1.hash_sort(field->ptr, len, &nr1, &nr2); + hasher.add(&my_charset_latin1, field->ptr, len); continue; } /* New types in mysql-5.6. */ @@ -10063,9 +10062,9 @@ uint32 ha_partition::calculate_key_hash_value(Field **field_array) } /* fall through, use collation based hashing. */ } - field->hash(&nr1, &nr2); + field->hash(&hasher); } while (*(++field_array)); - return (uint32) nr1; + return (uint32) hasher.finalize(); } diff --git a/sql/handler.cc b/sql/handler.cc index d969c8c6ab8..7c42b3bbb6f 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -4356,6 +4356,35 @@ int handler::check_collation_compatibility() } +int handler::check_long_hash_compatibility() const +{ + if (!table->s->old_long_hash_function()) + return 0; + KEY *key= table->key_info; + KEY *key_end= key + table->s->keys; + for ( ; key < key_end; key++) + { + if (key->algorithm == HA_KEY_ALG_LONG_HASH) + { + /* + The old (pre-MDEV-27653) hash function was wrong. + So the long hash unique constraint can have some + duplicate records. REPAIR TABLE can't fix this, + it will fail on a duplicate key error. + Only "ALTER IGNORE TABLE .. FORCE" can fix this. + So we need to return HA_ADMIN_NEEDS_ALTER here, + (not HA_ADMIN_NEEDS_UPGRADE which is used elsewhere), + to properly send the error message text corresponding + to ER_TABLE_NEEDS_REBUILD (rather than to ER_TABLE_NEEDS_UPGRADE) + to the user. + */ + return HA_ADMIN_NEEDS_ALTER; + } + } + return 0; +} + + int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt) { int error; @@ -4393,6 +4422,9 @@ int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt) if (unlikely((error= check_collation_compatibility()))) return error; + + if (unlikely((error= check_long_hash_compatibility()))) + return error; return check_for_upgrade(check_opt); } diff --git a/sql/handler.h b/sql/handler.h index 3191c408e56..60cc026359b 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -3416,6 +3416,7 @@ public: } int check_collation_compatibility(); + int check_long_hash_compatibility() const; int ha_check_for_upgrade(HA_CHECK_OPT *check_opt); /** to be actually called to get 'check()' functionality*/ int ha_check(THD *thd, HA_CHECK_OPT *check_opt); diff --git a/sql/item.cc b/sql/item.cc index ac5082dbdb2..19c0b341982 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -10755,7 +10755,7 @@ table_map Item_direct_view_ref::used_tables() const table_map used= (*ref)->used_tables(); return (used ? used : - ((null_ref_table != NO_NULL_TABLE) ? + (null_ref_table != NO_NULL_TABLE && !null_ref_table->const_table ? null_ref_table->map : (table_map)0 )); } diff --git a/sql/item.h b/sql/item.h index 01898709131..bf111c649ae 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1295,6 +1295,12 @@ public: */ inline ulonglong val_uint() { return (ulonglong) val_int(); } + virtual bool hash_not_null(Hasher *hasher) + { + DBUG_ASSERT(0); + return true; + } + /* Return string representation of this item object. @@ -3506,6 +3512,13 @@ public: { return Sql_mode_dependency(0, field->value_depends_on_sql_mode()); } + bool hash_not_null(Hasher *hasher) + { + if (field->is_null()) + return true; + field->hash_not_null(hasher); + return false; + } longlong val_int_endpoint(bool left_endp, bool *incl_endp) override; bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; bool get_date_result(THD *thd, MYSQL_TIME *ltime,date_mode_t fuzzydate) diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 63cf9c70e88..40181c3384c 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4870,38 +4870,18 @@ Item_cond::fix_fields(THD *thd, Item **ref) if (check_stack_overrun(thd, STACK_MIN_SIZE, buff)) return TRUE; // Fatal error flag is set! - /* - The following optimization reduces the depth of an AND-OR tree. - E.g. a WHERE clause like - F1 AND (F2 AND (F2 AND F4)) - is parsed into a tree with the same nested structure as defined - by braces. This optimization will transform such tree into - AND (F1, F2, F3, F4). - Trees of OR items are flattened as well: - ((F1 OR F2) OR (F3 OR F4)) => OR (F1, F2, F3, F4) - Items for removed AND/OR levels will dangle until the death of the - entire statement. - The optimization is currently prepared statements and stored procedures - friendly as it doesn't allocate any memory and its effects are durable - (i.e. do not depend on PS/SP arguments). - */ - while ((item=li++)) + + while (li++) { - while (item->type() == Item::COND_ITEM && - ((Item_cond*) item)->functype() == functype() && - !((Item_cond*) item)->list.is_empty()) - { // Identical function - li.replace(((Item_cond*) item)->list); - ((Item_cond*) item)->list.empty(); - item= *li.ref(); // new current item - } + merge_sub_condition(li); + item= *li.ref(); if (abort_on_null) item->top_level_item(); /* replace degraded condition: was: <field> - become: <field> = 1 + become: <field> != 0 */ Item::Type type= item->type(); if (type == Item::FIELD_ITEM || type == Item::REF_ITEM) @@ -4917,7 +4897,9 @@ Item_cond::fix_fields(THD *thd, Item **ref) if (item->fix_fields_if_needed_for_bool(thd, li.ref())) return TRUE; /* purecov: inspected */ - item= *li.ref(); // item can be substituted in fix_fields + merge_sub_condition(li); + item= *li.ref(); // may be substituted in fix_fields/merge_item_if_possible + used_tables_cache|= item->used_tables(); if (item->const_item() && !item->with_param && !item->is_expensive() && !cond_has_datetime_is_null(item)) @@ -4969,6 +4951,55 @@ Item_cond::fix_fields(THD *thd, Item **ref) return FALSE; } +/** + @brief + Merge a lower-level condition pointed by the iterator into this Item_cond + if possible + + @param li list iterator pointing to condition that must be + examined and merged if possible. + + @details + If an item pointed by the iterator is an instance of Item_cond with the + same functype() as this Item_cond (i.e. both are Item_cond_and or both are + Item_cond_or) then the arguments of that lower-level item can be merged + into the list of arguments of this upper-level Item_cond. + + This optimization reduces the depth of an AND-OR tree. + E.g. a WHERE clause like + F1 AND (F2 AND (F2 AND F4)) + is parsed into a tree with the same nested structure as defined + by braces. This optimization will transform such tree into + AND (F1, F2, F3, F4). + Trees of OR items are flattened as well: + ((F1 OR F2) OR (F3 OR F4)) => OR (F1, F2, F3, F4) + Items for removed AND/OR levels will dangle until the death of the + entire statement. + + The optimization is currently prepared statements and stored procedures + friendly as it doesn't allocate any memory and its effects are durable + (i.e. do not depend on PS/SP arguments). +*/ +void Item_cond::merge_sub_condition(List_iterator<Item>& li) +{ + Item *item= *li.ref(); + + /* + The check for list.is_empty() is to catch empty Item_cond_and() items. + We may encounter Item_cond_and with an empty list, because optimizer code + strips multiple equalities, combines items, then adds multiple equalities + back + */ + while (item->type() == Item::COND_ITEM && + ((Item_cond*) item)->functype() == functype() && + !((Item_cond*) item)->list.is_empty()) + { + li.replace(((Item_cond*) item)->list); + ((Item_cond*) item)->list.empty(); + item= *li.ref(); + } +} + bool Item_cond::eval_not_null_tables(void *opt_arg) diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 25553f8d565..439e5d5a708 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -3049,6 +3049,9 @@ public: Item *build_clone(THD *thd); bool excl_dep_on_table(table_map tab_map); bool excl_dep_on_grouping_fields(st_select_lex *sel); + +private: + void merge_sub_condition(List_iterator<Item>& li); }; template <template<class> class LI, class T> class Item_equal_iterator; diff --git a/sql/item_func.cc b/sql/item_func.cc index 81a53a88dc6..dc9afbd5c93 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -1768,7 +1768,7 @@ static void calc_hash_for_unique(ulong &nr1, ulong &nr2, String *str) cs->hash_sort((uchar *)str->ptr(), str->length(), &nr1, &nr2); } -longlong Item_func_hash::val_int() +longlong Item_func_hash_mariadb_100403::val_int() { DBUG_EXECUTE_IF("same_long_unique_hash", return 9;); unsigned_flag= true; @@ -1789,6 +1789,24 @@ longlong Item_func_hash::val_int() } +longlong Item_func_hash::val_int() +{ + DBUG_EXECUTE_IF("same_long_unique_hash", return 9;); + unsigned_flag= true; + Hasher hasher; + for(uint i= 0;i<arg_count;i++) + { + if (args[i]->hash_not_null(&hasher)) + { + null_value= 1; + return 0; + } + } + null_value= 0; + return (longlong) hasher.finalize(); +} + + bool Item_func_hash::fix_length_and_dec() { decimals= 0; diff --git a/sql/item_func.h b/sql/item_func.h index de515100146..523f672eba6 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -1213,6 +1213,18 @@ public: const char *func_name() const { return "<hash>"; } }; +class Item_func_hash_mariadb_100403: public Item_func_hash +{ +public: + Item_func_hash_mariadb_100403(THD *thd, List<Item> &item) + :Item_func_hash(thd, item) + {} + longlong val_int(); + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_hash_mariadb_100403>(thd, this); } + const char *func_name() const { return "<hash_mariadb_100403>"; } +}; + class Item_longlong_func: public Item_int_func { public: diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index cffeeb7256e..000d198eb0c 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1612,6 +1612,18 @@ bool Item_func_ucase::fix_length_and_dec() } +bool Item_func_left::hash_not_null(Hasher *hasher) +{ + StringBuffer<STRING_BUFFER_USUAL_SIZE> buf; + String *str= val_str(&buf); + DBUG_ASSERT((str == NULL) == null_value); + if (!str) + return true; + hasher->add(collation.collation, str->ptr(), str->length()); + return false; +} + + String *Item_func_left::val_str(String *str) { DBUG_ASSERT(fixed == 1); diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 0c4967a5247..4366d65eba0 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -459,6 +459,7 @@ class Item_func_left :public Item_str_func String tmp_value; public: Item_func_left(THD *thd, Item *a, Item *b): Item_str_func(thd, a, b) {} + bool hash_not_null(Hasher *hasher); String *val_str(String *); bool fix_length_and_dec(); const char *func_name() const { return "left"; } diff --git a/sql/item_sum.h b/sql/item_sum.h index 96604e5cf1d..02b9431b83e 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -367,7 +367,14 @@ public: int8 aggr_level; /* nesting level of the aggregating subquery */ int8 max_arg_level; /* max level of unbound column references */ int8 max_sum_func_level;/* max level of aggregation for embedded functions */ - bool quick_group; /* If incremental update of fields */ + + /* + true (the default value) means this aggregate function can be computed + with TemporaryTableWithPartialSums algorithm (see end_update()). + false means this aggregate function needs OrderedGroupBy algorithm (see + end_write_group()). + */ + bool quick_group; /* This list is used by the check for mixing non aggregated fields and sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc index 35e24a521e4..8879d7daf5d 100644 --- a/sql/mysql_install_db.cc +++ b/sql/mysql_install_db.cc @@ -263,7 +263,7 @@ static char *get_plugindir() { static char plugin_dir[2*MAX_PATH]; get_basedir(plugin_dir, sizeof(plugin_dir), mysqld_path); - strcat(plugin_dir, "/" STR(INSTALL_PLUGINDIR)); + safe_strcat(plugin_dir, sizeof(plugin_dir), "/" STR(INSTALL_PLUGINDIR)); if (access(plugin_dir, 0) == 0) return plugin_dir; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index b65cd6d81bb..ebefaf00282 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4852,12 +4852,11 @@ static int init_server_components() else // full wsrep initialization { // add basedir/bin to PATH to resolve wsrep script names - char* const tmp_path= (char*)my_alloca(strlen(mysql_home) + - strlen("/bin") + 1); + size_t tmp_path_size= strlen(mysql_home) + 5; /* including "/bin" */ + char* const tmp_path= (char*)my_alloca(tmp_path_size); if (tmp_path) { - strcpy(tmp_path, mysql_home); - strcat(tmp_path, "/bin"); + snprintf(tmp_path, tmp_path_size, "%s/bin", mysql_home); wsrep_prepend_PATH(tmp_path); } else @@ -5668,8 +5667,9 @@ int mysqld_main(int argc, char **argv) char real_server_version[2 * SERVER_VERSION_LENGTH + 10]; set_server_version(real_server_version, sizeof(real_server_version)); - strcat(real_server_version, "' as '"); - strcat(real_server_version, server_version); + safe_strcat(real_server_version, sizeof(real_server_version), "' as '"); + safe_strcat(real_server_version, sizeof(real_server_version), + server_version); sql_print_information(ER_DEFAULT(ER_STARTUP), my_progname, real_server_version, @@ -7916,7 +7916,8 @@ static int mysql_init_variables(void) } else my_path(prg_dev, my_progname, "mysql/bin"); - strcat(prg_dev,"/../"); // Remove 'bin' to get base dir + // Remove 'bin' to get base dir + safe_strcat(prg_dev, sizeof(prg_dev), "/../"); cleanup_dirname(mysql_home,prg_dev); } #else diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 8409a9cd89c..6332e02453d 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -56,8 +56,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, rgi->event_relay_log_pos= qev->event_relay_log_pos; rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos; strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name); - if (!(ev->is_artificial_event() || ev->is_relay_log_event() || - (ev->when == 0))) + if (event_can_update_last_master_timestamp(ev)) rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time; err= apply_event_and_update_pos_for_parallel(ev, thd, rgi); diff --git a/sql/slave.cc b/sql/slave.cc index 715fa8cd69e..ae273da0456 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4192,10 +4192,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, the user might be surprised to see a claim that the slave is up to date long before those queued events are actually executed. */ - if (!rli->mi->using_parallel() && - !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) + if ((!rli->mi->using_parallel()) && event_can_update_last_master_timestamp(ev)) { rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; + rli->sql_thread_caught_up= false; DBUG_ASSERT(rli->last_master_timestamp >= 0); } @@ -4247,6 +4247,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, if (rli->mi->using_parallel()) { + if (unlikely((rli->last_master_timestamp == 0 || + rli->sql_thread_caught_up) && + event_can_update_last_master_timestamp(ev))) + { + if (rli->last_master_timestamp < ev->when) + { + rli->last_master_timestamp= ev->when; + rli->sql_thread_caught_up= false; + } + } + int res= rli->parallel.do_event(serial_rgi, ev, event_size); /* In parallel replication, we need to update the relay log position @@ -4267,7 +4278,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, This is the case for pre-10.0 events without GTID, and for handling slave_skip_counter. */ - if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) + if (event_can_update_last_master_timestamp(ev)) { /* Ignore FD's timestamp as it does not reflect the slave execution @@ -4275,7 +4286,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, data modification event execution last long all this time Seconds_Behind_Master is zero. */ - if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) + if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT && + rli->last_master_timestamp < ev->when) rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; DBUG_ASSERT(rli->last_master_timestamp >= 0); @@ -7615,7 +7627,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) if (hot_log) mysql_mutex_unlock(log_lock); - rli->sql_thread_caught_up= false; DBUG_RETURN(ev); } if (opt_reckless_slave) // For mysql-test @@ -7779,7 +7790,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd); // re-acquire data lock since we released it earlier mysql_mutex_lock(&rli->data_lock); - rli->sql_thread_caught_up= false; continue; } /* @@ -7970,12 +7980,19 @@ event(errno: %d cur_log->error: %d)", { sql_print_information("Error reading relay log event: %s", "slave SQL thread was killed"); - DBUG_RETURN(0); + goto end; } err: if (errmsg) sql_print_error("Error reading relay log event: %s", errmsg); + +end: + /* + Set that we are not caught up so if there is a hang/problem on restart, + Seconds_Behind_Master will still grow. + */ + rli->sql_thread_caught_up= false; DBUG_RETURN(0); } #ifdef WITH_WSREP diff --git a/sql/slave.h b/sql/slave.h index 5ca6054a178..e2bd5cec1b9 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -49,6 +49,7 @@ #include "rpl_filter.h" #include "rpl_tblmap.h" #include "rpl_gtid.h" +#include "log_event.h" #define SLAVE_NET_TIMEOUT 60 @@ -293,6 +294,17 @@ extern char *report_host, *report_password; extern I_List<THD> threads; +/* + Check that a binlog event (read from the relay log) is valid to update + last_master_timestamp. That is, a valid event is one with a consistent + timestamp which originated from a primary server. +*/ +static inline bool event_can_update_last_master_timestamp(Log_event *ev) +{ + return ev && !(ev->is_artificial_event() || ev->is_relay_log_event() || + (ev->when == 0)); +} + #else #define close_active_mi() /* no-op */ #endif /* HAVE_REPLICATION */ diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index 060dcc059b5..2967339d404 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -37,7 +37,8 @@ const LEX_CSTRING msg_status= {STRING_WITH_LEN("status")}; /* Prepare, run and cleanup for mysql_recreate_table() */ -static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list) +static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list, + Recreate_info *recreate_info) { bool result_code; DBUG_ENTER("admin_recreate_table"); @@ -58,7 +59,7 @@ static bool admin_recreate_table(THD *thd, TABLE_LIST *table_list) DEBUG_SYNC(thd, "ha_admin_try_alter"); tmp_disable_binlog(thd); // binlogging is done by caller if wanted result_code= (thd->open_temporary_tables(table_list) || - mysql_recreate_table(thd, table_list, false)); + mysql_recreate_table(thd, table_list, recreate_info, false)); reenable_binlog(thd); /* mysql_recreate_table() can push OK or ERROR. @@ -528,6 +529,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, bool open_error; bool collect_eis= FALSE; bool open_for_modify= org_open_for_modify; + Recreate_info recreate_info; DBUG_PRINT("admin", ("table: '%s'.'%s'", db, table->table_name.str)); DEBUG_SYNC(thd, "admin_command_kill_before_modify"); @@ -787,7 +789,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, { /* We use extra_open_options to be able to open crashed tables */ thd->open_options|= extra_open_options; - result_code= admin_recreate_table(thd, table); + result_code= admin_recreate_table(thd, table, &recreate_info) ? + HA_ADMIN_FAILED : HA_ADMIN_OK; thd->open_options&= ~extra_open_options; goto send_result; } @@ -968,12 +971,31 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, repair was not implemented and we need to upgrade the table to a new version so we recreate the table with ALTER TABLE */ - result_code= admin_recreate_table(thd, table); + result_code= admin_recreate_table(thd, table, &recreate_info); } send_result: lex->cleanup_after_one_table_open(); thd->clear_error(); // these errors shouldn't get client + + if (recreate_info.records_duplicate()) + { + protocol->prepare_for_resend(); + protocol->store(&table_name, system_charset_info); + protocol->store((char*) operator_name, system_charset_info); + protocol->store(warning_level_names[Sql_condition::WARN_LEVEL_WARN].str, + warning_level_names[Sql_condition::WARN_LEVEL_WARN].length, + system_charset_info); + char buf[80]; + size_t length= my_snprintf(buf, sizeof(buf), + "Number of rows changed from %u to %u", + (uint) recreate_info.records_processed(), + (uint) recreate_info.records_copied()); + protocol->store(buf, length, system_charset_info); + if (protocol->write()) + goto err; + } + { Diagnostics_area::Sql_condition_iterator it= thd->get_stmt_da()->sql_conditions(); @@ -1083,7 +1105,7 @@ send_result_message: *save_next_global= table->next_global; table->next_local= table->next_global= 0; - result_code= admin_recreate_table(thd, table); + result_code= admin_recreate_table(thd, table, &recreate_info); trans_commit_stmt(thd); trans_commit(thd); close_thread_tables(thd); @@ -1278,6 +1300,8 @@ send_result_message: goto err; DEBUG_SYNC(thd, "admin_command_kill_after_modify"); } + thd->resume_subsequent_commits(suspended_wfc); + DBUG_EXECUTE_IF("inject_analyze_table_sleep", my_sleep(500000);); if (is_table_modified && is_cmd_replicated && (!opt_readonly || thd->slave_thread) && !thd->lex->no_write_to_binlog) { @@ -1287,10 +1311,8 @@ send_result_message: if (res) goto err; } - my_eof(thd); - thd->resume_subsequent_commits(suspended_wfc); - DBUG_EXECUTE_IF("inject_analyze_table_sleep", my_sleep(500000);); + DBUG_RETURN(FALSE); err: @@ -1438,6 +1460,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd) LEX *m_lex= thd->lex; TABLE_LIST *first_table= m_lex->first_select_lex()->table_list.first; bool res= TRUE; + Recreate_info recreate_info; DBUG_ENTER("Sql_cmd_optimize_table::execute"); if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table, @@ -1446,7 +1469,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd) WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table); res= (specialflag & SPECIAL_NO_NEW_FUNC) ? - mysql_recreate_table(thd, first_table, true) : + mysql_recreate_table(thd, first_table, &recreate_info, true) : mysql_admin_table(thd, first_table, &m_lex->check_opt, "optimize", TL_WRITE, 1, 0, 0, 0, &handler::ha_optimize, 0, true); diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index dee5ea2fe4b..a4b0c24cc6b 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -550,9 +550,11 @@ bool Sql_cmd_alter_table::execute(THD *thd) thd->work_part_info= 0; #endif + Recreate_info recreate_info; result= mysql_alter_table(thd, &select_lex->db, &lex->name, &create_info, first_table, + &recreate_info, &alter_info, select_lex->order_list.elements, select_lex->order_list.first, diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d29f5832434..66c9e3da58d 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -8276,6 +8276,20 @@ bool THD::timestamp_to_TIME(MYSQL_TIME *ltime, my_time_t ts, return 0; } + +void THD::my_ok_with_recreate_info(const Recreate_info &info, + ulong warn_count) +{ + char buf[80]; + my_snprintf(buf, sizeof(buf), + ER_THD(this, ER_INSERT_INFO), + (ulong) info.records_processed(), + (ulong) info.records_duplicate(), + warn_count); + my_ok(this, info.records_processed(), 0L, buf); +} + + THD_list_iterator *THD_list_iterator::iterator() { return &server_threads; diff --git a/sql/sql_class.h b/sql/sql_class.h index 1dced52c133..ef4fc58f4e2 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -243,6 +243,29 @@ public: }; +class Recreate_info +{ + ha_rows m_records_copied; + ha_rows m_records_duplicate; +public: + Recreate_info() + :m_records_copied(0), + m_records_duplicate(0) + { } + Recreate_info(ha_rows records_copied, + ha_rows records_duplicate) + :m_records_copied(records_copied), + m_records_duplicate(records_duplicate) + { } + ha_rows records_copied() const { return m_records_copied; } + ha_rows records_duplicate() const { return m_records_duplicate; } + ha_rows records_processed() const + { + return m_records_copied + m_records_duplicate; + } +}; + + #define TC_HEURISTIC_RECOVER_COMMIT 1 #define TC_HEURISTIC_RECOVER_ROLLBACK 2 extern ulong tc_heuristic_recover; @@ -4102,6 +4125,8 @@ public: inline bool vio_ok() const { return TRUE; } inline bool is_connected() { return TRUE; } #endif + + void my_ok_with_recreate_info(const Recreate_info &info, ulong warn_count); /** Mark the current error as fatal. Warning: this does not set any error, it sets a property of the error, so must be @@ -5963,6 +5988,12 @@ public: uint sum_func_count; uint hidden_field_count; uint group_parts,group_length,group_null_parts; + + /* + If we're doing a GROUP BY operation, shows which one is used: + true TemporaryTableWithPartialSums algorithm (see end_update()). + false OrderedGroupBy algorithm (see end_write_group()). + */ uint quick_group; /** Enabled when we have atleast one outer_sum_func. Needed when used diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc index 91300e3a326..e22aa1ebd6f 100644 --- a/sql/sql_cte.cc +++ b/sql/sql_cte.cc @@ -102,49 +102,6 @@ bool LEX::check_dependencies_in_with_clauses() /** @brief - Resolve references to CTE in specification of hanging CTE - - @details - A CTE to which there are no references in the query is called hanging CTE. - Although such CTE is not used for execution its specification must be - subject to context analysis. All errors concerning references to - non-existing tables or fields occurred in the specification must be - reported as well as all other errors caught at the prepare stage. - The specification of a hanging CTE might contain references to other - CTE outside of the specification and within it if the specification - contains a with clause. This function resolves all such references for - all hanging CTEs encountered in the processed query. - - @retval - false on success - true on failure -*/ - -bool -LEX::resolve_references_to_cte_in_hanging_cte() -{ - for (With_clause *with_clause= with_clauses_list; - with_clause; with_clause= with_clause->next_with_clause) - { - for (With_element *with_elem= with_clause->with_list.first; - with_elem; with_elem= with_elem->next) - { - if (!with_elem->is_referenced()) - { - TABLE_LIST *first_tbl= - with_elem->spec->first_select()->table_list.first; - TABLE_LIST **with_elem_end_pos= with_elem->head->tables_pos.end_pos; - if (first_tbl && resolve_references_to_cte(first_tbl, with_elem_end_pos)) - return true; - } - } - } - return false; -} - - -/** - @brief Resolve table references to CTE from a sub-chain of table references @param tables Points to the beginning of the sub-chain @@ -289,8 +246,6 @@ LEX::check_cte_dependencies_and_resolve_references() return false; if (resolve_references_to_cte(query_tables, query_tables_last)) return true; - if (resolve_references_to_cte_in_hanging_cte()) - return true; return false; } @@ -489,47 +444,33 @@ With_element *find_table_def_in_with_clauses(TABLE_LIST *tbl, st_unit_ctxt_elem *ctxt) { With_element *found= 0; + st_select_lex_unit *top_unit= 0; for (st_unit_ctxt_elem *unit_ctxt_elem= ctxt; unit_ctxt_elem; unit_ctxt_elem= unit_ctxt_elem->prev) { st_select_lex_unit *unit= unit_ctxt_elem->unit; With_clause *with_clause= unit->with_clause; - /* - First look for the table definition in the with clause attached to 'unit' - if there is any such clause. - */ if (with_clause) { - found= with_clause->find_table_def(tbl, NULL); + /* + If the reference to tbl that has to be resolved belongs to + the FROM clause of a descendant of top_unit->with_element + and this with element belongs to with_clause then this + element must be used as the barrier for the search in the + the list of CTEs from with_clause unless the clause contains + RECURSIVE. + */ + With_element *barrier= 0; + if (top_unit && !with_clause->with_recursive && + top_unit->with_element && + top_unit->with_element->get_owner() == with_clause) + barrier= top_unit->with_element; + found= with_clause->find_table_def(tbl, barrier); if (found) break; } - /* - If 'unit' is the unit that defines a with element then reset 'unit' - to the unit whose attached with clause contains this with element. - */ - With_element *with_elem= unit->with_element; - if (with_elem) - { - if (!(unit_ctxt_elem= unit_ctxt_elem->prev)) - break; - unit= unit_ctxt_elem->unit; - } - with_clause= unit->with_clause; - /* - Now look for the table definition in this with clause. If the with clause - contains RECURSIVE the search is performed through all CTE definitions in - clause, otherwise up to the definition of 'with_elem' unless it is NULL. - */ - if (with_clause) - { - found= with_clause->find_table_def(tbl, - with_clause->with_recursive ? - NULL : with_elem); - if (found) - break; - } + top_unit= unit; } return found; } diff --git a/sql/sql_cte.h b/sql/sql_cte.h index e0fbd79803b..6a1f67d3258 100644 --- a/sql/sql_cte.h +++ b/sql/sql_cte.h @@ -326,8 +326,6 @@ public: friend bool LEX::resolve_references_to_cte(TABLE_LIST *tables, TABLE_LIST **tables_last); - friend - bool LEX::resolve_references_to_cte_in_hanging_cte(); }; const uint max_number_of_elements_in_with_clause= sizeof(table_map)*8; @@ -441,9 +439,6 @@ public: friend bool LEX::check_dependencies_in_with_clauses(); - - friend - bool LEX::resolve_references_to_cte_in_hanging_cte(); }; inline diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index d9796fb4380..132934773f9 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -3962,7 +3962,8 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u) lex->current_select->join->select_options|= OPTION_BUFFER_RESULT; } else if (!(lex->current_select->options & OPTION_BUFFER_RESULT) && - thd->locked_tables_mode <= LTM_LOCK_TABLES) + thd->locked_tables_mode <= LTM_LOCK_TABLES && + !table->s->long_unique_table) { /* We must not yet prepare the result table if it is the same as one of the diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 413cd0f18e7..fad9aabdc90 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1308,8 +1308,6 @@ void LEX::start(THD *thd_arg) stmt_var_list.empty(); proc_list.elements=0; - save_group_list.empty(); - save_order_list.empty(); win_ref= NULL; win_frame= NULL; frame_top_bound= NULL; diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 731ddbaefac..374c923a6b9 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1116,6 +1116,7 @@ public: group_list_ptrs, and re-establish the original list before each execution. */ SQL_I_List<ORDER> group_list; + SQL_I_List<ORDER> save_group_list; Group_list_ptrs *group_list_ptrs; List<Item> item_list; /* list of fields & expressions */ @@ -1181,6 +1182,7 @@ public: const char *type; /* type of select for EXPLAIN */ SQL_I_List<ORDER> order_list; /* ORDER clause */ + SQL_I_List<ORDER> save_order_list; SQL_I_List<ORDER> gorder_list; Item *select_limit, *offset_limit; /* LIMIT clause parameters */ bool is_set_query_expr_tail; @@ -3537,8 +3539,6 @@ public: } - SQL_I_List<ORDER> save_group_list; - SQL_I_List<ORDER> save_order_list; LEX_CSTRING *win_ref; Window_frame *win_frame; Window_frame_bound *frame_top_bound; @@ -4778,12 +4778,11 @@ public: const LEX_CSTRING *constraint_name, Table_ident *ref_table_name, DDL_options ddl_options); + bool check_dependencies_in_with_clauses(); - bool resolve_references_to_cte_in_hanging_cte(); bool check_cte_dependencies_and_resolve_references(); bool resolve_references_to_cte(TABLE_LIST *tables, TABLE_LIST **tables_last); - }; diff --git a/sql/sql_list.h b/sql/sql_list.h index 9d159071ff3..a9ab5415d5a 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -54,7 +54,7 @@ public: { elements= tmp.elements; first= tmp.first; - next= tmp.next; + next= elements ? tmp.next : &first;; return *this; } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index b56e5742845..75381c9a894 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -4270,8 +4270,10 @@ mysql_execute_command(THD *thd) WSREP_TO_ISOLATION_BEGIN(first_table->db.str, first_table->table_name.str, NULL); + Recreate_info recreate_info; res= mysql_alter_table(thd, &first_table->db, &first_table->table_name, - &create_info, first_table, &alter_info, + &create_info, first_table, + &recreate_info, &alter_info, 0, (ORDER*) 0, 0, lex->if_exists()); break; } @@ -8894,8 +8896,8 @@ TABLE_LIST *st_select_lex::convert_right_join() void st_select_lex::prepare_add_window_spec(THD *thd) { LEX *lex= thd->lex; - lex->save_group_list= group_list; - lex->save_order_list= order_list; + save_group_list= group_list; + save_order_list= order_list; lex->win_ref= NULL; lex->win_frame= NULL; lex->frame_top_bound= NULL; @@ -8922,8 +8924,8 @@ bool st_select_lex::add_window_def(THD *thd, win_part_list_ptr, win_order_list_ptr, win_frame); - group_list= thd->lex->save_group_list; - order_list= thd->lex->save_order_list; + group_list= save_group_list; + order_list= save_order_list; if (parsing_place != SELECT_LIST) { fields_in_window_functions+= win_part_list_ptr->elements + @@ -8949,8 +8951,8 @@ bool st_select_lex::add_window_spec(THD *thd, win_part_list_ptr, win_order_list_ptr, win_frame); - group_list= thd->lex->save_group_list; - order_list= thd->lex->save_order_list; + group_list= save_group_list; + order_list= save_order_list; if (parsing_place != SELECT_LIST) { fields_in_window_functions+= win_part_list_ptr->elements + diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 8c2dbc91ec2..c50a59a62cb 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -342,7 +342,7 @@ static bool register_builtin(struct st_maria_plugin *, struct st_plugin_int *, struct st_plugin_int **); static void unlock_variables(THD *thd, struct system_variables *vars); static void cleanup_variables(struct system_variables *vars); -static void plugin_vars_free_values(sys_var *vars); +static void plugin_vars_free_values(st_mysql_sys_var **vars); static void restore_ptr_backup(uint n, st_ptr_backup *backup); static void intern_plugin_unlock(LEX *lex, plugin_ref plugin); static void reap_plugins(void); @@ -1290,7 +1290,7 @@ static void plugin_del(struct st_plugin_int *plugin) DBUG_ENTER("plugin_del"); mysql_mutex_assert_owner(&LOCK_plugin); /* Free allocated strings before deleting the plugin. */ - plugin_vars_free_values(plugin->system_vars); + plugin_vars_free_values(plugin->plugin->system_vars); restore_ptr_backup(plugin->nbackups, plugin->ptr_backup); if (plugin->plugin_dl) { @@ -2941,6 +2941,7 @@ sys_var *find_sys_var(THD *thd, const char *str, size_t length, /* called by register_var, construct_options and test_plugin_options. Returns the 'bookmark' for the named variable. + returns null for non thd-local variables. LOCK_system_variables_hash should be at least read locked */ static st_bookmark *find_bookmark(const char *plugin, const char *name, @@ -2997,7 +2998,6 @@ static size_t var_storage_size(int flags) /* returns a bookmark for thd-local variables, creating if neccessary. - returns null for non thd-local variables. Requires that a write lock is obtained on LOCK_system_variables_hash */ static st_bookmark *register_var(const char *plugin, const char *name, @@ -3351,27 +3351,35 @@ void plugin_thdvar_cleanup(THD *thd) variables are no longer accessible and the value space is lost. Note that only string values with PLUGIN_VAR_MEMALLOC are allocated and must be freed. - - @param[in] vars Chain of system variables of a plugin */ -static void plugin_vars_free_values(sys_var *vars) +static void plugin_vars_free_values(st_mysql_sys_var **vars) { DBUG_ENTER("plugin_vars_free_values"); - for (sys_var *var= vars; var; var= var->next) + if (!vars) + DBUG_VOID_RETURN; + + while(st_mysql_sys_var *var= *vars++) { - sys_var_pluginvar *piv= var->cast_pluginvar(); - if (piv && - ((piv->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) && - (piv->plugin_var->flags & PLUGIN_VAR_MEMALLOC)) + if ((var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && + var->flags & PLUGIN_VAR_MEMALLOC) { - /* Free the string from global_system_variables. */ - char **valptr= (char**) piv->real_value_ptr(NULL, OPT_GLOBAL); + char **val; + if (var->flags & PLUGIN_VAR_THDLOCAL) + { + st_bookmark *v= find_bookmark(0, var->name, var->flags); + if (!v) + continue; + val= (char**)(global_system_variables.dynamic_variables_ptr + v->offset); + } + else + val= *(char***) (var + 1); + DBUG_PRINT("plugin", ("freeing value for: '%s' addr: %p", - var->name.str, valptr)); - my_free(*valptr); - *valptr= NULL; + var->name, val)); + my_free(*val); + *val= NULL; } } DBUG_VOID_RETURN; @@ -4031,7 +4039,7 @@ static my_option *construct_help_options(MEM_ROOT *mem_root, bzero(opts, sizeof(my_option) * count); /** - some plugin variables (those that don't have PLUGIN_VAR_NOSYSVAR flag) + some plugin variables have their names prefixed with the plugin name. Restore the names here to get the correct (not double-prefixed) help text. We won't need @@sysvars anymore and don't care about their proper names. @@ -4143,9 +4151,6 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, char *varname; sys_var *v; - if (o->flags & PLUGIN_VAR_NOSYSVAR) - continue; - tmp_backup[tmp->nbackups++].save(&o->name); if ((var= find_bookmark(tmp->name.str, o->name, o->flags))) { @@ -4161,6 +4166,12 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, my_casedn_str(&my_charset_latin1, varname); convert_dash_to_underscore(varname, len-1); } + if (o->flags & PLUGIN_VAR_NOSYSVAR) + { + o->name= varname; + continue; + } + const char *s= o->flags & PLUGIN_VAR_DEPRECATED ? "" : NULL; v= new (mem_root) sys_var_pluginvar(&chain, varname, tmp, o, s); v->test_load= (var ? &var->loaded : &static_unload); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index a721899a8be..42711270f60 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -3539,15 +3539,26 @@ bool JOIN::make_aggr_tables_info() /* If we have different sort & group then we must sort the data by group - and copy it to another tmp table + and copy it to another tmp table. + This code is also used if we are using distinct something we haven't been able to store in the temporary table yet like SEC_TO_TIME(SUM(...)). + + 3. Also, this is used when + - the query has Window functions, + - the GROUP BY operation is done with OrderedGroupBy algorithm. + In this case, the first temptable will contain pre-GROUP-BY data. Force + the creation of the second temporary table. Post-GROUP-BY dataset will be + written there, and then Window Function processing code will be able to + process it. */ if ((group_list && (!test_if_subpart(group_list, order) || select_distinct)) || - (select_distinct && tmp_table_param.using_outer_summary_function)) - { /* Must copy to another table */ + (select_distinct && tmp_table_param.using_outer_summary_function) || + (group_list && !tmp_table_param.quick_group && // (3) + select_lex->have_window_funcs())) // (3) + { /* Must copy to another table */ DBUG_PRINT("info",("Creating group table")); calc_group_buffer(this, group_list); @@ -7646,6 +7657,7 @@ best_access_path(JOIN *join, rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables Json_writer_object trace_access_idx(thd); + double eq_ref_rows= 0.0, eq_ref_cost= 0.0; /* full text keys require special treatment */ @@ -7690,7 +7702,10 @@ best_access_path(JOIN *join, tmp= adjust_quick_cost(table->opt_range[key].cost, 1); else tmp= table->file->avg_io_cost(); - tmp*= prev_record_reads(join_positions, idx, found_ref); + eq_ref_rows= prev_record_reads(join_positions, idx, + found_ref); + tmp*= eq_ref_rows; + eq_ref_cost= tmp; records=1.0; } else @@ -7992,7 +8007,27 @@ best_access_path(JOIN *join, (table->file->index_flags(start_key->key,0,1) & HA_DO_RANGE_FILTER_PUSHDOWN)) { - double rows= record_count * records; + double rows; + if (type == JT_EQ_REF) + { + /* + Treat EQ_REF access in a special way: + 1. We have no cost for index-only read. Assume its cost is 50% of + the cost of the full read. + + 2. A regular ref access will do #record_count lookups, but eq_ref + has "lookup cache" which reduces the number of lookups made. + The estimation code uses prev_record_reads() call to estimate: + + tmp = prev_record_reads(join_positions, idx, found_ref); + + Set the effective number of rows from "tmp" here. + */ + keyread_tmp= COST_ADD(eq_ref_cost / 2, s->startup_cost); + rows= eq_ref_rows; + } + else + rows= record_count * records; /* If we use filter F with selectivity s the the cost of fetching data @@ -8035,10 +8070,6 @@ best_access_path(JOIN *join, we cannot use filters as the cost calculation below would cause tmp to become negative. The future resultion is to not limit cost with worst_seek. - - We cannot use filter with JT_EQ_REF as in this case 'tmp' is - number of rows from prev_record_read() and keyread_tmp is 0. These - numbers are not usable with rowid filter code. */ double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0); if (!(records < s->worst_seeks && @@ -8046,7 +8077,7 @@ best_access_path(JOIN *join, trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping"); else if (access_cost_factor <= 0.0) trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0"); - else if (type != JT_EQ_REF) + else { filter= table->best_range_rowid_filter_for_partial_join(start_key->key, @@ -22380,11 +22411,17 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), /* @brief - Perform a GROUP BY operation over a stream of rows ordered by their group. - The result is sent into join->result. + Perform OrderedGroupBy operation and write the output into join->result. @detail - Also applies HAVING, etc. + The input stream is ordered by the GROUP BY expression, so groups come + one after another. We only need to accumulate the aggregate value, when + a GROUP BY group ends, check the HAVING and send the group. + + Note that the output comes in the GROUP BY order, which is required by + the MySQL's GROUP BY semantics. No further sorting is needed. + + @seealso end_write_group() also implements SortAndGroup */ enum_nested_loop_state @@ -22574,13 +22611,26 @@ end: /* @brief - Perform a GROUP BY operation over rows coming in arbitrary order. - - This is done by looking up the group in a temp.table and updating group - values. + Perform GROUP BY operation over rows coming in arbitrary order: use + TemporaryTableWithPartialSums algorithm. + + @detail + The TemporaryTableWithPartialSums algorithm is: + + CREATE TEMPORARY TABLE tmp ( + group_by_columns PRIMARY KEY, + partial_sum + ); + + for each row R in join output { + INSERT INTO tmp (R.group_by_columns, R.sum_value) + ON DUPLICATE KEY UPDATE partial_sum=partial_sum + R.sum_value; + } @detail Also applies HAVING, etc. + + @seealso end_unique_update() */ static enum_nested_loop_state @@ -22730,13 +22780,15 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), /* @brief - Perform a GROUP BY operation over a stream of rows ordered by their group. - Write the result into a temporary table. + Perform OrderedGroupBy operation and write the output into the temporary + table (join_tab->table). @detail - Also applies HAVING, etc. + The input stream is ordered by the GROUP BY expression, so groups come + one after another. We only need to accumulate the aggregate value, when + a GROUP BY group ends, check the HAVING and write the group. - The rows are written into temptable so e.g. filesort can read them. + @seealso end_send_group() also implements OrderedGroupBy */ enum_nested_loop_state diff --git a/sql/sql_select.h b/sql/sql_select.h index 807b4115fec..fb36fd90a0d 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1434,12 +1434,30 @@ public: (set in make_join_statistics()) */ bool impossible_where; - List<Item> all_fields; ///< to store all fields that used in query + + /* + All fields used in the query processing. + + Initially this is a list of fields from the query's SQL text. + + Then, ORDER/GROUP BY and Window Function code add columns that need to + be saved to be available in the post-group-by context. These extra columns + are added to the front, because this->all_fields points to the suffix of + this list. + */ + List<Item> all_fields; ///Above list changed to use temporary table List<Item> tmp_all_fields1, tmp_all_fields2, tmp_all_fields3; ///Part, shared with list above, emulate following list List<Item> tmp_fields_list1, tmp_fields_list2, tmp_fields_list3; - List<Item> &fields_list; ///< hold field list passed to mysql_select + + /* + The original field list as it was passed to mysql_select(). This refers + to select_lex->item_list. + CAUTION: this list is a suffix of this->all_fields list, that is, it shares + elements with that list! + */ + List<Item> &fields_list; List<Item> procedure_fields_list; int error; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 24ce892fb12..15be1c66f2a 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -10018,6 +10018,7 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, const LEX_CSTRING *new_name, HA_CREATE_INFO *create_info, TABLE_LIST *table_list, + Recreate_info *recreate_info, Alter_info *alter_info, uint order_num, ORDER *order, bool ignore, bool if_exists) @@ -11302,11 +11303,10 @@ end_inplace: end_temporary: thd->variables.option_bits&= ~OPTION_BIN_COMMIT_OFF; - my_snprintf(alter_ctx.tmp_buff, sizeof(alter_ctx.tmp_buff), - ER_THD(thd, ER_INSERT_INFO), - (ulong) (copied + deleted), (ulong) deleted, - (ulong) thd->get_stmt_da()->current_statement_warn_count()); - my_ok(thd, copied + deleted, 0L, alter_ctx.tmp_buff); + *recreate_info= Recreate_info(copied, deleted); + thd->my_ok_with_recreate_info(*recreate_info, + (ulong) thd->get_stmt_da()-> + current_statement_warn_count()); DEBUG_SYNC(thd, "alter_table_inplace_trans_commit"); DBUG_RETURN(false); @@ -11816,7 +11816,8 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, Like mysql_alter_table(). */ -bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, bool table_copy) +bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, + Recreate_info *recreate_info, bool table_copy) { HA_CREATE_INFO create_info; Alter_info alter_info; @@ -11842,8 +11843,11 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, bool table_copy) Alter_info::ALTER_TABLE_ALGORITHM_COPY); bool res= mysql_alter_table(thd, &null_clex_str, &null_clex_str, &create_info, - table_list, &alter_info, 0, - (ORDER *) 0, 0, 0); + table_list, recreate_info, &alter_info, 0, + (ORDER *) 0, + // Ignore duplicate records on REPAIR + thd->lex->sql_command == SQLCOM_REPAIR, + 0); table_list->next_global= next_table; DBUG_RETURN(res); } diff --git a/sql/sql_table.h b/sql/sql_table.h index 53741d934cc..f5c783f6f3d 100644 --- a/sql/sql_table.h +++ b/sql/sql_table.h @@ -224,6 +224,7 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, const LEX_CSTRING *new_name, HA_CREATE_INFO *create_info, TABLE_LIST *table_list, + class Recreate_info *recreate_info, Alter_info *alter_info, uint order_num, ORDER *order, bool ignore, bool if_exists); @@ -231,7 +232,8 @@ bool mysql_compare_tables(TABLE *table, Alter_info *alter_info, HA_CREATE_INFO *create_info, bool *metadata_equal); -bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, bool table_copy); +bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, + class Recreate_info *recreate_info, bool table_copy); bool mysql_create_like_table(THD *thd, TABLE_LIST *table, TABLE_LIST *src_table, Table_specification_st *create_info); diff --git a/sql/sql_type.h b/sql/sql_type.h index 8a3a3776b52..5d2f08cb1e3 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -124,6 +124,32 @@ enum scalar_comparison_op }; +class Hasher +{ + ulong m_nr1; + ulong m_nr2; +public: + Hasher(): m_nr1(1), m_nr2(4) + { } + void add_null() + { + m_nr1^= (m_nr1 << 1) | 1; + } + void add(CHARSET_INFO *cs, const uchar *str, size_t length) + { + cs->coll->hash_sort(cs, str, length, &m_nr1, &m_nr2); + } + void add(CHARSET_INFO *cs, const char *str, size_t length) + { + add(cs, (const uchar *) str, length); + } + uint32 finalize() const + { + return (uint32) m_nr1; + } +}; + + enum partition_value_print_mode_t { PARTITION_VALUE_PRINT_MODE_SHOW= 0, diff --git a/sql/table.cc b/sql/table.cc index 7334f0143e6..98a9665ba27 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -1097,6 +1097,18 @@ static void mysql57_calculate_null_position(TABLE_SHARE *share, } } + +Item_func_hash *TABLE_SHARE::make_long_hash_func(THD *thd, + MEM_ROOT *mem_root, + List<Item> *field_list) + const +{ + if (old_long_hash_function()) + return new (mem_root) Item_func_hash_mariadb_100403(thd, *field_list); + return new (mem_root) Item_func_hash(thd, *field_list); +} + + /** Parse TABLE_SHARE::vcol_defs unpack_vcol_info_from_frm @@ -1308,7 +1320,10 @@ bool parse_vcol_defs(THD *thd, MEM_ROOT *mem_root, TABLE *table, list_item= new (mem_root) Item_field(thd, keypart->field); field_list->push_back(list_item, mem_root); } - Item_func_hash *hash_item= new(mem_root)Item_func_hash(thd, *field_list); + + Item_func_hash *hash_item= table->s->make_long_hash_func(thd, mem_root, + field_list); + Virtual_column_info *v= new (mem_root) Virtual_column_info(); field->vcol_info= v; field->vcol_info->expr= hash_item; diff --git a/sql/table.h b/sql/table.h index d704f3ce05e..0b4faba1b0b 100644 --- a/sql/table.h +++ b/sql/table.h @@ -55,6 +55,7 @@ class Item; /* Needed by ORDER */ typedef Item (*Item_ptr); class Item_subselect; class Item_field; +class Item_func_hash; class GRANT_TABLE; class st_select_lex_unit; class st_select_lex; @@ -1165,6 +1166,21 @@ struct TABLE_SHARE void free_frm_image(const uchar *frm); void set_overlapped_keys(); + + bool old_long_hash_function() const + { + return mysql_version < 100428 || + (mysql_version >= 100500 && mysql_version < 100519) || + (mysql_version >= 100600 && mysql_version < 100612) || + (mysql_version >= 100700 && mysql_version < 100708) || + (mysql_version >= 100800 && mysql_version < 100807) || + (mysql_version >= 100900 && mysql_version < 100905) || + (mysql_version >= 101000 && mysql_version < 101003) || + (mysql_version >= 101100 && mysql_version < 101102); + } + Item_func_hash *make_long_hash_func(THD *thd, + MEM_ROOT *mem_root, + List<Item> *field_list) const; }; /* not NULL, but cannot be dereferenced */ diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 219bf9fc9c7..db4a2a2e7b9 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -995,13 +995,19 @@ void wsrep_init_startup (bool sst_first) With mysqldump SST (!sst_first) wait until the server reaches joiner state and procedd to accepting connections. */ + int err= 0; if (sst_first) { - server_state.wait_until_state(Wsrep_server_state::s_initializing); + err= server_state.wait_until_state(Wsrep_server_state::s_initializing); } else { - server_state.wait_until_state(Wsrep_server_state::s_joiner); + err= server_state.wait_until_state(Wsrep_server_state::s_joiner); + } + if (err) + { + WSREP_ERROR("Wsrep startup was interrupted"); + unireg_abort(1); } } @@ -1107,7 +1113,11 @@ void wsrep_stop_replication(THD *thd) { WSREP_DEBUG("Disconnect provider"); Wsrep_server_state::instance().disconnect(); - Wsrep_server_state::instance().wait_until_state(Wsrep_server_state::s_disconnected); + if (Wsrep_server_state::instance().wait_until_state( + Wsrep_server_state::s_disconnected)) + { + WSREP_WARN("Wsrep interrupted while waiting for disconnected state"); + } } /* my connection, should not terminate with wsrep_close_client_connection(), @@ -1129,7 +1139,11 @@ void wsrep_shutdown_replication() { WSREP_DEBUG("Disconnect provider"); Wsrep_server_state::instance().disconnect(); - Wsrep_server_state::instance().wait_until_state(Wsrep_server_state::s_disconnected); + if (Wsrep_server_state::instance().wait_until_state( + Wsrep_server_state::s_disconnected)) + { + WSREP_WARN("Wsrep interrupted while waiting for disconnected state"); + } } wsrep_close_client_connections(TRUE); diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 09211a96e1e..d4035fcf363 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -336,9 +336,14 @@ static bool wsrep_sst_complete (THD* thd, if ((state == Wsrep_server_state::s_joiner || state == Wsrep_server_state::s_initialized)) { - Wsrep_server_state::instance().sst_received(client_service, - rcode); - WSREP_INFO("SST succeeded for position %s", start_pos_buf); + if (Wsrep_server_state::instance().sst_received(client_service, rcode)) + { + failed= true; + } + else + { + WSREP_INFO("SST succeeded for position %s", start_pos_buf); + } } else { |