From 2f4a0c5be2c5d5153c4253a49ba8820ab333a9a0 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 7 Oct 2018 18:59:52 +0200 Subject: Fix accumulation of old rows in mysql.gtid_slave_pos This would happen especially in optimistic parallel replication, where there is a good chance that a transaction will be rolled back (due to conflicts) after it has executed record_gtid(). If the transaction did any deletions of old rows as part of record_gtid(), those deletions will be undone as well. And the code did not properly ensure that the deletions would be re-tried. This patch makes record_gtid() remember the list of deletions done as part of a transaction. Then in rpl_slave_state::update() when the changes have been committed, we discard the list. However, in case of error and rollback, in cleanup_context() we will instead put the list back into rpl_global_gtid_slave_state so that the deletions will be re-tried later. Probably fixes part of the cause of MDEV-12147 as well. Signed-off-by: Kristian Nielsen --- sql/rpl_gtid.cc | 64 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 20 deletions(-) (limited to 'sql/rpl_gtid.cc') diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 7b1acf17ef5..94944b5b3e5 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -77,7 +77,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) rgi->gtid_pending= false; if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE) { - if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) + if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false)) DBUG_RETURN(1); update_state_hash(sub_id, &rgi->current_gtid, rgi); } @@ -328,6 +328,8 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, } } rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; + + rgi->pending_gtid_deletes_clear(); } if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME)))) @@ -377,15 +379,24 @@ int rpl_slave_state::put_back_list(uint32 domain_id, list_element *list) { element *e; + int err= 0; + + mysql_mutex_lock(&LOCK_slave_state); if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0))) - return 1; + { + err= 1; + goto end; + } while (list) { list_element *next= list->next; e->add(list); list= next; } - return 0; + +end: + mysql_mutex_unlock(&LOCK_slave_state); + return err; } @@ -468,12 +479,12 @@ gtid_check_rpl_slave_state_table(TABLE *table) /* Write a gtid to the replication slave state table. - Do it as part of the transaction, to get slave crash safety, or as a separate - transaction if !in_transaction (eg. MyISAM or DDL). - gtid The global transaction id for this event group. sub_id Value allocated within the sub_id when the event group was read (sub_id must be consistent with commit order in master binlog). + rgi rpl_group_info context, if we are recording the gtid transactionally + as part of replicating a transactional event. NULL if called from + outside of a replicated transaction. Note that caller must later ensure that the new gtid and sub_id is inserted into the appropriate HASH element with rpl_slave_state.add(), so that it can @@ -481,13 +492,13 @@ gtid_check_rpl_slave_state_table(TABLE *table) */ int rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - bool in_transaction, bool in_statement) + rpl_group_info *rgi, bool in_statement) { TABLE_LIST tlist; int err= 0; bool table_opened= false; TABLE *table; - list_element *elist= 0, *next; + list_element *elist= 0, *cur, *next; element *elem; ulonglong thd_saved_option= thd->variables.option_bits; Query_tables_list lex_backup; @@ -558,7 +569,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, thd->wsrep_ignore_table= true; #endif - if (!in_transaction) + if (!rgi) { DBUG_PRINT("info", ("resetting OPTION_BEGIN")); thd->variables.option_bits&= @@ -601,9 +612,9 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, if ((elist= elem->grab_list()) != NULL) { /* Delete any old stuff, but keep around the most recent one. */ - list_element *cur= elist; - uint64 best_sub_id= cur->sub_id; + uint64 best_sub_id= elist->sub_id; list_element **best_ptr_ptr= &elist; + cur= elist; while ((next= cur->next)) { if (next->sub_id > best_sub_id) @@ -636,7 +647,8 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, table->file->print_error(err, MYF(0)); goto end; } - while (elist) + cur = elist; + while (cur) { uchar key_buffer[4+8]; @@ -646,9 +658,9 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, /* `break' does not work inside DBUG_EXECUTE_IF */ goto dbug_break; }); - next= elist->next; + next= cur->next; - table->field[1]->store(elist->sub_id, true); + table->field[1]->store(cur->sub_id, true); /* domain_id is already set in table->record[0] from write_row() above. */ key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false); if (table->file->ha_index_read_map(table->record[1], key_buffer, @@ -662,8 +674,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, not want to endlessly error on the same element in case of table corruption or such. */ - my_free(elist); - elist= next; + cur= next; if (err) break; } @@ -686,18 +697,31 @@ end: */ if (elist) { - mysql_mutex_lock(&LOCK_slave_state); put_back_list(gtid->domain_id, elist); - mysql_mutex_unlock(&LOCK_slave_state); + elist = 0; } ha_rollback_trans(thd, FALSE); } close_thread_tables(thd); - if (in_transaction) + if (rgi) + { thd->mdl_context.release_statement_locks(); + /* + Save the list of old gtid entries we deleted. If this transaction + fails later for some reason and is rolled back, the deletion of those + entries will be rolled back as well, and we will need to put them back + on the to-be-deleted list so we can re-do the deletion. Otherwise + redundant rows in mysql.gtid_slave_pos may accumulate if transactions + are rolled back and retried after record_gtid(). + */ + rgi->pending_gtid_deletes_save(gtid->domain_id, elist); + } else + { thd->mdl_context.release_transactional_locks(); + rpl_group_info::pending_gtid_deletes_free(elist); + } } thd->lex->restore_backup_query_tables_list(&lex_backup); thd->variables.option_bits= thd_saved_option; @@ -1080,7 +1104,7 @@ rpl_slave_state::load(THD *thd, char *state_from_master, size_t len, if (gtid_parser_helper(&state_from_master, end, >id) || !(sub_id= next_sub_id(gtid.domain_id)) || - record_gtid(thd, >id, sub_id, false, in_statement) || + record_gtid(thd, >id, sub_id, NULL, in_statement) || update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, NULL)) return 1; if (state_from_master == end) -- cgit v1.2.1