diff options
author | Nirbhay Choubey <nirbhay@skysql.com> | 2014-08-11 23:55:41 -0400 |
---|---|---|
committer | Nirbhay Choubey <nirbhay@skysql.com> | 2014-08-11 23:55:41 -0400 |
commit | 8358dd53b7406deaa9f50ad09b16a86b7e367632 (patch) | |
tree | ef8995ad0e400cb6a1842649c3c886c7b3474835 /sql/log_event.cc | |
parent | e06e12f5b8dfe0ab2e5976eec1b27b25d318441b (diff) | |
parent | 4105cbf4a230c82ea7dee31d4d2262b798fad9f4 (diff) | |
download | mariadb-git-8358dd53b7406deaa9f50ad09b16a86b7e367632.tar.gz |
bzr merge -r4346 maria/10.0 (maria-10.0.13)
Diffstat (limited to 'sql/log_event.cc')
-rw-r--r-- | sql/log_event.cc | 198 |
1 files changed, 134 insertions, 64 deletions
diff --git a/sql/log_event.cc b/sql/log_event.cc index 4edd41fdb2e..77353d33bf1 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -193,6 +193,28 @@ static const char *HA_ERR(int i) return "No Error!"; } + +/* + Return true if an error caught during event execution is a temporary error + that will cause automatic retry of the event group during parallel + replication, false otherwise. + + In parallel replication, conflicting transactions can occasionally cause + deadlocks; such errors are handled automatically by rolling back re-trying + the transactions, so should not pollute the error log. +*/ +static bool +is_parallel_retry_error(rpl_group_info *rgi, int err) +{ + if (!rgi->is_parallel_exec) + return false; + if (rgi->killed_for_retry && + (err == ER_QUERY_INTERRUPTED || err == ER_CONNECTION_KILLED)) + return true; + return has_temporary_error(rgi->thd); +} + + /** Error reporting facility for Rows_log_event::do_apply_event @@ -207,7 +229,7 @@ static const char *HA_ERR(int i) */ static void inline slave_rows_error_report(enum loglevel level, int ha_error, - Relay_log_info const *rli, THD *thd, + rpl_group_info *rgi, THD *thd, TABLE *table, const char * type, const char *log_name, ulong pos) { @@ -217,8 +239,19 @@ static void inline slave_rows_error_report(enum loglevel level, int ha_error, uint len; Diagnostics_area::Sql_condition_iterator it= thd->get_stmt_da()->sql_conditions(); + Relay_log_info const *rli= rgi->rli; const Sql_condition *err; buff[0]= 0; + int errcode= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0; + + /* + In parallel replication, deadlocks or other temporary errors can happen + occasionally in normal operation, they will be handled correctly and + automatically by re-trying the transactions. So do not pollute the error + log with messages about them. + */ + if (is_parallel_retry_error(rgi, errcode)) + return; for (err= it++, slider= buff; err && slider < buff_end - 1; slider += len, err= it++) @@ -229,7 +262,7 @@ static void inline slave_rows_error_report(enum loglevel level, int ha_error, } if (ha_error != 0) - rli->report(level, thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0, + rli->report(level, errcode, rgi->gtid_info(), "Could not execute %s event on table %s.%s;" "%s handler error %s; " "the event's master log %s, end_log_pos %lu", @@ -237,7 +270,7 @@ static void inline slave_rows_error_report(enum loglevel level, int ha_error, buff, handler_error == NULL ? "<unknown>" : handler_error, log_name, pos); else - rli->report(level, thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0, + rli->report(level, errcode, rgi->gtid_info(), "Could not execute %s event on table %s.%s;" "%s the event's master log %s, end_log_pos %lu", type, table->s->db.str, table->s->table_name.str, @@ -4098,9 +4131,10 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, */ int error; char llbuff[22]; - if ((error= rows_event_stmt_cleanup(rgi, thd))) + if ((error= rows_event_stmt_cleanup(rgi, thd)) && + !is_parallel_retry_error(rgi, error)) { - const_cast<Relay_log_info*>(rli)->report(ERROR_LEVEL, error, + rli->report(ERROR_LEVEL, error, rgi->gtid_info(), "Error in cleaning up after an event preceding the commit; " "the group log file/position: %s %s", const_cast<Relay_log_info*>(rli)->group_master_log_name, @@ -4245,21 +4279,24 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, Record any GTID in the same transaction, so slave state is transactionally consistent. */ - if (current_stmt_is_commit && (sub_id= rgi->gtid_sub_id)) + if (current_stmt_is_commit && rgi->gtid_pending) { - /* Clear the GTID from the RLI so we don't accidentally reuse it. */ - rgi->gtid_sub_id= 0; + sub_id= rgi->gtid_sub_id; + rgi->gtid_pending= false; gtid= rgi->current_gtid; thd->variables.option_bits&= ~OPTION_GTID_BEGIN; if (rpl_global_gtid_slave_state.record_gtid(thd, >id, sub_id, true, false)) { - rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, - "Error during COMMIT: failed to update GTID state in " - "%s.%s: %d: %s", - "mysql", rpl_gtid_slave_state_table_name.str, - thd->get_stmt_da()->sql_errno(), - thd->get_stmt_da()->message()); + int errcode= thd->get_stmt_da()->sql_errno(); + if (!is_parallel_retry_error(rgi, errcode)) + rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, + rgi->gtid_info(), + "Error during COMMIT: failed to update GTID state in " + "%s.%s: %d: %s", + "mysql", rpl_gtid_slave_state_table_name.str, + errcode, + thd->get_stmt_da()->message()); trans_rollback(thd); sub_id= 0; thd->is_slave_error= 1; @@ -4327,7 +4364,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, clear_all_errors(thd, const_cast<Relay_log_info*>(rli)); /* Can ignore query */ else { - rli->report(ERROR_LEVEL, expected_error, + rli->report(ERROR_LEVEL, expected_error, rgi->gtid_info(), "\ Query partially completed on the master (error on master: %d) \ and was aborted. There is a chance that your master is inconsistent at this \ @@ -4383,7 +4420,7 @@ compare_errors: !ignored_error_code(actual_error) && !ignored_error_code(expected_error)) { - rli->report(ERROR_LEVEL, 0, + rli->report(ERROR_LEVEL, 0, rgi->gtid_info(), "\ Query caused different errors on master and slave. \ Error on master: message (format)='%s' error code=%d ; \ @@ -4406,18 +4443,21 @@ Default database: '%s'. Query: '%s'", { DBUG_PRINT("info",("error ignored")); clear_all_errors(thd, const_cast<Relay_log_info*>(rli)); - thd->reset_killed(); + if (actual_error == ER_QUERY_INTERRUPTED || + actual_error == ER_CONNECTION_KILLED) + thd->reset_killed(); } /* Other cases: mostly we expected no error and get one. */ else if (thd->is_slave_error || thd->is_fatal_error) { - rli->report(ERROR_LEVEL, actual_error, - "Error '%s' on query. Default database: '%s'. Query: '%s'", - (actual_error ? thd->get_stmt_da()->message() : - "unexpected success or fatal error"), - print_slave_db_safe(thd->db), query_arg); + if (!is_parallel_retry_error(rgi, actual_error)) + rli->report(ERROR_LEVEL, actual_error, rgi->gtid_info(), + "Error '%s' on query. Default database: '%s'. Query: '%s'", + (actual_error ? thd->get_stmt_da()->message() : + "unexpected success or fatal error"), + print_slave_db_safe(thd->db), query_arg); thd->is_slave_error= 1; } @@ -5084,7 +5124,7 @@ int Format_description_log_event::do_apply_event(rpl_group_info *rgi) if (!is_artificial_event() && created && thd->transaction.all.ha_list) { /* This is not an error (XA is safe), just an information */ - rli->report(INFORMATION_LEVEL, 0, + rli->report(INFORMATION_LEVEL, 0, NULL, "Rolling back unfinished transaction (no COMMIT " "or ROLLBACK in relay log). A probable cause is that " "the master died while writing the transaction to " @@ -6025,7 +6065,7 @@ error: sql_errno=ER_UNKNOWN_ERROR; err=ER(sql_errno); } - rli->report(ERROR_LEVEL, sql_errno,"\ + rli->report(ERROR_LEVEL, sql_errno, rgi->gtid_info(), "\ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", err, (char*)table_name, print_slave_db_safe(remember_db)); free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); @@ -6042,7 +6082,7 @@ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", (char*)table_name, print_slave_db_safe(remember_db)); - rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), ER(ER_SLAVE_FATAL_ERROR), buf); DBUG_RETURN(1); } @@ -6532,12 +6572,10 @@ Gtid_log_event::do_apply_event(rpl_group_info *rgi) thd->variables.server_id= this->server_id; thd->variables.gtid_domain_id= this->domain_id; thd->variables.gtid_seq_no= this->seq_no; + mysql_reset_thd_for_next_command(thd); if (opt_gtid_strict_mode && opt_bin_log && opt_log_slave_updates) { - /* Need to reset prior "ok" status to give an error. */ - thd->clear_error(); - thd->get_stmt_da()->reset_diagnostics_area(); if (mysql_bin_log.check_strict_gtid_sequence(this->domain_id, this->server_id, this->seq_no)) return 1; @@ -7315,28 +7353,41 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) bool res; int err; rpl_gtid gtid; - uint64 sub_id; + uint64 sub_id= 0; Relay_log_info const *rli= rgi->rli; /* + XID_EVENT works like a COMMIT statement. And it also updates the + mysql.gtid_slave_pos table with the GTID of the current transaction. + + Therefore, it acts much like a normal SQL statement, so we need to do + mysql_reset_thd_for_next_command() as if starting a new statement. + */ + mysql_reset_thd_for_next_command(thd); + /* Record any GTID in the same transaction, so slave state is transactionally consistent. */ - if ((sub_id= rgi->gtid_sub_id)) + if (rgi->gtid_pending) { - /* Clear the GTID from the RLI so we don't accidentally reuse it. */ - rgi->gtid_sub_id= 0; + sub_id= rgi->gtid_sub_id; + rgi->gtid_pending= false; gtid= rgi->current_gtid; err= rpl_global_gtid_slave_state.record_gtid(thd, >id, sub_id, true, false); if (err) { - rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, - "Error during XID COMMIT: failed to update GTID state in " - "%s.%s: %d: %s", - "mysql", rpl_gtid_slave_state_table_name.str, - thd->get_stmt_da()->sql_errno(), - thd->get_stmt_da()->message()); + int ec= thd->get_stmt_da()->sql_errno(); + /* + Do not report an error if this is really a kill due to a deadlock. + In this case, the transaction will be re-tried instead. + */ + if (!is_parallel_retry_error(rgi, ec)) + rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, rgi->gtid_info(), + "Error during XID COMMIT: failed to update GTID state in " + "%s.%s: %d: %s", + "mysql", rpl_gtid_slave_state_table_name.str, ec, + thd->get_stmt_da()->message()); trans_rollback(thd); thd->is_slave_error= 1; return err; @@ -8366,7 +8417,7 @@ int Create_file_log_event::do_apply_event(rpl_group_info *rgi) init_io_cache(&file, fd, IO_SIZE, WRITE_CACHE, (my_off_t)0, 0, MYF(MY_WME|MY_NABP))) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in Create_file event: could not open file '%s'", fname_buf); goto err; @@ -8378,7 +8429,7 @@ int Create_file_log_event::do_apply_event(rpl_group_info *rgi) if (write_base(&file)) { strmov(ext, ".info"); // to have it right in the error message - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in Create_file event: could not write to file '%s'", fname_buf); goto err; @@ -8394,14 +8445,14 @@ int Create_file_log_event::do_apply_event(rpl_group_info *rgi) O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, MYF(MY_WME))) < 0) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in Create_file event: could not open file '%s'", fname_buf); goto err; } if (mysql_file_write(fd, (uchar*) block, block_len, MYF(MY_WME+MY_NABP))) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in Create_file event: write to '%s' failed", fname_buf); goto err; @@ -8558,7 +8609,7 @@ int Append_block_log_event::do_apply_event(rpl_group_info *rgi) O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, MYF(MY_WME))) < 0) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in %s event: could not create file '%s'", get_type_str(), fname); goto err; @@ -8569,7 +8620,7 @@ int Append_block_log_event::do_apply_event(rpl_group_info *rgi) O_WRONLY | O_APPEND | O_BINARY | O_NOFOLLOW, MYF(MY_WME))) < 0) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in %s event: could not open file '%s'", get_type_str(), fname); goto err; @@ -8582,7 +8633,7 @@ int Append_block_log_event::do_apply_event(rpl_group_info *rgi) if (mysql_file_write(fd, (uchar*) block, block_len, MYF(MY_WME+MY_NABP))) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in %s event: write to '%s' failed", get_type_str(), fname); goto err; @@ -8807,7 +8858,7 @@ int Execute_load_log_event::do_apply_event(rpl_group_info *rgi) init_io_cache(&file, fd, IO_SIZE, READ_CACHE, (my_off_t)0, 0, MYF(MY_WME|MY_NABP))) { - rli->report(ERROR_LEVEL, my_errno, + rli->report(ERROR_LEVEL, my_errno, rgi->gtid_info(), "Error in Exec_load event: could not open file '%s'", fname); goto err; @@ -8819,7 +8870,7 @@ int Execute_load_log_event::do_apply_event(rpl_group_info *rgi) opt_slave_sql_verify_checksum)) || lev->get_type_code() != NEW_LOAD_EVENT) { - rli->report(ERROR_LEVEL, 0, "Error in Exec_load event: " + rli->report(ERROR_LEVEL, 0, rgi->gtid_info(), "Error in Exec_load event: " "file '%s' appears corrupted", fname); goto err; } @@ -8845,7 +8896,7 @@ int Execute_load_log_event::do_apply_event(rpl_group_info *rgi) char *tmp= my_strdup(rli->last_error().message, MYF(MY_WME)); if (tmp) { - rli->report(ERROR_LEVEL, rli->last_error().number, + rli->report(ERROR_LEVEL, rli->last_error().number, rgi->gtid_info(), "%s. Failed executing load from '%s'", tmp, fname); my_free(tmp); } @@ -9017,9 +9068,9 @@ void Execute_load_query_log_event::print(FILE* file, if (local_fname) { my_b_write(&cache, (uchar*) query, fn_pos_start); - my_b_write_string(&cache, " LOCAL INFILE \'"); - my_b_printf(&cache, "%s", local_fname); - my_b_write_string(&cache, "\'"); + my_b_write_string(&cache, " LOCAL INFILE "); + pretty_print_str(&cache, local_fname, strlen(local_fname)); + if (dup_handling == LOAD_DUP_REPLACE) my_b_write_string(&cache, " REPLACE"); my_b_write_string(&cache, " INTO"); @@ -9078,7 +9129,7 @@ Execute_load_query_log_event::do_apply_event(rpl_group_info *rgi) /* Replace filename and LOCAL keyword in query before executing it */ if (buf == NULL) { - rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), ER(ER_SLAVE_FATAL_ERROR), "Not enough memory"); return 1; } @@ -9686,6 +9737,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0)) { uint actual_error= thd->get_stmt_da()->sql_errno(); + #ifdef WITH_WSREP if (WSREP(thd)) { @@ -9696,9 +9748,11 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) thd->wsrep_exec_mode, thd->wsrep_conflict_state, (long long)wsrep_thd_trx_seqno(thd)); - } + } #endif - if (thd->is_slave_error || thd->is_fatal_error) + + if ((thd->is_slave_error || thd->is_fatal_error) && + !is_parallel_retry_error(rgi, actual_error)) { /* Error reporting borrowed from Query_log_event with many excessive @@ -9706,7 +9760,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) We should not honour --slave-skip-errors at this point as we are having severe errors which should not be skiped. */ - rli->report(ERROR_LEVEL, actual_error, + rli->report(ERROR_LEVEL, actual_error, rgi->gtid_info(), "Error executing row event: '%s'", (actual_error ? thd->get_stmt_da()->message() : "unexpected success or fatal error")); @@ -9747,8 +9801,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) { DBUG_ASSERT(ptr->m_tabledef_valid); TABLE *conv_table; - if (!ptr->m_tabledef.compatible_with(thd, const_cast<Relay_log_info*>(rli), - ptr->table, &conv_table)) + if (!ptr->m_tabledef.compatible_with(thd, rgi, ptr->table, &conv_table)) { DBUG_PRINT("debug", ("Table: %s.%s is not compatible with master", ptr->table->s->db.str, @@ -9904,7 +9957,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (idempotent_error || ignored_error) { if (global_system_variables.log_warnings) - slave_rows_error_report(WARNING_LEVEL, error, rli, thd, table, + slave_rows_error_report(WARNING_LEVEL, error, rgi, thd, table, get_type_str(), RPL_LOG_NAME, (ulong) log_pos); clear_all_errors(thd, const_cast<Relay_log_info*>(rli)); @@ -9960,7 +10013,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) { if (global_system_variables.log_warnings) - slave_rows_error_report(WARNING_LEVEL, error, rli, thd, table, + slave_rows_error_report(WARNING_LEVEL, error, rgi, thd, table, get_type_str(), RPL_LOG_NAME, (ulong) log_pos); clear_all_errors(thd, const_cast<Relay_log_info*>(rli)); @@ -9971,7 +10024,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (error) { - slave_rows_error_report(ERROR_LEVEL, error, rli, thd, table, + slave_rows_error_report(ERROR_LEVEL, error, rgi, thd, table, get_type_str(), RPL_LOG_NAME, (ulong) log_pos); /* @@ -9993,7 +10046,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rgi, thd))) slave_rows_error_report(ERROR_LEVEL, thd->is_error() ? 0 : error, - rli, thd, table, + rgi, thd, table, get_type_str(), RPL_LOG_NAME, (ulong) log_pos); DBUG_RETURN(error); @@ -10981,7 +11034,7 @@ int Table_map_log_event::do_apply_event(rpl_group_info *rgi) table_list->table_id); if (thd->slave_thread) - rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), ER(ER_SLAVE_FATAL_ERROR), buf); else /* @@ -12605,6 +12658,8 @@ Incident_log_event::Incident_log_event(const char *buf, uint event_len, DBUG_PRINT("info",("event_len: %u; common_header_len: %d; post_header_len: %d", event_len, common_header_len, post_header_len)); + m_message.str= NULL; + m_message.length= 0; int incident_number= uint2korr(buf + common_header_len); if (incident_number >= INCIDENT_COUNT || incident_number <= INCIDENT_NONE) @@ -12621,7 +12676,13 @@ Incident_log_event::Incident_log_event(const char *buf, uint event_len, uint8 len= 0; // Assignment to keep compiler happy const char *str= NULL; // Assignment to keep compiler happy read_str(&ptr, str_end, &str, &len); - m_message.str= const_cast<char*>(str); + if (!(m_message.str= (char*) my_malloc(len+1, MYF(MY_WME)))) + { + /* Mark this event invalid */ + m_incident= INCIDENT_NONE; + DBUG_VOID_RETURN; + } + strmake(m_message.str, str, len); m_message.length= len; DBUG_PRINT("info", ("m_incident: %d", m_incident)); DBUG_VOID_RETURN; @@ -12630,6 +12691,8 @@ Incident_log_event::Incident_log_event(const char *buf, uint event_len, Incident_log_event::~Incident_log_event() { + if (m_message.str) + my_free(m_message.str); } @@ -12724,7 +12787,14 @@ Incident_log_event::do_apply_event(rpl_group_info *rgi) { Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Incident_log_event::do_apply_event"); - rli->report(ERROR_LEVEL, ER_SLAVE_INCIDENT, + + if (ignored_error_code(ER_SLAVE_INCIDENT)) + { + DBUG_PRINT("info", ("Ignoring Incident")); + DBUG_RETURN(0); + } + + rli->report(ERROR_LEVEL, ER_SLAVE_INCIDENT, NULL, ER(ER_SLAVE_INCIDENT), description(), m_message.length > 0 ? m_message.str : "<none>"); |