diff options
author | Alfranio Correia <alfranio.correia@sun.com> | 2009-06-18 14:52:46 +0100 |
---|---|---|
committer | Alfranio Correia <alfranio.correia@sun.com> | 2009-06-18 14:52:46 +0100 |
commit | 3cf052b76cb62a3efbb29d4086a893f7f0d37d94 (patch) | |
tree | 49a520e10f3b7ebb207ed083d24093a52a9d4dc6 /sql/log.cc | |
parent | 48d911e70834af190a0650e77584f644525af538 (diff) | |
download | mariadb-git-3cf052b76cb62a3efbb29d4086a893f7f0d37d94.tar.gz |
BUG#43929 binlog corruption when max_binlog_cache_size is exceeded
Large transactions and statements may corrupt the binary log if the size of the
cache, which is set by the max_binlog_cache_size, is not enough to store the
the changes.
In a nutshell, to fix the bug, we save the position of the next character in the
cache before starting processing a statement. If there is a problem, we simply
restore the position thus removing any effect of the statement from the cache.
Unfortunately, to avoid corrupting the binary log, we may end up loosing changes
on non-transactional tables if they do not fit in the cache. In such cases, we
store an Incident_log_event in order to stop the slave and alert users that some
changes were not logged.
Precisely, for every non-transactional changes that do not fit into the cache,
we do the following:
a) the statement is *not* logged
b) an incident event is logged after committing/rolling back the transaction,
if any. Note that if a failure happens before writing the incident event to
the binary log, the slave will not stop and the master will not have reported
any error.
c) its respective statement gives an error
For transactional changes that do not fit into the cache, we do the following:
a) the statement is *not* logged
b) its respective statement gives an error
To work properly, this patch requires two additional things. Firstly, callers to
MYSQL_BIN_LOG::write and THD::binlog_query must handle any error returned and
take the appropriate actions such as undoing the effects of a statement. We
already changed some calls in the sql_insert.cc, sql_update.cc and sql_insert.cc
modules but the remaining calls spread all over the code should be handled in
BUG#37148. Secondly, statements must be either classified as DDL or DML because
DDLs that do not get into the cache must generate an incident event since they
cannot be rolled back.
Diffstat (limited to 'sql/log.cc')
-rw-r--r-- | sql/log.cc | 141 |
1 files changed, 118 insertions, 23 deletions
diff --git a/sql/log.cc b/sql/log.cc index ee7ee48b42c..1ed3b35a8a9 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -153,7 +153,8 @@ private: class binlog_trx_data { public: binlog_trx_data() - : at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF) + : at_least_one_stmt(0), incident(FALSE), m_pending(0), + before_stmt_pos(MY_OFF_T_UNDEF) { trans_log.end_of_file= max_binlog_cache_size; } @@ -184,6 +185,7 @@ public: delete pending(); set_pending(0); reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0); + trans_log.end_of_file= max_binlog_cache_size; if (pos < before_stmt_pos) before_stmt_pos= MY_OFF_T_UNDEF; @@ -206,6 +208,7 @@ public: if (!empty()) truncate(0); before_stmt_pos= MY_OFF_T_UNDEF; + incident= FALSE; trans_log.end_of_file= max_binlog_cache_size; DBUG_ASSERT(empty()); } @@ -222,11 +225,22 @@ public: IO_CACHE trans_log; // The transaction cache + void set_incident(void) + { + incident= TRUE; + } + + bool has_incident(void) + { + return(incident); + } + /** Boolean that is true if there is at least one statement in the transaction cache. */ bool at_least_one_stmt; + bool incident; private: /* @@ -1391,7 +1405,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data, */ if (end_ev != NULL) { - thd->binlog_flush_pending_rows_event(TRUE); + if (thd->binlog_flush_pending_rows_event(TRUE)) + DBUG_RETURN(1); /* Doing a commit or a rollback including non-transactional tables, i.e., ending a transaction where we might write the transaction @@ -1402,7 +1417,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data, were, we would have to ensure that we're not ending a statement inside a stored function. */ - error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev); + error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev, + trx_data->has_incident()); trx_data->reset(); /* @@ -1428,7 +1444,11 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data, */ thd->binlog_remove_pending_rows_event(TRUE); if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT))) + { + if (trx_data->has_incident()) + mysql_bin_log.write_incident(thd, TRUE); trx_data->reset(); + } else // ...statement trx_data->truncate(trx_data->before_stmt_pos); @@ -1544,9 +1564,11 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) YESNO(all), YESNO(thd->transaction.all.modified_non_trans_table), YESNO(thd->transaction.stmt.modified_non_trans_table))); - if (all && thd->transaction.all.modified_non_trans_table || - !all && thd->transaction.stmt.modified_non_trans_table || - (thd->options & OPTION_KEEP_LOG)) + if ((all && thd->transaction.all.modified_non_trans_table) || + (!all && thd->transaction.stmt.modified_non_trans_table && + !mysql_bin_log.check_write_error(thd)) || + ((thd->options & OPTION_KEEP_LOG) && + !mysql_bin_log.check_write_error(thd))) { /* We write the transaction cache with a rollback last if we have @@ -1559,14 +1581,22 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0); error= binlog_end_trans(thd, trx_data, &qev, all); } - else if (all && !thd->transaction.all.modified_non_trans_table || - !all && !thd->transaction.stmt.modified_non_trans_table) + else { /* - If we have modified only transactional tables, we can truncate - the transaction cache without writing anything to the binary - log. + We reach this point if either only transactional tables were modified or + the effect of a statement that did not get into the binlog needs to be + rolled back. In the latter case, if a statement changed non-transactional + tables or had the OPTION_KEEP_LOG associated, we write an incident event + to the binlog in order to stop slaves and notify users that some changes + on the master did not get into the binlog and slaves will be inconsistent. + On the other hand, if a statement is transactional, we just safely roll it + back. */ + if ((thd->transaction.stmt.modified_non_trans_table || + (thd->options & OPTION_KEEP_LOG)) && + mysql_bin_log.check_write_error(thd)) + trx_data->set_incident(); error= binlog_end_trans(thd, trx_data, 0, all); } if (!all) @@ -1574,6 +1604,44 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) DBUG_RETURN(error); } +void MYSQL_BIN_LOG::set_write_error(THD *thd) +{ + DBUG_ENTER("MYSQL_BIN_LOG::set_write_error"); + + write_error= 1; + + if (check_write_error(thd)) + DBUG_VOID_RETURN; + + if (my_errno == EFBIG) + my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME)); + else + my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno); + + DBUG_VOID_RETURN; +} + +bool MYSQL_BIN_LOG::check_write_error(THD *thd) +{ + DBUG_ENTER("MYSQL_BIN_LOG::check_write_error"); + + bool checked= FALSE; + + if (!thd->is_error()) + DBUG_RETURN(checked); + + switch (thd->main_da.sql_errno()) + { + case ER_TRANS_CACHE_FULL: + case ER_ERROR_ON_WRITE: + case ER_BINLOG_LOGGING_IMPOSSIBLE: + checked= TRUE; + break; + } + + DBUG_RETURN(checked); +} + /** @note How do we handle this (unlikely but legal) case: @@ -3854,6 +3922,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, if (pending->write(file)) { pthread_mutex_unlock(&LOCK_log); + set_write_error(thd); DBUG_RETURN(1); } @@ -3928,7 +3997,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) */ bool const end_stmt= thd->prelocked_mode && thd->lex->requires_prelocking(); - thd->binlog_flush_pending_rows_event(end_stmt); + if (thd->binlog_flush_pending_rows_event(end_stmt)) + DBUG_RETURN(error); pthread_mutex_lock(&LOCK_log); @@ -3979,8 +4049,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu", event_info->get_cache_stmt(), (ulong) trans_log_pos)); - if (trans_log_pos == 0) - thd->binlog_start_trans_and_stmt(); + thd->binlog_start_trans_and_stmt(); file= trans_log; } /* @@ -4058,7 +4127,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) Write the SQL command */ - if (event_info->write(file)) + if (event_info->write(file) || + DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0)) goto err; if (file == &log_file) // we are writing to the real log (disk) @@ -4072,13 +4142,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) err: if (error) - { - if (my_errno == EFBIG) - my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0)); - else - my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno); - write_error=1; - } + set_write_error(thd); } if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F) @@ -4359,6 +4423,29 @@ int query_error_code(THD *thd, bool not_killed) return error; } +bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock) +{ + uint error= 0; + DBUG_ENTER("MYSQL_BIN_LOG::write_incident"); + LEX_STRING const write_error_msg= + { C_STRING_WITH_LEN("error writing to the binary log") }; + Incident incident= INCIDENT_LOST_EVENTS; + Incident_log_event ev(thd, incident, write_error_msg); + if (lock) + pthread_mutex_lock(&LOCK_log); + ev.write(&log_file); + if (lock) + { + if (!error && !(error= flush_and_sync())) + { + signal_update(); + rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); + } + pthread_mutex_unlock(&LOCK_log); + } + DBUG_RETURN(error); +} + /** Write a cached log entry to the binary log. - To support transaction over replication, we wrap the transaction @@ -4371,6 +4458,9 @@ int query_error_code(THD *thd, bool not_killed) @param cache The cache to copy to the binlog @param commit_event The commit event to print after writing the contents of the cache. + @param incident Defines if an incident event should be created to + notify that some non-transactional changes did + not get into the binlog. @note We only come here if there is something in the cache. @@ -4380,7 +4470,8 @@ int query_error_code(THD *thd, bool not_killed) 'cache' needs to be reinitialized after this functions returns. */ -bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event) +bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event, + bool incident) { DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)"); VOID(pthread_mutex_lock(&LOCK_log)); @@ -4429,6 +4520,10 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event) if (commit_event && commit_event->write(&log_file)) goto err; + + if (incident && write_incident(thd, FALSE)) + goto err; + if (flush_and_sync()) goto err; DBUG_EXECUTE_IF("half_binlogged_transaction", abort();); |