summaryrefslogtreecommitdiff
path: root/sql/log.cc
diff options
context:
space:
mode:
authorAlfranio Correia <alfranio.correia@sun.com>2009-06-18 14:52:46 +0100
committerAlfranio Correia <alfranio.correia@sun.com>2009-06-18 14:52:46 +0100
commit3cf052b76cb62a3efbb29d4086a893f7f0d37d94 (patch)
tree49a520e10f3b7ebb207ed083d24093a52a9d4dc6 /sql/log.cc
parent48d911e70834af190a0650e77584f644525af538 (diff)
downloadmariadb-git-3cf052b76cb62a3efbb29d4086a893f7f0d37d94.tar.gz
BUG#43929 binlog corruption when max_binlog_cache_size is exceeded
Large transactions and statements may corrupt the binary log if the size of the cache, which is set by the max_binlog_cache_size, is not enough to store the the changes. In a nutshell, to fix the bug, we save the position of the next character in the cache before starting processing a statement. If there is a problem, we simply restore the position thus removing any effect of the statement from the cache. Unfortunately, to avoid corrupting the binary log, we may end up loosing changes on non-transactional tables if they do not fit in the cache. In such cases, we store an Incident_log_event in order to stop the slave and alert users that some changes were not logged. Precisely, for every non-transactional changes that do not fit into the cache, we do the following: a) the statement is *not* logged b) an incident event is logged after committing/rolling back the transaction, if any. Note that if a failure happens before writing the incident event to the binary log, the slave will not stop and the master will not have reported any error. c) its respective statement gives an error For transactional changes that do not fit into the cache, we do the following: a) the statement is *not* logged b) its respective statement gives an error To work properly, this patch requires two additional things. Firstly, callers to MYSQL_BIN_LOG::write and THD::binlog_query must handle any error returned and take the appropriate actions such as undoing the effects of a statement. We already changed some calls in the sql_insert.cc, sql_update.cc and sql_insert.cc modules but the remaining calls spread all over the code should be handled in BUG#37148. Secondly, statements must be either classified as DDL or DML because DDLs that do not get into the cache must generate an incident event since they cannot be rolled back.
Diffstat (limited to 'sql/log.cc')
-rw-r--r--sql/log.cc141
1 files changed, 118 insertions, 23 deletions
diff --git a/sql/log.cc b/sql/log.cc
index ee7ee48b42c..1ed3b35a8a9 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -153,7 +153,8 @@ private:
class binlog_trx_data {
public:
binlog_trx_data()
- : at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
+ : at_least_one_stmt(0), incident(FALSE), m_pending(0),
+ before_stmt_pos(MY_OFF_T_UNDEF)
{
trans_log.end_of_file= max_binlog_cache_size;
}
@@ -184,6 +185,7 @@ public:
delete pending();
set_pending(0);
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
+ trans_log.end_of_file= max_binlog_cache_size;
if (pos < before_stmt_pos)
before_stmt_pos= MY_OFF_T_UNDEF;
@@ -206,6 +208,7 @@ public:
if (!empty())
truncate(0);
before_stmt_pos= MY_OFF_T_UNDEF;
+ incident= FALSE;
trans_log.end_of_file= max_binlog_cache_size;
DBUG_ASSERT(empty());
}
@@ -222,11 +225,22 @@ public:
IO_CACHE trans_log; // The transaction cache
+ void set_incident(void)
+ {
+ incident= TRUE;
+ }
+
+ bool has_incident(void)
+ {
+ return(incident);
+ }
+
/**
Boolean that is true if there is at least one statement in the
transaction cache.
*/
bool at_least_one_stmt;
+ bool incident;
private:
/*
@@ -1391,7 +1405,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
*/
if (end_ev != NULL)
{
- thd->binlog_flush_pending_rows_event(TRUE);
+ if (thd->binlog_flush_pending_rows_event(TRUE))
+ DBUG_RETURN(1);
/*
Doing a commit or a rollback including non-transactional tables,
i.e., ending a transaction where we might write the transaction
@@ -1402,7 +1417,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
were, we would have to ensure that we're not ending a statement
inside a stored function.
*/
- error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
+ error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
+ trx_data->has_incident());
trx_data->reset();
/*
@@ -1428,7 +1444,11 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
*/
thd->binlog_remove_pending_rows_event(TRUE);
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ {
+ if (trx_data->has_incident())
+ mysql_bin_log.write_incident(thd, TRUE);
trx_data->reset();
+ }
else // ...statement
trx_data->truncate(trx_data->before_stmt_pos);
@@ -1544,9 +1564,11 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
YESNO(all),
YESNO(thd->transaction.all.modified_non_trans_table),
YESNO(thd->transaction.stmt.modified_non_trans_table)));
- if (all && thd->transaction.all.modified_non_trans_table ||
- !all && thd->transaction.stmt.modified_non_trans_table ||
- (thd->options & OPTION_KEEP_LOG))
+ if ((all && thd->transaction.all.modified_non_trans_table) ||
+ (!all && thd->transaction.stmt.modified_non_trans_table &&
+ !mysql_bin_log.check_write_error(thd)) ||
+ ((thd->options & OPTION_KEEP_LOG) &&
+ !mysql_bin_log.check_write_error(thd)))
{
/*
We write the transaction cache with a rollback last if we have
@@ -1559,14 +1581,22 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
error= binlog_end_trans(thd, trx_data, &qev, all);
}
- else if (all && !thd->transaction.all.modified_non_trans_table ||
- !all && !thd->transaction.stmt.modified_non_trans_table)
+ else
{
/*
- If we have modified only transactional tables, we can truncate
- the transaction cache without writing anything to the binary
- log.
+ We reach this point if either only transactional tables were modified or
+ the effect of a statement that did not get into the binlog needs to be
+ rolled back. In the latter case, if a statement changed non-transactional
+ tables or had the OPTION_KEEP_LOG associated, we write an incident event
+ to the binlog in order to stop slaves and notify users that some changes
+ on the master did not get into the binlog and slaves will be inconsistent.
+ On the other hand, if a statement is transactional, we just safely roll it
+ back.
*/
+ if ((thd->transaction.stmt.modified_non_trans_table ||
+ (thd->options & OPTION_KEEP_LOG)) &&
+ mysql_bin_log.check_write_error(thd))
+ trx_data->set_incident();
error= binlog_end_trans(thd, trx_data, 0, all);
}
if (!all)
@@ -1574,6 +1604,44 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
DBUG_RETURN(error);
}
+void MYSQL_BIN_LOG::set_write_error(THD *thd)
+{
+ DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
+
+ write_error= 1;
+
+ if (check_write_error(thd))
+ DBUG_VOID_RETURN;
+
+ if (my_errno == EFBIG)
+ my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
+ else
+ my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno);
+
+ DBUG_VOID_RETURN;
+}
+
+bool MYSQL_BIN_LOG::check_write_error(THD *thd)
+{
+ DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
+
+ bool checked= FALSE;
+
+ if (!thd->is_error())
+ DBUG_RETURN(checked);
+
+ switch (thd->main_da.sql_errno())
+ {
+ case ER_TRANS_CACHE_FULL:
+ case ER_ERROR_ON_WRITE:
+ case ER_BINLOG_LOGGING_IMPOSSIBLE:
+ checked= TRUE;
+ break;
+ }
+
+ DBUG_RETURN(checked);
+}
+
/**
@note
How do we handle this (unlikely but legal) case:
@@ -3854,6 +3922,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
if (pending->write(file))
{
pthread_mutex_unlock(&LOCK_log);
+ set_write_error(thd);
DBUG_RETURN(1);
}
@@ -3928,7 +3997,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
*/
bool const end_stmt=
thd->prelocked_mode && thd->lex->requires_prelocking();
- thd->binlog_flush_pending_rows_event(end_stmt);
+ if (thd->binlog_flush_pending_rows_event(end_stmt))
+ DBUG_RETURN(error);
pthread_mutex_lock(&LOCK_log);
@@ -3979,8 +4049,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu",
event_info->get_cache_stmt(),
(ulong) trans_log_pos));
- if (trans_log_pos == 0)
- thd->binlog_start_trans_and_stmt();
+ thd->binlog_start_trans_and_stmt();
file= trans_log;
}
/*
@@ -4058,7 +4127,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
Write the SQL command
*/
- if (event_info->write(file))
+ if (event_info->write(file) ||
+ DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
goto err;
if (file == &log_file) // we are writing to the real log (disk)
@@ -4072,13 +4142,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
err:
if (error)
- {
- if (my_errno == EFBIG)
- my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
- else
- my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
- write_error=1;
- }
+ set_write_error(thd);
}
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
@@ -4359,6 +4423,29 @@ int query_error_code(THD *thd, bool not_killed)
return error;
}
+bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+{
+ uint error= 0;
+ DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
+ LEX_STRING const write_error_msg=
+ { C_STRING_WITH_LEN("error writing to the binary log") };
+ Incident incident= INCIDENT_LOST_EVENTS;
+ Incident_log_event ev(thd, incident, write_error_msg);
+ if (lock)
+ pthread_mutex_lock(&LOCK_log);
+ ev.write(&log_file);
+ if (lock)
+ {
+ if (!error && !(error= flush_and_sync()))
+ {
+ signal_update();
+ rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
+ }
+ pthread_mutex_unlock(&LOCK_log);
+ }
+ DBUG_RETURN(error);
+}
+
/**
Write a cached log entry to the binary log.
- To support transaction over replication, we wrap the transaction
@@ -4371,6 +4458,9 @@ int query_error_code(THD *thd, bool not_killed)
@param cache The cache to copy to the binlog
@param commit_event The commit event to print after writing the
contents of the cache.
+ @param incident Defines if an incident event should be created to
+ notify that some non-transactional changes did
+ not get into the binlog.
@note
We only come here if there is something in the cache.
@@ -4380,7 +4470,8 @@ int query_error_code(THD *thd, bool not_killed)
'cache' needs to be reinitialized after this functions returns.
*/
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
+bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
+ bool incident)
{
DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
VOID(pthread_mutex_lock(&LOCK_log));
@@ -4429,6 +4520,10 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
if (commit_event && commit_event->write(&log_file))
goto err;
+
+ if (incident && write_incident(thd, FALSE))
+ goto err;
+
if (flush_and_sync())
goto err;
DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););