diff options
author | unknown <mats@romeo.(none)> | 2006-12-21 09:29:02 +0100 |
---|---|---|
committer | unknown <mats@romeo.(none)> | 2006-12-21 09:29:02 +0100 |
commit | 3d68c135d0daa8030665a0a3b9b85e18c9cb76d9 (patch) | |
tree | e6a777ceb86f6f798e1174f3f83eed70f75b1bdb /sql/log.cc | |
parent | 27bbf36f87138ee434f629756a543fa7caf9f789 (diff) | |
download | mariadb-git-3d68c135d0daa8030665a0a3b9b85e18c9cb76d9.tar.gz |
BUG#22864 (Rollback following CREATE... SELECT discards 'CREATE TABLE'
from log):
When row-based logging is used, the CREATE-SELECT is written as two
parts: as a CREATE TABLE statement and as the rows for the table. For
both transactional and non-transactional tables, the CREATE TABLE
statement was written to the transaction cache, as were the rows, and
on statement end, the entire transaction cache was written to the binary
log if the table was non-transactional. For transactional tables, the
events were kept in the transaction cache until end of transaction (or
statement that were not part of a transaction).
For the case when AUTOCOMMIT=0 and we are creating a transactional table
using a create select, we would then keep the CREATE TABLE statement and
the rows for the CREATE-SELECT, while executing the following statements.
On a rollback, the transaction cache would then be cleared, which would
also remove the CREATE TABLE statement. Hence no table would be created
on the slave, while there is an empty table on the master.
This relates to BUG#22865 where the table being created exists on the
master, but not on the slave during insertion of rows into the newly
created table. This occurs since the CREATE TABLE statement were still
in the transaction cache until the statement finished executing, and
possibly longer if the table was transactional.
This patch changes the behaviour of the CREATE-SELECT statement by
adding an implicit commit at the end of the statement when creating
non-temporary tables. Hence, non-temporary tables will be written to the
binary log on completion, and in the even of AUTOCOMMIT=0, a new
transaction will be started. Temporary tables do not commit an ongoing
transaction: neither as a pre- not a post-commit.
The events for both transactional and non-transactional tables are
saved in the transaction cache, and written to the binary log at end
of the statement.
mysql-test/r/rpl_row_create_table.result:
Result change
mysql-test/t/rpl_row_create_table.test:
Requring InnoDB for slave as well.
Adding test CREATE-SELECT that is rolled back explicitly.
Changing binlog positions.
sql/log.cc:
Adding helper class to handle lock/unlock of mutexes using RAII.
Factoring out code into write_cache() function to transaction cache
to binary log.
Adding function THD::binlog_flush_transaction_cache() to flush the
transaction cache to the binary log file.
Factoring out code into binlog_set_stmt_begin() to set the beginning
of statement savepoint.
Clearing before statement point when transaction cache is truncated
so that these points are out of range.
sql/log.h:
Adding method MYSQL_BIN_LOG::write_cache()
sql/log_event.h:
Replicating OPTION_NOT_AUTOCOMMIT flag (see changeset comment)
sql/mysql_priv.h:
Although left-shifting signed integer values is well-defined,
it has potential for strange errors. Using unsigned long long
instead of signed long long since this is the type of the options
flags.
sql/slave.cc:
Adding printout of transaction-critical thread flags.
sql/sql_class.h:
Adding function THD::binlog_flush_transaction_cache()
Adding function THD::binlog_set_stmt_begin()
sql/sql_insert.cc:
Adding code to cache events for a CREATE-SELECT statement.
Disabling binlog for SBR (but not RBR) when sending error for select part
of CREATE-SELECT statement.
Adding implicit commit at end of statement for non-temporary tables.
mysql-test/t/rpl_row_create_table-slave.opt:
New BitKeeper file ``mysql-test/t/rpl_row_create_table-slave.opt''
Diffstat (limited to 'sql/log.cc')
-rw-r--r-- | sql/log.cc | 158 |
1 files changed, 129 insertions, 29 deletions
diff --git a/sql/log.cc b/sql/log.cc index a1ed9bd6df3..1bb1b9a33e6 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -82,6 +82,41 @@ char *make_default_log_name(char *buff,const char* log_ext) } /* + Helper class to hold a mutex for the duration of the + block. + + Eliminates the need for explicit unlocking of mutexes on, e.g., + error returns. On passing a null pointer, the sentry will not do + anything. + */ +class Mutex_sentry +{ +public: + Mutex_sentry(pthread_mutex_t *mutex) + : m_mutex(mutex) + { + if (m_mutex) + pthread_mutex_lock(mutex); + } + + ~Mutex_sentry() + { + if (m_mutex) + pthread_mutex_unlock(m_mutex); +#ifndef DBUG_OFF + m_mutex= 0; +#endif + } + +private: + pthread_mutex_t *m_mutex; + + // It's not allowed to copy this object in any way + Mutex_sentry(Mutex_sentry const&); + void operator=(Mutex_sentry const&); +}; + +/* Helper class to store binary log transaction data. */ class binlog_trx_data { @@ -121,11 +156,17 @@ public: */ void truncate(my_off_t pos) { + DBUG_PRINT("info", ("truncating to position %lu", pos)); + DBUG_PRINT("info", ("before_stmt_pos=%lu", pos)); #ifdef HAVE_ROW_BASED_REPLICATION delete pending(); set_pending(0); #endif reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0); +#ifdef HAVE_ROW_BASED_REPLICATION + if (pos < before_stmt_pos) + before_stmt_pos= MY_OFF_T_UNDEF; +#endif } /* @@ -1416,12 +1457,11 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data, If rolling back a statement in a transaction, we truncate the transaction cache to remove the statement. - */ if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT))) trx_data->reset(); - else - trx_data->truncate(trx_data->before_stmt_pos); // ...statement + else // ...statement + trx_data->truncate(trx_data->before_stmt_pos); /* We need to step the table map version on a rollback to ensure @@ -2010,7 +2050,7 @@ bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, goto err; /* command_type, thread_id */ - length= my_snprintf(buff, 32, "%5ld ", thread_id); + length= my_snprintf(buff, 32, "%5ld ", static_cast<long>(thread_id)); if (my_b_write(&log_file, (byte*) buff, length)) goto err; @@ -3338,18 +3378,7 @@ THD::binlog_start_trans_and_stmt() if (trx_data == NULL || trx_data->before_stmt_pos == MY_OFF_T_UNDEF) { - /* - The call to binlog_trans_log_savepos() might create the trx_data - structure, if it didn't exist before, so we save the position - into an auto variable and then write it into the transaction - data for the binary log (i.e., trx_data). - */ - my_off_t pos= 0; - binlog_trans_log_savepos(this, &pos); - trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot]; - - trx_data->before_stmt_pos= pos; - + this->binlog_set_stmt_begin(); if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) trans_register_ha(this, TRUE, binlog_hton); trans_register_ha(this, FALSE, binlog_hton); @@ -3357,6 +3386,51 @@ THD::binlog_start_trans_and_stmt() DBUG_VOID_RETURN; } +void THD::binlog_set_stmt_begin() { + binlog_trx_data *trx_data= + (binlog_trx_data*) ha_data[binlog_hton->slot]; + + /* + The call to binlog_trans_log_savepos() might create the trx_data + structure, if it didn't exist before, so we save the position + into an auto variable and then write it into the transaction + data for the binary log (i.e., trx_data). + */ + my_off_t pos= 0; + binlog_trans_log_savepos(this, &pos); + trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot]; + trx_data->before_stmt_pos= pos; +} + +int THD::binlog_flush_transaction_cache() +{ + DBUG_ENTER("binlog_flush_transaction_cache"); + binlog_trx_data *trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot]; + DBUG_PRINT("enter", ("trx_data=0x%lu", trx_data)); + if (trx_data) + DBUG_PRINT("enter", ("trx_data->before_stmt_pos=%u", + trx_data->before_stmt_pos)); + + /* + Write the transaction cache to the binary log. We don't flush and + sync the log file since we don't know if more will be written to + it. If the caller want the log file sync:ed, the caller has to do + it. + + The transaction data is only reset upon a successful write of the + cache to the binary log. + */ + + if (trx_data && likely(mysql_bin_log.is_open())) { + if (int error= mysql_bin_log.write_cache(&trx_data->trans_log, true, true)) + DBUG_RETURN(error); + trx_data->reset(); + } + + DBUG_RETURN(0); +} + + /* Write a table map to the binary log. */ @@ -3768,12 +3842,48 @@ uint MYSQL_BIN_LOG::next_file_id() /* + Write the contents of a cache to the binary log. + + SYNOPSIS + write_cache() + cache Cache to write to the binary log + lock_log True if the LOCK_log mutex should be aquired, false otherwise + sync_log True if the log should be flushed and sync:ed + + DESCRIPTION + Write the contents of the cache to the binary log. The cache will + be reset as a READ_CACHE to be able to read the contents from it. + */ + +int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log) +{ + Mutex_sentry sentry(lock_log ? &LOCK_log : NULL); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + return ER_ERROR_ON_WRITE; + uint bytes= my_b_bytes_in_cache(cache); + do + { + if (my_b_write(&log_file, cache->read_pos, bytes)) + return ER_ERROR_ON_WRITE; + cache->read_pos= cache->read_end; + } while ((bytes= my_b_fill(cache))); + + if (sync_log) + flush_and_sync(); + + return 0; // All OK +} + +/* Write a cached log entry to the binary log SYNOPSIS write() thd cache The cache to copy to the binlog + commit_event The commit event to print after writing the + contents of the cache. NOTE - We only come here if there is something in the cache. @@ -3833,20 +3943,10 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event) if (qinfo.write(&log_file)) goto err; } - /* Read from the file used to cache the queries .*/ - if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) - goto err; - length=my_b_bytes_in_cache(cache); - DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;); - do - { - /* Write data to the binary log file */ - if (my_b_write(&log_file, cache->read_pos, length)) - goto err; - cache->read_pos=cache->read_end; // Mark buffer used up - DBUG_EXECUTE_IF("half_binlogged_transaction", goto DBUG_skip_commit;); - } while ((length=my_b_fill(cache))); + if ((write_error= write_cache(cache, false, false))) + goto err; + if (commit_event && commit_event->write(&log_file)) goto err; #ifndef DBUG_OFF |