diff options
author | Jonas Oreland <jonaso@google.com> | 2014-12-23 13:38:00 +0100 |
---|---|---|
committer | Kristian Nielsen <knielsen@knielsen-hq.org> | 2014-12-23 14:16:32 +0100 |
commit | 0b87de124d716cee7c1aa56f30c7f80c2c2bfcce (patch) | |
tree | 5557407d03402bdf28daf1b64e438362358f6188 /sql/log.cc | |
parent | 4d8b346e079a27960dbe49e4d0ec4364bed8d30e (diff) | |
download | mariadb-git-0b87de124d716cee7c1aa56f30c7f80c2c2bfcce.tar.gz |
MDEV-162 Enhanced semisync replication
Implement --semi-sync-master-wait-point=AFTER_SYNC|AFTER_COMMIT.
When AFTER_SYNC, the semi-sync wait will be done earlier, before the storage
engine commit rather than after. This means that a transaction will not be
visible on the master until at least one slave has received it.
Diffstat (limited to 'sql/log.cc')
-rw-r--r-- | sql/log.cc | 118 |
1 files changed, 98 insertions, 20 deletions
diff --git a/sql/log.cc b/sql/log.cc index edb4c07c8cc..66e142668a9 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -93,6 +93,7 @@ ulong opt_binlog_dbug_fsync_sleep= 0; mysql_mutex_t LOCK_prepare_ordered; mysql_cond_t COND_prepare_ordered; +mysql_mutex_t LOCK_after_binlog_sync; mysql_mutex_t LOCK_commit_ordered; static ulonglong binlog_status_var_num_commits; @@ -3938,7 +3939,8 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool create_new_log, Without binlog, we cannot XA recover prepared-but-not-committed transactions in engines. So force a commit checkpoint first. - Note that we take and immediately release LOCK_commit_ordered. This has + Note that we take and immediately + release LOCK_after_binlog_sync/LOCK_commit_ordered. This has the effect to ensure that any on-going group commit (in trx_group_commit_leader()) has completed before we request the checkpoint, due to the chaining of LOCK_log and LOCK_commit_ordered in that function. @@ -3949,7 +3951,10 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool create_new_log, commit_ordered() in the engine of some transaction, and then a crash later would leave such transaction not recoverable. */ + + mysql_mutex_lock(&LOCK_after_binlog_sync); mysql_mutex_lock(&LOCK_commit_ordered); + mysql_mutex_unlock(&LOCK_after_binlog_sync); mysql_mutex_unlock(&LOCK_commit_ordered); mark_xids_active(current_binlog_id, 1); @@ -6035,11 +6040,6 @@ err: if ((error= flush_and_sync(&synced))) { } - else if ((error= RUN_HOOK(binlog_storage, after_flush, - (thd, log_file_name, file->pos_in_file, synced)))) - { - sql_print_error("Failed to run 'after_flush' hooks"); - } else { /* update binlog_end_pos so it can be read by dump thread @@ -6050,23 +6050,58 @@ err: */ update_binlog_end_pos(offset); - signal_update(); - if ((error= rotate(false, &check_purge))) - check_purge= false; + /* documentation of which mutexes are (not) owned */ + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + bool first= true; + bool last= true; + if ((error= RUN_HOOK(binlog_storage, after_flush, + (thd, log_file_name, file->pos_in_file, + synced, first, last)))) + { + sql_print_error("Failed to run 'after_flush' hooks"); + error= 1; + } + else + { + signal_update(); + if ((error= rotate(false, &check_purge))) + check_purge= false; + } } } status_var_add(thd->status_var.binlog_bytes_written, offset - my_org_b_tell); + mysql_mutex_lock(&LOCK_after_binlog_sync); + mysql_mutex_unlock(&LOCK_log); + + /* documentation of which mutexes are (not) owned */ + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + bool first= true; + bool last= true; + if (RUN_HOOK(binlog_storage, after_sync, + (thd, log_file_name, file->pos_in_file, + first, last))) + { + error=1; + /* error is already printed inside hook */ + } + /* Take mutex to protect against a reader seeing partial writes of 64-bit offset on 32-bit CPUs. */ mysql_mutex_lock(&LOCK_commit_ordered); + mysql_mutex_unlock(&LOCK_after_binlog_sync); last_commit_pos_offset= offset; mysql_mutex_unlock(&LOCK_commit_ordered); - mysql_mutex_unlock(&LOCK_log); if (check_purge) checkpoint_and_purge(prev_binlog_id); @@ -7374,13 +7409,22 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) { bool any_error= false; bool all_error= true; + + /* documentation of which mutexes are (not) owned */ + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + bool first= true, last; for (current= queue; current != NULL; current= current->next) { + last= current->next == NULL; if (!current->error && RUN_HOOK(binlog_storage, after_flush, (current->thd, current->cache_mngr->last_commit_pos_file, - current->cache_mngr->last_commit_pos_offset, synced))) + current->cache_mngr->last_commit_pos_offset, synced, + first, last))) { current->error= ER_ERROR_ON_WRITE; current->commit_errno= -1; @@ -7389,6 +7433,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) } else all_error= false; + first= false; } /* update binlog_end_pos so it can be read by dump thread @@ -7437,22 +7482,55 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) } } - DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); - mysql_mutex_lock(&LOCK_commit_ordered); - /** - * TODO(jonaso): Check with Kristian, - * if we rotate:d above, this offset is "wrong" - */ - last_commit_pos_offset= commit_offset; + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync"); + mysql_mutex_lock(&LOCK_after_binlog_sync); /* - We cannot unlock LOCK_log until we have locked LOCK_commit_ordered; + We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync; otherwise scheduling could allow the next group commit to run ahead of us, messing up the order of commit_ordered() calls. But as soon as - LOCK_commit_ordered is obtained, we can let the next group commit start. + LOCK_after_binlog_sync is obtained, we can let the next group commit start. */ mysql_mutex_unlock(&LOCK_log); DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log"); + + /* + Loop through threads and run the binlog_sync hook + */ + { + /* documentation of which mutexes are (not) owned */ + mysql_mutex_assert_not_owner(&LOCK_prepare_ordered); + mysql_mutex_assert_not_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_after_binlog_sync); + mysql_mutex_assert_not_owner(&LOCK_commit_ordered); + + bool first= true, last; + for (current= queue; current != NULL; current= current->next) + { + last= current->next == NULL; + if (!current->error && + RUN_HOOK(binlog_storage, after_sync, + (current->thd, log_file_name, + current->cache_mngr->last_commit_pos_offset, + first, last))) + { + /* error is already printed inside hook */ + } + first= false; + } + } + + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); + mysql_mutex_lock(&LOCK_commit_ordered); + last_commit_pos_offset= commit_offset; + + /* + Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been + acquired so that groups can not reorder for the different stages of + the group commit procedure. + */ + mysql_mutex_unlock(&LOCK_after_binlog_sync); + DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync"); ++num_group_commits; if (!opt_optimize_thread_scheduling) |