diff options
author | unknown <knielsen@knielsen-hq.org> | 2012-12-14 15:38:07 +0100 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2012-12-14 15:38:07 +0100 |
commit | 40bbf697aad7d923fc1bd995bc5f547e45461cbe (patch) | |
tree | 1563a1c99589bb5853d35faee4ae49e22b3fdd0a /storage/xtradb | |
parent | e97d6232f366c474f6eba4013bfbd6dacc01d544 (diff) | |
download | mariadb-git-40bbf697aad7d923fc1bd995bc5f547e45461cbe.tar.gz |
MDEV-532: Async InnoDB commit checkpoint.
Make the commit checkpoint inside InnoDB be asynchroneous.
Implement a background thread in binlog to do the writing and flushing of
binlog checkpoint events to disk.
Diffstat (limited to 'storage/xtradb')
-rw-r--r-- | storage/xtradb/handler/ha_innodb.cc | 143 | ||||
-rw-r--r-- | storage/xtradb/include/ha_prototypes.h | 11 | ||||
-rw-r--r-- | storage/xtradb/include/log0log.h | 7 | ||||
-rw-r--r-- | storage/xtradb/include/log0log.ic | 19 | ||||
-rw-r--r-- | storage/xtradb/log/log0log.c | 7 |
5 files changed, 183 insertions, 4 deletions
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 653607c9381..a5873bc05d3 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -121,6 +121,7 @@ static ulong commit_threads = 0; static mysql_mutex_t commit_threads_m; static mysql_cond_t commit_cond; static mysql_mutex_t commit_cond_m; +static mysql_mutex_t pending_checkpoint_mutex; static bool innodb_inited = 0; @@ -254,11 +255,13 @@ static mysql_pfs_key_t innobase_share_mutex_key; static mysql_pfs_key_t commit_threads_m_key; static mysql_pfs_key_t commit_cond_mutex_key; static mysql_pfs_key_t commit_cond_key; +static mysql_pfs_key_t pending_checkpoint_mutex_key; static PSI_mutex_info all_pthread_mutexes[] = { {&commit_threads_m_key, "commit_threads_m", 0}, {&commit_cond_mutex_key, "commit_cond_mutex", 0}, - {&innobase_share_mutex_key, "innobase_share_mutex", 0} + {&innobase_share_mutex_key, "innobase_share_mutex", 0}, + {&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0} }; static PSI_cond_info all_innodb_conds[] = { @@ -3088,6 +3091,9 @@ skip_overwrite: mysql_mutex_init(commit_cond_mutex_key, &commit_cond_m, MY_MUTEX_INIT_FAST); mysql_cond_init(commit_cond_key, &commit_cond, NULL); + mysql_mutex_init(pending_checkpoint_mutex_key, + &pending_checkpoint_mutex, + MY_MUTEX_INIT_FAST); innodb_inited= 1; #ifdef MYSQL_DYNAMIC_PLUGIN if (innobase_hton != p) { @@ -3135,6 +3141,7 @@ innobase_end( mysql_mutex_destroy(&commit_threads_m); mysql_mutex_destroy(&commit_cond_m); mysql_cond_destroy(&commit_cond); + mysql_mutex_destroy(&pending_checkpoint_mutex); } DBUG_RETURN(err); @@ -3530,17 +3537,145 @@ innobase_rollback_trx( DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); } + +struct pending_checkpoint { + struct pending_checkpoint *next; + handlerton *hton; + void *cookie; + ib_uint64_t lsn; +}; +static struct pending_checkpoint *pending_checkpoint_list; +static struct pending_checkpoint *pending_checkpoint_list_end; + /*****************************************************************//** Handle a commit checkpoint request from server layer. -We simply flush the redo log immediately and do the notify call.*/ +We put the request in a queue, so that we can notify upper layer about +checkpoint complete when we have flushed the redo log. +If we have already flushed all relevant redo log, we notify immediately.*/ static void innobase_checkpoint_request( handlerton *hton, void *cookie) { - log_buffer_flush_to_disk(); - commit_checkpoint_notify_ha(hton, cookie); + ib_uint64_t lsn; + ib_uint64_t flush_lsn; + struct pending_checkpoint * entry; + + /* Do the allocation outside of lock to reduce contention. The normal + case is that not everything is flushed, so we will need to enqueue. */ + entry = static_cast<struct pending_checkpoint *> + (my_malloc(sizeof(*entry), MYF(MY_WME))); + if (!entry) { + sql_print_error("Failed to allocate %u bytes." + " Commit checkpoint will be skipped.", + static_cast<unsigned>(sizeof(*entry))); + return; + } + + entry->next = NULL; + entry->hton = hton; + entry->cookie = cookie; + + mysql_mutex_lock(&pending_checkpoint_mutex); + lsn = log_get_lsn(); + flush_lsn = log_get_flush_lsn(); + if (lsn > flush_lsn) { + /* Put the request in queue. + When the log gets flushed past the lsn, we will remove the + entry from the queue and notify the upper layer. */ + entry->lsn = lsn; + if (pending_checkpoint_list_end) { + pending_checkpoint_list_end->next = entry; + /* There is no need to order the entries in the list + by lsn. The upper layer can accept notifications in + any order, and short delays in notifications do not + significantly impact performance. */ + } else { + pending_checkpoint_list = entry; + } + pending_checkpoint_list_end = entry; + entry = NULL; + } + mysql_mutex_unlock(&pending_checkpoint_mutex); + + if (entry) { + /* We are already flushed. Notify the checkpoint immediately. */ + commit_checkpoint_notify_ha(entry->hton, entry->cookie); + my_free(entry); + } +} + +/*****************************************************************//** +Log code calls this whenever log has been written and/or flushed up +to a new position. We use this to notify upper layer of a new commit +checkpoint when necessary.*/ +extern "C" UNIV_INTERN +void +innobase_mysql_log_notify( +/*===============*/ + ib_uint64_t write_lsn, /*!< in: LSN written to log file */ + ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */ +{ + struct pending_checkpoint * pending; + struct pending_checkpoint * entry; + struct pending_checkpoint * last_ready; + + /* It is safe to do a quick check for NULL first without lock. + Even if we should race, we will at most skip one checkpoint and + take the next one, which is harmless. */ + if (!pending_checkpoint_list) + return; + + mysql_mutex_lock(&pending_checkpoint_mutex); + pending = pending_checkpoint_list; + if (!pending) + { + mysql_mutex_unlock(&pending_checkpoint_mutex); + return; + } + + last_ready = NULL; + for (entry = pending; entry != NULL; entry = entry -> next) + { + /* Notify checkpoints up until the first entry that has not + been fully flushed to the redo log. Since we do not maintain + the list ordered, in principle there could be more entries + later than were also flushed. But there is no harm in + delaying notifications for those a bit. And in practise, the + list is unlikely to have more than one element anyway, as we + flush the redo log at least once every second. */ + if (entry->lsn > flush_lsn) + break; + last_ready = entry; + } + + if (last_ready) + { + /* We found some pending checkpoints that are now flushed to + disk. So remove them from the list. */ + pending_checkpoint_list = entry; + if (!entry) + pending_checkpoint_list_end = NULL; + } + + mysql_mutex_unlock(&pending_checkpoint_mutex); + + if (!last_ready) + return; + + /* Now that we have released the lock, notify upper layer about all + commit checkpoints that have now completed. */ + for (;;) { + entry = pending; + pending = pending->next; + + commit_checkpoint_notify_ha(entry->hton, entry->cookie); + + my_free(entry); + if (entry == last_ready) + break; + } } /*****************************************************************//** diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 2907365a32a..890bf33ac02 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -136,6 +136,17 @@ innobase_mysql_print_thd( uint max_query_len); /*!< in: max query length to print, or 0 to use the default max length */ +/*****************************************************************//** +Log code calls this whenever log has been written and/or flushed up +to a new position. We use this to notify upper layer of a new commit +checkpoint when necessary.*/ +UNIV_INTERN +void +innobase_mysql_log_notify( +/*===============*/ + ib_uint64_t write_lsn, /*!< in: LSN written to log file */ + ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */ + /**************************************************************//** Converts a MySQL type to an InnoDB type. Note that this function returns the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 857ec0946c2..8a6430fb105 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -151,6 +151,13 @@ UNIV_INLINE ib_uint64_t log_get_lsn(void); /*=============*/ +/************************************************************//** +Gets the last lsn that is fully flushed to disk. +@return last flushed lsn */ +UNIV_INLINE +ib_uint64_t +log_get_flush_lsn(void); +/*=============*/ /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 67db6695cab..b54697637b0 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -411,6 +411,25 @@ log_get_lsn(void) return(lsn); } +/************************************************************//** +Gets the last lsn that is fully flushed to disk. +@return last flushed lsn */ +UNIV_INLINE +ib_uint64_t +log_get_flush_lsn(void) +/*=============*/ +{ + ib_uint64_t lsn; + + mutex_enter(&(log_sys->mutex)); + + lsn = log_sys->flushed_to_disk_lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index dcaf951a0ed..4f8133b3036 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -1390,6 +1390,8 @@ log_write_up_to( ulint loop_count = 0; #endif /* UNIV_DEBUG */ ulint unlock; + ib_uint64_t write_lsn; + ib_uint64_t flush_lsn; if (recv_no_ibuf_operations) { /* Recovery is running and no operations on the log files are @@ -1568,8 +1570,13 @@ loop: log_flush_do_unlocks(unlock); + write_lsn = log_sys->write_lsn; + flush_lsn = log_sys->flushed_to_disk_lsn; + mutex_exit(&(log_sys->mutex)); + innobase_mysql_log_notify(write_lsn, flush_lsn); + return; do_waits: |