From 7e5dc4f074b7d1cee4721e6fa49d6e5628ef793f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 26 Jun 2013 12:10:35 +0200 Subject: MDEV-4506: Parallel replication. Intermediate commit. Implement facility for the commit in one thread to wait for the commit of another to complete first. The wait is done in a way that does not hinder that a waiter and a waitee can group commit together with a single fsync() in both binlog and InnoDB. The wait is done efficiently with respect to locking. The patch was originally made to support TaoBao parallel replication with in-order commit; now it will be adapted to also be used for parallel replication of group-committed transactions. A waiter THD registers itself with a prior waitee THD. The waiter will then complete its commit at the earliest in the same group commit of the waitee (when using binlog). The wait can also be done explicitly by the waitee. --- include/mysql/plugin.h | 35 +++++++++++++++++++++++++++++++++++ include/mysql/plugin_audit.h.pp | 1 + include/mysql/plugin_auth.h.pp | 1 + include/mysql/plugin_ftparser.h.pp | 1 + 4 files changed, 38 insertions(+) (limited to 'include/mysql') diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 38573180232..ab72a9d106b 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -683,6 +683,41 @@ void *thd_get_ha_data(const MYSQL_THD thd, const struct handlerton *hton); */ void thd_set_ha_data(MYSQL_THD thd, const struct handlerton *hton, const void *ha_data); + + +/** + Signal that the first part of handler commit is finished, and that the + committed transaction is now visible and has fixed commit ordering with + respect to other transactions. The commit need _not_ be durable yet, and + typically will not be when this call makes sense. + + This call is optional, if the storage engine does not call it the upper + layer will after the handler commit() method is done. However, the storage + engine may choose to call it itself to increase the possibility for group + commit. + + In-order parallel replication uses this to apply different transaction in + parallel, but delay the commits of later transactions until earlier + transactions have committed first, thus achieving increased performance on + multi-core systems while still preserving full transaction consistency. + + The storage engine can call this from within the commit() method, typically + after the commit record has been written to the transaction log, but before + the log has been fsync()'ed. This will allow the next replicated transaction + to proceed to commit before the first one has done fsync() or similar. Thus, + it becomes possible for multiple sequential replicated transactions to share + a single fsync() inside the engine in group commit. + + Note that this method should _not_ be called from within the commit_ordered() + method, or any other place in the storage engine. When commit_ordered() is + used (typically when binlog is enabled), the transaction coordinator takes + care of this and makes group commit in the storage engine possible without + any other action needed on the part of the storage engine. This function + thd_wakeup_subsequent_commits() is only needed when no transaction + coordinator is used, meaning a single storage engine and no binary log. +*/ +void thd_wakeup_subsequent_commits(MYSQL_THD thd); + #ifdef __cplusplus } #endif diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp index d630359f5fe..564dd6272f5 100644 --- a/include/mysql/plugin_audit.h.pp +++ b/include/mysql/plugin_audit.h.pp @@ -236,6 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); +void thd_wakeup_subsequent_commits(void* thd); struct mysql_event_general { unsigned int event_subclass; diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp index 6a877980c25..edfd7095203 100644 --- a/include/mysql/plugin_auth.h.pp +++ b/include/mysql/plugin_auth.h.pp @@ -236,6 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); +void thd_wakeup_subsequent_commits(void* thd); #include typedef struct st_plugin_vio_info { diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp index ab15c9d176d..0cc51e259dc 100644 --- a/include/mysql/plugin_ftparser.h.pp +++ b/include/mysql/plugin_ftparser.h.pp @@ -189,6 +189,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); +void thd_wakeup_subsequent_commits(void* thd); enum enum_ftparser_mode { MYSQL_FTPARSER_SIMPLE_MODE= 0, -- cgit v1.2.1 From 2842f6b5dc254c82aa3dc976cd5bd3645dc82a60 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 14 Oct 2013 15:28:16 +0200 Subject: MDEV-4506: Parallel replication: error handling. Add an error code to the wait_for_commit facility. Now, when a transaction fails, it can signal the error to any subsequent transaction that is waiting for it to commit. The waiting transactions then receive the error code back from wait_for_prior_commit() and can handle the error appropriately. Also fix one race that could cause crash if @@slave_parallel_threads were changed several times quickly in succession. --- include/mysql/plugin.h | 2 +- include/mysql/plugin_audit.h.pp | 2 +- include/mysql/plugin_auth.h.pp | 2 +- include/mysql/plugin_ftparser.h.pp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/mysql') diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index ab72a9d106b..9ac63f08f73 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -716,7 +716,7 @@ void thd_set_ha_data(MYSQL_THD thd, const struct handlerton *hton, thd_wakeup_subsequent_commits() is only needed when no transaction coordinator is used, meaning a single storage engine and no binary log. */ -void thd_wakeup_subsequent_commits(MYSQL_THD thd); +void thd_wakeup_subsequent_commits(MYSQL_THD thd, int wakeup_error); #ifdef __cplusplus } diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp index 564dd6272f5..17e5c191672 100644 --- a/include/mysql/plugin_audit.h.pp +++ b/include/mysql/plugin_audit.h.pp @@ -236,7 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); -void thd_wakeup_subsequent_commits(void* thd); +void thd_wakeup_subsequent_commits(void* thd, int wakeup_error); struct mysql_event_general { unsigned int event_subclass; diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp index edfd7095203..33b552ec75b 100644 --- a/include/mysql/plugin_auth.h.pp +++ b/include/mysql/plugin_auth.h.pp @@ -236,7 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); -void thd_wakeup_subsequent_commits(void* thd); +void thd_wakeup_subsequent_commits(void* thd, int wakeup_error); #include typedef struct st_plugin_vio_info { diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp index 0cc51e259dc..b4aa962c51c 100644 --- a/include/mysql/plugin_ftparser.h.pp +++ b/include/mysql/plugin_ftparser.h.pp @@ -189,7 +189,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); -void thd_wakeup_subsequent_commits(void* thd); +void thd_wakeup_subsequent_commits(void* thd, int wakeup_error); enum enum_ftparser_mode { MYSQL_FTPARSER_SIMPLE_MODE= 0, -- cgit v1.2.1