diff options
author | Monty <monty@mariadb.org> | 2020-07-20 15:19:25 +0300 |
---|---|---|
committer | Monty <monty@mariadb.org> | 2020-07-21 12:42:42 +0300 |
commit | fc48c8ff4c6cb9aa7a0fd27e724e6e3acd555b0e (patch) | |
tree | 14b8ef527d76e830ee649c867c8709edfca8b34f | |
parent | c4d5b6b157b06fe22fd7e01967d7a0194c3686a2 (diff) | |
download | mariadb-git-fc48c8ff4c6cb9aa7a0fd27e724e6e3acd555b0e.tar.gz |
MDEV-21953 deadlock between BACKUP STAGE BLOCK_COMMIT and parallel repl.
The issue was:
T1, a parallel slave worker thread, is waiting for another worker thread to
commit. While waiting, it has the MDL_BACKUP_COMMIT lock.
T2, working for mariabackup, is doing BACKUP STAGE BLOCK_COMMIT and blocks
all commits.
This causes a deadlock as the thread T1 is waiting for can't commit.
Fixed by moving locking of MDL_BACKUP_COMMIT from ha_commit_trans() to
commit_one_phase_2()
Other things:
- Added a new argument to ha_comit_one_phase() to signal if the
transaction was a write transaction.
- Ensured that ha_maria::implicit_commit() is always called under
MDL_BACKUP_COMMIT. This code is not needed in 10.5
- Ensure that MDL_Request values 'type' and 'ticket' are always
initialized. This makes it easier to check the state of the MDL_Request.
- Moved thd->store_globals() earlier in handle_rpl_parallel_thread() as
thd->init_for_queries() could use a MDL that could crash if store_globals
where not called.
- Don't call ha_enable_transactions() in THD::init_for_queries() as this
is both slow (uses MDL locks) and not needed.
-rw-r--r-- | mysql-test/suite/rpl/r/parallel_backup.result | 40 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/parallel_backup.test | 75 | ||||
-rw-r--r-- | sql/handler.cc | 117 | ||||
-rw-r--r-- | sql/handler.h | 8 | ||||
-rw-r--r-- | sql/mdl.h | 5 | ||||
-rw-r--r-- | sql/rpl_parallel.cc | 2 | ||||
-rw-r--r-- | sql/sql_class.cc | 7 | ||||
-rw-r--r-- | sql/sql_parse.cc | 8 | ||||
-rw-r--r-- | sql/xa.cc | 2 | ||||
-rw-r--r-- | storage/maria/ha_maria.cc | 4 | ||||
-rw-r--r-- | storage/maria/ha_maria.h | 1 |
11 files changed, 210 insertions, 59 deletions
diff --git a/mysql-test/suite/rpl/r/parallel_backup.result b/mysql-test/suite/rpl/r/parallel_backup.result new file mode 100644 index 00000000000..d87c61f2d0f --- /dev/null +++ b/mysql-test/suite/rpl/r/parallel_backup.result @@ -0,0 +1,40 @@ +include/master-slave.inc +[connection master] +# +# MDEV-21953: deadlock between BACKUP STAGE BLOCK_COMMIT and parallel +# replication +# +connection master; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE = innodb; +connection slave; +include/stop_slave.inc +SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; +SET @@global.slave_parallel_threads= 2; +SET @@global.slave_parallel_mode = 'optimistic'; +connection master; +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +connect aux_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,; +BEGIN; +INSERT INTO t1 VALUES (1); +connection slave; +include/start_slave.inc +connection aux_slave; +connect backup_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,; +BACKUP STAGE START; +BACKUP STAGE BLOCK_COMMIT; +connection aux_slave; +ROLLBACK; +connection backup_slave; +BACKUP STAGE END; +connection slave; +include/diff_tables.inc [master:t1,slave:t1] +connection slave; +include/stop_slave.inc +SET @@global.slave_parallel_threads= @old_parallel_threads; +SET @@global.slave_parallel_mode = @old_parallel_mode; +include/start_slave.inc +connection server_1; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/parallel_backup.test b/mysql-test/suite/rpl/t/parallel_backup.test new file mode 100644 index 00000000000..6ed182c024b --- /dev/null +++ b/mysql-test/suite/rpl/t/parallel_backup.test @@ -0,0 +1,75 @@ +--source include/have_innodb.inc +# The test is not format specific, MIXED is required to optimize testing time +--source include/have_binlog_format_mixed.inc +--source include/master-slave.inc + +--echo # +--echo # MDEV-21953: deadlock between BACKUP STAGE BLOCK_COMMIT and parallel +--echo # replication +--echo # + +--connection master +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE = innodb; + +--sync_slave_with_master +--source include/stop_slave.inc +SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; +SET @@global.slave_parallel_threads= 2; +SET @@global.slave_parallel_mode = 'optimistic'; + +--connection master +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +--save_master_pos + +# The plot: +# Block the 1st of two workers and, at waiting-for-prior-commit by the 2nd, +# issue BACKUP commands. +# BLOCK_COMMIT may hang so it is --send. +# Release the 1st worker to observe a deadlock unless its fixed. + +--connect (aux_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,) +BEGIN; +# block the 1st worker and wait for the 2nd ready to commit +INSERT INTO t1 VALUES (1); + +--connection slave +--source include/start_slave.inc + +--connection aux_slave +--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit" +--source include/wait_condition.inc + +# While the 1st worker is locked out run backup +--connect (backup_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,) +BACKUP STAGE START; +--send BACKUP STAGE BLOCK_COMMIT + +# release the 1st work +--connection aux_slave +--sleep 1 +ROLLBACK; + +--connection backup_slave +--reap +BACKUP STAGE END; + +--connection slave +--sync_with_master + +--let $diff_tables= master:t1,slave:t1 +--source include/diff_tables.inc + + +# Clean up. +--connection slave +--source include/stop_slave.inc +SET @@global.slave_parallel_threads= @old_parallel_threads; +SET @@global.slave_parallel_mode = @old_parallel_mode; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1; + +--source include/rpl_end.inc diff --git a/sql/handler.cc b/sql/handler.cc index cc7eedd18f2..5f94f9e893b 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -114,7 +114,7 @@ static TYPELIB known_extensions= {0,"known_exts", NULL, NULL}; uint known_extensions_id= 0; static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, - bool is_real_trans); + bool is_real_trans, bool rw_trans); static plugin_ref ha_default_plugin(THD *thd) @@ -133,6 +133,23 @@ static plugin_ref ha_default_tmp_plugin(THD *thd) return ha_default_plugin(thd); } +#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500 +void ha_maria_implicit_commit(THD *thd, bool new_trn) +{ + if (ha_maria::has_active_transaction(thd)) + { + int error; + MDL_request mdl_request; + mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT); + error= thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout); + ha_maria::implicit_commit(thd, new_trn); + if (!error) + thd->mdl_context.release_lock(mdl_request.ticket); + } +} +#endif + /** @brief Return the default storage engine handlerton for thread @@ -1445,10 +1462,6 @@ int ha_commit_trans(THD *thd, bool all) DBUG_RETURN(2); } -#ifdef WITH_ARIA_STORAGE_ENGINE - ha_maria::implicit_commit(thd, TRUE); -#endif - if (!ha_info) { /* @@ -1462,7 +1475,9 @@ int ha_commit_trans(THD *thd, bool all) wsrep_commit_empty(thd, all); } #endif /* WITH_WSREP */ - DBUG_RETURN(0); + + ha_maria_implicit_commit(thd, TRUE); + DBUG_RETURN(error); } DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE();); @@ -1475,33 +1490,9 @@ int ha_commit_trans(THD *thd, bool all) /* rw_trans is TRUE when we in a transaction changing data */ bool rw_trans= is_real_trans && (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U)); - MDL_request mdl_request; DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d", is_real_trans, rw_trans, rw_ha_count)); - if (rw_trans) - { - /* - Acquire a metadata lock which will ensure that COMMIT is blocked - by an active FLUSH TABLES WITH READ LOCK (and vice versa: - COMMIT in progress blocks FTWRL). - - We allow the owner of FTWRL to COMMIT; we assume that it knows - what it does. - */ - mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT); - - if (!WSREP(thd) && - thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) - { - ha_rollback_trans(thd, all); - DBUG_RETURN(1); - } - - DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock"); - } - if (rw_trans && opt_readonly && !(thd->security_ctx->master_access & SUPER_ACL) && @@ -1541,7 +1532,7 @@ int ha_commit_trans(THD *thd, bool all) // Here, the call will not commit inside InnoDB. It is only working // around closing thd->transaction.stmt open by TR_table::open(). if (all) - commit_one_phase_2(thd, false, &thd->transaction.stmt, false); + commit_one_phase_2(thd, false, &thd->transaction.stmt, false, false); } } #endif @@ -1561,7 +1552,7 @@ int ha_commit_trans(THD *thd, bool all) goto wsrep_err; } #endif /* WITH_WSREP */ - error= ha_commit_one_phase(thd, all); + error= ha_commit_one_phase(thd, all, rw_trans); #ifdef WITH_WSREP if (run_wsrep_hooks) error= error || wsrep_after_commit(thd, all); @@ -1613,7 +1604,7 @@ int ha_commit_trans(THD *thd, bool all) if (!is_real_trans) { - error= commit_one_phase_2(thd, all, trans, is_real_trans); + error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans); goto done; } #ifdef WITH_WSREP @@ -1631,7 +1622,7 @@ int ha_commit_trans(THD *thd, bool all) DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order"); DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE();); - error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0; + error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans) ? 2 : 0; #ifdef WITH_WSREP if (run_wsrep_hooks && (error || (error = wsrep_after_commit(thd, all)))) { @@ -1694,16 +1685,6 @@ err: thd->rgi_slave->is_parallel_exec); } end: - if (rw_trans && mdl_request.ticket) - { - /* - We do not always immediately release transactional locks - after ha_commit_trans() (see uses of ha_enable_transaction()), - thus we release the commit blocker lock as soon as it's - not needed. - */ - thd->mdl_context.release_lock(mdl_request.ticket); - } #ifdef WITH_WSREP if (wsrep_is_active(thd) && is_real_trans && !error && (rw_ha_count == 0 || all) && @@ -1719,6 +1700,7 @@ end: /** @note This function does not care about global read lock. A caller should. + However backup locks are handled in commit_one_phase_2. @param[in] all Is set in case of explicit commit (COMMIT statement), or implicit commit @@ -1727,7 +1709,7 @@ end: autocommit=1. */ -int ha_commit_one_phase(THD *thd, bool all) +int ha_commit_one_phase(THD *thd, bool all, bool rw_trans) { THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; /* @@ -1753,20 +1735,50 @@ int ha_commit_one_phase(THD *thd, bool all) if ((res= thd->wait_for_prior_commit())) DBUG_RETURN(res); } - res= commit_one_phase_2(thd, all, trans, is_real_trans); + res= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans); DBUG_RETURN(res); } static int -commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) +commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans, + bool rw_trans) { int error= 0; uint count= 0; Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + MDL_request mdl_request; DBUG_ENTER("commit_one_phase_2"); if (is_real_trans) DEBUG_SYNC(thd, "commit_one_phase_2"); + + if (rw_trans) + { + /* + Acquire a metadata lock which will ensure that COMMIT is blocked + by an active FLUSH TABLES WITH READ LOCK (and vice versa: + COMMIT in progress blocks FTWRL). + + We allow the owner of FTWRL to COMMIT; we assume that it knows + what it does. + */ + mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT); + + if (!WSREP(thd) && + thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1); + ha_rollback_trans(thd, all); + DBUG_RETURN(1); + } + DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock"); + } + +#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500 + ha_maria::implicit_commit(thd, TRUE); +#endif + if (ha_info) { for (; ha_info; ha_info= ha_info_next) @@ -1795,6 +1807,17 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) #endif } } + if (mdl_request.ticket) + { + /* + We do not always immediately release transactional locks + after ha_commit_trans() (see uses of ha_enable_transaction()), + thus we release the commit blocker lock as soon as it's + not needed. + */ + thd->mdl_context.release_lock(mdl_request.ticket); + } + /* Free resources and perform other cleanup even for 'empty' transactions. */ if (is_real_trans) { diff --git a/sql/handler.h b/sql/handler.h index e514fcfd60c..3757a3ab2f9 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -5021,7 +5021,7 @@ int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); /* transactions: interface to handlerton functions */ int ha_start_consistent_snapshot(THD *thd); int ha_commit_or_rollback_by_xid(XID *xid, bool commit); -int ha_commit_one_phase(THD *thd, bool all); +int ha_commit_one_phase(THD *thd, bool all, bool rw_trans); int ha_commit_trans(THD *thd, bool all); int ha_rollback_trans(THD *thd, bool all); int ha_prepare(THD *thd); @@ -5093,4 +5093,10 @@ int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *t @note This does not need to be multi-byte safe or anything */ char *xid_to_str(char *buf, const XID &xid); #endif // !DBUG_OFF + +#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500 +extern void ha_maria_implicit_commit(THD *thd, bool new_trans); +#else +#define ha_maria_implicit_commit(A, B) while(0) +#endif #endif /* HANDLER_INCLUDED */ diff --git a/sql/mdl.h b/sql/mdl.h index b084670e5c6..123ffbcade6 100644 --- a/sql/mdl.h +++ b/sql/mdl.h @@ -122,6 +122,8 @@ public: */ enum enum_mdl_type { + /* This means that the MDL_request is not initialized */ + MDL_NOT_INITIALIZED= -1, /* An intention exclusive metadata lock (IX). Used only for scoped locks. Owner of this type of lock can acquire upgradable exclusive locks on @@ -592,12 +594,13 @@ public: */ MDL_request& operator=(const MDL_request &) { + type= MDL_NOT_INITIALIZED; ticket= NULL; /* Do nothing, in particular, don't try to copy the key. */ return *this; } /* Another piece of ugliness for TABLE_LIST constructor */ - MDL_request() {} + MDL_request(): type(MDL_NOT_INITIALIZED), ticket(NULL) {} MDL_request(const MDL_request *rhs) :type(rhs->type), diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 7c0e96bc8ed..eef6f734f1f 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1051,10 +1051,10 @@ handle_rpl_parallel_thread(void *arg) server_threads.insert(thd); set_current_thd(thd); pthread_detach_this_thread(); + thd->store_globals(); thd->init_for_queries(); thd->variables.binlog_annotate_row_events= 0; init_thr_lock(); - thd->store_globals(); thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; thd->security_ctx->skip_grants(); thd->variables.max_allowed_packet= slave_max_allowed_packet; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 40e606425c5..15088148e02 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1383,7 +1383,11 @@ void THD::update_all_stats() void THD::init_for_queries() { set_time(); - ha_enable_transaction(this,TRUE); + /* + We don't need to call ha_enable_transaction() as we can't have + any active transactions that has to be commited + */ + transaction.on= TRUE; reset_root_defaults(mem_root, variables.query_alloc_block_size, variables.query_prealloc_size); @@ -7309,7 +7313,6 @@ wait_for_commit::~wait_for_commit() mysql_cond_destroy(&COND_wait_commit); } - void wait_for_commit::wakeup(int wakeup_error) { diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 935e054e163..3c5c52e2aa1 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1841,9 +1841,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, */ char *beginning_of_next_stmt= (char*) parser_state.m_lip.found_semicolon; -#ifdef WITH_ARIA_STORAGE_ENGINE - ha_maria::implicit_commit(thd, FALSE); -#endif + ha_maria_implicit_commit(thd, FALSE); /* Finalize server status flags after executing a statement. */ thd->update_server_status(); @@ -6157,9 +6155,7 @@ finish: trans_commit_stmt(thd); thd->get_stmt_da()->set_overwrite_status(false); } -#ifdef WITH_ARIA_STORAGE_ENGINE - ha_maria::implicit_commit(thd, FALSE); -#endif + ha_maria_implicit_commit(thd, FALSE); } /* Free tables. Set stage 'closing tables' */ diff --git a/sql/xa.cc b/sql/xa.cc index b08ab973cc5..15116ba9366 100644 --- a/sql/xa.cc +++ b/sql/xa.cc @@ -582,7 +582,7 @@ bool trans_xa_commit(THD *thd) { DEBUG_SYNC(thd, "trans_xa_commit_after_acquire_commit_lock"); - res= MY_TEST(ha_commit_one_phase(thd, 1)); + res= MY_TEST(ha_commit_one_phase(thd, 1, 1)); if (res) my_error(ER_XAER_RMERR, MYF(0)); } diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 35edfc31914..013c740582b 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2890,6 +2890,10 @@ static void reset_thd_trn(THD *thd, MARIA_HA *first_table) DBUG_VOID_RETURN; } +bool ha_maria::has_active_transaction(THD *thd) +{ + return (maria_hton && THD_TRN); +} /** Performs an implicit commit of the Maria transaction and creates a new diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index f6b00420c9c..73f3576a34e 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -165,6 +165,7 @@ public: { return file; } + static bool has_active_transaction(THD *thd); static int implicit_commit(THD *thd, bool new_trn); /** * Multi Range Read interface |