summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMonty <monty@mariadb.org>2020-07-20 15:19:25 +0300
committerMonty <monty@mariadb.org>2020-07-21 12:42:42 +0300
commitfc48c8ff4c6cb9aa7a0fd27e724e6e3acd555b0e (patch)
tree14b8ef527d76e830ee649c867c8709edfca8b34f
parentc4d5b6b157b06fe22fd7e01967d7a0194c3686a2 (diff)
downloadmariadb-git-fc48c8ff4c6cb9aa7a0fd27e724e6e3acd555b0e.tar.gz
MDEV-21953 deadlock between BACKUP STAGE BLOCK_COMMIT and parallel repl.
The issue was: T1, a parallel slave worker thread, is waiting for another worker thread to commit. While waiting, it has the MDL_BACKUP_COMMIT lock. T2, working for mariabackup, is doing BACKUP STAGE BLOCK_COMMIT and blocks all commits. This causes a deadlock as the thread T1 is waiting for can't commit. Fixed by moving locking of MDL_BACKUP_COMMIT from ha_commit_trans() to commit_one_phase_2() Other things: - Added a new argument to ha_comit_one_phase() to signal if the transaction was a write transaction. - Ensured that ha_maria::implicit_commit() is always called under MDL_BACKUP_COMMIT. This code is not needed in 10.5 - Ensure that MDL_Request values 'type' and 'ticket' are always initialized. This makes it easier to check the state of the MDL_Request. - Moved thd->store_globals() earlier in handle_rpl_parallel_thread() as thd->init_for_queries() could use a MDL that could crash if store_globals where not called. - Don't call ha_enable_transactions() in THD::init_for_queries() as this is both slow (uses MDL locks) and not needed.
-rw-r--r--mysql-test/suite/rpl/r/parallel_backup.result40
-rw-r--r--mysql-test/suite/rpl/t/parallel_backup.test75
-rw-r--r--sql/handler.cc117
-rw-r--r--sql/handler.h8
-rw-r--r--sql/mdl.h5
-rw-r--r--sql/rpl_parallel.cc2
-rw-r--r--sql/sql_class.cc7
-rw-r--r--sql/sql_parse.cc8
-rw-r--r--sql/xa.cc2
-rw-r--r--storage/maria/ha_maria.cc4
-rw-r--r--storage/maria/ha_maria.h1
11 files changed, 210 insertions, 59 deletions
diff --git a/mysql-test/suite/rpl/r/parallel_backup.result b/mysql-test/suite/rpl/r/parallel_backup.result
new file mode 100644
index 00000000000..d87c61f2d0f
--- /dev/null
+++ b/mysql-test/suite/rpl/r/parallel_backup.result
@@ -0,0 +1,40 @@
+include/master-slave.inc
+[connection master]
+#
+# MDEV-21953: deadlock between BACKUP STAGE BLOCK_COMMIT and parallel
+# replication
+#
+connection master;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE = innodb;
+connection slave;
+include/stop_slave.inc
+SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
+SET @@global.slave_parallel_threads= 2;
+SET @@global.slave_parallel_mode = 'optimistic';
+connection master;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+connect aux_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,;
+BEGIN;
+INSERT INTO t1 VALUES (1);
+connection slave;
+include/start_slave.inc
+connection aux_slave;
+connect backup_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,;
+BACKUP STAGE START;
+BACKUP STAGE BLOCK_COMMIT;
+connection aux_slave;
+ROLLBACK;
+connection backup_slave;
+BACKUP STAGE END;
+connection slave;
+include/diff_tables.inc [master:t1,slave:t1]
+connection slave;
+include/stop_slave.inc
+SET @@global.slave_parallel_threads= @old_parallel_threads;
+SET @@global.slave_parallel_mode = @old_parallel_mode;
+include/start_slave.inc
+connection server_1;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/parallel_backup.test b/mysql-test/suite/rpl/t/parallel_backup.test
new file mode 100644
index 00000000000..6ed182c024b
--- /dev/null
+++ b/mysql-test/suite/rpl/t/parallel_backup.test
@@ -0,0 +1,75 @@
+--source include/have_innodb.inc
+# The test is not format specific, MIXED is required to optimize testing time
+--source include/have_binlog_format_mixed.inc
+--source include/master-slave.inc
+
+--echo #
+--echo # MDEV-21953: deadlock between BACKUP STAGE BLOCK_COMMIT and parallel
+--echo # replication
+--echo #
+
+--connection master
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE = innodb;
+
+--sync_slave_with_master
+--source include/stop_slave.inc
+SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
+SET @@global.slave_parallel_threads= 2;
+SET @@global.slave_parallel_mode = 'optimistic';
+
+--connection master
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+--save_master_pos
+
+# The plot:
+# Block the 1st of two workers and, at waiting-for-prior-commit by the 2nd,
+# issue BACKUP commands.
+# BLOCK_COMMIT may hang so it is --send.
+# Release the 1st worker to observe a deadlock unless its fixed.
+
+--connect (aux_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,)
+BEGIN;
+# block the 1st worker and wait for the 2nd ready to commit
+INSERT INTO t1 VALUES (1);
+
+--connection slave
+--source include/start_slave.inc
+
+--connection aux_slave
+--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
+--source include/wait_condition.inc
+
+# While the 1st worker is locked out run backup
+--connect (backup_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,)
+BACKUP STAGE START;
+--send BACKUP STAGE BLOCK_COMMIT
+
+# release the 1st work
+--connection aux_slave
+--sleep 1
+ROLLBACK;
+
+--connection backup_slave
+--reap
+BACKUP STAGE END;
+
+--connection slave
+--sync_with_master
+
+--let $diff_tables= master:t1,slave:t1
+--source include/diff_tables.inc
+
+
+# Clean up.
+--connection slave
+--source include/stop_slave.inc
+SET @@global.slave_parallel_threads= @old_parallel_threads;
+SET @@global.slave_parallel_mode = @old_parallel_mode;
+--source include/start_slave.inc
+
+--connection server_1
+DROP TABLE t1;
+
+--source include/rpl_end.inc
diff --git a/sql/handler.cc b/sql/handler.cc
index cc7eedd18f2..5f94f9e893b 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -114,7 +114,7 @@ static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
uint known_extensions_id= 0;
static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
- bool is_real_trans);
+ bool is_real_trans, bool rw_trans);
static plugin_ref ha_default_plugin(THD *thd)
@@ -133,6 +133,23 @@ static plugin_ref ha_default_tmp_plugin(THD *thd)
return ha_default_plugin(thd);
}
+#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
+void ha_maria_implicit_commit(THD *thd, bool new_trn)
+{
+ if (ha_maria::has_active_transaction(thd))
+ {
+ int error;
+ MDL_request mdl_request;
+ mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
+ error= thd->mdl_context.acquire_lock(&mdl_request,
+ thd->variables.lock_wait_timeout);
+ ha_maria::implicit_commit(thd, new_trn);
+ if (!error)
+ thd->mdl_context.release_lock(mdl_request.ticket);
+ }
+}
+#endif
+
/** @brief
Return the default storage engine handlerton for thread
@@ -1445,10 +1462,6 @@ int ha_commit_trans(THD *thd, bool all)
DBUG_RETURN(2);
}
-#ifdef WITH_ARIA_STORAGE_ENGINE
- ha_maria::implicit_commit(thd, TRUE);
-#endif
-
if (!ha_info)
{
/*
@@ -1462,7 +1475,9 @@ int ha_commit_trans(THD *thd, bool all)
wsrep_commit_empty(thd, all);
}
#endif /* WITH_WSREP */
- DBUG_RETURN(0);
+
+ ha_maria_implicit_commit(thd, TRUE);
+ DBUG_RETURN(error);
}
DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
@@ -1475,33 +1490,9 @@ int ha_commit_trans(THD *thd, bool all)
/* rw_trans is TRUE when we in a transaction changing data */
bool rw_trans= is_real_trans &&
(rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
- MDL_request mdl_request;
DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
is_real_trans, rw_trans, rw_ha_count));
- if (rw_trans)
- {
- /*
- Acquire a metadata lock which will ensure that COMMIT is blocked
- by an active FLUSH TABLES WITH READ LOCK (and vice versa:
- COMMIT in progress blocks FTWRL).
-
- We allow the owner of FTWRL to COMMIT; we assume that it knows
- what it does.
- */
- mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
-
- if (!WSREP(thd) &&
- thd->mdl_context.acquire_lock(&mdl_request,
- thd->variables.lock_wait_timeout))
- {
- ha_rollback_trans(thd, all);
- DBUG_RETURN(1);
- }
-
- DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
- }
-
if (rw_trans &&
opt_readonly &&
!(thd->security_ctx->master_access & SUPER_ACL) &&
@@ -1541,7 +1532,7 @@ int ha_commit_trans(THD *thd, bool all)
// Here, the call will not commit inside InnoDB. It is only working
// around closing thd->transaction.stmt open by TR_table::open().
if (all)
- commit_one_phase_2(thd, false, &thd->transaction.stmt, false);
+ commit_one_phase_2(thd, false, &thd->transaction.stmt, false, false);
}
}
#endif
@@ -1561,7 +1552,7 @@ int ha_commit_trans(THD *thd, bool all)
goto wsrep_err;
}
#endif /* WITH_WSREP */
- error= ha_commit_one_phase(thd, all);
+ error= ha_commit_one_phase(thd, all, rw_trans);
#ifdef WITH_WSREP
if (run_wsrep_hooks)
error= error || wsrep_after_commit(thd, all);
@@ -1613,7 +1604,7 @@ int ha_commit_trans(THD *thd, bool all)
if (!is_real_trans)
{
- error= commit_one_phase_2(thd, all, trans, is_real_trans);
+ error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans);
goto done;
}
#ifdef WITH_WSREP
@@ -1631,7 +1622,7 @@ int ha_commit_trans(THD *thd, bool all)
DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
- error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
+ error= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans) ? 2 : 0;
#ifdef WITH_WSREP
if (run_wsrep_hooks && (error || (error = wsrep_after_commit(thd, all))))
{
@@ -1694,16 +1685,6 @@ err:
thd->rgi_slave->is_parallel_exec);
}
end:
- if (rw_trans && mdl_request.ticket)
- {
- /*
- We do not always immediately release transactional locks
- after ha_commit_trans() (see uses of ha_enable_transaction()),
- thus we release the commit blocker lock as soon as it's
- not needed.
- */
- thd->mdl_context.release_lock(mdl_request.ticket);
- }
#ifdef WITH_WSREP
if (wsrep_is_active(thd) && is_real_trans && !error &&
(rw_ha_count == 0 || all) &&
@@ -1719,6 +1700,7 @@ end:
/**
@note
This function does not care about global read lock. A caller should.
+ However backup locks are handled in commit_one_phase_2.
@param[in] all Is set in case of explicit commit
(COMMIT statement), or implicit commit
@@ -1727,7 +1709,7 @@ end:
autocommit=1.
*/
-int ha_commit_one_phase(THD *thd, bool all)
+int ha_commit_one_phase(THD *thd, bool all, bool rw_trans)
{
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
/*
@@ -1753,20 +1735,50 @@ int ha_commit_one_phase(THD *thd, bool all)
if ((res= thd->wait_for_prior_commit()))
DBUG_RETURN(res);
}
- res= commit_one_phase_2(thd, all, trans, is_real_trans);
+ res= commit_one_phase_2(thd, all, trans, is_real_trans, rw_trans);
DBUG_RETURN(res);
}
static int
-commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
+commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans,
+ bool rw_trans)
{
int error= 0;
uint count= 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
+ MDL_request mdl_request;
DBUG_ENTER("commit_one_phase_2");
if (is_real_trans)
DEBUG_SYNC(thd, "commit_one_phase_2");
+
+ if (rw_trans)
+ {
+ /*
+ Acquire a metadata lock which will ensure that COMMIT is blocked
+ by an active FLUSH TABLES WITH READ LOCK (and vice versa:
+ COMMIT in progress blocks FTWRL).
+
+ We allow the owner of FTWRL to COMMIT; we assume that it knows
+ what it does.
+ */
+ mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
+
+ if (!WSREP(thd) &&
+ thd->mdl_context.acquire_lock(&mdl_request,
+ thd->variables.lock_wait_timeout))
+ {
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1);
+ ha_rollback_trans(thd, all);
+ DBUG_RETURN(1);
+ }
+ DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
+ }
+
+#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
+ ha_maria::implicit_commit(thd, TRUE);
+#endif
+
if (ha_info)
{
for (; ha_info; ha_info= ha_info_next)
@@ -1795,6 +1807,17 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
#endif
}
}
+ if (mdl_request.ticket)
+ {
+ /*
+ We do not always immediately release transactional locks
+ after ha_commit_trans() (see uses of ha_enable_transaction()),
+ thus we release the commit blocker lock as soon as it's
+ not needed.
+ */
+ thd->mdl_context.release_lock(mdl_request.ticket);
+ }
+
/* Free resources and perform other cleanup even for 'empty' transactions. */
if (is_real_trans)
{
diff --git a/sql/handler.h b/sql/handler.h
index e514fcfd60c..3757a3ab2f9 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -5021,7 +5021,7 @@ int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache);
/* transactions: interface to handlerton functions */
int ha_start_consistent_snapshot(THD *thd);
int ha_commit_or_rollback_by_xid(XID *xid, bool commit);
-int ha_commit_one_phase(THD *thd, bool all);
+int ha_commit_one_phase(THD *thd, bool all, bool rw_trans);
int ha_commit_trans(THD *thd, bool all);
int ha_rollback_trans(THD *thd, bool all);
int ha_prepare(THD *thd);
@@ -5093,4 +5093,10 @@ int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *t
@note This does not need to be multi-byte safe or anything */
char *xid_to_str(char *buf, const XID &xid);
#endif // !DBUG_OFF
+
+#if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
+extern void ha_maria_implicit_commit(THD *thd, bool new_trans);
+#else
+#define ha_maria_implicit_commit(A, B) while(0)
+#endif
#endif /* HANDLER_INCLUDED */
diff --git a/sql/mdl.h b/sql/mdl.h
index b084670e5c6..123ffbcade6 100644
--- a/sql/mdl.h
+++ b/sql/mdl.h
@@ -122,6 +122,8 @@ public:
*/
enum enum_mdl_type {
+ /* This means that the MDL_request is not initialized */
+ MDL_NOT_INITIALIZED= -1,
/*
An intention exclusive metadata lock (IX). Used only for scoped locks.
Owner of this type of lock can acquire upgradable exclusive locks on
@@ -592,12 +594,13 @@ public:
*/
MDL_request& operator=(const MDL_request &)
{
+ type= MDL_NOT_INITIALIZED;
ticket= NULL;
/* Do nothing, in particular, don't try to copy the key. */
return *this;
}
/* Another piece of ugliness for TABLE_LIST constructor */
- MDL_request() {}
+ MDL_request(): type(MDL_NOT_INITIALIZED), ticket(NULL) {}
MDL_request(const MDL_request *rhs)
:type(rhs->type),
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 7c0e96bc8ed..eef6f734f1f 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -1051,10 +1051,10 @@ handle_rpl_parallel_thread(void *arg)
server_threads.insert(thd);
set_current_thd(thd);
pthread_detach_this_thread();
+ thd->store_globals();
thd->init_for_queries();
thd->variables.binlog_annotate_row_events= 0;
init_thr_lock();
- thd->store_globals();
thd->system_thread= SYSTEM_THREAD_SLAVE_SQL;
thd->security_ctx->skip_grants();
thd->variables.max_allowed_packet= slave_max_allowed_packet;
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 40e606425c5..15088148e02 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1383,7 +1383,11 @@ void THD::update_all_stats()
void THD::init_for_queries()
{
set_time();
- ha_enable_transaction(this,TRUE);
+ /*
+ We don't need to call ha_enable_transaction() as we can't have
+ any active transactions that has to be commited
+ */
+ transaction.on= TRUE;
reset_root_defaults(mem_root, variables.query_alloc_block_size,
variables.query_prealloc_size);
@@ -7309,7 +7313,6 @@ wait_for_commit::~wait_for_commit()
mysql_cond_destroy(&COND_wait_commit);
}
-
void
wait_for_commit::wakeup(int wakeup_error)
{
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 935e054e163..3c5c52e2aa1 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -1841,9 +1841,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
*/
char *beginning_of_next_stmt= (char*) parser_state.m_lip.found_semicolon;
-#ifdef WITH_ARIA_STORAGE_ENGINE
- ha_maria::implicit_commit(thd, FALSE);
-#endif
+ ha_maria_implicit_commit(thd, FALSE);
/* Finalize server status flags after executing a statement. */
thd->update_server_status();
@@ -6157,9 +6155,7 @@ finish:
trans_commit_stmt(thd);
thd->get_stmt_da()->set_overwrite_status(false);
}
-#ifdef WITH_ARIA_STORAGE_ENGINE
- ha_maria::implicit_commit(thd, FALSE);
-#endif
+ ha_maria_implicit_commit(thd, FALSE);
}
/* Free tables. Set stage 'closing tables' */
diff --git a/sql/xa.cc b/sql/xa.cc
index b08ab973cc5..15116ba9366 100644
--- a/sql/xa.cc
+++ b/sql/xa.cc
@@ -582,7 +582,7 @@ bool trans_xa_commit(THD *thd)
{
DEBUG_SYNC(thd, "trans_xa_commit_after_acquire_commit_lock");
- res= MY_TEST(ha_commit_one_phase(thd, 1));
+ res= MY_TEST(ha_commit_one_phase(thd, 1, 1));
if (res)
my_error(ER_XAER_RMERR, MYF(0));
}
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 35edfc31914..013c740582b 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -2890,6 +2890,10 @@ static void reset_thd_trn(THD *thd, MARIA_HA *first_table)
DBUG_VOID_RETURN;
}
+bool ha_maria::has_active_transaction(THD *thd)
+{
+ return (maria_hton && THD_TRN);
+}
/**
Performs an implicit commit of the Maria transaction and creates a new
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
index f6b00420c9c..73f3576a34e 100644
--- a/storage/maria/ha_maria.h
+++ b/storage/maria/ha_maria.h
@@ -165,6 +165,7 @@ public:
{
return file;
}
+ static bool has_active_transaction(THD *thd);
static int implicit_commit(THD *thd, bool new_trn);
/**
* Multi Range Read interface