diff options
author | Vladislav Vaintroub <wlad@mariadb.com> | 2018-03-29 22:13:01 +0000 |
---|---|---|
committer | Vladislav Vaintroub <wlad@mariadb.com> | 2018-04-01 14:26:06 +0000 |
commit | 27c24808f7048718debc5e6e93bfef6e7abd7c52 (patch) | |
tree | e4d168426ab1df2ffaeb82f321ec07e69efb17fa /extra/mariabackup/backup_mysql.cc | |
parent | a1d68faa38a0fb6600c925be567f9349cf598c86 (diff) | |
download | mariadb-git-27c24808f7048718debc5e6e93bfef6e7abd7c52.tar.gz |
MDEV-15636 mariabackup --lock-ddl-per-table hangs if ALTER table is running
concurrently.
There is a deadlock between
C1 mariabackup's connection that holds MDL locks
C2 Online ALTER TABLE that wants to have MDL exclusively
and tries to upgrade its mdl lock.
C3 another mariabackup's connection that does FLUSH TABLES (or FTWRL)
C3 waits waits for C2, which waits for C1, which waits for C3,
thus the deadlock.
MDL locks cannot be released until FLUSH succeeds, because
otherwise it would allow ALTER to sneak in, causing backup to abort and
breaking lock-ddl-per-table's promise.
The fix here workarounds the deadlock, by killing connections in
"Waiting for metadata lock" status (i.e ALTER). This killing continues
until FTWRL succeeds.
Killing connections is skipped in case --no-locks parameter
was passed to backup, because there won't be a FLUSH.
For the reference,in Percona's xtrabackup --lock-ddl-per-connection
silently implies --no-lock ie FLUSH is always skipped there.
A rather large part of fix is introducing DBUG capability to start
a query the new connection at the right moment of backup
compensating somewhat for mariabackup' lack of send_query or DBUG_SYNC.
Diffstat (limited to 'extra/mariabackup/backup_mysql.cc')
-rw-r--r-- | extra/mariabackup/backup_mysql.cc | 102 |
1 files changed, 79 insertions, 23 deletions
diff --git a/extra/mariabackup/backup_mysql.cc b/extra/mariabackup/backup_mysql.cc index 91e659f679e..6b215a8a1c2 100644 --- a/extra/mariabackup/backup_mysql.cc +++ b/extra/mariabackup/backup_mysql.cc @@ -868,6 +868,76 @@ stop_query_killer() os_event_wait_time(kill_query_thread_stopped, 60000); } + +/* +Killing connections that wait for MDL lock. +If lock-ddl-per-table is used, there can be some DDL statements + +FLUSH TABLES would hang infinitely, if DDL statements are waiting for +MDL lock, which mariabackup currently holds. Therefore we start killing +those statements from a dedicated thread, until FLUSH TABLES WITH READ LOCK +succeeds. +*/ + +static os_event_t mdl_killer_stop_event; +static os_event_t mdl_killer_finished_event; + +static +os_thread_ret_t +DECLARE_THREAD(kill_mdl_waiters_thread(void *)) +{ + MYSQL *mysql; + if ((mysql = xb_mysql_connect()) == NULL) { + msg("Error: kill mdl waiters thread failed to connect\n"); + goto stop_thread; + } + + for(;;){ + if (os_event_wait_time(mdl_killer_stop_event, 1000) == 0) + break; + + MYSQL_RES *result = xb_mysql_query(mysql, + "SELECT ID, COMMAND FROM INFORMATION_SCHEMA.PROCESSLIST " + " WHERE State='Waiting for table metadata lock'", + true, true); + while (MYSQL_ROW row = mysql_fetch_row(result)) + { + char query[64]; + msg_ts("Killing MDL waiting query '%s' on connection '%s'\n", + row[1], row[0]); + snprintf(query, sizeof(query), "KILL QUERY %s", row[0]); + xb_mysql_query(mysql, query, true); + } + } + + mysql_close(mysql); + +stop_thread: + msg_ts("Kill mdl waiters thread stopped\n"); + os_event_set(mdl_killer_finished_event); + os_thread_exit(); + return os_thread_ret_t(0); +} + + +static void start_mdl_waiters_killer() +{ + mdl_killer_stop_event = os_event_create(0); + mdl_killer_finished_event = os_event_create(0); + os_thread_create(kill_mdl_waiters_thread, 0, 0); +} + + +/* Tell MDL killer to stop and finish for its completion*/ +static void stop_mdl_waiters_killer() +{ + os_event_set(mdl_killer_stop_event); + os_event_wait(mdl_killer_finished_event); + + os_event_destroy(mdl_killer_stop_event); + os_event_destroy(mdl_killer_finished_event); +} + /*********************************************************************//** Function acquires either a backup tables lock, if supported by the server, or a global read lock (FLUSH TABLES WITH READ LOCK) @@ -890,6 +960,10 @@ lock_tables(MYSQL *connection) return(true); } + if (opt_lock_ddl_per_table) { + start_mdl_waiters_killer(); + } + if (!opt_lock_wait_timeout && !opt_kill_long_queries_timeout) { /* We do first a FLUSH TABLES. If a long update is running, the @@ -930,6 +1004,10 @@ lock_tables(MYSQL *connection) xb_mysql_query(connection, "FLUSH TABLES WITH READ LOCK", false); + if (opt_lock_ddl_per_table) { + stop_mdl_waiters_killer(); + } + if (opt_kill_long_queries_timeout) { stop_query_killer(); } @@ -1647,25 +1725,6 @@ mdl_lock_init() } } -#ifndef DBUG_OFF -/* Test that table is really locked, if lock_ddl_per_table is set. - The test is executed in DBUG_EXECUTE_IF block inside mdl_lock_table(). -*/ -static void check_mdl_lock_works(const char *table_name) -{ - MYSQL *test_con= xb_mysql_connect(); - char *query; - xb_a(asprintf(&query, - "SET STATEMENT max_statement_time=1 FOR ALTER TABLE %s" - " ADD COLUMN mdl_lock_column int", table_name)); - int err = mysql_query(test_con, query); - DBUG_ASSERT(err); - int err_no = mysql_errno(test_con); - DBUG_ASSERT(err_no == ER_STATEMENT_TIMEOUT); - mysql_close(test_con); - free(query); -} -#endif void mdl_lock_table(ulint space_id) { @@ -1681,13 +1740,10 @@ mdl_lock_table(ulint space_id) while (MYSQL_ROW row = mysql_fetch_row(mysql_result)) { std::string full_table_name = ut_get_name(0,row[0]); std::ostringstream lock_query; - lock_query << "SELECT * FROM " << full_table_name << " LIMIT 0"; + lock_query << "SELECT 1 FROM " << full_table_name << " LIMIT 0"; msg_ts("Locking MDL for %s\n", full_table_name.c_str()); xb_mysql_query(mdl_con, lock_query.str().c_str(), false, false); - - DBUG_EXECUTE_IF("check_mdl_lock_works", - check_mdl_lock_works(full_table_name.c_str());); } pthread_mutex_unlock(&mdl_lock_con_mutex); |