diff options
author | Kristian Nielsen <knielsen@knielsen-hq.org> | 2015-05-26 12:47:35 +0200 |
---|---|---|
committer | Kristian Nielsen <knielsen@knielsen-hq.org> | 2015-05-26 13:04:15 +0200 |
commit | e5f1e841dc32ccb8e8630876e8073efd778d3efd (patch) | |
tree | d56611270886a591627a8c59ce7747f14a2a6679 | |
parent | ef99edf1a87d5234cf0dd8d496f0adfbf4040dad (diff) | |
download | mariadb-git-e5f1e841dc32ccb8e8630876e8073efd778d3efd.tar.gz |
MDEV-8147: Assertion `m_lock_type == 2' failed in handler::ha_close() during parallel replication
When the slave processes the master restart format_description event,
parallel replication needs to complete any prior events before processing
the restart event (which closes temporary tables and such stuff).
This happens in wait_for_workers_idle(), however it was not waiting long
enough. The wait was using wait_for_prior_commit(), but at that points table
can still be open. This lead to assertion in this case.
So change wait_for_workers_idle() to wait until all worker threads have
reached finish_event_group(), at which point all tables should have been
closed.
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_parallel_partition.result | 42 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_parallel_partition.test | 81 | ||||
-rw-r--r-- | sql/mysqld.cc | 1 | ||||
-rw-r--r-- | sql/mysqld.h | 1 | ||||
-rw-r--r-- | sql/rpl_parallel.cc | 33 | ||||
-rw-r--r-- | sql/rpl_parallel.h | 6 | ||||
-rw-r--r-- | sql/sql_class.cc | 1 |
7 files changed, 151 insertions, 14 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_parallel_partition.result b/mysql-test/suite/rpl/r/rpl_parallel_partition.result new file mode 100644 index 00000000000..e0cfe9882d2 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel_partition.result @@ -0,0 +1,42 @@ +include/master-slave.inc +[connection master] +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,inject_wakeup_subsequent_commits_sleep"; +SET GLOBAL slave_parallel_threads=8; +*** MDEV-8147: Assertion `m_lock_type == 2' failed in handler::ha_close() during parallel replication *** +CREATE TABLE E ( +pk INTEGER AUTO_INCREMENT, +col_int_nokey INTEGER /*! NULL */, +col_int_key INTEGER /*! NULL */, +col_date_key DATE /*! NULL */, +col_date_nokey DATE /*! NULL */, +col_time_key TIME /*! NULL */, +col_time_nokey TIME /*! NULL */, +col_datetime_key DATETIME /*! NULL */, +col_datetime_nokey DATETIME /*! NULL */, +col_varchar_key VARCHAR(1) /*! NULL */, +col_varchar_nokey VARCHAR(1) /*! NULL */, +PRIMARY KEY (pk), +KEY (col_int_key), +KEY (col_date_key), +KEY (col_time_key), +KEY (col_datetime_key), +KEY (col_varchar_key, col_int_key) +) ENGINE=InnoDB; +ALTER TABLE `E` PARTITION BY KEY() PARTITIONS 5; +ALTER TABLE `E` REMOVE PARTITIONING; +CREATE TABLE t1 (a INT PRIMARY KEY); +include/start_slave.inc +include/stop_slave.inc +SET GLOBAL debug_dbug=@old_dbug; +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=8; +include/start_slave.inc +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +include/start_slave.inc +DROP TABLE `E`; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_partition.test b/mysql-test/suite/rpl/t/rpl_parallel_partition.test new file mode 100644 index 00000000000..37dce9fef80 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_partition.test @@ -0,0 +1,81 @@ +--source include/have_partition.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/master-slave.inc + +--connection server_2 +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +--source include/stop_slave.inc +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,inject_wakeup_subsequent_commits_sleep"; +SET GLOBAL slave_parallel_threads=8; + +--echo *** MDEV-8147: Assertion `m_lock_type == 2' failed in handler::ha_close() during parallel replication *** +--connection server_1 +CREATE TABLE E ( + pk INTEGER AUTO_INCREMENT, + col_int_nokey INTEGER /*! NULL */, + col_int_key INTEGER /*! NULL */, + + col_date_key DATE /*! NULL */, + col_date_nokey DATE /*! NULL */, + + col_time_key TIME /*! NULL */, + col_time_nokey TIME /*! NULL */, + + col_datetime_key DATETIME /*! NULL */, + col_datetime_nokey DATETIME /*! NULL */, + + col_varchar_key VARCHAR(1) /*! NULL */, + col_varchar_nokey VARCHAR(1) /*! NULL */, + + PRIMARY KEY (pk), + KEY (col_int_key), + KEY (col_date_key), + KEY (col_time_key), + KEY (col_datetime_key), + KEY (col_varchar_key, col_int_key) + ) ENGINE=InnoDB; + +ALTER TABLE `E` PARTITION BY KEY() PARTITIONS 5; +ALTER TABLE `E` REMOVE PARTITIONING; +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF +--shutdown_server 30 +--source include/wait_until_disconnected.inc +--connection default +--source include/wait_until_disconnected.inc +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF +--enable_reconnect +--source include/wait_until_connected_again.inc +--connection server_1 +--enable_reconnect +--source include/wait_until_connected_again.inc +CREATE TABLE t1 (a INT PRIMARY KEY); +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master + +# Re-spawn worker threads to clear dbug injection. +--source include/stop_slave.inc +SET GLOBAL debug_dbug=@old_dbug; +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=8; +--source include/start_slave.inc + + +# Clean up. +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE `E`; +DROP TABLE t1; +--source include/rpl_end.inc diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 7138027ff0a..7a8c8f3388d 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -9498,6 +9498,7 @@ PSI_stage_info stage_waiting_for_work_from_sql_thread= { 0, "Waiting for work fr PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for prior transaction to commit", 0}; PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit before starting next transaction", 0}; PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0}; +PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0}; PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0}; PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0}; PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process GTID received on multiple master connections", 0}; diff --git a/sql/mysqld.h b/sql/mysqld.h index f6d9dbea48a..d508023a22b 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -452,6 +452,7 @@ extern PSI_stage_info stage_waiting_for_work_from_sql_thread; extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit; extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit; extern PSI_stage_info stage_waiting_for_room_in_worker_thread; +extern PSI_stage_info stage_waiting_for_workers_idle; extern PSI_stage_info stage_master_gtid_wait_primary; extern PSI_stage_info stage_master_gtid_wait; extern PSI_stage_info stage_gtid_wait_other_connection; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 99ddde95689..305e8053032 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -168,6 +168,8 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, done and also no longer need waiting for. */ entry->last_committed_sub_id= sub_id; + if (entry->need_sub_id_signal) + mysql_cond_broadcast(&entry->COND_parallel_entry); /* Now free any GCOs in which all transactions have committed. */ group_commit_orderer *tmp_gco= rgi->gco; @@ -1894,26 +1896,29 @@ rpl_parallel::wait_for_workers_idle(THD *thd) max_i= domain_hash.records; for (i= 0; i < max_i; ++i) { - bool active; - wait_for_commit my_orderer; + PSI_stage_info old_stage; struct rpl_parallel_entry *e; + int err= 0; e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); mysql_mutex_lock(&e->LOCK_parallel_entry); - if ((active= (e->current_sub_id > e->last_committed_sub_id))) - { - wait_for_commit *waitee= &e->current_group_info->commit_orderer; - my_orderer.register_wait_for_prior_commit(waitee); - thd->wait_for_commit_ptr= &my_orderer; - } - mysql_mutex_unlock(&e->LOCK_parallel_entry); - if (active) + e->need_sub_id_signal= true; + thd->ENTER_COND(&e->COND_parallel_entry, &e->LOCK_parallel_entry, + &stage_waiting_for_workers_idle, &old_stage); + while (e->current_sub_id > e->last_committed_sub_id) { - int err= my_orderer.wait_for_prior_commit(thd); - thd->wait_for_commit_ptr= NULL; - if (err) - return err; + if (thd->check_killed()) + { + thd->send_kill_message(); + err= 1; + break; + } + mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); } + e->need_sub_id_signal= false; + thd->EXIT_COND(&old_stage); + if (err) + return err; } return 0; } diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 09e0f39c0cd..0c2e4270646 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -228,6 +228,12 @@ struct rpl_parallel_entry { */ bool force_abort; /* + Set in wait_for_workers_idle() to show that it is waiting, so that + finish_event_group knows to signal it when last_committed_sub_id is + increased. + */ + bool need_sub_id_signal; + /* At STOP SLAVE (force_abort=true), we do not want to process all events in the queue (which could unnecessarily delay stop, if a lot of events happen to be queued). The stop_count provides a safe point at which to stop, so diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8c52d5dd92d..ad096dc60f9 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -6609,6 +6609,7 @@ wait_for_commit::wakeup_subsequent_commits2(int wakeup_error) a mutex), so no extra explicit barrier is needed here. */ wakeup_subsequent_commits_running= false; + DBUG_EXECUTE_IF("inject_wakeup_subsequent_commits_sleep", my_sleep(21000);); } |