summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristian Nielsen <knielsen@knielsen-hq.org>2015-05-26 12:47:35 +0200
committerKristian Nielsen <knielsen@knielsen-hq.org>2015-05-26 13:04:15 +0200
commite5f1e841dc32ccb8e8630876e8073efd778d3efd (patch)
treed56611270886a591627a8c59ce7747f14a2a6679
parentef99edf1a87d5234cf0dd8d496f0adfbf4040dad (diff)
downloadmariadb-git-e5f1e841dc32ccb8e8630876e8073efd778d3efd.tar.gz
MDEV-8147: Assertion `m_lock_type == 2' failed in handler::ha_close() during parallel replication
When the slave processes the master restart format_description event, parallel replication needs to complete any prior events before processing the restart event (which closes temporary tables and such stuff). This happens in wait_for_workers_idle(), however it was not waiting long enough. The wait was using wait_for_prior_commit(), but at that points table can still be open. This lead to assertion in this case. So change wait_for_workers_idle() to wait until all worker threads have reached finish_event_group(), at which point all tables should have been closed.
-rw-r--r--mysql-test/suite/rpl/r/rpl_parallel_partition.result42
-rw-r--r--mysql-test/suite/rpl/t/rpl_parallel_partition.test81
-rw-r--r--sql/mysqld.cc1
-rw-r--r--sql/mysqld.h1
-rw-r--r--sql/rpl_parallel.cc33
-rw-r--r--sql/rpl_parallel.h6
-rw-r--r--sql/sql_class.cc1
7 files changed, 151 insertions, 14 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_parallel_partition.result b/mysql-test/suite/rpl/r/rpl_parallel_partition.result
new file mode 100644
index 00000000000..e0cfe9882d2
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_parallel_partition.result
@@ -0,0 +1,42 @@
+include/master-slave.inc
+[connection master]
+SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+include/stop_slave.inc
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,inject_wakeup_subsequent_commits_sleep";
+SET GLOBAL slave_parallel_threads=8;
+*** MDEV-8147: Assertion `m_lock_type == 2' failed in handler::ha_close() during parallel replication ***
+CREATE TABLE E (
+pk INTEGER AUTO_INCREMENT,
+col_int_nokey INTEGER /*! NULL */,
+col_int_key INTEGER /*! NULL */,
+col_date_key DATE /*! NULL */,
+col_date_nokey DATE /*! NULL */,
+col_time_key TIME /*! NULL */,
+col_time_nokey TIME /*! NULL */,
+col_datetime_key DATETIME /*! NULL */,
+col_datetime_nokey DATETIME /*! NULL */,
+col_varchar_key VARCHAR(1) /*! NULL */,
+col_varchar_nokey VARCHAR(1) /*! NULL */,
+PRIMARY KEY (pk),
+KEY (col_int_key),
+KEY (col_date_key),
+KEY (col_time_key),
+KEY (col_datetime_key),
+KEY (col_varchar_key, col_int_key)
+) ENGINE=InnoDB;
+ALTER TABLE `E` PARTITION BY KEY() PARTITIONS 5;
+ALTER TABLE `E` REMOVE PARTITIONING;
+CREATE TABLE t1 (a INT PRIMARY KEY);
+include/start_slave.inc
+include/stop_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+SET GLOBAL slave_parallel_threads=0;
+SET GLOBAL slave_parallel_threads=8;
+include/start_slave.inc
+include/stop_slave.inc
+SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+include/start_slave.inc
+DROP TABLE `E`;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_parallel_partition.test b/mysql-test/suite/rpl/t/rpl_parallel_partition.test
new file mode 100644
index 00000000000..37dce9fef80
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_parallel_partition.test
@@ -0,0 +1,81 @@
+--source include/have_partition.inc
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/master-slave.inc
+
+--connection server_2
+SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+--source include/stop_slave.inc
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,inject_wakeup_subsequent_commits_sleep";
+SET GLOBAL slave_parallel_threads=8;
+
+--echo *** MDEV-8147: Assertion `m_lock_type == 2' failed in handler::ha_close() during parallel replication ***
+--connection server_1
+CREATE TABLE E (
+ pk INTEGER AUTO_INCREMENT,
+ col_int_nokey INTEGER /*! NULL */,
+ col_int_key INTEGER /*! NULL */,
+
+ col_date_key DATE /*! NULL */,
+ col_date_nokey DATE /*! NULL */,
+
+ col_time_key TIME /*! NULL */,
+ col_time_nokey TIME /*! NULL */,
+
+ col_datetime_key DATETIME /*! NULL */,
+ col_datetime_nokey DATETIME /*! NULL */,
+
+ col_varchar_key VARCHAR(1) /*! NULL */,
+ col_varchar_nokey VARCHAR(1) /*! NULL */,
+
+ PRIMARY KEY (pk),
+ KEY (col_int_key),
+ KEY (col_date_key),
+ KEY (col_time_key),
+ KEY (col_datetime_key),
+ KEY (col_varchar_key, col_int_key)
+ ) ENGINE=InnoDB;
+
+ALTER TABLE `E` PARTITION BY KEY() PARTITIONS 5;
+ALTER TABLE `E` REMOVE PARTITIONING;
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+--shutdown_server 30
+--source include/wait_until_disconnected.inc
+--connection default
+--source include/wait_until_disconnected.inc
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart:
+EOF
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--connection server_1
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+CREATE TABLE t1 (a INT PRIMARY KEY);
+--save_master_pos
+
+--connection server_2
+--source include/start_slave.inc
+--sync_with_master
+
+# Re-spawn worker threads to clear dbug injection.
+--source include/stop_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+SET GLOBAL slave_parallel_threads=0;
+SET GLOBAL slave_parallel_threads=8;
+--source include/start_slave.inc
+
+
+# Clean up.
+--connection server_2
+--source include/stop_slave.inc
+SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+--source include/start_slave.inc
+
+--connection server_1
+DROP TABLE `E`;
+DROP TABLE t1;
+--source include/rpl_end.inc
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 7138027ff0a..7a8c8f3388d 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -9498,6 +9498,7 @@ PSI_stage_info stage_waiting_for_work_from_sql_thread= { 0, "Waiting for work fr
PSI_stage_info stage_waiting_for_prior_transaction_to_commit= { 0, "Waiting for prior transaction to commit", 0};
PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit= { 0, "Waiting for prior transaction to start commit before starting next transaction", 0};
PSI_stage_info stage_waiting_for_room_in_worker_thread= { 0, "Waiting for room in worker thread event queue", 0};
+PSI_stage_info stage_waiting_for_workers_idle= { 0, "Waiting for worker threads to be idle", 0};
PSI_stage_info stage_master_gtid_wait_primary= { 0, "Waiting in MASTER_GTID_WAIT() (primary waiter)", 0};
PSI_stage_info stage_master_gtid_wait= { 0, "Waiting in MASTER_GTID_WAIT()", 0};
PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master connection to process GTID received on multiple master connections", 0};
diff --git a/sql/mysqld.h b/sql/mysqld.h
index f6d9dbea48a..d508023a22b 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -452,6 +452,7 @@ extern PSI_stage_info stage_waiting_for_work_from_sql_thread;
extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit;
extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit;
extern PSI_stage_info stage_waiting_for_room_in_worker_thread;
+extern PSI_stage_info stage_waiting_for_workers_idle;
extern PSI_stage_info stage_master_gtid_wait_primary;
extern PSI_stage_info stage_master_gtid_wait;
extern PSI_stage_info stage_gtid_wait_other_connection;
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 99ddde95689..305e8053032 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -168,6 +168,8 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
done and also no longer need waiting for.
*/
entry->last_committed_sub_id= sub_id;
+ if (entry->need_sub_id_signal)
+ mysql_cond_broadcast(&entry->COND_parallel_entry);
/* Now free any GCOs in which all transactions have committed. */
group_commit_orderer *tmp_gco= rgi->gco;
@@ -1894,26 +1896,29 @@ rpl_parallel::wait_for_workers_idle(THD *thd)
max_i= domain_hash.records;
for (i= 0; i < max_i; ++i)
{
- bool active;
- wait_for_commit my_orderer;
+ PSI_stage_info old_stage;
struct rpl_parallel_entry *e;
+ int err= 0;
e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i);
mysql_mutex_lock(&e->LOCK_parallel_entry);
- if ((active= (e->current_sub_id > e->last_committed_sub_id)))
- {
- wait_for_commit *waitee= &e->current_group_info->commit_orderer;
- my_orderer.register_wait_for_prior_commit(waitee);
- thd->wait_for_commit_ptr= &my_orderer;
- }
- mysql_mutex_unlock(&e->LOCK_parallel_entry);
- if (active)
+ e->need_sub_id_signal= true;
+ thd->ENTER_COND(&e->COND_parallel_entry, &e->LOCK_parallel_entry,
+ &stage_waiting_for_workers_idle, &old_stage);
+ while (e->current_sub_id > e->last_committed_sub_id)
{
- int err= my_orderer.wait_for_prior_commit(thd);
- thd->wait_for_commit_ptr= NULL;
- if (err)
- return err;
+ if (thd->check_killed())
+ {
+ thd->send_kill_message();
+ err= 1;
+ break;
+ }
+ mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry);
}
+ e->need_sub_id_signal= false;
+ thd->EXIT_COND(&old_stage);
+ if (err)
+ return err;
}
return 0;
}
diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h
index 09e0f39c0cd..0c2e4270646 100644
--- a/sql/rpl_parallel.h
+++ b/sql/rpl_parallel.h
@@ -228,6 +228,12 @@ struct rpl_parallel_entry {
*/
bool force_abort;
/*
+ Set in wait_for_workers_idle() to show that it is waiting, so that
+ finish_event_group knows to signal it when last_committed_sub_id is
+ increased.
+ */
+ bool need_sub_id_signal;
+ /*
At STOP SLAVE (force_abort=true), we do not want to process all events in
the queue (which could unnecessarily delay stop, if a lot of events happen
to be queued). The stop_count provides a safe point at which to stop, so
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 8c52d5dd92d..ad096dc60f9 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -6609,6 +6609,7 @@ wait_for_commit::wakeup_subsequent_commits2(int wakeup_error)
a mutex), so no extra explicit barrier is needed here.
*/
wakeup_subsequent_commits_running= false;
+ DBUG_EXECUTE_IF("inject_wakeup_subsequent_commits_sleep", my_sleep(21000););
}