From 5db56a106b406d63e5fc41db47f98d227bb173eb Mon Sep 17 00:00:00 2001 From: "guilhem@mysql.com" <> Date: Fri, 20 Aug 2004 16:35:23 +0200 Subject: Making FLUSH TABLES WITH READ LOCK block COMMITs of existing transactions, in a deadlock-free manner. This splits locking the global read lock in two steps. This fixes a consequence of this bug, known as: BUG#4953 'mysqldump --master-data may report incorrect binlog position if using InnoDB' And a test. --- mysql-test/r/flush_block_commit.result | 23 +++++++++++++ mysql-test/t/flush_block_commit-master.opt | 1 + mysql-test/t/flush_block_commit.test | 49 +++++++++++++++++++++++++++ sql/handler.cc | 31 ++++++++++++----- sql/lock.cc | 54 ++++++++++++++++++++++++++---- sql/mysql_priv.h | 3 +- sql/sql_class.h | 4 +-- sql/sql_db.cc | 4 +-- sql/sql_parse.cc | 6 +++- 9 files changed, 154 insertions(+), 21 deletions(-) create mode 100644 mysql-test/r/flush_block_commit.result create mode 100644 mysql-test/t/flush_block_commit-master.opt create mode 100644 mysql-test/t/flush_block_commit.test diff --git a/mysql-test/r/flush_block_commit.result b/mysql-test/r/flush_block_commit.result new file mode 100644 index 00000000000..3205dd9dad9 --- /dev/null +++ b/mysql-test/r/flush_block_commit.result @@ -0,0 +1,23 @@ +drop table if exists t1; +create table t1 (a int) type=innodb; +begin; +insert into t1 values(1); +flush tables with read lock; +select * from t1; +a + commit; +select * from t1; +a +unlock tables; +begin; +select * from t1 for update; +a +1 +begin; + select * from t1 for update; + flush tables with read lock; +commit; +a +1 +unlock tables; +drop table t1; diff --git a/mysql-test/t/flush_block_commit-master.opt b/mysql-test/t/flush_block_commit-master.opt new file mode 100644 index 00000000000..d1f6d58e9f7 --- /dev/null +++ b/mysql-test/t/flush_block_commit-master.opt @@ -0,0 +1 @@ +--innodb_lock_wait_timeout=5 diff --git a/mysql-test/t/flush_block_commit.test b/mysql-test/t/flush_block_commit.test new file mode 100644 index 00000000000..20ecec1361c --- /dev/null +++ b/mysql-test/t/flush_block_commit.test @@ -0,0 +1,49 @@ +# Let's see if FLUSH TABLES WITH READ LOCK blocks COMMIT of existing +# transactions. +# We verify that we did not introduce a deadlock. + +-- source include/have_innodb.inc + +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); +connect (con3,localhost,root,,); +connection con1; +drop table if exists t1; +create table t1 (a int) type=innodb; + +# blocks COMMIT ? + +begin; +insert into t1 values(1); +connection con2; +flush tables with read lock; +select * from t1; +connection con1; +send commit; # blocked by con2 +sleep 1; +connection con2; +select * from t1; # verify con1 was blocked and data did not move +unlock tables; +connection con1; +reap; + +# No deadlock ? + +connection con1; +begin; +select * from t1 for update; +connection con2; +begin; +send select * from t1 for update; # blocked by con1 +sleep 1; +connection con3; +send flush tables with read lock; # blocked by con2 +connection con1; +commit; # should not be blocked by con3 +connection con2; +reap; +connection con3; +reap; +unlock tables; +connection con1; +drop table t1; diff --git a/sql/handler.cc b/sql/handler.cc index a1e738583fd..9eb129fab45 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -342,17 +342,30 @@ int ha_commit_trans(THD *thd, THD_TRANS* trans) #ifdef USING_TRANSACTIONS if (opt_using_transactions) { - bool operation_done= 0; + bool operation_done= 0, need_start_waiters= 0; bool transaction_commited= 0; - - /* Update the binary log if we have cached some queries */ - if (trans == &thd->transaction.all && mysql_bin_log.is_open() && + /* If transaction has done some updates to tables */ + if (trans == &thd->transaction.all && my_b_tell(&thd->transaction.trans_log)) { - mysql_bin_log.write(thd, &thd->transaction.trans_log, 1); - reinit_io_cache(&thd->transaction.trans_log, - WRITE_CACHE, (my_off_t) 0, 0, 1); - thd->transaction.trans_log.end_of_file= max_binlog_cache_size; + if (error= wait_if_global_read_lock(thd, 0, 0)) + { + /* + Note that ROLLBACK [TO SAVEPOINT] does not have this test; it's + because ROLLBACK never updates data, so needn't wait on the lock. + */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); + error= 1; + } + else + need_start_waiters= 1; + if (mysql_bin_log.is_open()) + { + mysql_bin_log.write(thd, &thd->transaction.trans_log, 1); + reinit_io_cache(&thd->transaction.trans_log, + WRITE_CACHE, (my_off_t) 0, 0, 1); + thd->transaction.trans_log.end_of_file= max_binlog_cache_size; + } } #ifdef HAVE_BERKELEY_DB if (trans->bdb_tid) @@ -393,6 +406,8 @@ int ha_commit_trans(THD *thd, THD_TRANS* trans) statistic_increment(ha_commit_count,&LOCK_status); thd->transaction.cleanup(); } + if (need_start_waiters) + start_waiting_global_read_lock(thd); } #endif // using transactions DBUG_RETURN(error); diff --git a/sql/lock.cc b/sql/lock.cc index 9ea1ce96175..dd2b61b65d2 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -96,7 +96,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd,TABLE **tables,uint count) Someone has issued LOCK ALL TABLES FOR READ and we want a write lock Wait until the lock is gone */ - if (wait_if_global_read_lock(thd, 1)) + if (wait_if_global_read_lock(thd, 1, 1)) { my_free((gptr) sql_lock,MYF(0)); sql_lock=0; @@ -453,7 +453,7 @@ int lock_and_wait_for_table_name(THD *thd, TABLE_LIST *table_list) int error= -1; DBUG_ENTER("lock_and_wait_for_table_name"); - if (wait_if_global_read_lock(thd,0)) + if (wait_if_global_read_lock(thd, 0, 1)) DBUG_RETURN(1); VOID(pthread_mutex_lock(&LOCK_open)); if ((lock_retcode = lock_table_name(thd, table_list)) < 0) @@ -667,14 +667,23 @@ static void print_lock_error(int error) The global locks are handled through the global variables: global_read_lock + global_read_lock_blocks_commit waiting_for_read_lock protect_against_global_read_lock + + Taking the global read lock is TWO steps (2nd step is optional; without + it, COMMIT of existing transactions will be allowed): + lock_global_read_lock() THEN make_global_read_lock_block_commit(). ****************************************************************************/ volatile uint global_read_lock=0; +volatile uint global_read_lock_blocks_commit=0; static volatile uint protect_against_global_read_lock=0; static volatile uint waiting_for_read_lock=0; +#define GOT_GLOBAL_READ_LOCK 1 +#define MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT 2 + bool lock_global_read_lock(THD *thd) { DBUG_ENTER("lock_global_read_lock"); @@ -697,27 +706,40 @@ bool lock_global_read_lock(THD *thd) thd->exit_cond(old_message); DBUG_RETURN(1); } - thd->global_read_lock=1; + thd->global_read_lock= GOT_GLOBAL_READ_LOCK; global_read_lock++; thd->exit_cond(old_message); } + /* + We DON'T set global_read_lock_blocks_commit now, it will be set after + tables are flushed (as the present function serves for FLUSH TABLES WITH + READ LOCK only). Doing things in this order is necessary to avoid + deadlocks (we must allow COMMIT until all tables are closed; we should not + forbid it before, or we can have a 3-thread deadlock if 2 do SELECT FOR + UPDATE and one does FLUSH TABLES WITH READ LOCK). + */ DBUG_RETURN(0); } void unlock_global_read_lock(THD *thd) { uint tmp; - thd->global_read_lock=0; pthread_mutex_lock(&LOCK_open); tmp= --global_read_lock; + if (thd->global_read_lock == MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT) + --global_read_lock_blocks_commit; pthread_mutex_unlock(&LOCK_open); /* Send the signal outside the mutex to avoid a context switch */ if (!tmp) pthread_cond_broadcast(&COND_refresh); + thd->global_read_lock= 0; } +#define must_wait (global_read_lock && \ + (is_not_commit || \ + global_read_lock_blocks_commit)) -bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh) +bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh, bool is_not_commit) { const char *old_message; bool result= 0, need_exit_cond; @@ -725,7 +747,7 @@ bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh) LINT_INIT(old_message); (void) pthread_mutex_lock(&LOCK_open); - if (need_exit_cond= (bool)global_read_lock) + if (need_exit_cond= must_wait) { if (thd->global_read_lock) // This thread had the read locks { @@ -735,7 +757,7 @@ bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh) } old_message=thd->enter_cond(&COND_refresh, &LOCK_open, "Waiting for release of readlock"); - while (global_read_lock && ! thd->killed && + while (must_wait && ! thd->killed && (!abort_on_refresh || thd->version == refresh_version)) (void) pthread_cond_wait(&COND_refresh,&LOCK_open); if (thd->killed) @@ -762,3 +784,21 @@ void start_waiting_global_read_lock(THD *thd) pthread_cond_broadcast(&COND_refresh); DBUG_VOID_RETURN; } + + +void make_global_read_lock_block_commit(THD *thd) +{ + /* + If we didn't succeed lock_global_read_lock(), or if we already suceeded + make_global_read_lock_block_commit(), do nothing. + */ + if (thd->global_read_lock != GOT_GLOBAL_READ_LOCK) + return; + pthread_mutex_lock(&LOCK_open); + /* increment this BEFORE waiting on cond (otherwise race cond) */ + global_read_lock_blocks_commit++; + while (protect_against_global_read_lock) + pthread_cond_wait(&COND_refresh, &LOCK_open); + pthread_mutex_unlock(&LOCK_open); + thd->global_read_lock= MADE_GLOBAL_READ_LOCK_BLOCK_COMMIT; +} diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index ca4b8d2c2b9..f53e14878b0 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -762,8 +762,9 @@ void mysql_lock_abort_for_thread(THD *thd, TABLE *table); MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b); bool lock_global_read_lock(THD *thd); void unlock_global_read_lock(THD *thd); -bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh); +bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh, bool is_not_commit); void start_waiting_global_read_lock(THD *thd); +void make_global_read_lock_block_commit(THD *thd); /* Lock based on name */ int lock_and_wait_for_table_name(THD *thd, TABLE_LIST *table_list); diff --git a/sql/sql_class.h b/sql/sql_class.h index 3c968c6a8ae..30947041b7d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -478,7 +478,7 @@ public: ulong rand_saved_seed1, rand_saved_seed2; long dbug_thread_id; pthread_t real_id; - uint current_tablenr,tmp_table,cond_count; + uint current_tablenr,tmp_table,cond_count,global_read_lock; uint server_status,open_options,system_thread; uint32 query_length; uint32 db_length; @@ -489,7 +489,7 @@ public: bool set_query_id,locked,count_cuted_fields,some_tables_deleted; bool no_errors, allow_sum_func, password, fatal_error; bool query_start_used,last_insert_id_used,insert_id_used,rand_used; - bool in_lock_tables,global_read_lock; + bool in_lock_tables; bool query_error, bootstrap, cleanup_done; bool safe_to_cache_query; bool volatile killed; diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 2ee725e7432..3d877403813 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -42,7 +42,7 @@ int mysql_create_db(THD *thd, char *db, uint create_options, bool silent) VOID(pthread_mutex_lock(&LOCK_mysql_create_db)); // do not create database if another thread is holding read lock - if (wait_if_global_read_lock(thd,0)) + if (wait_if_global_read_lock(thd, 0, 1)) { error= -1; goto exit2; @@ -146,7 +146,7 @@ int mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) VOID(pthread_mutex_lock(&LOCK_mysql_create_db)); // do not drop database if another thread is holding read lock - if (wait_if_global_read_lock(thd,0)) + if (wait_if_global_read_lock(thd, 0, 1)) { error= -1; goto exit2; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 7a5260a78f0..f4887c6a8e6 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -3708,8 +3708,12 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables) { if (lock_global_read_lock(thd)) return 1; + result=close_cached_tables(thd,(options & REFRESH_FAST) ? 0 : 1, + tables); + make_global_read_lock_block_commit(thd); } - result=close_cached_tables(thd,(options & REFRESH_FAST) ? 0 : 1, tables); + else + result=close_cached_tables(thd,(options & REFRESH_FAST) ? 0 : 1, tables); } if (options & REFRESH_HOSTS) hostname_cache_refresh(); -- cgit v1.2.1