diff options
author | Mattias Jonsson <mattias.jonsson@sun.com> | 2010-03-17 15:10:41 +0100 |
---|---|---|
committer | Mattias Jonsson <mattias.jonsson@sun.com> | 2010-03-17 15:10:41 +0100 |
commit | 3b897f2bc5de31e7f166a02fccf5f3e4017f4a5f (patch) | |
tree | 817537f1993a7a5dd2793a5a9122af60a9041efd | |
parent | 8d514f21459b2bc767d7be00ab05ccf615c33e6b (diff) | |
download | mariadb-git-3b897f2bc5de31e7f166a02fccf5f3e4017f4a5f.tar.gz |
Bug#50561: ALTER PARTITIONS does not have adequate lock, breaks with
concurrent I_S query
There were two problem:
1) MYSQL_LOCK_IGNORE_FLUSH also ignored name locks
2) there was a race between abort_and_upgrade_locks and
alter_close_tables
(i.e. remove_table_from_cache and
close_data_files_and_morph_locks)
Which allowed the table to be opened with MYSQL_LOCK_IGNORE_FLUSH flag
resulting in renaming a partition that was already in use,
which could cause the table to be unusable.
Solution was to not allow IGNORE_FLUSH to skip waiting for
a named locked table.
And to not release the LOCK_open mutex between the
calls to remove_table_from_cache and
close_data_files_and_morph_locks by merging the functions
abort_and_upgrade_locks and alter_close_tables.
-rw-r--r-- | mysql-test/suite/parts/r/partition_debug_sync_innodb.result | 61 | ||||
-rw-r--r-- | mysql-test/suite/parts/t/partition_debug_sync_innodb-master.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/parts/t/partition_debug_sync_innodb.test | 44 | ||||
-rw-r--r-- | sql/authors.h | 1 | ||||
-rw-r--r-- | sql/ha_partition.cc | 4 | ||||
-rw-r--r-- | sql/mysql_priv.h | 2 | ||||
-rw-r--r-- | sql/sql_base.cc | 34 | ||||
-rw-r--r-- | sql/sql_partition.cc | 38 | ||||
-rw-r--r-- | sql/sql_show.cc | 2 |
9 files changed, 140 insertions, 47 deletions
diff --git a/mysql-test/suite/parts/r/partition_debug_sync_innodb.result b/mysql-test/suite/parts/r/partition_debug_sync_innodb.result new file mode 100644 index 00000000000..41c3d1af174 --- /dev/null +++ b/mysql-test/suite/parts/r/partition_debug_sync_innodb.result @@ -0,0 +1,61 @@ +# +# Bug#50561: ALTER PARTITIONS does not have adequate lock, breaks with +# concurrent I_S query +create table t1 (a int) +engine = innodb +partition by range (a) +(partition p0 values less than MAXVALUE); +insert into t1 values (1), (11), (21), (33); +SELECT * FROM t1; +a +1 +11 +21 +33 +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY RANGE (a) +(PARTITION p0 VALUES LESS THAN MAXVALUE ENGINE = InnoDB) */ +t1#P#p0.ibd +t1.frm +t1.par +SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open'; +SET DEBUG_SYNC='partition_open_error SIGNAL alter WAIT_FOR finish'; +SELECT * FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME = 't1' AND TABLE_SCHEMA = 'test'; +SET DEBUG_SYNC = 'now WAIT_FOR parked'; +# When waiting for the name lock in get_all_tables in sql_show.cc +# this will not be concurrent any more, thus the TIMEOUT +SET DEBUG_SYNC = 'before_rename_partitions SIGNAL open WAIT_FOR alter TIMEOUT 1'; +# Needs to be executed twice, since first is this 'SET DEBUG_SYNC ...' +SET DEBUG_SYNC = 'before_close_thread_tables SIGNAL finish EXECUTE 2'; +ALTER TABLE t1 REORGANIZE PARTITION p0 INTO +(PARTITION p0 VALUES LESS THAN (10), +PARTITION p10 VALUES LESS THAN MAXVALUE); +Warnings: +Warning 1639 debug sync point wait timed out +TABLE_CATALOG TABLE_SCHEMA TABLE_NAME PARTITION_NAME SUBPARTITION_NAME PARTITION_ORDINAL_POSITION SUBPARTITION_ORDINAL_POSITION PARTITION_METHOD SUBPARTITION_METHOD PARTITION_EXPRESSION SUBPARTITION_EXPRESSION PARTITION_DESCRIPTION TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE CREATE_TIME UPDATE_TIME CHECK_TIME CHECKSUM PARTITION_COMMENT NODEGROUP TABLESPACE_NAME +NULL test t1 p0 NULL 1 NULL RANGE NULL a NULL 10 1 16384 16384 NULL 0 0 NULL NULL NULL NULL default NULL +NULL test t1 p10 NULL 2 NULL RANGE NULL a NULL MAXVALUE 3 5461 16384 NULL 0 0 NULL NULL NULL NULL default NULL +t1#P#p0.ibd +t1#P#p10.ibd +t1.frm +t1.par +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY RANGE (a) +(PARTITION p0 VALUES LESS THAN (10) ENGINE = InnoDB, + PARTITION p10 VALUES LESS THAN MAXVALUE ENGINE = InnoDB) */ +SELECT * FROM t1; +a +1 +11 +21 +33 +drop table t1; +SET DEBUG_SYNC = 'RESET'; diff --git a/mysql-test/suite/parts/t/partition_debug_sync_innodb-master.opt b/mysql-test/suite/parts/t/partition_debug_sync_innodb-master.opt new file mode 100644 index 00000000000..9854fda301d --- /dev/null +++ b/mysql-test/suite/parts/t/partition_debug_sync_innodb-master.opt @@ -0,0 +1 @@ +--innodb_file_per_table=1 diff --git a/mysql-test/suite/parts/t/partition_debug_sync_innodb.test b/mysql-test/suite/parts/t/partition_debug_sync_innodb.test new file mode 100644 index 00000000000..79ef3d537bf --- /dev/null +++ b/mysql-test/suite/parts/t/partition_debug_sync_innodb.test @@ -0,0 +1,44 @@ +--source include/have_innodb.inc +--source include/have_partition.inc +--source include/have_debug_sync.inc + +let $MYSQLD_DATADIR=`SELECT @@datadir`; + +--echo # +--echo # Bug#50561: ALTER PARTITIONS does not have adequate lock, breaks with +--echo # concurrent I_S query +create table t1 (a int) +engine = innodb +partition by range (a) +(partition p0 values less than MAXVALUE); +insert into t1 values (1), (11), (21), (33); +SELECT * FROM t1; +SHOW CREATE TABLE t1; +--list_files $MYSQLD_DATADIR/test + +SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open'; +SET DEBUG_SYNC='partition_open_error SIGNAL alter WAIT_FOR finish'; +send +SELECT * FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME = 't1' AND TABLE_SCHEMA = 'test'; + +connect (con1, localhost, root,,); +SET DEBUG_SYNC = 'now WAIT_FOR parked'; +--echo # When waiting for the name lock in get_all_tables in sql_show.cc +--echo # this will not be concurrent any more, thus the TIMEOUT +SET DEBUG_SYNC = 'before_rename_partitions SIGNAL open WAIT_FOR alter TIMEOUT 1'; +--echo # Needs to be executed twice, since first is this 'SET DEBUG_SYNC ...' +SET DEBUG_SYNC = 'before_close_thread_tables SIGNAL finish EXECUTE 2'; +--error 0,ER_TABLE_EXISTS_ERROR +ALTER TABLE t1 REORGANIZE PARTITION p0 INTO +(PARTITION p0 VALUES LESS THAN (10), + PARTITION p10 VALUES LESS THAN MAXVALUE); + +disconnect con1; +connection default; +--reap +--list_files $MYSQLD_DATADIR/test +SHOW CREATE TABLE t1; +SELECT * FROM t1; +drop table t1; +--list_files $MYSQLD_DATADIR/test +SET DEBUG_SYNC = 'RESET'; diff --git a/sql/authors.h b/sql/authors.h index dfe3b143e2f..925c942fa24 100644 --- a/sql/authors.h +++ b/sql/authors.h @@ -76,6 +76,7 @@ struct show_table_authors_st show_table_authors[]= { { "Eric Herman", "Amsterdam, Netherlands", "Bug fixing - federated" }, { "Andrey Hristov", "Walldorf, Germany", "Event scheduler (5.1)" }, { "Alexander (Alexi) Ivanov", "St. Petersburg, Russia", "Replication" }, + { "Mattias Jonsson", "Uppsala, Sweden", "Partitioning" }, { "Alexander (Salle) Keremidarski", "Sofia, Bulgaria", "Bug fixing" }, { "Mats Kindahl", "Storvreta, Sweden", "Replication" }, diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 4523d620821..fc6c88e8732 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -58,6 +58,8 @@ #include <mysql/plugin.h> +#include "debug_sync.h" + static const char *ha_par_ext= ".par"; #ifdef NOT_USED static int free_share(PARTITION_SHARE * share); @@ -690,6 +692,7 @@ int ha_partition::rename_partitions(const char *path) DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path, norm_name_buff))); + DEBUG_SYNC(ha_thd(), "before_rename_partitions"); if (temp_partitions) { /* @@ -2607,6 +2610,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(0); err_handler: + DEBUG_SYNC(ha_thd(), "partition_open_error"); while (file-- != m_file) (*file)->close(); bitmap_free(&m_bulk_insert_started); diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 90b02f5337a..604a3589089 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1750,7 +1750,7 @@ extern pthread_mutex_t LOCK_gdl; #define WFRM_PACK_FRM 4 #define WFRM_KEEP_SHARE 8 bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags); -int abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt); +int abort_and_upgrade_lock_and_close_table(ALTER_PARTITION_PARAM_TYPE *lpt); void close_open_tables_and_downgrade(ALTER_PARTITION_PARAM_TYPE *lpt); void mysql_wait_completed_table(ALTER_PARTITION_PARAM_TYPE *lpt, TABLE *my_table); diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 0e70eb93725..91794e061e4 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -1244,6 +1244,7 @@ void close_thread_tables(THD *thd) table->s->table_name.str, (long) table)); #endif + DEBUG_SYNC(thd, "before_close_thread_tables"); /* We are assuming here that thd->derived_tables contains ONLY derived tables for this substatement. i.e. instead of approach which uses @@ -2502,7 +2503,7 @@ bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists) put in the thread-open-list. flags Bitmap of flags to modify how open works: MYSQL_LOCK_IGNORE_FLUSH - Open table even if - someone has done a flush or namelock on it. + someone has done a flush on it. No version number checking is done. MYSQL_OPEN_TEMPORARY_ONLY - Open only temporary table not the base table or view. @@ -2792,8 +2793,10 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, ("Found table '%s.%s' with different refresh version", table_list->db, table_list->table_name)); - if (flags & MYSQL_LOCK_IGNORE_FLUSH) + /* Ignore FLUSH, but not name locks! */ + if (flags & MYSQL_LOCK_IGNORE_FLUSH && !table->open_placeholder) { + DBUG_ASSERT(table->db_stat); /* Force close at once after usage */ thd->version= table->s->version; continue; @@ -4448,7 +4451,7 @@ thr_lock_type read_lock_type_for_table(THD *thd, TABLE *table) counter - number of opened tables will be return using this parameter flags - bitmap of flags to modify how the tables will be open: MYSQL_LOCK_IGNORE_FLUSH - open table even if someone has - done a flush or namelock on it. + done a flush on it. NOTE Unless we are already in prelocked mode, this function will also precache @@ -5047,7 +5050,7 @@ int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived) tables - list of tables for open flags - bitmap of flags to modify how the tables will be open: MYSQL_LOCK_IGNORE_FLUSH - open table even if someone has - done a flush or namelock on it. + done a flush on it. RETURN FALSE - ok @@ -8708,7 +8711,7 @@ bool is_equal(const LEX_STRING *a, const LEX_STRING *b) /* SYNOPSIS - abort_and_upgrade_lock() + abort_and_upgrade_lock_and_close_table() lpt Parameter passing struct All parameters passed through the ALTER_PARTITION_PARAM_TYPE object RETURN VALUE @@ -8717,7 +8720,7 @@ bool is_equal(const LEX_STRING *a, const LEX_STRING *b) Remember old lock level (for possible downgrade later on), abort all waiting threads and ensure that all keeping locks currently are completed such that we own the lock exclusively and no other interaction - is ongoing. + is ongoing. Close the table and hold the name lock. thd Thread object table Table object @@ -8726,17 +8729,26 @@ bool is_equal(const LEX_STRING *a, const LEX_STRING *b) old_lock_level Old lock level */ -int abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt) +int abort_and_upgrade_lock_and_close_table(ALTER_PARTITION_PARAM_TYPE *lpt) { uint flags= RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG; - DBUG_ENTER("abort_and_upgrade_locks"); + const char *db= lpt->db; + const char *table_name= lpt->table_name; + THD *thd= lpt->thd; + DBUG_ENTER("abort_and_upgrade_lock_and_close_table"); lpt->old_lock_type= lpt->table->reginfo.lock_type; + safe_mutex_assert_not_owner(&LOCK_open); VOID(pthread_mutex_lock(&LOCK_open)); /* If MERGE child, forward lock handling to parent. */ - mysql_lock_abort(lpt->thd, lpt->table->parent ? lpt->table->parent : - lpt->table, TRUE); - VOID(remove_table_from_cache(lpt->thd, lpt->db, lpt->table_name, flags)); + mysql_lock_abort(thd, lpt->table->parent ? lpt->table->parent : lpt->table, + TRUE); + if (remove_table_from_cache(thd, db, table_name, flags)) + { + VOID(pthread_mutex_unlock(&LOCK_open)); + DBUG_RETURN(1); + } + close_data_files_and_morph_locks(thd, db, table_name); VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(0); } diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 395156dcd63..679d23b49ad 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -5955,32 +5955,6 @@ static void alter_partition_lock_handling(ALTER_PARTITION_PARAM_TYPE *lpt) } } -/* - Unlock and close table before renaming and dropping partitions - SYNOPSIS - alter_close_tables() - lpt Struct carrying parameters - RETURN VALUES - 0 -*/ - -static int alter_close_tables(ALTER_PARTITION_PARAM_TYPE *lpt) -{ - THD *thd= lpt->thd; - const char *db= lpt->db; - const char *table_name= lpt->table_name; - DBUG_ENTER("alter_close_tables"); - /* - We need to also unlock tables and close all handlers. - We set lock to zero to ensure we don't do this twice - and we set db_stat to zero to ensure we don't close twice. - */ - pthread_mutex_lock(&LOCK_open); - close_data_files_and_morph_locks(thd, db, table_name); - pthread_mutex_unlock(&LOCK_open); - DBUG_RETURN(0); -} - /* Handle errors for ALTER TABLE for partitioning @@ -6278,9 +6252,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, write_log_drop_partition(lpt) || ERROR_INJECT_CRASH("crash_drop_partition_3") || (not_completed= FALSE) || - abort_and_upgrade_lock(lpt) || /* Always returns 0 */ - ERROR_INJECT_CRASH("crash_drop_partition_4") || - alter_close_tables(lpt) || + abort_and_upgrade_lock_and_close_table(lpt) || ERROR_INJECT_CRASH("crash_drop_partition_5") || ((!thd->lex->no_write_to_binlog) && (write_bin_log(thd, FALSE, @@ -6345,9 +6317,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, ERROR_INJECT_CRASH("crash_add_partition_2") || mysql_change_partitions(lpt) || ERROR_INJECT_CRASH("crash_add_partition_3") || - abort_and_upgrade_lock(lpt) || /* Always returns 0 */ - ERROR_INJECT_CRASH("crash_add_partition_4") || - alter_close_tables(lpt) || + abort_and_upgrade_lock_and_close_table(lpt) || ERROR_INJECT_CRASH("crash_add_partition_5") || ((!thd->lex->no_write_to_binlog) && (write_bin_log(thd, FALSE, @@ -6435,9 +6405,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, write_log_final_change_partition(lpt) || ERROR_INJECT_CRASH("crash_change_partition_4") || (not_completed= FALSE) || - abort_and_upgrade_lock(lpt) || /* Always returns 0 */ - ERROR_INJECT_CRASH("crash_change_partition_5") || - alter_close_tables(lpt) || + abort_and_upgrade_lock_and_close_table(lpt) || ERROR_INJECT_CRASH("crash_change_partition_6") || ((!thd->lex->no_write_to_binlog) && (write_bin_log(thd, FALSE, diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 989606300d8..9062f005738 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -30,6 +30,7 @@ #include "event_data_objects.h" #endif #include <my_dir.h> +#include "debug_sync.h" #define STR_OR_NIL(S) ((S) ? (S) : "<nil>") @@ -3447,6 +3448,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) lex->sql_command= SQLCOM_SHOW_FIELDS; show_table_list->i_s_requested_object= schema_table->i_s_requested_object; + DEBUG_SYNC(thd, "before_open_in_get_all_tables"); res= open_normal_and_derived_tables(thd, show_table_list, MYSQL_LOCK_IGNORE_FLUSH); lex->sql_command= save_sql_command; |