diff options
author | Luis Soares <luis.soares@sun.com> | 2010-05-24 17:43:27 +0100 |
---|---|---|
committer | Luis Soares <luis.soares@sun.com> | 2010-05-24 17:43:27 +0100 |
commit | 92064b8116cf42d430d68a2ae13f203db5fb8882 (patch) | |
tree | 662309c6fb769c80aa4c7d772a701824b6edd56a /sql | |
parent | ca0aa95c6d624d861f623b7719c950c9e43a8425 (diff) | |
download | mariadb-git-92064b8116cf42d430d68a2ae13f203db5fb8882.tar.gz |
BUG#53657: Slave crashed with error 22 when trying to lock mutex
at mf_iocache.c, line 1722
The slave crashed while two threads: IO thread and user thread
raced for the same mutex (the append_buffer_lock protecting the
relay log's IO_CACHE). The IO thread was trying to flush the
cache, and for that was grabbing the append_buffer_lock.
However, the other thread was closing and reopening the relay log
when the IO thread tried to lock. Closing and reopening the log
includes destroying and reinitialising the IO_CACHE
mutex. Therefore, the IO thread tried to lock a destroyed mutex.
We fix this by backporting patch for BUG#50364 which fixed this
bug in mysql server 5.5+. The patch deploys missing
synchronization when flush_master_info is called and the relay
log is flushed by the IO thread. In detail the patch backports
revision (from mysql-trunk):
- luis.soares@sun.com-20100203165617-b1yydr0ee24ycpjm
This patch already includes the post-push fix also in BUG#50364:
- luis.soares@sun.com-20100222002629-0cijwqk6baxhj7gr
Diffstat (limited to 'sql')
-rw-r--r-- | sql/repl_failsafe.cc | 2 | ||||
-rw-r--r-- | sql/rpl_mi.cc | 27 | ||||
-rw-r--r-- | sql/rpl_mi.h | 5 | ||||
-rw-r--r-- | sql/rpl_rli.cc | 2 | ||||
-rw-r--r-- | sql/slave.cc | 4 | ||||
-rw-r--r-- | sql/sql_repl.cc | 2 |
6 files changed, 30 insertions, 12 deletions
diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index c6a05e93bf4..c25d43ea5ba 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -976,7 +976,7 @@ bool load_master_data(THD* thd) host was specified; there could have been a problem when replication started, which led to relay log's IO_CACHE to not be inited. */ - if (flush_master_info(active_mi, 0)) + if (flush_master_info(active_mi, FALSE, FALSE)) sql_print_error("Failed to flush master info file"); } mysql_free_result(master_status_res); diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 5e46837e948..63f1f21c957 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -312,7 +312,7 @@ file '%s')", fname); mi->inited = 1; // now change cache READ -> WRITE - must do this before flush_master_info reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1); - if ((error=test(flush_master_info(mi, 1)))) + if ((error=test(flush_master_info(mi, TRUE, TRUE)))) sql_print_error("Failed to flush master info file"); pthread_mutex_unlock(&mi->data_lock); DBUG_RETURN(error); @@ -338,10 +338,13 @@ err: 1 - flush master info failed 0 - all ok */ -int flush_master_info(Master_info* mi, bool flush_relay_log_cache) +int flush_master_info(Master_info* mi, + bool flush_relay_log_cache, + bool need_lock_relay_log) { IO_CACHE* file = &mi->file; char lbuf[22]; + int err= 0; DBUG_ENTER("flush_master_info"); DBUG_PRINT("enter",("master_pos: %ld", (long) mi->master_log_pos)); @@ -358,9 +361,23 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache) When we come to this place in code, relay log may or not be initialized; the caller is responsible for setting 'flush_relay_log_cache' accordingly. */ - if (flush_relay_log_cache && - flush_io_cache(mi->rli.relay_log.get_log_file())) - DBUG_RETURN(2); + if (flush_relay_log_cache) + { + pthread_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); + IO_CACHE *log_file= mi->rli.relay_log.get_log_file(); + + if (need_lock_relay_log) + pthread_mutex_lock(log_lock); + + safe_mutex_assert_owner(log_lock); + err= flush_io_cache(log_file); + + if (need_lock_relay_log) + pthread_mutex_unlock(log_lock); + + if (err) + DBUG_RETURN(2); + } /* We flushed the relay log BEFORE the master.info file, because if we crash diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index 93fb0a98198..023879f84fa 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -108,7 +108,8 @@ int init_master_info(Master_info* mi, const char* master_info_fname, bool abort_if_no_master_info_file, int thread_mask); void end_master_info(Master_info* mi); -int flush_master_info(Master_info* mi, bool flush_relay_log_cache); - +int flush_master_info(Master_info* mi, + bool flush_relay_log_cache, + bool need_lock_relay_log); #endif /* HAVE_REPLICATION */ #endif /* RPL_MI_H */ diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 1263b7c52d9..316e26f7e40 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -120,7 +120,7 @@ int init_relay_log_info(Relay_log_info* rli, /* The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. Note that the I/O thread flushes it to disk after writing every - event, in flush_master_info(mi, 1). + event, in flush_master_info(mi, 1, ?). */ /* diff --git a/sql/slave.cc b/sql/slave.cc index 2e4642d179e..af53bc65c0e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1480,7 +1480,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi) " to the relay log, SHOW SLAVE STATUS may be" " inaccurate"); rli->relay_log.harvest_bytes_written(&rli->log_space_total); - if (flush_master_info(mi, 1)) + if (flush_master_info(mi, TRUE, TRUE)) sql_print_error("Failed to flush master info file"); delete ev; } @@ -2731,7 +2731,7 @@ Stopping slave I/O thread due to out-of-memory error from master"); "could not queue event from master"); goto err; } - if (flush_master_info(mi, 1)) + if (flush_master_info(mi, TRUE, TRUE)) { sql_print_error("Failed to flush master info file"); goto err; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c220f609c09..75a738a0073 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1282,7 +1282,7 @@ bool change_master(THD* thd, Master_info* mi) Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never a slave before). */ - if (flush_master_info(mi, 0)) + if (flush_master_info(mi, FALSE, FALSE)) { my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file"); unlock_slave_threads(mi); |