diff options
author | unknown <guilhem@gbichot2> | 2003-11-23 17:02:59 +0100 |
---|---|---|
committer | unknown <guilhem@gbichot2> | 2003-11-23 17:02:59 +0100 |
commit | bd6a70019e906cd49b7f81e6ce7242a0a43b8a37 (patch) | |
tree | 83cd5e156dfb886c15c3d3d13b272fe43f7e430b | |
parent | d0d8ba7815fc75164d79a9758a01e7efbe1b8022 (diff) | |
download | mariadb-git-bd6a70019e906cd49b7f81e6ce7242a0a43b8a37.tar.gz |
Replication:
Now the I/O thread (in flush_master_info()) flushes the relay log to disk
after reading every event. Slower but provides additionnal safety in case
of brutal crash.
I had to make the flush optional (i.e. add a if(some_bool_argument) in the function)
because sometimes flush_master_info() is called when there is no usable
relay log (the relay log's IO_CACHE is not initialized so can't be flushed).
mysql-test/r/rpl_loaddata_rule_m.result:
avoid a harmless error in the .err file; we don't need a slave in this test
(even though it's called 'rpl' because it's testing binlog-ignore-db).
mysql-test/t/rpl_loaddata_rule_m.test:
result update
sql/repl_failsafe.cc:
update call to flush_master_info() according to new prototype.
sql/slave.cc:
- Now the I/O thread (in flush_master_info()) flushes the relay log to disk
after reading every event. Slower but provides additionnal safety in case
of brutal crash.
I had to make the flush optional (i.e. add a if(some_bool_argument) in the function)
because sometimes flush_master_info() is called when there is no usable
relay log (the relay log's IO_CACHE is not initialized so can't be flushed).
- Update version in message.
- Remove warning about bug as it's not true anymore (since this changeset).
sql/slave.h:
new prototype
sql/sql_repl.cc:
update call to flush_master_info() according to new prototype.
-rw-r--r-- | mysql-test/r/rpl_loaddata_rule_m.result | 2 | ||||
-rw-r--r-- | mysql-test/t/rpl_loaddata_rule_m.test | 2 | ||||
-rw-r--r-- | sql/repl_failsafe.cc | 7 | ||||
-rw-r--r-- | sql/slave.cc | 51 | ||||
-rw-r--r-- | sql/slave.h | 2 | ||||
-rw-r--r-- | sql/sql_repl.cc | 7 |
6 files changed, 35 insertions, 36 deletions
diff --git a/mysql-test/r/rpl_loaddata_rule_m.result b/mysql-test/r/rpl_loaddata_rule_m.result index ed0c96bbfe1..a34453b0a2b 100644 --- a/mysql-test/r/rpl_loaddata_rule_m.result +++ b/mysql-test/r/rpl_loaddata_rule_m.result @@ -5,7 +5,7 @@ reset slave; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; start slave; drop database if exists mysqltest; -reset master; +stop slave; create database mysqltest; create table t1(a int, b int, unique(b)); use mysqltest; diff --git a/mysql-test/t/rpl_loaddata_rule_m.test b/mysql-test/t/rpl_loaddata_rule_m.test index ec3a9259e32..678dae13889 100644 --- a/mysql-test/t/rpl_loaddata_rule_m.test +++ b/mysql-test/t/rpl_loaddata_rule_m.test @@ -9,7 +9,7 @@ drop database if exists mysqltest; --enable_warnings connection slave; -reset master; +stop slave; # don't need slave for this test # Test logging on master diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index 0d2da91e015..084f7386b7b 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -908,7 +908,12 @@ int load_master_data(THD* thd) // don't hit the magic number if (active_mi->master_log_pos < BIN_LOG_HEADER_SIZE) active_mi->master_log_pos = BIN_LOG_HEADER_SIZE; - flush_master_info(active_mi); + /* + Relay log's IO_CACHE may not be inited (even if we are sure that some + host was specified; there could have been a problem when replication + started, which led to relay log's IO_CACHE to not be inited. + */ + flush_master_info(active_mi, 0); } mysql_free_result(master_status_res); } diff --git a/sql/slave.cc b/sql/slave.cc index ae524db5511..5fab217762c 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1107,7 +1107,7 @@ static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi) break; default: /* 5.0 is not supported */ - errmsg = "Master reported an unrecognized MySQL version. Note that 4.0 \ + errmsg = "Master reported an unrecognized MySQL version. Note that 4.1 \ slaves can't replicate a 5.0 or newer master."; break; } @@ -1368,32 +1368,9 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) } /* - The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. It is - notable that the last kilobytes of it (8 kB for example) may live in - memory, not on disk (depending on what the thread using it does). While - this is efficient, it has a side-effect one must know: - The size of the relay log on disk (displayed by 'ls -l' on Unix) can be a - few kilobytes less than one would expect by doing SHOW SLAVE STATUS; this - happens when only the IO thread is started (not the SQL thread). The - "missing" kilobytes are in memory, are preserved during 'STOP SLAVE; START - SLAVE IO_THREAD', and are flushed to disk when the slave's mysqld stops. So - this does not cause any bug. Example of how disk size grows by leaps: - - Read_Master_Log_Pos: 7811 -rw-rw---- 1 guilhem qq 4 Jun 5 16:19 gbichot2-relay-bin.002 - ...later... - Read_Master_Log_Pos: 9744 -rw-rw---- 1 guilhem qq 8192 Jun 5 16:27 gbichot2-relay-bin.002 - - See how 4 is less than 7811 and 8192 is less than 9744. - - WARNING: this is risky because the slave can stay like this for a long - time; then if it has a power failure, master.info says the I/O thread has - read until 9744 while the relay-log contains only until 8192 (the - in-memory part from 8192 to 9744 has been lost), so the SQL slave thread - will miss some events, silently breaking replication. - Ideally we would like to flush master.info only when we know that the relay - log has no in-memory tail. - Note that the above problem may arise only when only the IO thread is - started, which is unlikely. + The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. + Note that the I/O thread flushes it to disk after writing every event, in + flush_master_info(mi, 1). */ /* @@ -1850,7 +1827,7 @@ file '%s')", fname); mi->inited = 1; // now change cache READ -> WRITE - must do this before flush_master_info reinit_io_cache(&mi->file, WRITE_CACHE,0L,0,1); - if ((error=test(flush_master_info(mi)))) + if ((error=test(flush_master_info(mi, 1)))) sql_print_error("Failed to flush master info file"); pthread_mutex_unlock(&mi->data_lock); DBUG_RETURN(error); @@ -2100,7 +2077,7 @@ int show_master_info(THD* thd, MASTER_INFO* mi) } -bool flush_master_info(MASTER_INFO* mi) +bool flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache) { IO_CACHE* file = &mi->file; char lbuf[22]; @@ -2124,6 +2101,20 @@ bool flush_master_info(MASTER_INFO* mi) (int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert, mi->ssl_cipher, mi->ssl_key); flush_io_cache(file); + /* + Flush the relay log to disk. If we don't do it, then the relay log while + have some part (its last kilobytes) in memory only, so if the slave server + dies now, with, say, from master's position 100 to 150 in memory only (not + on disk), and with position 150 in master.info, then when the slave + restarts, the I/O thread will fetch binlogs from 150, so in the relay log + we will have "[0, 100] U [150, infinity[" and nobody will notice it, so the + SQL thread will jump from 100 to 150, and replication will silently break. + + When we come to this place in code, relay log may or not be initialized; + the caller is responsible for setting 'flush_relay_log_cache' accordingly. + */ + if (flush_relay_log_cache) + flush_io_cache(mi->rli.relay_log.get_log_file()); DBUG_RETURN(0); } @@ -2982,7 +2973,7 @@ reconnect done to recover from failed read"); sql_print_error("Slave I/O thread could not queue event from master"); goto err; } - flush_master_info(mi); + flush_master_info(mi, 1); /* sure that we can flush the relay log */ /* See if the relay logs take too much space. We don't lock mi->rli.log_space_lock here; this dirty read saves time diff --git a/sql/slave.h b/sql/slave.h index f8093826f58..e42b93a47ef 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -461,7 +461,7 @@ typedef struct st_table_rule_ent int init_slave(); void init_slave_skip_errors(const char* arg); -bool flush_master_info(MASTER_INFO* mi); +bool flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache); bool flush_relay_log_info(RELAY_LOG_INFO* rli); int register_slave_on_master(MYSQL* mysql); int terminate_slave_threads(MASTER_INFO* mi, int thread_mask, diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c30409fd0cb..3c477afa300 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1085,8 +1085,11 @@ int change_master(THD* thd, MASTER_INFO* mi) strmake(mi->master_log_name, mi->rli.group_master_log_name, sizeof(mi->master_log_name)-1); } - - flush_master_info(mi); + /* + Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never + a slave before). + */ + flush_master_info(mi, 0); if (need_relay_log_purge) { relay_log_purge= 1; |