diff options
author | unknown <guilhem@mysql.com> | 2003-10-03 22:13:01 +0200 |
---|---|---|
committer | unknown <guilhem@mysql.com> | 2003-10-03 22:13:01 +0200 |
commit | 1bd7662b896f31786b5de5dd2fcd309d148d32cf (patch) | |
tree | 09360947cec8862c4740eeb29ec0f1325a6084de | |
parent | 7a58bfee6195cdb2804a75ccd90af81c87f696e4 (diff) | |
download | mariadb-git-1bd7662b896f31786b5de5dd2fcd309d148d32cf.tar.gz |
When the I/O thread was stopped while copying a long transaction, and restarted,
Rotate_log_event::exec_event() believed that the relay log was corrupted. Fixed it
by moving the test for corruption to Start_log_event::exec_event().
Changed Rotate_log_event::exec_event() to not increment positions when the
event is seen in the middle of a transaction.
I did a separate commit in 4.1 (so this should not be merged to 4.0) because
code is a bit different in 4.1.
A test to see if the slave detects when the master died while writing a
transaction to the binlog (uses a forged truncated binlog I made).
sql/log_event.cc:
When the I/O thread was stopped while copying a long transaction, and restarted,
Rotate_log_event::exec_event() believed that the relay log was corrupted. Fixed it
by moving the test for corruption to Start_log_event::exec_event().
Changed Rotate_log_event::exec_event() to not increment positions when the
event is seen in the middle of a transaction.
-rw-r--r-- | mysql-test/r/rpl_trunc_binlog.result | 14 | ||||
-rw-r--r-- | mysql-test/std_data/trunc_binlog.001 | bin | 0 -> 119 bytes | |||
-rw-r--r-- | mysql-test/t/rpl_trunc_binlog.test | 22 | ||||
-rw-r--r-- | sql/log_event.cc | 66 |
4 files changed, 69 insertions, 33 deletions
diff --git a/mysql-test/r/rpl_trunc_binlog.result b/mysql-test/r/rpl_trunc_binlog.result new file mode 100644 index 00000000000..6d2158eedfe --- /dev/null +++ b/mysql-test/r/rpl_trunc_binlog.result @@ -0,0 +1,14 @@ +slave stop; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +slave start; +stop slave; +flush logs; +reset slave; +start slave; +show slave status; +Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_do_db Replicate_ignore_db Last_errno Last_error Skip_counter Exec_master_log_pos Relay_log_space +127.0.0.1 root MASTER_PORT 1 master-bin.002 4 slave-relay-bin.002 161 master-bin.001 Yes No 0 there is an unfinished transaction in the relay log (could find neither COMMIT nor ROLLBACK in the relay log); it could be that the master died while writing the transaction to its binary log. Now the slave is rolling back the transaction. 0 79 317 +reset master; diff --git a/mysql-test/std_data/trunc_binlog.001 b/mysql-test/std_data/trunc_binlog.001 Binary files differnew file mode 100644 index 00000000000..2c2b4ec6ce4 --- /dev/null +++ b/mysql-test/std_data/trunc_binlog.001 diff --git a/mysql-test/t/rpl_trunc_binlog.test b/mysql-test/t/rpl_trunc_binlog.test new file mode 100644 index 00000000000..efdc3012471 --- /dev/null +++ b/mysql-test/t/rpl_trunc_binlog.test @@ -0,0 +1,22 @@ +# We are testing if a binlog which contains BEGIN but not COMMIT (the master did +# while writing the transaction to the binlog) triggers an error on slave. +# So we use such a truncated binlog and simulate that the master restarted after +# this. + +source include/master-slave.inc; + +connection slave; +stop slave; +connection master; +flush logs; +system mv -f var/log/master-bin.001 var/log/master-bin.002; +system cp std_data/trunc_binlog.001 var/log/master-bin.001; +connection slave; +reset slave; +start slave; +# can't sync_with_master so we must sleep +sleep 3; +--replace_result $MASTER_MYPORT MASTER_PORT +show slave status; +connection master; +reset master; diff --git a/sql/log_event.cc b/sql/log_event.cc index 292e371e4dd..18d1624dfde 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -2091,6 +2091,23 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli) */ close_temporary_tables(thd); cleanup_load_tmpdir(); + /* + As a transaction NEVER spans on 2 or more binlogs: + if we have an active transaction at this point, the master died while + writing the transaction to the binary log, i.e. while flushing the binlog + cache to the binlog. As the write was started, the transaction had been + committed on the master, so we lack of information to replay this + transaction on the slave; all we can do is stop with error. + */ + if (rli->inside_transaction) + { + slave_print_error(rli, 0, + "there is an unfinished transaction in the relay log \ +(could find neither COMMIT nor ROLLBACK in the relay log); it could be that \ +the master died while writing the transaction to its binary log. Now the slave \ +is rolling back the transaction."); + return(1); + } break; /* Now the older formats; in that case load_tmpdir is cleaned up by the I/O @@ -2166,51 +2183,34 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli) We can't rotate the slave as this will cause infinitive rotations in a A -> B -> A setup. - NOTES - As a transaction NEVER spans on 2 or more binlogs: - if we have an active transaction at this point, the master died while - writing the transaction to the binary log, i.e. while flushing the binlog - cache to the binlog. As the write was started, the transaction had been - committed on the master, so we lack of information to replay this - transaction on the slave; all we can do is stop with error. - If we didn't detect it, then positions would start to become garbage (as we - are incrementing rli->relay_log_pos whereas we are in a transaction: the new - rli->relay_log_pos will be - relay_log_pos of the BEGIN + size of the Rotate event = garbage. - - Since MySQL 4.0.14, the master ALWAYS sends a Rotate event when it starts - sending the next binlog, so we are sure to receive a Rotate event just - after the end of the "dead master"'s binlog; so this exec_event() is the - right place to catch the problem. If we would wait until - Start_log_event::exec_event() it would be too late, rli->relay_log_pos would - already be garbage. - RETURN VALUES 0 ok */ int Rotate_log_event::exec_event(struct st_relay_log_info* rli) { - char* log_name = rli->master_log_name; DBUG_ENTER("Rotate_log_event::exec_event"); pthread_mutex_lock(&rli->data_lock); - - if (rli->inside_transaction) + /* + If we are in a transaction: the only normal case is when the I/O thread was + copying a big transaction, then it was stopped and restarted: we have this + in the relay log: + BEGIN + ... + ROTATE (a fake one) + ... + COMMIT or ROLLBACK + In that case, we don't want to touch the coordinates which correspond to the + beginning of the transaction. + */ + if (!rli->inside_transaction) { - slave_print_error(rli, 0, - "there is an unfinished transaction in the relay log \ -(could find neither COMMIT nor ROLLBACK in the relay log); it could be that \ -the master died while writing the transaction to its binary log. Now the slave \ -is rolling back the transaction."); - pthread_mutex_unlock(&rli->data_lock); - DBUG_RETURN(1); + memcpy(rli->master_log_name, new_log_ident, ident_len+1); + rli->master_log_pos= pos; + DBUG_PRINT("info", ("master_log_pos: %d", (ulong) rli->master_log_pos)); } - - memcpy(log_name, new_log_ident, ident_len+1); - rli->master_log_pos = pos; rli->relay_log_pos += get_event_len(); - DBUG_PRINT("info", ("master_log_pos: %d", (ulong) rli->master_log_pos)); pthread_mutex_unlock(&rli->data_lock); pthread_cond_broadcast(&rli->data_cond); flush_relay_log_info(rli); |