summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <aelkin/elkin@koti.dsl.inet.fi>2007-10-04 18:46:31 +0300
committerunknown <aelkin/elkin@koti.dsl.inet.fi>2007-10-04 18:46:31 +0300
commitbf8a85aa56fe755a1712a08b45667734765d45c8 (patch)
tree7bea0c44c6bb17183e0c153e64747c11b97e9902
parent69604f4db10f125651ab0620320dbd91253c5bc6 (diff)
downloadmariadb-git-bf8a85aa56fe755a1712a08b45667734765d45c8.tar.gz
Bug #29309 Incorrect "Seconds_Behind_Master" value in SHOW SLAVE STATUS after FLUSH LOGS
Report claims that Seconds_behind_master behaves unexpectedly. Code analysis shows that there is an evident flaw in that treating of FormatDescription event is wrong so that after FLUSH LOGS on slave the Seconds_behind_master's calculation slips and incorrect value can be reported to SHOW SLAVE STATUS. Even worse is that the gap between the correct and incorrect deltas grows with time. Fixed with prohibiting changes to rpl->last_master_timestamp by artifical events (any kind of). suggestion as comments is added how to fight with lack of info on the slave side by means of new heartbeat feature coming. The test can not be done ealily fully determistic. sql/log_event.cc: changing timestamp is affirmed only by non-artificial events. Artifical FD won't change it anymore. The simulation code is off unless server is started with the args from the opt-file. The simulation code assumes that it will execute specific schedule generated by rpl_replication_delay.test. sql/slave.cc: Comments are changed to announce a new possibility to cope with Seconds_behind_master jumping due to EOF special treatment (reset of the timestamp) mysql-test/suite/manual/r/rpl_replication_delay.result: result are not deterministic though there are comments saying the most probable expected value for Seconds_behind_master mysql-test/suite/manual/t/rpl_replication_delay-slave.opt: bug emulation mysql-test/suite/manual/t/rpl_replication_delay.test: specic test for bug#29309. It's hard to make it reliable as it deals with timestamps. (a way to automate the test like this is to have I_S table for show slave status' fields) A possible way to check results is to run grep -i 'show\|seconds' < suite/manual/r/rpl_replication_delay.reject and to get the lines like these: show slave status /* Second_behind reports 0 */;; Seconds_Behind_Master 0 show slave status /* bug emulated: reports slave threads starting time about 3*3 not 3 secs */;; Seconds_Behind_Master 9 show slave status /* reports the correct diff with master query time about 3+3 secs */;; Seconds_Behind_Master 6 Due to time discreteness of time the reported numbers may slightly vary. That's why the test is not reliable.
-rw-r--r--mysql-test/suite/manual/r/rpl_replication_delay.result121
-rw-r--r--mysql-test/suite/manual/t/rpl_replication_delay-slave.opt1
-rw-r--r--mysql-test/suite/manual/t/rpl_replication_delay.test71
-rw-r--r--sql/log_event.cc32
-rw-r--r--sql/slave.cc11
5 files changed, 234 insertions, 2 deletions
diff --git a/mysql-test/suite/manual/r/rpl_replication_delay.result b/mysql-test/suite/manual/r/rpl_replication_delay.result
new file mode 100644
index 00000000000..22447a30cba
--- /dev/null
+++ b/mysql-test/suite/manual/r/rpl_replication_delay.result
@@ -0,0 +1,121 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+show slave status /* Second_behind reports 0 */;;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port 9306
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 98
+Relay_Log_File #
+Relay_Log_Pos #
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running Yes
+Slave_SQL_Running Yes
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 0
+Last_Error
+Skip_Counter 0
+Exec_Master_Log_Pos 98
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master 0
+drop table if exists t1;
+Warnings:
+Note 1051 Unknown table 't1'
+create table t1 (f1 int);
+flush logs /* contaminate rli->last_master_timestamp */;
+lock table t1 write;
+insert into t1 values (1);
+show slave status /* bug emulated: reports slave threads starting time about 3*3 not 3 secs */;;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port 9306
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 359
+Relay_Log_File #
+Relay_Log_Pos #
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running Yes
+Slave_SQL_Running Yes
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 0
+Last_Error
+Skip_Counter 0
+Exec_Master_Log_Pos 271
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master 10
+unlock tables;
+flush logs /* this time rli->last_master_timestamp is not affected */;
+lock table t1 write;
+insert into t1 values (2);
+show slave status /* reports the correct diff with master query time about 3+3 secs */;;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port 9306
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 447
+Relay_Log_File #
+Relay_Log_Pos #
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running Yes
+Slave_SQL_Running Yes
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 0
+Last_Error
+Skip_Counter 0
+Exec_Master_Log_Pos 359
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master 6
+unlock tables;
+drop table t1;
diff --git a/mysql-test/suite/manual/t/rpl_replication_delay-slave.opt b/mysql-test/suite/manual/t/rpl_replication_delay-slave.opt
new file mode 100644
index 00000000000..24a4c5952fe
--- /dev/null
+++ b/mysql-test/suite/manual/t/rpl_replication_delay-slave.opt
@@ -0,0 +1 @@
+--loose-debug=d,let_first_flush_log_change_timestamp
diff --git a/mysql-test/suite/manual/t/rpl_replication_delay.test b/mysql-test/suite/manual/t/rpl_replication_delay.test
new file mode 100644
index 00000000000..8230698c8f9
--- /dev/null
+++ b/mysql-test/suite/manual/t/rpl_replication_delay.test
@@ -0,0 +1,71 @@
+#
+# Testing replication delay reporting (bug#29309)
+# there is an unavoidable non-determinism in the test
+# please compare the results with the comments
+#
+
+
+source include/master-slave.inc;
+
+connection master;
+#connection slave;
+sync_slave_with_master;
+--replace_result $DEFAULT_MASTER_PORT DEFAULT_MASTER_PORT
+--replace_column 1 # 8 # 9 # 23 #
+--query_vertical show slave status /* Second_behind reports 0 */;
+sleep 3;
+
+### bug emulation
+
+connection master;
+drop table if exists t1;
+create table t1 (f1 int);
+sleep 3;
+
+#connection slave;
+sync_slave_with_master;
+flush logs /* contaminate rli->last_master_timestamp */;
+
+connection slave;
+lock table t1 write;
+
+connection master;
+insert into t1 values (1);
+
+sleep 3;
+
+connection slave;
+--replace_result $DEFAULT_MASTER_PORT DEFAULT_MASTER_PORT
+--replace_column 1 # 8 # 9 # 23 #
+--query_vertical show slave status /* bug emulated: reports slave threads starting time about 3*3 not 3 secs */;
+unlock tables;
+
+connection master;
+sync_slave_with_master;
+
+### bugfix
+
+
+connection slave;
+flush logs /* this time rli->last_master_timestamp is not affected */;
+lock table t1 write;
+
+connection master;
+insert into t1 values (2);
+sleep 3;
+
+connection slave;
+--replace_result $DEFAULT_MASTER_PORT DEFAULT_MASTER_PORT
+--replace_column 1 # 8 # 9 # 23 #
+--query_vertical show slave status /* reports the correct diff with master query time about 3+3 secs */;
+unlock tables;
+
+connection master;
+drop table t1;
+
+#connection slave;
+sync_slave_with_master;
+
+
+# End of tests
+
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 1ef765f607f..3899e772bf8 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -27,6 +27,10 @@
#define log_cs &my_charset_latin1
+#ifndef DBUG_OFF
+uint debug_not_change_ts_if_art_event= 1; // bug#29309 simulation
+#endif
+
/*
pretty_print_str()
*/
@@ -481,6 +485,18 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
rli->inc_event_relay_log_pos();
else
{
+ /*
+ bug#29309 simulation: resetting the flag to force
+ wrong behaviour of artificial event to update
+ rli->last_master_timestamp for only one time -
+ the first FLUSH LOGS in the test.
+ */
+ DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp",
+ if (debug_not_change_ts_if_art_event == 1
+ && is_artificial_event())
+ {
+ debug_not_change_ts_if_art_event= 0;
+ });
rli->inc_group_relay_log_pos(log_pos);
flush_relay_log_info(rli);
/*
@@ -491,7 +507,21 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
rare cases, only consequence is that value may take some time to
display in Seconds_Behind_Master - not critical).
*/
- rli->last_master_timestamp= when;
+#ifndef DBUG_OFF
+ if (!(is_artificial_event() && debug_not_change_ts_if_art_event > 0))
+#else
+ if (!is_artificial_event())
+#endif
+ rli->last_master_timestamp= when;
+ /*
+ The flag is set back to be positive so that
+ any further FLUSH LOGS will be handled as prescribed.
+ */
+ DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp",
+ if (debug_not_change_ts_if_art_event == 0)
+ {
+ debug_not_change_ts_if_art_event= 2;
+ });
}
}
DBUG_RETURN(0);
diff --git a/sql/slave.cc b/sql/slave.cc
index 57f6e64ce03..0bc4bf7b32f 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -4931,7 +4931,16 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
a new event and is queuing it; the false "0" will exist until SQL
finishes executing the new event; it will be look abnormal only if
the events have old timestamps (then you get "many", 0, "many").
- Transient phases like this can't really be fixed.
+
+ Transient phases like this can be fixed with implemeting
+ Heartbeat event which provides the slave the status of the
+ master at time the master does not have any new update to send.
+ Seconds_Behind_Master would be zero only when master has no
+ more updates in binlog for slave. The heartbeat can be sent
+ in a (small) fraction of slave_net_timeout. Until it's done
+ rli->last_master_timestamp is temporarely (for time of
+ waiting for the following event) reset whenever EOF is
+ reached.
*/
time_t save_timestamp= rli->last_master_timestamp;
rli->last_master_timestamp= 0;