diff options
author | unknown <knielsen@knielsen-hq.org> | 2014-01-08 11:00:44 +0100 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2014-01-08 11:00:44 +0100 |
commit | 8cc6e90d74f4377491bcb7a0f1acd41ccf9fbcae (patch) | |
tree | 9d9878767d72068230ba15fc934259524b81c34d /sql/slave.cc | |
parent | bfdbb17990b198ff2a7e5eda5e2a365a1c6d8906 (diff) | |
download | mariadb-git-8cc6e90d74f4377491bcb7a0f1acd41ccf9fbcae.tar.gz |
MDEV-5509: Seconds_behind_master incorrect in parallel replication
The problem was a race between the SQL driver thread and the worker threads.
The SQL driver thread would set rli->last_master_timestamp to zero to
mark that it has caught up with the master, while the worker threads would
set it to the timestamp of the executed event. This can happen out-of-order
in parallel replication, causing the "caught up" status to be overwritten
and Seconds_Behind_Master to wrongly grow when the slave is idle.
To fix, introduce a separate flag rli->sql_thread_caught_up to mark that the
SQL driver thread is caught up. This avoids issues with worker threads
overwriting the SQL driver thread status. In parallel replication, we then
make SHOW SLAVE STATUS check in addition that all worker threads are idle
before showing Seconds_Behind_Master as 0 due to slave idle.
Diffstat (limited to 'sql/slave.cc')
-rw-r--r-- | sql/slave.cc | 39 |
1 files changed, 28 insertions, 11 deletions
diff --git a/sql/slave.cc b/sql/slave.cc index a887ad9d924..07209c30ff3 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2619,8 +2619,24 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) && mi->rli.slave_running) { - long time_diff= ((long)(time(0) - mi->rli.last_master_timestamp) - - mi->clock_diff_with_master); + long time_diff; + bool idle; + time_t stamp= mi->rli.last_master_timestamp; + + if (!stamp) + idle= true; + else + { + idle= mi->rli.sql_thread_caught_up; + if (opt_slave_parallel_threads > 0 && idle && + !mi->rli.parallel.workers_idle()) + idle= false; + } + if (idle) + time_diff= 0; + else + { + time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master); /* Apparently on some systems time_diff can be <0. Here are possible reasons related to MySQL: @@ -2636,13 +2652,15 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, slave is 2. At SHOW SLAVE STATUS time, assume that the difference between timestamp of slave and rli->last_master_timestamp is 0 (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result. - This confuses users, so we don't go below 0: hence the max(). + This confuses users, so we don't go below 0. last_master_timestamp == 0 (an "impossible" timestamp 1970) is a special marker to say "consider we have caught up". */ - protocol->store((longlong)(mi->rli.last_master_timestamp ? - max(0, time_diff) : 0)); + if (time_diff < 0) + time_diff= 0; + } + protocol->store((longlong)time_diff); } else { @@ -6100,6 +6118,7 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) if (hot_log) mysql_mutex_unlock(log_lock); + rli->sql_thread_caught_up= false; DBUG_RETURN(ev); } if (opt_reckless_slave) // For mysql-test @@ -6137,12 +6156,10 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) Seconds_Behind_Master would be zero only when master has no more updates in binlog for slave. The heartbeat can be sent in a (small) fraction of slave_net_timeout. Until it's done - rli->last_master_timestamp is temporarely (for time of - waiting for the following event) reset whenever EOF is - reached. + rli->sql_thread_caught_up is temporarely (for time of waiting for + the following event) set whenever EOF is reached. */ - time_t save_timestamp= rli->last_master_timestamp; - rli->last_master_timestamp= 0; + rli->sql_thread_caught_up= true; DBUG_ASSERT(rli->relay_log.get_open_count() == rli->cur_log_old_open_count); @@ -6268,7 +6285,7 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd); // re-acquire data lock since we released it earlier mysql_mutex_lock(&rli->data_lock); - rli->last_master_timestamp= save_timestamp; + rli->sql_thread_caught_up= false; continue; } /* |