diff options
author | unknown <guilhem@mysql.com> | 2004-12-16 18:14:51 +0100 |
---|---|---|
committer | unknown <guilhem@mysql.com> | 2004-12-16 18:14:51 +0100 |
commit | 93f82e8ebf7855ce9b4ecb903f5a49e5fe079090 (patch) | |
tree | 90b3e7f1034bbf847439484ca5e823d4bec73851 /sql | |
parent | ae6f147a4f65b0392e61c6d447523fa9b6db186f (diff) | |
parent | 4869f022fd4761445843135102790691e8d48861 (diff) | |
download | mariadb-git-93f82e8ebf7855ce9b4ecb903f5a49e5fe079090.tar.gz |
Merge gbichot@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/home/mysql_src/mysql-4.1-clean
Diffstat (limited to 'sql')
-rw-r--r-- | sql/slave.cc | 61 | ||||
-rw-r--r-- | sql/slave.h | 27 | ||||
-rw-r--r-- | sql/sql_repl.cc | 8 |
3 files changed, 74 insertions, 22 deletions
diff --git a/sql/slave.cc b/sql/slave.cc index bd9650ed369..90c75243f3d 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -545,7 +545,7 @@ int terminate_slave_threads(MASTER_INFO* mi,int thread_mask,bool skip_lock) int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock, pthread_mutex_t *cond_lock, pthread_cond_t* term_cond, - volatile bool* slave_running) + volatile uint *slave_running) { if (term_lock) { @@ -583,7 +583,7 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock, int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock, pthread_mutex_t *cond_lock, pthread_cond_t *start_cond, - volatile bool *slave_running, + volatile uint *slave_running, volatile ulong *slave_run_id, MASTER_INFO* mi, bool high_priority) @@ -963,7 +963,7 @@ void end_slave() static bool io_slave_killed(THD* thd, MASTER_INFO* mi) { DBUG_ASSERT(mi->io_thd == thd); - DBUG_ASSERT(mi->slave_running == 1); // tracking buffer overrun + DBUG_ASSERT(mi->slave_running); // tracking buffer overrun return mi->abort_slave || abort_loop || thd->killed; } @@ -1767,19 +1767,13 @@ void init_master_info_with_options(MASTER_INFO* mi) strmake(mi->ssl_key, master_ssl_key, sizeof(mi->ssl_key)-1); } -static void clear_slave_error(RELAY_LOG_INFO* rli) +void clear_slave_error(RELAY_LOG_INFO* rli) { /* Clear the errors displayed by SHOW SLAVE STATUS */ rli->last_slave_error[0]= 0; rli->last_slave_errno= 0; } -void clear_slave_error_timestamp(RELAY_LOG_INFO* rli) -{ - rli->last_master_timestamp= 0; - clear_slave_error(rli); -} - /* Reset UNTIL condition for RELAY_LOG_INFO SYNOPSYS @@ -2166,6 +2160,11 @@ int show_master_info(THD* thd, MASTER_INFO* mi) String *packet= &thd->packet; protocol->prepare_for_resend(); + /* + TODO: we read slave_running without run_lock, whereas these variables + are updated under run_lock and not data_lock. In 5.0 we should lock + run_lock on top of data_lock (with good order). + */ pthread_mutex_lock(&mi->data_lock); pthread_mutex_lock(&mi->rli.data_lock); @@ -2226,7 +2225,12 @@ int show_master_info(THD* thd, MASTER_INFO* mi) protocol->store(mi->ssl_cipher, &my_charset_bin); protocol->store(mi->ssl_key, &my_charset_bin); - if (mi->rli.last_master_timestamp) + /* + Seconds_Behind_Master: if SQL thread is running and I/O thread is + connected, we can compute it otherwise show NULL (i.e. unknown). + */ + if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) && + mi->rli.slave_running) { long tmp= (long)((time_t)time((time_t*) 0) - mi->rli.last_master_timestamp) @@ -2246,9 +2250,13 @@ int show_master_info(THD* thd, MASTER_INFO* mi) slave is 2. At SHOW SLAVE STATUS time, assume that the difference between timestamp of slave and rli->last_master_timestamp is 0 (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result. - This confuses users, so we don't go below 0. + This confuses users, so we don't go below 0: hence the max(). + + last_master_timestamp == 0 (an "impossible" timestamp 1970) is a + special marker to say "consider we have caught up". */ - protocol->store((longlong)(max(0, tmp))); + protocol->store((longlong)(mi->rli.last_master_timestamp ? max(0, tmp) + : 0)); } else protocol->store_null(); @@ -3041,6 +3049,8 @@ slave_begin: connected: + // TODO: the assignment below should be under mutex (5.0) + mi->slave_running= MYSQL_SLAVE_RUN_CONNECT; thd->slave_net = &mysql->net; thd->proc_info = "Checking master version"; if (get_master_version_and_clock(mysql, mi)) @@ -3072,6 +3082,7 @@ dump"); goto err; } + mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; thd->proc_info= "Waiting to reconnect after a failed binlog dump request"; #ifdef SIGNAL_WITH_VIO_CLOSE thd->clear_active_vio(); @@ -3148,6 +3159,7 @@ max_allowed_packet", mysql_error(mysql)); goto err; } + mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; thd->proc_info = "Waiting to reconnect after a failed master event read"; #ifdef SIGNAL_WITH_VIO_CLOSE thd->clear_active_vio(); @@ -3323,6 +3335,14 @@ slave_begin: pthread_mutex_lock(&LOCK_thread_count); threads.append(thd); pthread_mutex_unlock(&LOCK_thread_count); + /* + We are going to set slave_running to 1. Assuming slave I/O thread is + alive and connected, this is going to make Seconds_Behind_Master be 0 + i.e. "caught up". Even if we're just at start of thread. Well it's ok, at + the moment we start we can think we are caught up, and the next second we + start receiving data so we realize we are not caught up and + Seconds_Behind_Master grows. No big deal. + */ rli->slave_running = 1; rli->abort_slave = 0; pthread_mutex_unlock(&rli->run_lock); @@ -4211,10 +4231,25 @@ Before assert, my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s", */ pthread_mutex_unlock(&rli->log_space_lock); pthread_cond_broadcast(&rli->log_space_cond); + /* + We say in Seconds_Behind_Master that we have "caught up". Note that + for example if network link is broken but I/O slave thread hasn't + noticed it (slave_net_timeout not elapsed), then we'll say "caught + up" whereas we're not really caught up. Fixing that would require + internally cutting timeout in smaller pieces in network read, no + thanks. Another example: SQL has caught up on I/O, now I/O has read + a new event and is queuing it; the false "0" will exist until SQL + finishes executing the new event; it will be look abnormal only if + the events have old timestamps (then you get "many", 0, "many"). + Transient phases like this can't really be fixed. + */ + time_t save_timestamp= rli->last_master_timestamp; + rli->last_master_timestamp= 0; // Note that wait_for_update unlocks lock_log ! rli->relay_log.wait_for_update(rli->sql_thd, 1); // re-acquire data lock since we released it earlier pthread_mutex_lock(&rli->data_lock); + rli->last_master_timestamp= save_timestamp; continue; } /* diff --git a/sql/slave.h b/sql/slave.h index 08cf0806717..bcd79dd4a39 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -98,6 +98,21 @@ enum enum_binlog_formats { BINLOG_FORMAT_323_LESS_57, BINLOG_FORMAT_323_GEQ_57 }; +/* + 3 possible values for MASTER_INFO::slave_running and + RELAY_LOG_INFO::slave_running. + The values 0,1,2 are very important: to keep the diff small, I didn't + substitute places where we use 0/1 with the newly defined symbols. So don't change + these values. + The same way, code is assuming that in RELAY_LOG_INFO we use only values + 0/1. + I started with using an enum, but + enum_variable=1; is not legal so would have required many line changes. +*/ +#define MYSQL_SLAVE_NOT_RUN 0 +#define MYSQL_SLAVE_RUN_NOT_CONNECT 1 +#define MYSQL_SLAVE_RUN_CONNECT 2 + /**************************************************************************** Replication SQL Thread @@ -251,7 +266,8 @@ typedef struct st_relay_log_info /* if not set, the value of other members of the structure are undefined */ bool inited; - volatile bool abort_slave, slave_running; + volatile bool abort_slave; + volatile uint slave_running; /* Condition and its parameters from START SLAVE UNTIL clause. @@ -385,7 +401,8 @@ typedef struct st_master_info #endif bool inited; enum enum_binlog_formats old_format; - volatile bool abort_slave, slave_running; + volatile bool abort_slave; + volatile uint slave_running; volatile ulong slave_run_id; /* The difference in seconds between the clock of the master and the clock of @@ -464,7 +481,7 @@ int terminate_slave_threads(MASTER_INFO* mi, int thread_mask, int terminate_slave_thread(THD* thd, pthread_mutex_t* term_mutex, pthread_mutex_t* cond_lock, pthread_cond_t* term_cond, - volatile bool* slave_running); + volatile uint* slave_running); int start_slave_threads(bool need_slave_mutex, bool wait_for_start, MASTER_INFO* mi, const char* master_info_fname, const char* slave_info_fname, int thread_mask); @@ -477,7 +494,7 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start, int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock, pthread_mutex_t *cond_lock, pthread_cond_t* start_cond, - volatile bool *slave_running, + volatile uint *slave_running, volatile ulong *slave_run_id, MASTER_INFO* mi, bool high_priority); @@ -519,7 +536,7 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...); void end_slave(); /* clean up */ void init_master_info_with_options(MASTER_INFO* mi); void clear_until_condition(RELAY_LOG_INFO* rli); -void clear_slave_error_timestamp(RELAY_LOG_INFO* rli); +void clear_slave_error(RELAY_LOG_INFO* rli); int init_master_info(MASTER_INFO* mi, const char* master_info_fname, const char* slave_info_fname, bool abort_if_no_master_info_file, diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 6854cb24ee9..2f0d8d3aa0d 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -880,10 +880,10 @@ int reset_slave(THD *thd, MASTER_INFO* mi) */ init_master_info_with_options(mi); /* - Reset errors, and master timestamp (the idea is that we forget about the + Reset errors (the idea is that we forget about the old master). */ - clear_slave_error_timestamp(&mi->rli); + clear_slave_error(&mi->rli); clear_until_condition(&mi->rli); // close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0 @@ -1143,8 +1143,8 @@ int change_master(THD* thd, MASTER_INFO* mi) pthread_mutex_lock(&mi->rli.data_lock); mi->rli.abort_pos_wait++; /* for MASTER_POS_WAIT() to abort */ - /* Clear the errors, for a clean start, and master timestamp */ - clear_slave_error_timestamp(&mi->rli); + /* Clear the errors, for a clean start */ + clear_slave_error(&mi->rli); clear_until_condition(&mi->rli); /* If we don't write new coordinates to disk now, then old will remain in |