summaryrefslogtreecommitdiff
path: root/sql/slave.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/slave.cc')
-rw-r--r--sql/slave.cc414
1 files changed, 342 insertions, 72 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index 37979576b73..a6c86b08010 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -74,7 +74,6 @@ static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db,
const char* table_name);
static int check_master_version(MYSQL* mysql, MASTER_INFO* mi);
-char* rewrite_db(char* db);
/*
@@ -139,6 +138,8 @@ int init_slave()
{
DBUG_ENTER("init_slave");
+ /* This is called when mysqld starts */
+
/*
TODO: re-write this to interate through the list of files
for multi-master
@@ -150,11 +151,16 @@ int init_slave()
If master_host is specified, create the master_info file if it doesn't
exists.
*/
- if (!active_mi ||
- init_master_info(active_mi,master_info_file,relay_log_info_file,
+ if (!active_mi)
+ {
+ sql_print_error("Failed to allocate memory for the master info structure");
+ goto err;
+ }
+
+ if (init_master_info(active_mi,master_info_file,relay_log_info_file,
!master_host))
{
- sql_print_error("Note: Failed to initialized master info");
+ sql_print_error("Failed to initialize the master info structure");
goto err;
}
@@ -174,7 +180,7 @@ int init_slave()
relay_log_info_file,
SLAVE_IO | SLAVE_SQL))
{
- sql_print_error("Warning: Can't create threads to handle slave");
+ sql_print_error("Failed to create slave threads");
goto err;
}
}
@@ -300,6 +306,7 @@ err:
pthread_cond_broadcast(&rli->data_cond);
if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock);
+
pthread_mutex_unlock(log_lock);
DBUG_RETURN ((*errmsg) ? 1 : 0);
}
@@ -380,7 +387,10 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
rli->group_master_log_pos= 0;
if (!rli->inited)
+ {
+ DBUG_PRINT("info", ("rli->inited == 0"));
DBUG_RETURN(0);
+ }
DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0);
@@ -872,15 +882,38 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
}
+/*
+ Writes an error message to rli->last_slave_error and rli->last_slave_errno
+ (which will be displayed by SHOW SLAVE STATUS), and prints it to stderr.
+
+ SYNOPSIS
+ slave_print_error()
+ rli
+ err_code The error code
+ msg The error message (usually related to the error code, but can
+ contain more information).
+ ... (this is printf-like format, with % symbols in msg)
+
+ RETURN VALUES
+ void
+*/
+
void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
{
va_list args;
va_start(args,msg);
my_vsnprintf(rli->last_slave_error,
sizeof(rli->last_slave_error), msg, args);
- sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
- err_code);
rli->last_slave_errno = err_code;
+ /* If the error string ends with '.', do not add a ',' it would be ugly */
+ if (rli->last_slave_error[0] &&
+ (*(strend(rli->last_slave_error)-1) == '.'))
+ sql_print_error("Slave: %s Error_code: %d", rli->last_slave_error,
+ err_code);
+ else
+ sql_print_error("Slave: %s, Error_code: %d", rli->last_slave_error,
+ err_code);
+
}
/*
@@ -905,7 +938,7 @@ bool net_request_file(NET* net, const char* fname)
}
-char* rewrite_db(char* db)
+const char *rewrite_db(const char* db)
{
if (replicate_rewrite_db.is_empty() || !db)
return db;
@@ -920,6 +953,17 @@ char* rewrite_db(char* db)
return db;
}
+/*
+ From other comments and tests in code, it looks like
+ sometimes Query_log_event and Load_log_event can have db == 0
+ (see rewrite_db() above for example)
+ (cases where this happens are unclear; it may be when the master is 3.23).
+*/
+
+const char *print_slave_db_safe(const char* db)
+{
+ return (db ? rewrite_db(db) : "");
+}
/*
Checks whether a db matches some do_db and ignore_db rules
@@ -1027,13 +1071,19 @@ static int check_master_version(MYSQL* mysql, MASTER_INFO* mi)
{
const char* errmsg= 0;
+ /*
+ Note the following switch will bug when we have MySQL branch 30 ;)
+ */
switch (*mysql->server_version) {
case '3':
- mi->old_format = 1;
+ mi->old_format =
+ (strncmp(mysql->server_version, "3.23.57", 7) < 0) /* < .57 */ ?
+ BINLOG_FORMAT_323_LESS_57 :
+ BINLOG_FORMAT_323_GEQ_57 ;
break;
case '4':
case '5':
- mi->old_format = 0;
+ mi->old_format = BINLOG_FORMAT_CURRENT;
break;
default:
errmsg = "Master reported unrecognized MySQL version";
@@ -1246,11 +1296,55 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
strmov(strcend(tmp,'.'),"-relay-bin");
opt_relay_logname=my_strdup(tmp,MYF(MY_WME));
}
+
+ /*
+ The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. It is
+ notable that the last kilobytes of it (8 kB for example) may live in
+ memory, not on disk (depending on what the thread using it does). While
+ this is efficient, it has a side-effect one must know:
+ The size of the relay log on disk (displayed by 'ls -l' on Unix) can be a
+ few kilobytes less than one would expect by doing SHOW SLAVE STATUS; this
+ happens when only the IO thread is started (not the SQL thread). The
+ "missing" kilobytes are in memory, are preserved during 'STOP SLAVE; START
+ SLAVE IO_THREAD', and are flushed to disk when the slave's mysqld stops. So
+ this does not cause any bug. Example of how disk size grows by leaps:
+
+ Read_Master_Log_Pos: 7811 -rw-rw---- 1 guilhem qq 4 Jun 5 16:19 gbichot2-relay-bin.002
+ ...later...
+ Read_Master_Log_Pos: 9744 -rw-rw---- 1 guilhem qq 8192 Jun 5 16:27 gbichot2-relay-bin.002
+
+ See how 4 is less than 7811 and 8192 is less than 9744.
+
+ WARNING: this is risky because the slave can stay like this for a long
+ time; then if it has a power failure, master.info says the I/O thread has
+ read until 9744 while the relay-log contains only until 8192 (the
+ in-memory part from 8192 to 9744 has been lost), so the SQL slave thread
+ will miss some events, silently breaking replication.
+ Ideally we would like to flush master.info only when we know that the relay
+ log has no in-memory tail.
+ Note that the above problem may arise only when only the IO thread is
+ started, which is unlikely.
+ */
+
+ /*
+ For the maximum log size, we choose max_relay_log_size if it is
+ non-zero, max_binlog_size otherwise. If later the user does SET
+ GLOBAL on one of these variables, fix_max_binlog_size and
+ fix_max_relay_log_size will reconsider the choice (for example
+ if the user changes max_relay_log_size to zero, we have to
+ switch to using max_binlog_size for the relay log) and update
+ rli->relay_log.max_size (and mysql_bin_log.max_size).
+ */
+
if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname,
"-relay-bin", opt_relaylog_index_name,
LOG_BIN, 1 /* read_append cache */,
- 1 /* no auto events */))
+ 1 /* no auto events */,
+ max_relay_log_size ? max_relay_log_size : max_binlog_size))
+ {
+ sql_print_error("Failed in open_log() called from init_relay_log_info()");
DBUG_RETURN(1);
+ }
/* if file does not exist */
if (access(fname,F_OK))
@@ -1261,10 +1355,18 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
*/
if (info_fd >= 0)
my_close(info_fd, MYF(MY_WME));
- if ((info_fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ||
- init_io_cache(&rli->info_file, info_fd, IO_SIZE*2, READ_CACHE, 0L,0,
- MYF(MY_WME)))
+ if ((info_fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0)
{
+ sql_print_error("Failed to create a new relay log info file (\
+file '%s', errno %d)", fname, my_errno);
+ msg= current_thd->net.last_error;
+ goto err;
+ }
+ if (init_io_cache(&rli->info_file, info_fd, IO_SIZE*2, READ_CACHE, 0L,0,
+ MYF(MY_WME)))
+ {
+ sql_print_error("Failed to create a cache on relay log info file '%s'",
+ fname);
msg= current_thd->net.last_error;
goto err;
}
@@ -1272,7 +1374,10 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
/* Init relay log with first entry in the relay index file */
if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */,
&msg))
+ {
+ sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4");
goto err;
+ }
rli->group_master_log_name[0]= 0;
rli->group_master_log_pos= 0;
rli->info_fd= info_fd;
@@ -1281,18 +1386,34 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
{
if (info_fd >= 0)
reinit_io_cache(&rli->info_file, READ_CACHE, 0L,0,0);
- else if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ||
- init_io_cache(&rli->info_file, info_fd,
- IO_SIZE*2, READ_CACHE, 0L, 0, MYF(MY_WME)))
+ else
{
- if (info_fd >= 0)
- my_close(info_fd, MYF(0));
- rli->info_fd= -1;
- rli->relay_log.close(1);
- pthread_mutex_unlock(&rli->data_lock);
- DBUG_RETURN(1);
+ int error=0;
+ if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0)
+ {
+ sql_print_error("\
+Failed to open the existing relay log info file '%s' (errno %d)",
+ fname, my_errno);
+ error= 1;
+ }
+ else if (init_io_cache(&rli->info_file, info_fd,
+ IO_SIZE*2, READ_CACHE, 0L, 0, MYF(MY_WME)))
+ {
+ sql_print_error("Failed to create a cache on relay log info file '%s'",
+ fname);
+ error= 1;
+ }
+ if (error)
+ {
+ if (info_fd >= 0)
+ my_close(info_fd, MYF(0));
+ rli->info_fd= -1;
+ rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
+ pthread_mutex_unlock(&rli->data_lock);
+ DBUG_RETURN(1);
+ }
}
-
+
rli->info_fd = info_fd;
int relay_log_pos, master_log_pos;
if (init_strvar_from_file(rli->group_relay_log_name,
@@ -1318,7 +1439,12 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
rli->group_relay_log_pos,
0 /* no data lock*/,
&msg))
+ {
+ char llbuf[22];
+ sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s)",
+ rli->relay_log_name, llstr(rli->relay_log_pos, llbuf));
goto err;
+ }
}
DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
@@ -1327,7 +1453,8 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
before flush_relay_log_info
*/
reinit_io_cache(&rli->info_file, WRITE_CACHE,0L,0,1);
- error= flush_relay_log_info(rli);
+ if ((error= flush_relay_log_info(rli)))
+ sql_print_error("Failed to flush relay log info file");
if (count_relay_log_space(rli))
{
msg="Error counting relay log space";
@@ -1343,7 +1470,7 @@ err:
if (info_fd >= 0)
my_close(info_fd, MYF(0));
rli->info_fd= -1;
- rli->relay_log.close(1);
+ rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
pthread_mutex_unlock(&rli->data_lock);
DBUG_RETURN(1);
}
@@ -1372,7 +1499,7 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli)
{
bool slave_killed=0;
MASTER_INFO* mi = rli->mi;
- const char* save_proc_info;
+ const char *save_proc_info;
THD* thd = mi->io_thd;
DBUG_ENTER("wait_for_relay_log_space");
@@ -1380,12 +1507,14 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli)
pthread_mutex_lock(&rli->log_space_lock);
save_proc_info = thd->proc_info;
thd->proc_info = "Waiting for relay log space to free";
-
+ save_proc_info= thd->enter_cond(&rli->log_space_cond,
+ &rli->log_space_lock,
+ "Waiting for relay log space to free");
while (rli->log_space_limit < rli->log_space_total &&
!(slave_killed=io_slave_killed(thd,mi)) &&
!rli->ignore_log_space_limit)
pthread_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
- thd->proc_info = save_proc_info;
+ thd->exit_cond(save_proc_info);
pthread_mutex_unlock(&rli->log_space_lock);
DBUG_RETURN(slave_killed);
}
@@ -1406,9 +1535,36 @@ static int count_relay_log_space(RELAY_LOG_INFO* rli)
if (add_relay_log(rli,&linfo))
DBUG_RETURN(1);
} while (!rli->relay_log.find_next_log(&linfo, 1));
+ /*
+ As we have counted everything, including what may have written in a
+ preceding write, we must reset bytes_written, or we may count some space
+ twice.
+ */
+ rli->relay_log.reset_bytes_written();
DBUG_RETURN(0);
}
+void init_master_info_with_options(MASTER_INFO* mi)
+{
+ mi->master_log_name[0] = 0;
+ mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number
+
+ if (master_host)
+ strmake(mi->host, master_host, sizeof(mi->host) - 1);
+ if (master_user)
+ strmake(mi->user, master_user, sizeof(mi->user) - 1);
+ if (master_password)
+ strmake(mi->password, master_password, HASH_PASSWORD_LENGTH);
+ mi->port = master_port;
+ mi->connect_retry = master_connect_retry;
+}
+
+void clear_last_slave_error(RELAY_LOG_INFO* rli)
+{
+ //Clear the errors displayed by SHOW SLAVE STATUS
+ rli->last_slave_error[0]=0;
+ rli->last_slave_errno=0;
+}
int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
const char* slave_info_fname,
@@ -1431,6 +1587,8 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
pthread_mutex_lock(&mi->data_lock);
fd = mi->fd;
+
+ /* does master.info exist ? */
if (access(fname,F_OK))
{
@@ -1445,32 +1603,44 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
*/
if (fd >= 0)
my_close(fd, MYF(MY_WME));
- if ((fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ||
- init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,0,
+ if ((fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 )
+ {
+ sql_print_error("Failed to create a new master info file (\
+file '%s', errno %d)", fname, my_errno);
+ goto err;
+ }
+ if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,0,
MYF(MY_WME)))
+ {
+ sql_print_error("Failed to create a cache on master info file (\
+file '%s')", fname);
goto err;
+ }
- mi->master_log_name[0] = 0;
- mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number
mi->fd = fd;
-
- if (master_host)
- strmake(mi->host, master_host, sizeof(mi->host) - 1);
- if (master_user)
- strmake(mi->user, master_user, sizeof(mi->user) - 1);
- if (master_password)
- strmake(mi->password, master_password, HASH_PASSWORD_LENGTH);
- mi->port = master_port;
- mi->connect_retry = master_connect_retry;
+ init_master_info_with_options(mi);
+
}
else // file exists
{
if (fd >= 0)
reinit_io_cache(&mi->file, READ_CACHE, 0L,0,0);
- else if ((fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ||
- init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,
- 0, MYF(MY_WME)))
- goto err;
+ else
+ {
+ if ((fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 )
+ {
+ sql_print_error("Failed to open the existing master info file (\
+file '%s', errno %d)", fname, my_errno);
+ goto err;
+ }
+ if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,
+ 0, MYF(MY_WME)))
+ {
+ sql_print_error("Failed to create a cache on master info file (\
+file '%s')", fname);
+ goto err;
+ }
+ }
mi->fd = fd;
int port, connect_retry, master_log_pos;
@@ -1511,7 +1681,8 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
mi->inited = 1;
// now change cache READ -> WRITE - must do this before flush_master_info
reinit_io_cache(&mi->file, WRITE_CACHE,0L,0,1);
- error=test(flush_master_info(mi));
+ if ((error=test(flush_master_info(mi))))
+ sql_print_error("Failed to flush master info file");
pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(error);
@@ -1653,6 +1824,7 @@ int show_master_info(THD* thd, MASTER_INFO* mi)
if (mi->host[0])
{
+ DBUG_PRINT("info",("host is set: '%s'", mi->host));
String *packet= &thd->packet;
protocol->prepare_for_resend();
@@ -1732,8 +1904,8 @@ st_relay_log_info::st_relay_log_info()
group_relay_log_name[0]= event_relay_log_name[0]= group_master_log_name[0]= 0;
last_slave_error[0]=0;
- bzero(&info_file,sizeof(info_file));
- bzero(&cache_buf, sizeof(cache_buf));
+ bzero((char*) &info_file, sizeof(info_file));
+ bzero((char*) &cache_buf, sizeof(cache_buf));
pthread_mutex_init(&run_lock, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&data_lock, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&log_space_lock, MY_MUTEX_INIT_FAST);
@@ -1741,6 +1913,7 @@ st_relay_log_info::st_relay_log_info()
pthread_cond_init(&start_cond, NULL);
pthread_cond_init(&stop_cond, NULL);
pthread_cond_init(&log_space_cond, NULL);
+ relay_log.init_pthread_objects();
}
@@ -1799,11 +1972,17 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
pthread_mutex_lock(&data_lock);
/*
- This function will abort when it notices that
- some CHANGE MASTER or RESET MASTER has changed
- the master info. To catch this, these commands
- modify abort_pos_wait ; we just monitor abort_pos_wait
- and see if it has changed.
+ This function will abort when it notices that some CHANGE MASTER or
+ RESET MASTER has changed the master info.
+ To catch this, these commands modify abort_pos_wait ; We just monitor
+ abort_pos_wait and see if it has changed.
+ Why do we have this mechanism instead of simply monitoring slave_running
+ in the loop (we do this too), as CHANGE MASTER/RESET SLAVE require that
+ the SQL thread be stopped?
+ This is becasue if someones does:
+ STOP SLAVE;CHANGE MASTER/RESET SLAVE; START SLAVE;
+ the change may happen very quickly and we may not notice that
+ slave_running briefly switches between 1/0/1.
*/
init_abort_pos_wait= abort_pos_wait;
@@ -1824,7 +2003,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
error= -2; //means improper arguments
goto err;
}
- //p points to '.'
+ /* p points to '.' */
log_name_extension= strtoul(++p, &p_end, 10);
/*
p_end points to the first invalid character.
@@ -1837,10 +2016,10 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
goto err;
}
- //"compare and wait" main loop
+ /* The "compare and wait" main loop */
while (!thd->killed &&
init_abort_pos_wait == abort_pos_wait &&
- mi->slave_running)
+ slave_running)
{
bool pos_reached;
int cmp_result= 0;
@@ -1878,6 +2057,10 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
DBUG_PRINT("info",("Waiting for master update"));
const char* msg = thd->enter_cond(&data_cond, &data_lock,
"Waiting for master update");
+ /*
+ We are going to pthread_cond_(timed)wait(); if the SQL thread stops it
+ will wake us up.
+ */
if (timeout > 0)
{
/*
@@ -1895,6 +2078,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
}
else
pthread_cond_wait(&data_cond, &data_lock);
+ DBUG_PRINT("info",("Got signal of master update"));
thd->exit_cond(msg);
if (error == ETIMEDOUT || error == ETIME)
{
@@ -1903,6 +2087,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
}
error=0;
event_count++;
+ DBUG_PRINT("info",("Testing if killed or SQL thread not running"));
}
err:
@@ -1911,11 +2096,11 @@ err:
improper_arguments: %d timed_out: %d",
(int) thd->killed,
(int) (init_abort_pos_wait != abort_pos_wait),
- (int) mi->slave_running,
+ (int) slave_running,
(int) (error == -2),
(int) (error == -1)));
if (thd->killed || init_abort_pos_wait != abort_pos_wait ||
- !mi->slave_running)
+ !slave_running)
{
error= -2;
}
@@ -2137,14 +2322,13 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
case ER_NET_ERROR_ON_WRITE:
case ER_SERVER_SHUTDOWN:
case ER_NEW_ABORTING_CONNECTION:
- my_snprintf(rli->last_slave_error, sizeof(rli->last_slave_error),
- "Slave: query '%s' partially completed on the master \
+ slave_print_error(rli,expected_error,
+ "query '%s' partially completed on the master \
and was aborted. There is a chance that your master is inconsistent at this \
point. If you are sure that your master is ok, run this query manually on the\
slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\
- SLAVE START;", thd->query);
- rli->last_slave_errno = expected_error;
- sql_print_error("%s",rli->last_slave_error);
+ SLAVE START; .", thd->query);
+ thd->query_error= 1;
return 1;
default:
return 0;
@@ -2158,8 +2342,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
DBUG_ASSERT(rli->sql_thd==thd);
if (sql_slave_killed(thd,rli))
{
- /* do not forget to free ev ! */
- if (ev) delete ev;
+ delete ev;
return 1;
}
if (ev)
@@ -2453,6 +2636,17 @@ reconnect done to recover from failed read");
for no reason, but this function will do a clean read, notice the clean
value and exit immediately.
*/
+#ifndef DBUG_OFF
+ {
+ char llbuf1[22], llbuf2[22];
+ DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \
+ignore_log_space_limit=%d",
+ llstr(mi->rli.log_space_limit,llbuf1),
+ llstr(mi->rli.log_space_total,llbuf2),
+ (int) mi->rli.ignore_log_space_limit));
+ }
+#endif
+
if (mi->rli.log_space_limit && mi->rli.log_space_limit <
mi->rli.log_space_total &&
!mi->rli.ignore_log_space_limit)
@@ -2565,6 +2759,13 @@ slave_begin:
pthread_mutex_unlock(&rli->run_lock);
pthread_cond_broadcast(&rli->start_cond);
+ /*
+ Reset errors for a clean start (otherwise, if the master is idle, the SQL
+ thread may execute no Query_log_event, so the error will remain even
+ though there's no problem anymore).
+ */
+ clear_last_slave_error(rli);
+
//tell the I/O thread to take relay_log_space_limit into account from now on
pthread_mutex_lock(&rli->log_space_lock);
rli->ignore_log_space_limit= 0;
@@ -2624,8 +2825,16 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
VOID(pthread_mutex_unlock(&LOCK_thread_count));
thd->proc_info = "Waiting for slave mutex on exit";
pthread_mutex_lock(&rli->run_lock);
+ /* We need data_lock, at least to wake up any waiting master_pos_wait() */
+ pthread_mutex_lock(&rli->data_lock);
DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
- rli->slave_running = 0;
+ /* When master_pos_wait() wakes up it will check this and terminate */
+ rli->slave_running= 0;
+ /* Wake up master_pos_wait() */
+ pthread_mutex_unlock(&rli->data_lock);
+ DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
+ pthread_cond_broadcast(&rli->data_cond);
+ rli->ignore_log_space_limit= 0; /* don't need any lock */
rli->save_temporary_tables = thd->temporary_tables;
/*
@@ -3010,7 +3219,7 @@ void end_relay_log_info(RELAY_LOG_INFO* rli)
rli->cur_log_fd = -1;
}
rli->inited = 0;
- rli->relay_log.close(1);
+ rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
DBUG_VOID_RETURN;
}
@@ -3273,8 +3482,22 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
hot_log=0; // Using old binary log
}
}
- DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
- DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
+#ifndef DBUG_OFF
+ {
+ char llbuf1[22], llbuf2[22];
+ DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
+ /*
+ The next assertion sometimes (very rarely) fails, let's try to track
+ it
+ */
+ DBUG_PRINT("info", ("\
+Before assert, my_b_tell(cur_log)=%s rli->relay_log_pos=%s rli->pending=%lu",
+ llstr(my_b_tell(cur_log),llbuf1),
+ llstr(rli->relay_log_pos,llbuf2),
+ rli->pending));
+ DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
+ }
+#endif
/*
Relay log is always in new format - if the master is 3.23, the
I/O thread will convert the format for us
@@ -3334,8 +3557,8 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
log), and also when the SQL thread starts. We should also reset
ignore_log_space_limit to 0 when the user does RESET SLAVE, but in
fact, no need as RESET SLAVE requires that the slave
- be stopped, and when the SQL thread is later restarted
- ignore_log_space_limit will be reset to 0.
+ be stopped, and the SQL thread sets ignore_log_space_limit to 0 when
+ it stops.
*/
pthread_mutex_lock(&rli->log_space_lock);
// prevent the I/O thread from blocking next times
@@ -3466,6 +3689,53 @@ err:
DBUG_RETURN(0);
}
+/*
+ Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
+ because of size is simpler because when we do it we already have all relevant
+ locks; here we don't, so this function is mainly taking locks).
+ Returns nothing as we cannot catch any error (MYSQL_LOG::new_file() is void).
+*/
+
+void rotate_relay_log(MASTER_INFO* mi)
+{
+ DBUG_ENTER("rotate_relay_log");
+ RELAY_LOG_INFO* rli= &mi->rli;
+
+ lock_slave_threads(mi);
+ pthread_mutex_lock(&rli->data_lock);
+ /*
+ We need to test inited because otherwise, new_file() will attempt to lock
+ LOCK_log, which may not be inited (if we're not a slave).
+ */
+ if (!rli->inited)
+ {
+ DBUG_PRINT("info", ("rli->inited == 0"));
+ goto end;
+ }
+
+ /* If the relay log is closed, new_file() will do nothing. */
+ rli->relay_log.new_file(1);
+
+ /*
+ We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
+ be counted, so imagine a succession of FLUSH LOGS and assume the slave
+ threads are started:
+ relay_log_space decreases by the size of the deleted relay log, but does
+ not increase, so flush-after-flush we may become negative, which is wrong.
+ Even if this will be corrected as soon as a query is replicated on the
+ slave (because the I/O thread will then call harvest_bytes_written() which
+ will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
+ output in SHOW SLAVE STATUS meanwhile. So we harvest now.
+ If the log is closed, then this will just harvest the last writes, probably
+ 0 as they probably have been harvested.
+ */
+ rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+end:
+ pthread_mutex_unlock(&rli->data_lock);
+ unlock_slave_threads(mi);
+ DBUG_VOID_RETURN;
+}
+
#ifdef __GNUC__
template class I_List_iterator<i_string>;