summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--innobase/os/os0file.c7
-rw-r--r--sql/log.cc11
-rw-r--r--sql/log_event.cc43
-rw-r--r--sql/slave.cc53
-rw-r--r--sql/sql_class.h2
-rw-r--r--sql/sql_repl.cc14
6 files changed, 90 insertions, 40 deletions
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index 7936b06c24d..311937f2145 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -226,13 +226,8 @@ os_file_get_last_error(void)
"InnoDB: the directory. It may also be you have created a subdirectory\n"
"InnoDB: of the same name as a data file.\n");
} else {
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
- }
-
fprintf(stderr,
- "InnoDB: See also section 13.2 at http://www.innodb.com/ibman.html\n"
+ "InnoDB: See section 13.2 at http://www.innodb.com/ibman.html\n"
"InnoDB: about operating system error numbers.\n");
}
}
diff --git a/sql/log.cc b/sql/log.cc
index 6e9fa38c407..ce06092cfb7 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1526,6 +1526,9 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
SYNOPSIS
wait_for_update()
thd Thread variable
+ master_or_slave If 0, the caller is the Binlog_dump thread from master;
+ if 1, the caller is the SQL thread from the slave. This
+ influences only thd->proc_info.
NOTES
One must have a lock on LOCK_log before calling this function.
@@ -1538,11 +1541,15 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
*/
-void MYSQL_LOG:: wait_for_update(THD* thd)
+void MYSQL_LOG:: wait_for_update(THD* thd, bool master_or_slave)
{
safe_mutex_assert_owner(&LOCK_log);
const char* old_msg = thd->enter_cond(&update_cond, &LOCK_log,
- "Slave: waiting for binlog update");
+ master_or_slave ?
+ "Has read all relay log; waiting for \
+the I/O slave thread to update it" :
+ "Has sent all binlog to slave; \
+waiting for binlog to be updated");
pthread_cond_wait(&update_cond, &LOCK_log);
pthread_mutex_unlock(&LOCK_log); // See NOTES
thd->exit_cond(old_msg);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 6b8c1e2db1d..425b3c063d1 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2066,9 +2066,6 @@ Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'",
TODO
- Remove all active user locks
- - If we have an active transaction at this point, the master died
- in the middle while writing the transaction to the binary log.
- In this case we should stop the slave.
*/
int Start_log_event::exec_event(struct st_relay_log_info* rli)
@@ -2096,8 +2093,10 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli)
break;
case BINLOG_FORMAT_323_GEQ_57 :
/* Can distinguish, based on the value of 'created' */
- if (created) /* this was generated at master startup*/
- close_temporary_tables(thd);
+ if (!created)
+ break;
+ /* otherwise this was generated at master startup*/
+ close_temporary_tables(thd);
break;
default :
/* this case is impossible */
@@ -2154,10 +2153,28 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli)
We can't rotate the slave as this will cause infinitive rotations
in a A -> B -> A setup.
+ NOTES
+ As a transaction NEVER spans on 2 or more binlogs:
+ if we have an active transaction at this point, the master died while
+ writing the transaction to the binary log, i.e. while flushing the binlog
+ cache to the binlog. As the write was started, the transaction had been
+ committed on the master, so we lack of information to replay this
+ transaction on the slave; all we can do is stop with error.
+ If we didn't detect it, then positions would start to become garbage (as we
+ are incrementing rli->relay_log_pos whereas we are in a transaction: the new
+ rli->relay_log_pos will be
+ relay_log_pos of the BEGIN + size of the Rotate event = garbage.
+
+ Since MySQL 4.0.14, the master ALWAYS sends a Rotate event when it starts
+ sending the next binlog, so we are sure to receive a Rotate event just
+ after the end of the "dead master"'s binlog; so this exec_event() is the
+ right place to catch the problem. If we would wait until
+ Start_log_event::exec_event() it would be too late, rli->relay_log_pos would
+ already be garbage.
+
RETURN VALUES
0 ok
- */
-
+*/
int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
{
@@ -2165,6 +2182,18 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
DBUG_ENTER("Rotate_log_event::exec_event");
pthread_mutex_lock(&rli->data_lock);
+
+ if (rli->inside_transaction)
+ {
+ slave_print_error(rli, 0,
+ "there is an unfinished transaction in the relay log \
+(could find neither COMMIT nor ROLLBACK in the relay log); it could be that \
+the master died while writing the transaction to its binary log. Now the slave \
+is rolling back the transaction.");
+ pthread_mutex_unlock(&rli->data_lock);
+ DBUG_RETURN(1);
+ }
+
memcpy(log_name, new_log_ident, ident_len+1);
rli->master_log_pos = pos;
rli->relay_log_pos += get_event_len();
diff --git a/sql/slave.cc b/sql/slave.cc
index 32ed228e119..07c9bb7bd8a 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1443,7 +1443,8 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli)
pthread_mutex_lock(&rli->log_space_lock);
const char* save_proc_info= thd->enter_cond(&rli->log_space_cond,
&rli->log_space_lock,
- "Waiting for relay log space to free");
+ "Waiting for the SQL slave \
+thread to free enough relay log space");
while (rli->log_space_limit < rli->log_space_total &&
!(slave_killed=io_slave_killed(thd,mi)) &&
!rli->ignore_log_space_limit)
@@ -1925,7 +1926,8 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
DBUG_PRINT("info",("Waiting for master update"));
const char* msg = thd->enter_cond(&data_cond, &data_lock,
- "Waiting for master update");
+ "Waiting for the SQL slave thread to \
+advance position");
/*
We are going to pthread_cond_(timed)wait(); if the SQL thread stops it
will wake us up.
@@ -1988,7 +1990,14 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
thd->master_access= ~0;
thd->priv_user = 0;
thd->slave_thread = 1;
- thd->options = (((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | OPTION_AUTO_IS_NULL) ;
+ thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
+ OPTION_AUTO_IS_NULL |
+ /*
+ It's nonsense to constraint the slave threads with max_join_size; if a
+ query succeeded on master, we HAVE to execute it.
+ */
+ OPTION_BIG_SELECTS ;
+
thd->client_capabilities = CLIENT_LOCAL_FILES;
thd->real_id=pthread_self();
pthread_mutex_lock(&LOCK_thread_count);
@@ -2008,11 +2017,8 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
VOID(pthread_sigmask(SIG_UNBLOCK,&set,&thd->block_signals));
#endif
- if (thd->variables.max_join_size == HA_POS_ERROR)
- thd->options |= OPTION_BIG_SELECTS;
-
if (thd_type == SLAVE_THD_SQL)
- thd->proc_info= "Waiting for the next event in slave queue";
+ thd->proc_info= "Waiting for the next event in relay log";
else
thd->proc_info= "Waiting for master update";
thd->version=refresh_version;
@@ -2260,7 +2266,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
}
else
{
- sql_print_error("\
+ slave_print_error(rli, 0, "\
Could not parse relay log event entry. The possible reasons are: the master's \
binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
binary log), the slave's relay log is corrupted (you can check this by running \
@@ -2334,7 +2340,7 @@ slave_begin:
}
- thd->proc_info = "connecting to master";
+ thd->proc_info = "Connecting to master";
// we can get killed during safe_connect
if (!safe_connect(thd, mysql, mi))
sql_print_error("Slave I/O thread: connected to master '%s@%s:%d',\
@@ -2381,7 +2387,7 @@ dump");
goto err;
}
- thd->proc_info = "Waiiting to reconnect after a failed dump request";
+ thd->proc_info= "Waiting to reconnect after a failed binlog dump request";
mc_end_server(mysql);
/*
First time retry immediately, assuming that we can recover
@@ -2402,7 +2408,7 @@ dump");
goto err;
}
- thd->proc_info = "Reconnecting after a failed dump request";
+ thd->proc_info = "Reconnecting after a failed binlog dump request";
if (!suppress_warnings)
sql_print_error("Slave I/O thread: failed dump request, \
reconnecting to try again, log '%s' at postion %s", IO_RPL_LOG_NAME,
@@ -2421,7 +2427,13 @@ after reconnect");
while (!io_slave_killed(thd,mi))
{
bool suppress_warnings= 0;
- thd->proc_info = "Reading master update";
+ /*
+ We say "waiting" because read_event() will wait if there's nothing to
+ read. But if there's something to read, it will not wait. The important
+ thing is to not confuse users by saying "reading" whereas we're in fact
+ receiving nothing.
+ */
+ thd->proc_info = "Waiting for master to send event";
ulong event_len = read_event(mysql, mi, &suppress_warnings);
if (io_slave_killed(thd,mi))
{
@@ -2448,7 +2460,8 @@ max_allowed_packet",
mc_mysql_error(mysql));
goto err;
}
- thd->proc_info = "Waiting to reconnect after a failed read";
+ thd->proc_info = "Waiting to reconnect after a failed master event \
+read";
mc_end_server(mysql);
if (retry_count++)
{
@@ -2464,7 +2477,7 @@ max_allowed_packet",
reconnect after a failed read");
goto err;
}
- thd->proc_info = "Reconnecting after a failed read";
+ thd->proc_info = "Reconnecting after a failed master event read";
if (!suppress_warnings)
sql_print_error("Slave I/O thread: Failed reading log event, \
reconnecting to retry, log '%s' position %s", IO_RPL_LOG_NAME,
@@ -2481,7 +2494,7 @@ reconnect done to recover from failed read");
} // if (event_len == packet_error)
retry_count=0; // ok event, reset retry counter
- thd->proc_info = "Queueing event from master";
+ thd->proc_info = "Queueing master event to the relay log";
if (queue_event(mi,(const char*)mysql->net.read_pos + 1,
event_len))
{
@@ -2663,7 +2676,7 @@ log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,
while (!sql_slave_killed(thd,rli))
{
- thd->proc_info = "Processing master log event";
+ thd->proc_info = "Reading event from the relay log";
DBUG_ASSERT(rli->sql_thd == thd);
THD_CHECK_SENTRY(thd);
if (exec_relay_log_event(thd,rli))
@@ -2695,6 +2708,12 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
/* When master_pos_wait() wakes up it will check this and terminate */
rli->slave_running= 0;
+ /*
+ Going out of the transaction. Necessary to mark it, in case the user
+ restarts replication from a non-transactional statement (with CHANGE
+ MASTER).
+ */
+ rli->inside_transaction= 0;
/* Wake up master_pos_wait() */
pthread_mutex_unlock(&rli->data_lock);
DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
@@ -3386,7 +3405,7 @@ rli->relay_log_pos=%s rli->pending=%lu",
pthread_mutex_unlock(&rli->log_space_lock);
pthread_cond_broadcast(&rli->log_space_cond);
// Note that wait_for_update unlocks lock_log !
- rli->relay_log.wait_for_update(rli->sql_thd);
+ rli->relay_log.wait_for_update(rli->sql_thd, 1);
// re-acquire data lock since we released it earlier
pthread_mutex_lock(&rli->data_lock);
continue;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 9bf4dc852d7..64a314911ec 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -123,7 +123,7 @@ public:
}
void set_max_size(ulong max_size_arg);
void signal_update() { pthread_cond_broadcast(&update_cond);}
- void wait_for_update(THD* thd);
+ void wait_for_update(THD* thd, bool master_or_slave);
void set_need_start_event() { need_start_event = 1; }
void init(enum_log_type log_type_arg,
enum cache_type io_cache_type_arg,
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 5a42614dff4..10581431c72 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -532,7 +532,7 @@ Increase max_allowed_packet on master";
if (!thd->killed)
{
/* Note that the following call unlocks lock_log */
- mysql_bin_log.wait_for_update(thd);
+ mysql_bin_log.wait_for_update(thd, 0);
}
else
pthread_mutex_unlock(log_lock);
@@ -547,7 +547,7 @@ Increase max_allowed_packet on master";
if (read_packet)
{
- thd->proc_info = "sending update to slave";
+ thd->proc_info = "Sending binlog event to slave";
if (my_net_write(net, (char*)packet->ptr(), packet->length()) )
{
errmsg = "Failed on my_net_write()";
@@ -584,7 +584,7 @@ Increase max_allowed_packet on master";
{
bool loop_breaker = 0;
// need this to break out of the for loop from switch
- thd->proc_info = "switching to next log";
+ thd->proc_info = "Finished reading one binlog; switching to next binlog";
switch (mysql_bin_log.find_next_log(&linfo, 1)) {
case LOG_INFO_EOF:
loop_breaker = (flags & BINLOG_DUMP_NON_BLOCK);
@@ -623,14 +623,14 @@ end:
(void)my_close(file, MYF(MY_WME));
send_eof(&thd->net);
- thd->proc_info = "waiting to finalize termination";
+ thd->proc_info = "Waiting to finalize termination";
pthread_mutex_lock(&LOCK_thread_count);
thd->current_linfo = 0;
pthread_mutex_unlock(&LOCK_thread_count);
DBUG_VOID_RETURN;
err:
- thd->proc_info = "waiting to finalize termination";
+ thd->proc_info = "Waiting to finalize termination";
end_io_cache(&log);
/*
Exclude iteration through thread list
@@ -866,7 +866,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
DBUG_RETURN(1);
}
- thd->proc_info = "changing master";
+ thd->proc_info = "Changing master";
LEX_MASTER_INFO* lex_mi = &thd->lex.mi;
// TODO: see if needs re-write
if (init_master_info(mi, master_info_file, relay_log_info_file, 0))
@@ -932,7 +932,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
if (need_relay_log_purge)
{
mi->rli.skip_log_purge= 0;
- thd->proc_info="purging old relay logs";
+ thd->proc_info="Purging old relay logs";
if (purge_relay_logs(&mi->rli, thd,
0 /* not only reset, but also reinit */,
&errmsg))