diff options
-rw-r--r-- | innobase/os/os0file.c | 7 | ||||
-rw-r--r-- | sql/log.cc | 11 | ||||
-rw-r--r-- | sql/log_event.cc | 43 | ||||
-rw-r--r-- | sql/slave.cc | 53 | ||||
-rw-r--r-- | sql/sql_class.h | 2 | ||||
-rw-r--r-- | sql/sql_repl.cc | 14 |
6 files changed, 90 insertions, 40 deletions
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index 7936b06c24d..311937f2145 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -226,13 +226,8 @@ os_file_get_last_error(void) "InnoDB: the directory. It may also be you have created a subdirectory\n" "InnoDB: of the same name as a data file.\n"); } else { - if (strerror((int)err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err)); - } - fprintf(stderr, - "InnoDB: See also section 13.2 at http://www.innodb.com/ibman.html\n" + "InnoDB: See section 13.2 at http://www.innodb.com/ibman.html\n" "InnoDB: about operating system error numbers.\n"); } } diff --git a/sql/log.cc b/sql/log.cc index 6e9fa38c407..ce06092cfb7 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -1526,6 +1526,9 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, SYNOPSIS wait_for_update() thd Thread variable + master_or_slave If 0, the caller is the Binlog_dump thread from master; + if 1, the caller is the SQL thread from the slave. This + influences only thd->proc_info. NOTES One must have a lock on LOCK_log before calling this function. @@ -1538,11 +1541,15 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, */ -void MYSQL_LOG:: wait_for_update(THD* thd) +void MYSQL_LOG:: wait_for_update(THD* thd, bool master_or_slave) { safe_mutex_assert_owner(&LOCK_log); const char* old_msg = thd->enter_cond(&update_cond, &LOCK_log, - "Slave: waiting for binlog update"); + master_or_slave ? + "Has read all relay log; waiting for \ +the I/O slave thread to update it" : + "Has sent all binlog to slave; \ +waiting for binlog to be updated"); pthread_cond_wait(&update_cond, &LOCK_log); pthread_mutex_unlock(&LOCK_log); // See NOTES thd->exit_cond(old_msg); diff --git a/sql/log_event.cc b/sql/log_event.cc index 6b8c1e2db1d..425b3c063d1 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -2066,9 +2066,6 @@ Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'", TODO - Remove all active user locks - - If we have an active transaction at this point, the master died - in the middle while writing the transaction to the binary log. - In this case we should stop the slave. */ int Start_log_event::exec_event(struct st_relay_log_info* rli) @@ -2096,8 +2093,10 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli) break; case BINLOG_FORMAT_323_GEQ_57 : /* Can distinguish, based on the value of 'created' */ - if (created) /* this was generated at master startup*/ - close_temporary_tables(thd); + if (!created) + break; + /* otherwise this was generated at master startup*/ + close_temporary_tables(thd); break; default : /* this case is impossible */ @@ -2154,10 +2153,28 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli) We can't rotate the slave as this will cause infinitive rotations in a A -> B -> A setup. + NOTES + As a transaction NEVER spans on 2 or more binlogs: + if we have an active transaction at this point, the master died while + writing the transaction to the binary log, i.e. while flushing the binlog + cache to the binlog. As the write was started, the transaction had been + committed on the master, so we lack of information to replay this + transaction on the slave; all we can do is stop with error. + If we didn't detect it, then positions would start to become garbage (as we + are incrementing rli->relay_log_pos whereas we are in a transaction: the new + rli->relay_log_pos will be + relay_log_pos of the BEGIN + size of the Rotate event = garbage. + + Since MySQL 4.0.14, the master ALWAYS sends a Rotate event when it starts + sending the next binlog, so we are sure to receive a Rotate event just + after the end of the "dead master"'s binlog; so this exec_event() is the + right place to catch the problem. If we would wait until + Start_log_event::exec_event() it would be too late, rli->relay_log_pos would + already be garbage. + RETURN VALUES 0 ok - */ - +*/ int Rotate_log_event::exec_event(struct st_relay_log_info* rli) { @@ -2165,6 +2182,18 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli) DBUG_ENTER("Rotate_log_event::exec_event"); pthread_mutex_lock(&rli->data_lock); + + if (rli->inside_transaction) + { + slave_print_error(rli, 0, + "there is an unfinished transaction in the relay log \ +(could find neither COMMIT nor ROLLBACK in the relay log); it could be that \ +the master died while writing the transaction to its binary log. Now the slave \ +is rolling back the transaction."); + pthread_mutex_unlock(&rli->data_lock); + DBUG_RETURN(1); + } + memcpy(log_name, new_log_ident, ident_len+1); rli->master_log_pos = pos; rli->relay_log_pos += get_event_len(); diff --git a/sql/slave.cc b/sql/slave.cc index 32ed228e119..07c9bb7bd8a 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1443,7 +1443,8 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli) pthread_mutex_lock(&rli->log_space_lock); const char* save_proc_info= thd->enter_cond(&rli->log_space_cond, &rli->log_space_lock, - "Waiting for relay log space to free"); + "Waiting for the SQL slave \ +thread to free enough relay log space"); while (rli->log_space_limit < rli->log_space_total && !(slave_killed=io_slave_killed(thd,mi)) && !rli->ignore_log_space_limit) @@ -1925,7 +1926,8 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, DBUG_PRINT("info",("Waiting for master update")); const char* msg = thd->enter_cond(&data_cond, &data_lock, - "Waiting for master update"); + "Waiting for the SQL slave thread to \ +advance position"); /* We are going to pthread_cond_(timed)wait(); if the SQL thread stops it will wake us up. @@ -1988,7 +1990,14 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type) thd->master_access= ~0; thd->priv_user = 0; thd->slave_thread = 1; - thd->options = (((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | OPTION_AUTO_IS_NULL) ; + thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | + OPTION_AUTO_IS_NULL | + /* + It's nonsense to constraint the slave threads with max_join_size; if a + query succeeded on master, we HAVE to execute it. + */ + OPTION_BIG_SELECTS ; + thd->client_capabilities = CLIENT_LOCAL_FILES; thd->real_id=pthread_self(); pthread_mutex_lock(&LOCK_thread_count); @@ -2008,11 +2017,8 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type) VOID(pthread_sigmask(SIG_UNBLOCK,&set,&thd->block_signals)); #endif - if (thd->variables.max_join_size == HA_POS_ERROR) - thd->options |= OPTION_BIG_SELECTS; - if (thd_type == SLAVE_THD_SQL) - thd->proc_info= "Waiting for the next event in slave queue"; + thd->proc_info= "Waiting for the next event in relay log"; else thd->proc_info= "Waiting for master update"; thd->version=refresh_version; @@ -2260,7 +2266,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) } else { - sql_print_error("\ + slave_print_error(rli, 0, "\ Could not parse relay log event entry. The possible reasons are: the master's \ binary log is corrupted (you can check this by running 'mysqlbinlog' on the \ binary log), the slave's relay log is corrupted (you can check this by running \ @@ -2334,7 +2340,7 @@ slave_begin: } - thd->proc_info = "connecting to master"; + thd->proc_info = "Connecting to master"; // we can get killed during safe_connect if (!safe_connect(thd, mysql, mi)) sql_print_error("Slave I/O thread: connected to master '%s@%s:%d',\ @@ -2381,7 +2387,7 @@ dump"); goto err; } - thd->proc_info = "Waiiting to reconnect after a failed dump request"; + thd->proc_info= "Waiting to reconnect after a failed binlog dump request"; mc_end_server(mysql); /* First time retry immediately, assuming that we can recover @@ -2402,7 +2408,7 @@ dump"); goto err; } - thd->proc_info = "Reconnecting after a failed dump request"; + thd->proc_info = "Reconnecting after a failed binlog dump request"; if (!suppress_warnings) sql_print_error("Slave I/O thread: failed dump request, \ reconnecting to try again, log '%s' at postion %s", IO_RPL_LOG_NAME, @@ -2421,7 +2427,13 @@ after reconnect"); while (!io_slave_killed(thd,mi)) { bool suppress_warnings= 0; - thd->proc_info = "Reading master update"; + /* + We say "waiting" because read_event() will wait if there's nothing to + read. But if there's something to read, it will not wait. The important + thing is to not confuse users by saying "reading" whereas we're in fact + receiving nothing. + */ + thd->proc_info = "Waiting for master to send event"; ulong event_len = read_event(mysql, mi, &suppress_warnings); if (io_slave_killed(thd,mi)) { @@ -2448,7 +2460,8 @@ max_allowed_packet", mc_mysql_error(mysql)); goto err; } - thd->proc_info = "Waiting to reconnect after a failed read"; + thd->proc_info = "Waiting to reconnect after a failed master event \ +read"; mc_end_server(mysql); if (retry_count++) { @@ -2464,7 +2477,7 @@ max_allowed_packet", reconnect after a failed read"); goto err; } - thd->proc_info = "Reconnecting after a failed read"; + thd->proc_info = "Reconnecting after a failed master event read"; if (!suppress_warnings) sql_print_error("Slave I/O thread: Failed reading log event, \ reconnecting to retry, log '%s' position %s", IO_RPL_LOG_NAME, @@ -2481,7 +2494,7 @@ reconnect done to recover from failed read"); } // if (event_len == packet_error) retry_count=0; // ok event, reset retry counter - thd->proc_info = "Queueing event from master"; + thd->proc_info = "Queueing master event to the relay log"; if (queue_event(mi,(const char*)mysql->net.read_pos + 1, event_len)) { @@ -2663,7 +2676,7 @@ log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME, while (!sql_slave_killed(thd,rli)) { - thd->proc_info = "Processing master log event"; + thd->proc_info = "Reading event from the relay log"; DBUG_ASSERT(rli->sql_thd == thd); THD_CHECK_SENTRY(thd); if (exec_relay_log_event(thd,rli)) @@ -2695,6 +2708,12 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun /* When master_pos_wait() wakes up it will check this and terminate */ rli->slave_running= 0; + /* + Going out of the transaction. Necessary to mark it, in case the user + restarts replication from a non-transactional statement (with CHANGE + MASTER). + */ + rli->inside_transaction= 0; /* Wake up master_pos_wait() */ pthread_mutex_unlock(&rli->data_lock); DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions")); @@ -3386,7 +3405,7 @@ rli->relay_log_pos=%s rli->pending=%lu", pthread_mutex_unlock(&rli->log_space_lock); pthread_cond_broadcast(&rli->log_space_cond); // Note that wait_for_update unlocks lock_log ! - rli->relay_log.wait_for_update(rli->sql_thd); + rli->relay_log.wait_for_update(rli->sql_thd, 1); // re-acquire data lock since we released it earlier pthread_mutex_lock(&rli->data_lock); continue; diff --git a/sql/sql_class.h b/sql/sql_class.h index 9bf4dc852d7..64a314911ec 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -123,7 +123,7 @@ public: } void set_max_size(ulong max_size_arg); void signal_update() { pthread_cond_broadcast(&update_cond);} - void wait_for_update(THD* thd); + void wait_for_update(THD* thd, bool master_or_slave); void set_need_start_event() { need_start_event = 1; } void init(enum_log_type log_type_arg, enum cache_type io_cache_type_arg, diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 5a42614dff4..10581431c72 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -532,7 +532,7 @@ Increase max_allowed_packet on master"; if (!thd->killed) { /* Note that the following call unlocks lock_log */ - mysql_bin_log.wait_for_update(thd); + mysql_bin_log.wait_for_update(thd, 0); } else pthread_mutex_unlock(log_lock); @@ -547,7 +547,7 @@ Increase max_allowed_packet on master"; if (read_packet) { - thd->proc_info = "sending update to slave"; + thd->proc_info = "Sending binlog event to slave"; if (my_net_write(net, (char*)packet->ptr(), packet->length()) ) { errmsg = "Failed on my_net_write()"; @@ -584,7 +584,7 @@ Increase max_allowed_packet on master"; { bool loop_breaker = 0; // need this to break out of the for loop from switch - thd->proc_info = "switching to next log"; + thd->proc_info = "Finished reading one binlog; switching to next binlog"; switch (mysql_bin_log.find_next_log(&linfo, 1)) { case LOG_INFO_EOF: loop_breaker = (flags & BINLOG_DUMP_NON_BLOCK); @@ -623,14 +623,14 @@ end: (void)my_close(file, MYF(MY_WME)); send_eof(&thd->net); - thd->proc_info = "waiting to finalize termination"; + thd->proc_info = "Waiting to finalize termination"; pthread_mutex_lock(&LOCK_thread_count); thd->current_linfo = 0; pthread_mutex_unlock(&LOCK_thread_count); DBUG_VOID_RETURN; err: - thd->proc_info = "waiting to finalize termination"; + thd->proc_info = "Waiting to finalize termination"; end_io_cache(&log); /* Exclude iteration through thread list @@ -866,7 +866,7 @@ int change_master(THD* thd, MASTER_INFO* mi) DBUG_RETURN(1); } - thd->proc_info = "changing master"; + thd->proc_info = "Changing master"; LEX_MASTER_INFO* lex_mi = &thd->lex.mi; // TODO: see if needs re-write if (init_master_info(mi, master_info_file, relay_log_info_file, 0)) @@ -932,7 +932,7 @@ int change_master(THD* thd, MASTER_INFO* mi) if (need_relay_log_purge) { mi->rli.skip_log_purge= 0; - thd->proc_info="purging old relay logs"; + thd->proc_info="Purging old relay logs"; if (purge_relay_logs(&mi->rli, thd, 0 /* not only reset, but also reinit */, &errmsg)) |