6 files changed, 90 insertions, 40 deletions
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index 7936b06c24d..311937f2145 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -226,13 +226,8 @@ os_file_get_last_error(void)
   "InnoDB: the directory. It may also be you have created a subdirectory\n"
   "InnoDB: of the same name as a data file.\n"); 
 		} else {
-			 if (strerror((int)err) != NULL) {
-				fprintf(stderr,
-  "InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
-			 }
-
 			 fprintf(stderr,
-  "InnoDB: See also section 13.2 at http://www.innodb.com/ibman.html\n"
+  "InnoDB: See section 13.2 at http://www.innodb.com/ibman.html\n"
   "InnoDB: about operating system error numbers.\n");
 		}
 	}
diff --git a/sql/log.cc b/sql/log.cc
index 6e9fa38c407..ce06092cfb7 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1526,6 +1526,9 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
   SYNOPSIS
     wait_for_update()
     thd			Thread variable
+    master_or_slave     If 0, the caller is the Binlog_dump thread from master;
+                        if 1, the caller is the SQL thread from the slave. This
+                        influences only thd->proc_info.
 
   NOTES
     One must have a lock on LOCK_log before calling this function.
@@ -1538,11 +1541,15 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
 */
 
 
-void MYSQL_LOG:: wait_for_update(THD* thd)
+void MYSQL_LOG:: wait_for_update(THD* thd, bool master_or_slave)
 {
   safe_mutex_assert_owner(&LOCK_log);
   const char* old_msg = thd->enter_cond(&update_cond, &LOCK_log,
-					"Slave: waiting for binlog update");
+                                        master_or_slave ?
+                                        "Has read all relay log; waiting for \
+the I/O slave thread to update it" : 
+                                        "Has sent all binlog to slave; \
+waiting for binlog to be updated"); 
   pthread_cond_wait(&update_cond, &LOCK_log);
   pthread_mutex_unlock(&LOCK_log);		// See NOTES
   thd->exit_cond(old_msg);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 6b8c1e2db1d..425b3c063d1 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2066,9 +2066,6 @@ Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'",
 
   TODO
     - Remove all active user locks
-    - If we have an active transaction at this point, the master died
-      in the middle while writing the transaction to the binary log.
-      In this case we should stop the slave.
 */
 
 int Start_log_event::exec_event(struct st_relay_log_info* rli)
@@ -2096,8 +2093,10 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli)
     break;
  case BINLOG_FORMAT_323_GEQ_57 : 
     /* Can distinguish, based on the value of 'created' */
-    if (created) /* this was generated at master startup*/
-      close_temporary_tables(thd);
+    if (!created) 
+      break;
+    /* otherwise this was generated at master startup*/  
+    close_temporary_tables(thd);
     break;
   default :
     /* this case is impossible */
@@ -2154,10 +2153,28 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli)
     We can't rotate the slave as this will cause infinitive rotations
     in a A -> B -> A setup.
 
+  NOTES
+    As a transaction NEVER spans on 2 or more binlogs:
+    if we have an active transaction at this point, the master died while
+    writing the transaction to the binary log, i.e. while flushing the binlog
+    cache to the binlog. As the write was started, the transaction had been
+    committed on the master, so we lack of information to replay this
+    transaction on the slave; all we can do is stop with error.
+    If we didn't detect it, then positions would start to become garbage (as we
+    are incrementing rli->relay_log_pos whereas we are in a transaction: the new
+    rli->relay_log_pos will be
+    relay_log_pos of the BEGIN + size of the Rotate event = garbage.
+
+    Since MySQL 4.0.14, the master ALWAYS sends a Rotate event when it starts
+    sending the next binlog, so we are sure to receive a Rotate event just
+    after the end of the "dead master"'s binlog; so this exec_event() is the
+    right place to catch the problem. If we would wait until
+    Start_log_event::exec_event() it would be too late, rli->relay_log_pos would
+    already be garbage.
+
   RETURN VALUES
     0	ok
- */
-  
+*/
 
 int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
 {
@@ -2165,6 +2182,18 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
   DBUG_ENTER("Rotate_log_event::exec_event");
 
   pthread_mutex_lock(&rli->data_lock);
+
+  if (rli->inside_transaction)
+  {
+    slave_print_error(rli, 0,
+                      "there is an unfinished transaction in the relay log \
+(could find neither COMMIT nor ROLLBACK in the relay log); it could be that \
+the master died while writing the transaction to its binary log. Now the slave \
+is rolling back the transaction.");
+    pthread_mutex_unlock(&rli->data_lock);
+    DBUG_RETURN(1);
+  }
+
   memcpy(log_name, new_log_ident, ident_len+1);
   rli->master_log_pos = pos;
   rli->relay_log_pos += get_event_len();
diff --git a/sql/slave.cc b/sql/slave.cc
index 32ed228e119..07c9bb7bd8a 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1443,7 +1443,8 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli)
   pthread_mutex_lock(&rli->log_space_lock);
   const char* save_proc_info= thd->enter_cond(&rli->log_space_cond,
                                               &rli->log_space_lock, 
-                                              "Waiting for relay log space to free");
+                                              "Waiting for the SQL slave \
+thread to free enough relay log space");
   while (rli->log_space_limit < rli->log_space_total &&
 	 !(slave_killed=io_slave_killed(thd,mi)) &&
          !rli->ignore_log_space_limit)
@@ -1925,7 +1926,8 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
     
     DBUG_PRINT("info",("Waiting for master update"));
     const char* msg = thd->enter_cond(&data_cond, &data_lock,
-                                      "Waiting for master update");
+                                      "Waiting for the SQL slave thread to \
+advance position");
     /*
       We are going to pthread_cond_(timed)wait(); if the SQL thread stops it
       will wake us up.
@@ -1988,7 +1990,14 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
   thd->master_access= ~0;
   thd->priv_user = 0;
   thd->slave_thread = 1;
-  thd->options = (((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | OPTION_AUTO_IS_NULL) ;
+  thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
+    OPTION_AUTO_IS_NULL |
+    /* 
+       It's nonsense to constraint the slave threads with max_join_size; if a
+       query succeeded on master, we HAVE to execute it.
+    */
+    OPTION_BIG_SELECTS ; 
+    
   thd->client_capabilities = CLIENT_LOCAL_FILES;
   thd->real_id=pthread_self();
   pthread_mutex_lock(&LOCK_thread_count);
@@ -2008,11 +2017,8 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
   VOID(pthread_sigmask(SIG_UNBLOCK,&set,&thd->block_signals));
 #endif
 
-  if (thd->variables.max_join_size == HA_POS_ERROR)
-    thd->options |= OPTION_BIG_SELECTS;
-
   if (thd_type == SLAVE_THD_SQL)
-    thd->proc_info= "Waiting for the next event in slave queue";
+    thd->proc_info= "Waiting for the next event in relay log";
   else
     thd->proc_info= "Waiting for master update";
   thd->version=refresh_version;
@@ -2260,7 +2266,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
   }
   else
   {
-    sql_print_error("\
+    slave_print_error(rli, 0, "\
 Could not parse relay log event entry. The possible reasons are: the master's \
 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
 binary log), the slave's relay log is corrupted (you can check this by running \
@@ -2334,7 +2340,7 @@ slave_begin:
   }
   
 
-  thd->proc_info = "connecting to master";
+  thd->proc_info = "Connecting to master";
   // we can get killed during safe_connect
   if (!safe_connect(thd, mysql, mi))
     sql_print_error("Slave I/O thread: connected to master '%s@%s:%d',\
@@ -2381,7 +2387,7 @@ dump");
 	goto err;
       }
 	  
-      thd->proc_info = "Waiiting to reconnect after a failed dump request";
+      thd->proc_info= "Waiting to reconnect after a failed binlog dump request";
       mc_end_server(mysql);
       /*
 	First time retry immediately, assuming that we can recover
@@ -2402,7 +2408,7 @@ dump");
 	goto err;
       }
 
-      thd->proc_info = "Reconnecting after a failed dump request";
+      thd->proc_info = "Reconnecting after a failed binlog dump request";
       if (!suppress_warnings)
 	sql_print_error("Slave I/O thread: failed dump request, \
 reconnecting to try again, log '%s' at postion %s", IO_RPL_LOG_NAME,
@@ -2421,7 +2427,13 @@ after reconnect");
     while (!io_slave_killed(thd,mi))
     {
       bool suppress_warnings= 0;    
-      thd->proc_info = "Reading master update";
+      /* 
+         We say "waiting" because read_event() will wait if there's nothing to
+         read. But if there's something to read, it will not wait. The important
+         thing is to not confuse users by saying "reading" whereas we're in fact
+         receiving nothing.
+      */
+      thd->proc_info = "Waiting for master to send event";
       ulong event_len = read_event(mysql, mi, &suppress_warnings);
       if (io_slave_killed(thd,mi))
       {
@@ -2448,7 +2460,8 @@ max_allowed_packet",
 			  mc_mysql_error(mysql));
 	  goto err;
 	}
-	thd->proc_info = "Waiting to reconnect after a failed read";
+	thd->proc_info = "Waiting to reconnect after a failed master event \
+read";
 	mc_end_server(mysql);
 	if (retry_count++)
 	{
@@ -2464,7 +2477,7 @@ max_allowed_packet",
 reconnect after a failed read");
 	  goto err;
 	}
-	thd->proc_info = "Reconnecting after a failed read";
+	thd->proc_info = "Reconnecting after a failed master event read";
 	if (!suppress_warnings)
 	  sql_print_error("Slave I/O thread: Failed reading log event, \
 reconnecting to retry, log '%s' position %s", IO_RPL_LOG_NAME,
@@ -2481,7 +2494,7 @@ reconnect done to recover from failed read");
       } // if (event_len == packet_error)
 	  
       retry_count=0;			// ok event, reset retry counter
-      thd->proc_info = "Queueing event from master";
+      thd->proc_info = "Queueing master event to the relay log";
       if (queue_event(mi,(const char*)mysql->net.read_pos + 1,
 		      event_len))
       {
@@ -2663,7 +2676,7 @@ log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,
 
   while (!sql_slave_killed(thd,rli))
   {
-    thd->proc_info = "Processing master log event"; 
+    thd->proc_info = "Reading event from the relay log"; 
     DBUG_ASSERT(rli->sql_thd == thd);
     THD_CHECK_SENTRY(thd);
     if (exec_relay_log_event(thd,rli))
@@ -2695,6 +2708,12 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
   DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
   /* When master_pos_wait() wakes up it will check this and terminate */
   rli->slave_running= 0; 
+  /* 
+     Going out of the transaction. Necessary to mark it, in case the user
+     restarts replication from a non-transactional statement (with CHANGE
+     MASTER).
+  */
+  rli->inside_transaction= 0;
   /* Wake up master_pos_wait() */
   pthread_mutex_unlock(&rli->data_lock);
   DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
@@ -3386,7 +3405,7 @@ rli->relay_log_pos=%s rli->pending=%lu",
         pthread_mutex_unlock(&rli->log_space_lock);
         pthread_cond_broadcast(&rli->log_space_cond);
         // Note that wait_for_update unlocks lock_log !
-        rli->relay_log.wait_for_update(rli->sql_thd);
+        rli->relay_log.wait_for_update(rli->sql_thd, 1);
         // re-acquire data lock since we released it earlier
         pthread_mutex_lock(&rli->data_lock);
 	continue;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 9bf4dc852d7..64a314911ec 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -123,7 +123,7 @@ public:
   }
   void set_max_size(ulong max_size_arg);
   void signal_update() { pthread_cond_broadcast(&update_cond);}
-  void wait_for_update(THD* thd);
+  void wait_for_update(THD* thd, bool master_or_slave);
   void set_need_start_event() { need_start_event = 1; }
   void init(enum_log_type log_type_arg,
 	    enum cache_type io_cache_type_arg,
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 5a42614dff4..10581431c72 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -532,7 +532,7 @@ Increase max_allowed_packet on master";
 	  if (!thd->killed)
 	  {
 	    /* Note that the following call unlocks lock_log */
-	    mysql_bin_log.wait_for_update(thd);
+	    mysql_bin_log.wait_for_update(thd, 0);
 	  }
 	  else
 	    pthread_mutex_unlock(log_lock);
@@ -547,7 +547,7 @@ Increase max_allowed_packet on master";
 
 	if (read_packet)
 	{
-	  thd->proc_info = "sending update to slave";
+	  thd->proc_info = "Sending binlog event to slave";
 	  if (my_net_write(net, (char*)packet->ptr(), packet->length()) )
 	  {
 	    errmsg = "Failed on my_net_write()";
@@ -584,7 +584,7 @@ Increase max_allowed_packet on master";
     {
       bool loop_breaker = 0;
       // need this to break out of the for loop from switch
-      thd->proc_info = "switching to next log";
+      thd->proc_info = "Finished reading one binlog; switching to next binlog";
       switch (mysql_bin_log.find_next_log(&linfo, 1)) {
       case LOG_INFO_EOF:
 	loop_breaker = (flags & BINLOG_DUMP_NON_BLOCK);
@@ -623,14 +623,14 @@ end:
   (void)my_close(file, MYF(MY_WME));
 
   send_eof(&thd->net);
-  thd->proc_info = "waiting to finalize termination";
+  thd->proc_info = "Waiting to finalize termination";
   pthread_mutex_lock(&LOCK_thread_count);
   thd->current_linfo = 0;
   pthread_mutex_unlock(&LOCK_thread_count);
   DBUG_VOID_RETURN;
 
 err:
-  thd->proc_info = "waiting to finalize termination";
+  thd->proc_info = "Waiting to finalize termination";
   end_io_cache(&log);
   /*
     Exclude  iteration through thread list
@@ -866,7 +866,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
     DBUG_RETURN(1);
   }
 
-  thd->proc_info = "changing master";
+  thd->proc_info = "Changing master";
   LEX_MASTER_INFO* lex_mi = &thd->lex.mi;
   // TODO: see if needs re-write
   if (init_master_info(mi, master_info_file, relay_log_info_file, 0))
@@ -932,7 +932,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
   if (need_relay_log_purge)
   {
     mi->rli.skip_log_purge= 0;
-    thd->proc_info="purging old relay logs";
+    thd->proc_info="Purging old relay logs";
     if (purge_relay_logs(&mi->rli, thd,
 			 0 /* not only reset, but also reinit */,
 			 &errmsg))