diff options
-rw-r--r-- | sql/log.cc | 8 | ||||
-rw-r--r-- | sql/log_event.cc | 59 | ||||
-rw-r--r-- | sql/log_event.h | 119 | ||||
-rw-r--r-- | sql/slave.cc | 79 | ||||
-rw-r--r-- | sql/slave.h | 2 |
5 files changed, 195 insertions, 72 deletions
diff --git a/sql/log.cc b/sql/log.cc index 0ccb40c5246..223df51d07f 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -966,14 +966,6 @@ void MYSQL_LOG::new_file(bool need_lock) THD* thd = current_thd; Rotate_log_event r(thd,new_name+dirname_length(new_name)); r.set_log_pos(this); - - /* - Because this log rotation could have been initiated by a master of - the slave running with log-bin, we set the flag on rotate - event to prevent infinite log rotation loop - */ - if (thd->slave_thread) - r.flags|= LOG_EVENT_FORCED_ROTATE_F; r.write(&log_file); bytes_written += r.get_event_len(); } diff --git a/sql/log_event.cc b/sql/log_event.cc index 3d500ede462..98a877616e0 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1057,7 +1057,8 @@ int Start_log_event::write_data(IO_CACHE* file) The master started IMPLEMENTATION - - To handle the case where the master died without a stop event, + - To handle the case where the master died without having time to write DROP + TEMPORARY TABLE, DO RELEASE_LOCK (prepared statements' deletion is TODO), we clean up all temporary tables + locks that we got. However, we don't clean temporary tables if the master was 3.23 (this is because a 3.23 master writes a Start_log_event at every @@ -1065,11 +1066,20 @@ int Start_log_event::write_data(IO_CACHE* file) on the slave when FLUSH LOGS is issued on the master). TODO - - Remove all active user locks + - Remove all active user locks. + Guilhem 2003-06: this is true but not urgent: the worst it can cause is + the use of a bit of memory for a user lock which will not be used + anymore. If the user lock is later used, the old one will be released. In + other words, no deadlock problem. - If we have an active transaction at this point, the master died in the middle while writing the transaction to the binary log. In this case we should stop the slave. - + Guilhem 2003-06: I don't think we should. As the binlog is written before + the table changes are committed, rollback has occured on the master; we + should rather rollback on the slave and go on. If we don't rollback, and + the next query is not BEGIN, then it will be considered as part of the + unfinished transaction, and so will be rolled back at next BEGIN, which is + a bug. */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) @@ -1079,6 +1089,11 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli) if (!rli->mi->old_format) { + /* + If the master died before writing the COMMIT to the binlog, rollback; + otherwise it does not hurt to rollback. + */ + ha_rollback(thd); /* If 4.0 master, all temporary tables have been deleted on the master; if 3.23 master, this is far from sure. @@ -1703,8 +1718,6 @@ void Rotate_log_event::pack_info(Protocol *protocol) b_pos+= ident_len; b_pos= strmov(b_pos, ";pos="); b_pos=longlong10_to_str(pos, b_pos, 10); - if (flags & LOG_EVENT_FORCED_ROTATE_F) - b_pos= strmov(b_pos ,"; forced by master"); protocol->store(buf, b_pos-buf, &my_charset_bin); my_free(buf, MYF(MY_ALLOW_ZERO_PTR)); } @@ -1728,8 +1741,6 @@ void Rotate_log_event::print(FILE* file, bool short_form, char* last_db) my_fwrite(file, (byte*) new_log_ident, (uint)ident_len, MYF(MY_NABP | MY_WME)); fprintf(file, " pos: %s", llstr(pos, buf)); - if (flags & LOG_EVENT_FORCED_ROTATE_F) - fprintf(file," forced by master"); fputc('\n', file); fflush(file); } @@ -2399,29 +2410,21 @@ void Stop_log_event::print(FILE* file, bool short_form, char* last_db) /* Stop_log_event::exec_event() - The master stopped. Clean up all temporary tables + locks that the - master may have set. - - TODO - - Remove all active user locks + The master stopped. + We used to clean up all temporary tables but this is useless as, as the master + has shut down properly, it has written all DROP TEMPORARY TABLE and DO + RELEASE_LOCK (prepared statements' deletion is TODO). + We used to clean up slave_load_tmpdir, but this is useless as it has been + cleared at the end of LOAD DATA INFILE. + So we have nothing to do here. + The place were we must do this cleaning is in Start_log_event::exec_event(), + not here. Because if we come here, the master was sane. */ #ifndef MYSQL_CLIENT int Stop_log_event::exec_event(struct st_relay_log_info* rli) { /* - do not clean up immediately after rotate event; - QQ: this should be a useless test: the only case when it is false is when - shutdown occurred just after FLUSH LOGS. It has nothing to do with Rotate? - By the way, immediately after a Rotate the I/O thread does not write - the Stop to the relay log, so we won't come here in that case. - */ - if (rli->group_master_log_pos > BIN_LOG_HEADER_SIZE) - { - close_temporary_tables(thd); - cleanup_load_tmpdir(); - } - /* We do not want to update master_log pos because we get a rotate event before stop, so by now group_master_log_name is set to the next log. If we updated it, we will have incorrect master coordinates and this @@ -2965,10 +2968,10 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) goto err; } /* - We want to disable binary logging in slave thread because we need the file - events to appear in the same order as they do on the master relative to - other events, so that we can preserve ascending order of log sequence - numbers - needed to handle failover . + We are going to create a Load_log_event to finally load into the table. + This event should not go into the binlog: in the binlog we only want the + Create_file, Append_blocks and Execute_load. We disable binary logging and + restore the thread's options just after finishing the load. */ save_options = thd->options; thd->options &= ~ (ulong) (OPTION_BIN_LOG); diff --git a/sql/log_event.h b/sql/log_event.h index 1d2fc741fa8..bd5e1a82be4 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -34,15 +34,21 @@ #define LOG_READ_TOO_LARGE -7 #define LOG_EVENT_OFFSET 4 + #define BINLOG_VERSION 3 /* We could have used SERVER_VERSION_LENGTH, but this introduces an obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH - this would have broke the replication protocol + this would have broken the replication protocol */ #define ST_SERVER_VER_LEN 50 +/* + These are flags and structs to handle all the LOAD DATA INFILE options (LINES + TERMINATED etc). +*/ + #define DUMPFILE_FLAG 0x1 #define OPT_ENCLOSED_FLAG 0x2 #define REPLACE_FLAG 0x4 @@ -121,11 +127,17 @@ struct sql_ex_info See the #defines below for the format specifics. + The events which really update data are Query_log_event and + Load_log_event/Create_file_log_event/Execute_load_log_event (these 3 act + together to replicate LOAD DATA INFILE, with the help of + Append_block_log_event which prepares temporary files to load into the table). + ****************************************************************************/ +#define LOG_EVENT_HEADER_LEN 19 /* the fixed header length */ +#define OLD_HEADER_LEN 13 /* the fixed header length in 3.23 */ + /* event-specific post-header sizes */ -#define LOG_EVENT_HEADER_LEN 19 -#define OLD_HEADER_LEN 13 #define QUERY_HEADER_LEN (4 + 4 + 1 + 2) #define LOAD_HEADER_LEN (4 + 4 + 4 + 1 +1 + 4) #define START_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4) @@ -135,7 +147,10 @@ struct sql_ex_info #define EXEC_LOAD_HEADER_LEN 4 #define DELETE_FILE_HEADER_LEN 4 -/* event header offsets */ +/* + Event header offsets; + these point to places inside the fixed header. +*/ #define EVENT_TYPE_OFFSET 4 #define SERVER_ID_OFFSET 5 @@ -149,7 +164,7 @@ struct sql_ex_info #define ST_SERVER_VER_OFFSET 2 #define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN) -/* slave event post-header */ +/* slave event post-header (this event is never written) */ #define SL_MASTER_PORT_OFFSET 8 #define SL_MASTER_POS_OFFSET 0 @@ -197,14 +212,20 @@ struct sql_ex_info #define R_POS_OFFSET 0 #define R_IDENT_OFFSET 8 +/* CF to DF handle LOAD DATA INFILE */ + +/* CF = "Create File" */ #define CF_FILE_ID_OFFSET 0 #define CF_DATA_OFFSET CREATE_FILE_HEADER_LEN +/* AB = "Append Block" */ #define AB_FILE_ID_OFFSET 0 #define AB_DATA_OFFSET APPEND_BLOCK_HEADER_LEN +/* EL = "Execute Load" */ #define EL_FILE_ID_OFFSET 0 +/* DF = "Delete File" */ #define DF_FILE_ID_OFFSET 0 #define QUERY_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN) @@ -217,13 +238,31 @@ struct sql_ex_info #define EXEC_LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+EXEC_LOAD_HEADER_LEN) #define APPEND_BLOCK_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+APPEND_BLOCK_HEADER_LEN) - +/* 4 bytes which all binlogs should begin with */ #define BINLOG_MAGIC "\xfe\x62\x69\x6e" +/* + The 2 flags below were useless : + - the first one was never set + - the second one was set in all Rotate events on the master, but not used for + anything useful. + So they are now removed and their place may later be reused for other + flags. Then one must remember that Rotate events in 4.x have + LOG_EVENT_FORCED_ROTATE_F set, so one should not rely on the value of the + replacing flag when reading a Rotate event. + I keep the defines here just to remember what they were. +*/ +#ifdef TO_BE_REMOVED #define LOG_EVENT_TIME_F 0x1 -#define LOG_EVENT_FORCED_ROTATE_F 0x2 -#define LOG_EVENT_THREAD_SPECIFIC_F 0x4 /* query depends on thread - (for example: TEMPORARY TABLE) */ +#define LOG_EVENT_FORCED_ROTATE_F 0x2 +#endif +/* + If the query depends on the thread (for example: TEMPORARY TABLE). + Currently this is used by mysqlbinlog to know it must print + SET @@PSEUDO_THREAD_ID=xx; before the query (it would not hurt to print it + for every query but this would be slow). +*/ +#define LOG_EVENT_THREAD_SPECIFIC_F 0x4 enum Log_event_type { @@ -258,30 +297,81 @@ struct st_relay_log_info; class Log_event { public: + /* + The offset in the log where this event originally appeared (it is preserved + in relay logs, making SHOW SLAVE STATUS able to print coordinates of the + event in the master's binlog). Note: when a transaction is written by the + master to its binlog (wrapped in BEGIN/COMMIT) the log_pos of all the + queries it contains is the one of the BEGIN (this way, when one does SHOW + SLAVE STATUS it sees the offset of the BEGIN, which is logical as rollback + may occur), except the COMMIT query which has its real offset. + */ my_off_t log_pos; - char *temp_buf; + /* + A temp buffer for read_log_event; it is later analysed according to the + event's type, and its content is distributed in the event-specific fields. + */ + char *temp_buf; + /* + Timestamp on the master(for debugging and replication of NOW()/TIMESTAMP). + It is important for queries and LOAD DATA INFILE. This is set at the event's + creation time, except for Query and Load (et al.) events where this is set + at the query's execution time, which guarantees good replication (otherwise, + we could have a query and its event with different timestamps). + */ time_t when; + /* The number of seconds the query took to run on the master. */ ulong exec_time; + /* + The master's server id (is preserved in the relay log; used to prevent from + infinite loops in circular replication). + */ uint32 server_id; uint cached_event_len; + + /* + Some 16 flags. Only one is really used now; look above for + LOG_EVENT_TIME_F, LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F + for notes. + */ uint16 flags; + bool cache_stmt; #ifndef MYSQL_CLIENT THD* thd; Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt); Log_event(); + /* + read_log_event() functions read an event from a binlog or relay log; used by + SHOW BINLOG EVENTS, the binlog_dump thread on the master (reads master's + binlog), the slave IO thread (reads the event sent by binlog_dump), the + slave SQL thread (reads the event from the relay log). + */ // if mutex is 0, the read will proceed without mutex static Log_event* read_log_event(IO_CACHE* file, pthread_mutex_t* log_lock, bool old_format); static int read_log_event(IO_CACHE* file, String* packet, pthread_mutex_t* log_lock); + /* set_log_pos() is used to fill log_pos with tell(log). */ void set_log_pos(MYSQL_LOG* log); + /* + init_show_field_list() prepares the column names and types for the output of + SHOW BINLOG EVENTS; it is used only by SHOW BINLOG EVENTS. + */ static void init_show_field_list(List<Item>* field_list); #ifdef HAVE_REPLICATION int net_send(Protocol *protocol, const char* log_name, my_off_t pos); + /* + pack_info() is used by SHOW BINLOG EVENTS; as print() it prepares and sends + a string to display to the user, so it resembles print(). + */ virtual void pack_info(Protocol *protocol); + /* + The SQL slave thread calls exec_event() to execute the event; this is where + the slave's data is modified. + */ virtual int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ virtual const char* get_db() @@ -291,6 +381,7 @@ public: #else // avoid having to link mysqlbinlog against libpthread static Log_event* read_log_event(IO_CACHE* file, bool old_format); + /* print*() functions are used by mysqlbinlog */ virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0; void print_timestamp(FILE* file, time_t *ts = 0); void print_header(FILE* file); @@ -336,6 +427,7 @@ public: } static Log_event* read_log_event(const char* buf, int event_len, const char **error, bool old_format); + /* returns the human readable name of the event's type */ const char* get_type_str(); }; @@ -403,6 +495,8 @@ public: /***************************************************************************** Slave Log Event class + Note that this class is currently not used at all; no code writes a + Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event). ****************************************************************************/ class Slave_log_event: public Log_event @@ -593,7 +687,7 @@ public: Rand Log Event class - Logs random seed used by the next RAND() + Logs random seed used by the next RAND(), and by PASSWORD() in 4.1. ****************************************************************************/ class Rand_log_event: public Log_event @@ -626,6 +720,9 @@ class Rand_log_event: public Log_event User var Log Event class + Every time a query uses the value of a user variable, a User_var_log_event is + written before the Query_log_event, to set the user variable. + ****************************************************************************/ class User_var_log_event: public Log_event { diff --git a/sql/slave.cc b/sql/slave.cc index cc27bb96ab7..504bb0309cc 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1426,7 +1426,6 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, DBUG_RETURN(0); mi->mysql=0; mi->file_id=1; - mi->ignore_stop_event=0; fn_format(fname, master_info_fname, mysql_data_home, "", 4+32); /* @@ -2746,6 +2745,8 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) /* queue_old_event() + Writes a 3.23 event to the relay log. + TODO: Test this code before release - it has to be tested on a separate setup with 3.23 master @@ -2790,8 +2791,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, ev->log_pos = mi->master_log_pos; switch (ev->get_type_code()) { case STOP_EVENT: - ignore_event= mi->ignore_stop_event; - mi->ignore_stop_event=0; + ignore_event= 1; inc_pos= event_len; break; case ROTATE_EVENT: @@ -2801,7 +2801,6 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, pthread_mutex_unlock(&mi->data_lock); DBUG_RETURN(1); } - mi->ignore_stop_event=1; inc_pos= 0; break; case CREATE_FILE_EVENT: @@ -2817,7 +2816,6 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, DBUG_RETURN(error); } default: - mi->ignore_stop_event=0; inc_pos= event_len; break; } @@ -2842,15 +2840,12 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, /* queue_event() - TODO: verify the issue with stop events, see if we need them at all - in the relay log */ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) { int error= 0; ulong inc_pos; - bool ignore_event= 0; RELAY_LOG_INFO *rli= &mi->rli; DBUG_ENTER("queue_event"); @@ -2861,39 +2856,77 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) /* TODO: figure out if other events in addition to Rotate - require special processing + require special processing. + Guilhem 2003-06 : I don't think so. */ switch (buf[EVENT_TYPE_OFFSET]) { case STOP_EVENT: - ignore_event= mi->ignore_stop_event; - mi->ignore_stop_event= 0; - inc_pos= event_len; - break; + /* + We needn't write this event to the relay log. Indeed, it just indicates a + master server shutdown. The only thing this does is cleaning. But cleaning + is already done on a per-master-thread basis (as the master server is + shutting down cleanly, it has written all DROP TEMPORARY TABLE and DO + RELEASE_LOCK; prepared statements' deletion are TODO). + + We don't even increment mi->master_log_pos, because we may be just after a + Rotate event. Btw, in a few milliseconds we are going to have a Start + event from the next binlog (unless the master is presently running without + --log-bin). + */ + goto err; case ROTATE_EVENT: { Rotate_log_event rev(buf,event_len,0); if (unlikely(process_io_rotate(mi,&rev))) { - pthread_mutex_unlock(&mi->data_lock); - DBUG_RETURN(1); + error= 1; + goto err; } - mi->ignore_stop_event= 1; + /* + Now the I/O thread has just changed its mi->master_log_name, so + incrementing mi->master_log_pos is nonsense. + */ inc_pos= 0; break; } default: - mi->ignore_stop_event= 0; inc_pos= event_len; break; } - - if (likely(!ignore_event && - !(error= rli->relay_log.appendv(buf,event_len,0)))) + + /* + If this event is originating from this server, don't queue it. + We don't check this for 3.23 events because it's simpler like this; 3.23 + will be filtered anyway by the SQL slave thread which also tests the server + id (we must also keep this test in the SQL thread, in case somebody + upgrades a 4.0 slave which has a not-filtered relay log). + + ANY event coming from ourselves can be ignored: it is obvious for queries; + for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves + (--log-slave-updates would not log that) unless this slave is also its + direct master (an unsupported, useless setup!). + */ + + if (uint4korr(buf + SERVER_ID_OFFSET) == ::server_id) { + /* + Do not write it to the relay log. + We still want to increment, so that we won't re-read this event from the + master if the slave IO thread is now stopped/restarted (more efficient if + the events we are ignoring are big LOAD DATA INFILE). + */ mi->master_log_pos+= inc_pos; - DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); - rli->relay_log.harvest_bytes_written(&rli->log_space_total); - } + DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos)); + } + else /* write the event to the relay log */ + if (likely(!(error= rli->relay_log.appendv(buf,event_len,0)))) + { + mi->master_log_pos+= inc_pos; + DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); + rli->relay_log.harvest_bytes_written(&rli->log_space_total); + } + +err: pthread_mutex_unlock(&mi->data_lock); DBUG_RETURN(error); } diff --git a/sql/slave.h b/sql/slave.h index 1d00df67b22..429456eb0bb 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -308,8 +308,6 @@ typedef struct st_master_info bool old_format; /* master binlog is in 3.23 format */ volatile bool abort_slave, slave_running; volatile ulong slave_run_id; - bool ignore_stop_event; - st_master_info() :fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0), |