summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/log.cc8
-rw-r--r--sql/log_event.cc59
-rw-r--r--sql/log_event.h119
-rw-r--r--sql/slave.cc79
-rw-r--r--sql/slave.h2
5 files changed, 195 insertions, 72 deletions
diff --git a/sql/log.cc b/sql/log.cc
index 0ccb40c5246..223df51d07f 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -966,14 +966,6 @@ void MYSQL_LOG::new_file(bool need_lock)
THD* thd = current_thd;
Rotate_log_event r(thd,new_name+dirname_length(new_name));
r.set_log_pos(this);
-
- /*
- Because this log rotation could have been initiated by a master of
- the slave running with log-bin, we set the flag on rotate
- event to prevent infinite log rotation loop
- */
- if (thd->slave_thread)
- r.flags|= LOG_EVENT_FORCED_ROTATE_F;
r.write(&log_file);
bytes_written += r.get_event_len();
}
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 3d500ede462..98a877616e0 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1057,7 +1057,8 @@ int Start_log_event::write_data(IO_CACHE* file)
The master started
IMPLEMENTATION
- - To handle the case where the master died without a stop event,
+ - To handle the case where the master died without having time to write DROP
+ TEMPORARY TABLE, DO RELEASE_LOCK (prepared statements' deletion is TODO),
we clean up all temporary tables + locks that we got.
However, we don't clean temporary tables if the master was 3.23
(this is because a 3.23 master writes a Start_log_event at every
@@ -1065,11 +1066,20 @@ int Start_log_event::write_data(IO_CACHE* file)
on the slave when FLUSH LOGS is issued on the master).
TODO
- - Remove all active user locks
+ - Remove all active user locks.
+ Guilhem 2003-06: this is true but not urgent: the worst it can cause is
+ the use of a bit of memory for a user lock which will not be used
+ anymore. If the user lock is later used, the old one will be released. In
+ other words, no deadlock problem.
- If we have an active transaction at this point, the master died
in the middle while writing the transaction to the binary log.
In this case we should stop the slave.
-
+ Guilhem 2003-06: I don't think we should. As the binlog is written before
+ the table changes are committed, rollback has occured on the master; we
+ should rather rollback on the slave and go on. If we don't rollback, and
+ the next query is not BEGIN, then it will be considered as part of the
+ unfinished transaction, and so will be rolled back at next BEGIN, which is
+ a bug.
*/
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
@@ -1079,6 +1089,11 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli)
if (!rli->mi->old_format)
{
+ /*
+ If the master died before writing the COMMIT to the binlog, rollback;
+ otherwise it does not hurt to rollback.
+ */
+ ha_rollback(thd);
/*
If 4.0 master, all temporary tables have been deleted on the master;
if 3.23 master, this is far from sure.
@@ -1703,8 +1718,6 @@ void Rotate_log_event::pack_info(Protocol *protocol)
b_pos+= ident_len;
b_pos= strmov(b_pos, ";pos=");
b_pos=longlong10_to_str(pos, b_pos, 10);
- if (flags & LOG_EVENT_FORCED_ROTATE_F)
- b_pos= strmov(b_pos ,"; forced by master");
protocol->store(buf, b_pos-buf, &my_charset_bin);
my_free(buf, MYF(MY_ALLOW_ZERO_PTR));
}
@@ -1728,8 +1741,6 @@ void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
my_fwrite(file, (byte*) new_log_ident, (uint)ident_len,
MYF(MY_NABP | MY_WME));
fprintf(file, " pos: %s", llstr(pos, buf));
- if (flags & LOG_EVENT_FORCED_ROTATE_F)
- fprintf(file," forced by master");
fputc('\n', file);
fflush(file);
}
@@ -2399,29 +2410,21 @@ void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
/*
Stop_log_event::exec_event()
- The master stopped. Clean up all temporary tables + locks that the
- master may have set.
-
- TODO
- - Remove all active user locks
+ The master stopped.
+ We used to clean up all temporary tables but this is useless as, as the master
+ has shut down properly, it has written all DROP TEMPORARY TABLE and DO
+ RELEASE_LOCK (prepared statements' deletion is TODO).
+ We used to clean up slave_load_tmpdir, but this is useless as it has been
+ cleared at the end of LOAD DATA INFILE.
+ So we have nothing to do here.
+ The place were we must do this cleaning is in Start_log_event::exec_event(),
+ not here. Because if we come here, the master was sane.
*/
#ifndef MYSQL_CLIENT
int Stop_log_event::exec_event(struct st_relay_log_info* rli)
{
/*
- do not clean up immediately after rotate event;
- QQ: this should be a useless test: the only case when it is false is when
- shutdown occurred just after FLUSH LOGS. It has nothing to do with Rotate?
- By the way, immediately after a Rotate the I/O thread does not write
- the Stop to the relay log, so we won't come here in that case.
- */
- if (rli->group_master_log_pos > BIN_LOG_HEADER_SIZE)
- {
- close_temporary_tables(thd);
- cleanup_load_tmpdir();
- }
- /*
We do not want to update master_log pos because we get a rotate event
before stop, so by now group_master_log_name is set to the next log.
If we updated it, we will have incorrect master coordinates and this
@@ -2965,10 +2968,10 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
goto err;
}
/*
- We want to disable binary logging in slave thread because we need the file
- events to appear in the same order as they do on the master relative to
- other events, so that we can preserve ascending order of log sequence
- numbers - needed to handle failover .
+ We are going to create a Load_log_event to finally load into the table.
+ This event should not go into the binlog: in the binlog we only want the
+ Create_file, Append_blocks and Execute_load. We disable binary logging and
+ restore the thread's options just after finishing the load.
*/
save_options = thd->options;
thd->options &= ~ (ulong) (OPTION_BIN_LOG);
diff --git a/sql/log_event.h b/sql/log_event.h
index 1d2fc741fa8..bd5e1a82be4 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -34,15 +34,21 @@
#define LOG_READ_TOO_LARGE -7
#define LOG_EVENT_OFFSET 4
+
#define BINLOG_VERSION 3
/*
We could have used SERVER_VERSION_LENGTH, but this introduces an
obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH
- this would have broke the replication protocol
+ this would have broken the replication protocol
*/
#define ST_SERVER_VER_LEN 50
+/*
+ These are flags and structs to handle all the LOAD DATA INFILE options (LINES
+ TERMINATED etc).
+*/
+
#define DUMPFILE_FLAG 0x1
#define OPT_ENCLOSED_FLAG 0x2
#define REPLACE_FLAG 0x4
@@ -121,11 +127,17 @@ struct sql_ex_info
See the #defines below for the format specifics.
+ The events which really update data are Query_log_event and
+ Load_log_event/Create_file_log_event/Execute_load_log_event (these 3 act
+ together to replicate LOAD DATA INFILE, with the help of
+ Append_block_log_event which prepares temporary files to load into the table).
+
****************************************************************************/
+#define LOG_EVENT_HEADER_LEN 19 /* the fixed header length */
+#define OLD_HEADER_LEN 13 /* the fixed header length in 3.23 */
+
/* event-specific post-header sizes */
-#define LOG_EVENT_HEADER_LEN 19
-#define OLD_HEADER_LEN 13
#define QUERY_HEADER_LEN (4 + 4 + 1 + 2)
#define LOAD_HEADER_LEN (4 + 4 + 4 + 1 +1 + 4)
#define START_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4)
@@ -135,7 +147,10 @@ struct sql_ex_info
#define EXEC_LOAD_HEADER_LEN 4
#define DELETE_FILE_HEADER_LEN 4
-/* event header offsets */
+/*
+ Event header offsets;
+ these point to places inside the fixed header.
+*/
#define EVENT_TYPE_OFFSET 4
#define SERVER_ID_OFFSET 5
@@ -149,7 +164,7 @@ struct sql_ex_info
#define ST_SERVER_VER_OFFSET 2
#define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN)
-/* slave event post-header */
+/* slave event post-header (this event is never written) */
#define SL_MASTER_PORT_OFFSET 8
#define SL_MASTER_POS_OFFSET 0
@@ -197,14 +212,20 @@ struct sql_ex_info
#define R_POS_OFFSET 0
#define R_IDENT_OFFSET 8
+/* CF to DF handle LOAD DATA INFILE */
+
+/* CF = "Create File" */
#define CF_FILE_ID_OFFSET 0
#define CF_DATA_OFFSET CREATE_FILE_HEADER_LEN
+/* AB = "Append Block" */
#define AB_FILE_ID_OFFSET 0
#define AB_DATA_OFFSET APPEND_BLOCK_HEADER_LEN
+/* EL = "Execute Load" */
#define EL_FILE_ID_OFFSET 0
+/* DF = "Delete File" */
#define DF_FILE_ID_OFFSET 0
#define QUERY_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
@@ -217,13 +238,31 @@ struct sql_ex_info
#define EXEC_LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+EXEC_LOAD_HEADER_LEN)
#define APPEND_BLOCK_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+APPEND_BLOCK_HEADER_LEN)
-
+/* 4 bytes which all binlogs should begin with */
#define BINLOG_MAGIC "\xfe\x62\x69\x6e"
+/*
+ The 2 flags below were useless :
+ - the first one was never set
+ - the second one was set in all Rotate events on the master, but not used for
+ anything useful.
+ So they are now removed and their place may later be reused for other
+ flags. Then one must remember that Rotate events in 4.x have
+ LOG_EVENT_FORCED_ROTATE_F set, so one should not rely on the value of the
+ replacing flag when reading a Rotate event.
+ I keep the defines here just to remember what they were.
+*/
+#ifdef TO_BE_REMOVED
#define LOG_EVENT_TIME_F 0x1
-#define LOG_EVENT_FORCED_ROTATE_F 0x2
-#define LOG_EVENT_THREAD_SPECIFIC_F 0x4 /* query depends on thread
- (for example: TEMPORARY TABLE) */
+#define LOG_EVENT_FORCED_ROTATE_F 0x2
+#endif
+/*
+ If the query depends on the thread (for example: TEMPORARY TABLE).
+ Currently this is used by mysqlbinlog to know it must print
+ SET @@PSEUDO_THREAD_ID=xx; before the query (it would not hurt to print it
+ for every query but this would be slow).
+*/
+#define LOG_EVENT_THREAD_SPECIFIC_F 0x4
enum Log_event_type
{
@@ -258,30 +297,81 @@ struct st_relay_log_info;
class Log_event
{
public:
+ /*
+ The offset in the log where this event originally appeared (it is preserved
+ in relay logs, making SHOW SLAVE STATUS able to print coordinates of the
+ event in the master's binlog). Note: when a transaction is written by the
+ master to its binlog (wrapped in BEGIN/COMMIT) the log_pos of all the
+ queries it contains is the one of the BEGIN (this way, when one does SHOW
+ SLAVE STATUS it sees the offset of the BEGIN, which is logical as rollback
+ may occur), except the COMMIT query which has its real offset.
+ */
my_off_t log_pos;
- char *temp_buf;
+ /*
+ A temp buffer for read_log_event; it is later analysed according to the
+ event's type, and its content is distributed in the event-specific fields.
+ */
+ char *temp_buf;
+ /*
+ Timestamp on the master(for debugging and replication of NOW()/TIMESTAMP).
+ It is important for queries and LOAD DATA INFILE. This is set at the event's
+ creation time, except for Query and Load (et al.) events where this is set
+ at the query's execution time, which guarantees good replication (otherwise,
+ we could have a query and its event with different timestamps).
+ */
time_t when;
+ /* The number of seconds the query took to run on the master. */
ulong exec_time;
+ /*
+ The master's server id (is preserved in the relay log; used to prevent from
+ infinite loops in circular replication).
+ */
uint32 server_id;
uint cached_event_len;
+
+ /*
+ Some 16 flags. Only one is really used now; look above for
+ LOG_EVENT_TIME_F, LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F
+ for notes.
+ */
uint16 flags;
+
bool cache_stmt;
#ifndef MYSQL_CLIENT
THD* thd;
Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
Log_event();
+ /*
+ read_log_event() functions read an event from a binlog or relay log; used by
+ SHOW BINLOG EVENTS, the binlog_dump thread on the master (reads master's
+ binlog), the slave IO thread (reads the event sent by binlog_dump), the
+ slave SQL thread (reads the event from the relay log).
+ */
// if mutex is 0, the read will proceed without mutex
static Log_event* read_log_event(IO_CACHE* file,
pthread_mutex_t* log_lock,
bool old_format);
static int read_log_event(IO_CACHE* file, String* packet,
pthread_mutex_t* log_lock);
+ /* set_log_pos() is used to fill log_pos with tell(log). */
void set_log_pos(MYSQL_LOG* log);
+ /*
+ init_show_field_list() prepares the column names and types for the output of
+ SHOW BINLOG EVENTS; it is used only by SHOW BINLOG EVENTS.
+ */
static void init_show_field_list(List<Item>* field_list);
#ifdef HAVE_REPLICATION
int net_send(Protocol *protocol, const char* log_name, my_off_t pos);
+ /*
+ pack_info() is used by SHOW BINLOG EVENTS; as print() it prepares and sends
+ a string to display to the user, so it resembles print().
+ */
virtual void pack_info(Protocol *protocol);
+ /*
+ The SQL slave thread calls exec_event() to execute the event; this is where
+ the slave's data is modified.
+ */
virtual int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
virtual const char* get_db()
@@ -291,6 +381,7 @@ public:
#else
// avoid having to link mysqlbinlog against libpthread
static Log_event* read_log_event(IO_CACHE* file, bool old_format);
+ /* print*() functions are used by mysqlbinlog */
virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0;
void print_timestamp(FILE* file, time_t *ts = 0);
void print_header(FILE* file);
@@ -336,6 +427,7 @@ public:
}
static Log_event* read_log_event(const char* buf, int event_len,
const char **error, bool old_format);
+ /* returns the human readable name of the event's type */
const char* get_type_str();
};
@@ -403,6 +495,8 @@ public:
/*****************************************************************************
Slave Log Event class
+ Note that this class is currently not used at all; no code writes a
+ Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event).
****************************************************************************/
class Slave_log_event: public Log_event
@@ -593,7 +687,7 @@ public:
Rand Log Event class
- Logs random seed used by the next RAND()
+ Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.
****************************************************************************/
class Rand_log_event: public Log_event
@@ -626,6 +720,9 @@ class Rand_log_event: public Log_event
User var Log Event class
+ Every time a query uses the value of a user variable, a User_var_log_event is
+ written before the Query_log_event, to set the user variable.
+
****************************************************************************/
class User_var_log_event: public Log_event
{
diff --git a/sql/slave.cc b/sql/slave.cc
index cc27bb96ab7..504bb0309cc 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1426,7 +1426,6 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
DBUG_RETURN(0);
mi->mysql=0;
mi->file_id=1;
- mi->ignore_stop_event=0;
fn_format(fname, master_info_fname, mysql_data_home, "", 4+32);
/*
@@ -2746,6 +2745,8 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
/*
queue_old_event()
+ Writes a 3.23 event to the relay log.
+
TODO:
Test this code before release - it has to be tested on a separate
setup with 3.23 master
@@ -2790,8 +2791,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
ev->log_pos = mi->master_log_pos;
switch (ev->get_type_code()) {
case STOP_EVENT:
- ignore_event= mi->ignore_stop_event;
- mi->ignore_stop_event=0;
+ ignore_event= 1;
inc_pos= event_len;
break;
case ROTATE_EVENT:
@@ -2801,7 +2801,6 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(1);
}
- mi->ignore_stop_event=1;
inc_pos= 0;
break;
case CREATE_FILE_EVENT:
@@ -2817,7 +2816,6 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
DBUG_RETURN(error);
}
default:
- mi->ignore_stop_event=0;
inc_pos= event_len;
break;
}
@@ -2842,15 +2840,12 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
/*
queue_event()
- TODO: verify the issue with stop events, see if we need them at all
- in the relay log
*/
int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
{
int error= 0;
ulong inc_pos;
- bool ignore_event= 0;
RELAY_LOG_INFO *rli= &mi->rli;
DBUG_ENTER("queue_event");
@@ -2861,39 +2856,77 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
/*
TODO: figure out if other events in addition to Rotate
- require special processing
+ require special processing.
+ Guilhem 2003-06 : I don't think so.
*/
switch (buf[EVENT_TYPE_OFFSET]) {
case STOP_EVENT:
- ignore_event= mi->ignore_stop_event;
- mi->ignore_stop_event= 0;
- inc_pos= event_len;
- break;
+ /*
+ We needn't write this event to the relay log. Indeed, it just indicates a
+ master server shutdown. The only thing this does is cleaning. But cleaning
+ is already done on a per-master-thread basis (as the master server is
+ shutting down cleanly, it has written all DROP TEMPORARY TABLE and DO
+ RELEASE_LOCK; prepared statements' deletion are TODO).
+
+ We don't even increment mi->master_log_pos, because we may be just after a
+ Rotate event. Btw, in a few milliseconds we are going to have a Start
+ event from the next binlog (unless the master is presently running without
+ --log-bin).
+ */
+ goto err;
case ROTATE_EVENT:
{
Rotate_log_event rev(buf,event_len,0);
if (unlikely(process_io_rotate(mi,&rev)))
{
- pthread_mutex_unlock(&mi->data_lock);
- DBUG_RETURN(1);
+ error= 1;
+ goto err;
}
- mi->ignore_stop_event= 1;
+ /*
+ Now the I/O thread has just changed its mi->master_log_name, so
+ incrementing mi->master_log_pos is nonsense.
+ */
inc_pos= 0;
break;
}
default:
- mi->ignore_stop_event= 0;
inc_pos= event_len;
break;
}
-
- if (likely(!ignore_event &&
- !(error= rli->relay_log.appendv(buf,event_len,0))))
+
+ /*
+ If this event is originating from this server, don't queue it.
+ We don't check this for 3.23 events because it's simpler like this; 3.23
+ will be filtered anyway by the SQL slave thread which also tests the server
+ id (we must also keep this test in the SQL thread, in case somebody
+ upgrades a 4.0 slave which has a not-filtered relay log).
+
+ ANY event coming from ourselves can be ignored: it is obvious for queries;
+ for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
+ (--log-slave-updates would not log that) unless this slave is also its
+ direct master (an unsupported, useless setup!).
+ */
+
+ if (uint4korr(buf + SERVER_ID_OFFSET) == ::server_id)
{
+ /*
+ Do not write it to the relay log.
+ We still want to increment, so that we won't re-read this event from the
+ master if the slave IO thread is now stopped/restarted (more efficient if
+ the events we are ignoring are big LOAD DATA INFILE).
+ */
mi->master_log_pos+= inc_pos;
- DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
- rli->relay_log.harvest_bytes_written(&rli->log_space_total);
- }
+ DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos));
+ }
+ else /* write the event to the relay log */
+ if (likely(!(error= rli->relay_log.appendv(buf,event_len,0))))
+ {
+ mi->master_log_pos+= inc_pos;
+ DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
+ rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+ }
+
+err:
pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(error);
}
diff --git a/sql/slave.h b/sql/slave.h
index 1d00df67b22..429456eb0bb 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -308,8 +308,6 @@ typedef struct st_master_info
bool old_format; /* master binlog is in 3.23 format */
volatile bool abort_slave, slave_running;
volatile ulong slave_run_id;
- bool ignore_stop_event;
-
st_master_info()
:fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0),