summaryrefslogtreecommitdiff
path: root/sql/log.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/log.h')
-rw-r--r--sql/log.h196
1 files changed, 176 insertions, 20 deletions
diff --git a/sql/log.h b/sql/log.h
index d20ef2ef491..bfa63b79ffe 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -40,17 +40,58 @@ class TC_LOG
virtual int open(const char *opt_name)=0;
virtual void close()=0;
- virtual int log_xid(THD *thd, my_xid xid)=0;
+ virtual int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered,
+ bool need_commit_ordered) = 0;
virtual int unlog(ulong cookie, my_xid xid)=0;
+
+protected:
+ /*
+ These methods are meant to be invoked from log_and_order() implementations
+ to run any prepare_ordered() respectively commit_ordered() methods in
+ participating handlers.
+
+ They must be called using suitable thread syncronisation to ensure that
+ they are each called in the correct commit order among all
+ transactions. However, it is only necessary to call them if the
+ corresponding flag passed to log_and_order is set (it is safe, but not
+ required, to call them when the flag is false).
+
+ The caller must be holding LOCK_prepare_ordered respectively
+ LOCK_commit_ordered when calling these methods.
+ */
+ void run_prepare_ordered(THD *thd, bool all);
+ void run_commit_ordered(THD *thd, bool all);
};
+/*
+ Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
+ and TC_LOG::run_commit_ordered(), or any other code that calls handler
+ prepare_ordered() or commit_ordered() methods.
+*/
+extern pthread_mutex_t LOCK_prepare_ordered;
+extern pthread_mutex_t LOCK_commit_ordered;
+
+extern void TC_init();
+extern void TC_destroy();
+
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
{
public:
TC_LOG_DUMMY() {}
int open(const char *opt_name) { return 0; }
void close() { }
- int log_xid(THD *thd, my_xid xid) { return 1; }
+ /*
+ TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
+ only use internal XA during commit when >= 2 XA-capable engines
+ participate.
+ */
+ int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered, bool need_commit_ordered)
+ {
+ DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
+ return 1;
+ }
int unlog(ulong cookie, my_xid xid) { return 0; }
};
@@ -76,6 +117,13 @@ class TC_LOG_MMAP: public TC_LOG
pthread_cond_t cond; // to wait for a sync
} PAGE;
+ /* List of THDs for which to invoke commit_ordered(), in order. */
+ struct commit_entry
+ {
+ struct commit_entry *next;
+ THD *thd;
+ };
+
char logname[FN_REFLEN];
File fd;
my_off_t file_length;
@@ -90,16 +138,38 @@ class TC_LOG_MMAP: public TC_LOG
*/
pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
pthread_cond_t COND_pool, COND_active;
+ /*
+ Queue of threads that need to call commit_ordered().
+ Access to this queue must be protected by LOCK_prepare_ordered.
+ */
+ commit_entry *commit_ordered_queue;
+ /*
+ This flag and condition is used to reserve the queue while threads in it
+ each run the commit_ordered() methods one after the other. Only once the
+ last commit_ordered() in the queue is done can we start on a new queue
+ run.
+
+ Since we start this process in the first thread in the queue and finish in
+ the last (and possibly different) thread, we need a condition variable for
+ this (we cannot unlock a mutex in a different thread than the one who
+ locked it).
+
+ The condition is used together with the LOCK_prepare_ordered mutex.
+ */
+ my_bool commit_ordered_queue_busy;
+ pthread_cond_t COND_queue_busy;
public:
TC_LOG_MMAP(): inited(0) {}
int open(const char *opt_name);
void close();
- int log_xid(THD *thd, my_xid xid);
+ int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
int recover();
private:
+ int log_one_transaction(my_xid xid);
void get_active_from_pool();
int sync();
int overflow();
@@ -218,7 +288,7 @@ public:
uint user_host_len, int thread_id,
const char *command_type, uint command_type_len,
const char *sql_text, uint sql_text_len);
- bool write(THD *thd, time_t current_time, time_t query_start_arg,
+ bool write(THD *thd, time_t current_time,
const char *user_host, uint user_host_len,
ulonglong query_utime, ulonglong lock_utime, bool is_command,
const char *sql_text, uint sql_text_len);
@@ -239,9 +309,31 @@ private:
time_t last_time;
};
+class binlog_trx_data;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
{
private:
+ struct group_commit_entry
+ {
+ struct group_commit_entry *next;
+ THD *thd;
+ binlog_trx_data *trx_data;
+ /*
+ Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
+ written during group commit. The incident_event is only valid if
+ trx_data->has_incident() is true.
+ */
+ Log_event *begin_event;
+ Log_event *end_event;
+ Log_event *incident_event;
+ /* Set during group commit to record any per-thread error. */
+ int error;
+ int commit_errno;
+ /* This is the `all' parameter for ha_commit_ordered(). */
+ bool all;
+ /* True if we come in through XA log_and_order(), false otherwise. */
+ };
+
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
pthread_mutex_t LOCK_index;
pthread_mutex_t LOCK_prep_xids;
@@ -283,6 +375,20 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
+ /* Queue of transactions queued up to participate in group commit. */
+ group_commit_entry *group_commit_queue;
+ /*
+ Condition variable to mark that the group commit queue is busy.
+ Used when each thread does it's own commit_ordered() (when
+ binlog_optimize_thread_scheduling=1).
+ Used with the LOCK_commit_ordered mutex.
+ */
+ my_bool group_commit_queue_busy;
+ pthread_cond_t COND_queue_busy;
+ /* Total number of committed transactions. */
+ ulonglong num_commits;
+ /* Number of group commits done. */
+ ulonglong num_group_commits;
int write_to_file(IO_CACHE *cache);
/*
@@ -292,6 +398,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
int new_file_without_locking();
int new_file_impl(bool need_lock);
+ int write_transaction(group_commit_entry *entry);
+ bool write_transaction_to_binlog_events(group_commit_entry *entry);
+ void trx_group_commit_leader(group_commit_entry *leader);
+ void mark_xid_done();
+ void mark_xids_active(uint xid_count);
public:
using MYSQL_LOG::generate_name;
@@ -299,7 +410,41 @@ public:
/* This is relay log */
bool is_relay_log;
-
+ uint8 checksum_alg_reset; // to contain a new value when binlog is rotated
+ /*
+ Holds the last seen in Relay-Log FD's checksum alg value.
+ The initial value comes from the slave's local FD that heads
+ the very first Relay-Log file. In the following the value may change
+ with each received master's FD_m.
+ Besides to be used in verification events that IO thread receives
+ (except the 1st fake Rotate, see @c Master_info:: checksum_alg_before_fd),
+ the value specifies if/how to compute checksum for slave's local events
+ and the first fake Rotate (R_f^1) coming from the master.
+ R_f^1 needs logging checksum-compatibly with the RL's heading FD_s.
+
+ Legends for the checksum related comments:
+
+ FD - Format-Description event,
+ R - Rotate event
+ R_f - the fake Rotate event
+ E - an arbirary event
+
+ The underscore indexes for any event
+ `_s' indicates the event is generated by Slave
+ `_m' - by Master
+
+ Two special underscore indexes of FD:
+ FD_q - Format Description event for queuing (relay-logging)
+ FD_e - Format Description event for executing (relay-logging)
+
+ Upper indexes:
+ E^n - n:th event is a sequence
+
+ RL - Relay Log
+ (A) - checksum algorithm descriptor value
+ FD.(A) - the value of (A) in FD
+ */
+ uint8 relay_log_checksum_alg;
/*
These describe the log's format. This is used only for relay logs.
_for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
@@ -310,6 +455,12 @@ public:
*/
Format_description_log_event *description_event_for_exec,
*description_event_for_queue;
+ /*
+ Binlog position of last commit (or non-transactional write) to the binlog.
+ Access to this is protected by LOCK_commit_ordered.
+ */
+ char last_commit_pos_file[FN_REFLEN];
+ my_off_t last_commit_pos_offset;
MYSQL_BIN_LOG();
/*
@@ -320,7 +471,8 @@ public:
int open(const char *opt_name);
void close();
- int log_xid(THD *thd, my_xid xid);
+ int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
int recover(IO_CACHE *log, Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
@@ -364,11 +516,14 @@ public:
int new_file();
void reset_gathered_updates(THD *thd);
- bool write(Log_event* event_info); // binary log write
- bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
- bool write_incident(THD *thd, bool lock);
- int write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
- bool flush_and_sync);
+ bool write(Log_event* event_info,
+ my_bool *with_annotate= 0); // binary log write
+ bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev, bool all);
+
+ bool write_incident_already_locked(THD *thd);
+ bool write_incident(THD *thd);
+ int write_cache(THD *thd, IO_CACHE *cache);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
@@ -423,6 +578,7 @@ public:
inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
inline IO_CACHE *get_index_file() { return &index_file;}
inline uint32 get_open_count() { return open_count; }
+ void set_status_variables(THD *thd);
};
class Log_event_handler
@@ -432,14 +588,14 @@ public:
virtual bool init()= 0;
virtual void cleanup()= 0;
- virtual bool log_slow(THD *thd, time_t current_time,
- time_t query_start_arg, const char *user_host,
+ virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+ const char *user_host,
uint user_host_len, ulonglong query_utime,
ulonglong lock_utime, bool is_command,
const char *sql_text, uint sql_text_len)= 0;
virtual bool log_error(enum loglevel level, const char *format,
va_list args)= 0;
- virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+ virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
uint user_host_len, int thread_id,
const char *command_type, uint command_type_len,
const char *sql_text, uint sql_text_len,
@@ -461,14 +617,14 @@ public:
virtual bool init();
virtual void cleanup();
- virtual bool log_slow(THD *thd, time_t current_time,
- time_t query_start_arg, const char *user_host,
+ virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+ const char *user_host,
uint user_host_len, ulonglong query_utime,
ulonglong lock_utime, bool is_command,
const char *sql_text, uint sql_text_len);
virtual bool log_error(enum loglevel level, const char *format,
va_list args);
- virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+ virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
uint user_host_len, int thread_id,
const char *command_type, uint command_type_len,
const char *sql_text, uint sql_text_len,
@@ -493,14 +649,14 @@ public:
virtual bool init();
virtual void cleanup();
- virtual bool log_slow(THD *thd, time_t current_time,
- time_t query_start_arg, const char *user_host,
+ virtual bool log_slow(THD *thd, my_hrtime_t current_time,
+ const char *user_host,
uint user_host_len, ulonglong query_utime,
ulonglong lock_utime, bool is_command,
const char *sql_text, uint sql_text_len);
virtual bool log_error(enum loglevel level, const char *format,
va_list args);
- virtual bool log_general(THD *thd, time_t event_time, const char *user_host,
+ virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host,
uint user_host_len, int thread_id,
const char *command_type, uint command_type_len,
const char *sql_text, uint sql_text_len,