diff options
Diffstat (limited to 'sql/log.h')
-rw-r--r-- | sql/log.h | 196 |
1 files changed, 176 insertions, 20 deletions
diff --git a/sql/log.h b/sql/log.h index d20ef2ef491..bfa63b79ffe 100644 --- a/sql/log.h +++ b/sql/log.h @@ -40,17 +40,58 @@ class TC_LOG virtual int open(const char *opt_name)=0; virtual void close()=0; - virtual int log_xid(THD *thd, my_xid xid)=0; + virtual int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, + bool need_commit_ordered) = 0; virtual int unlog(ulong cookie, my_xid xid)=0; + +protected: + /* + These methods are meant to be invoked from log_and_order() implementations + to run any prepare_ordered() respectively commit_ordered() methods in + participating handlers. + + They must be called using suitable thread syncronisation to ensure that + they are each called in the correct commit order among all + transactions. However, it is only necessary to call them if the + corresponding flag passed to log_and_order is set (it is safe, but not + required, to call them when the flag is false). + + The caller must be holding LOCK_prepare_ordered respectively + LOCK_commit_ordered when calling these methods. + */ + void run_prepare_ordered(THD *thd, bool all); + void run_commit_ordered(THD *thd, bool all); }; +/* + Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered() + and TC_LOG::run_commit_ordered(), or any other code that calls handler + prepare_ordered() or commit_ordered() methods. +*/ +extern pthread_mutex_t LOCK_prepare_ordered; +extern pthread_mutex_t LOCK_commit_ordered; + +extern void TC_init(); +extern void TC_destroy(); + class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging { public: TC_LOG_DUMMY() {} int open(const char *opt_name) { return 0; } void close() { } - int log_xid(THD *thd, my_xid xid) { return 1; } + /* + TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we + only use internal XA during commit when >= 2 XA-capable engines + participate. + */ + int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, bool need_commit_ordered) + { + DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); + return 1; + } int unlog(ulong cookie, my_xid xid) { return 0; } }; @@ -76,6 +117,13 @@ class TC_LOG_MMAP: public TC_LOG pthread_cond_t cond; // to wait for a sync } PAGE; + /* List of THDs for which to invoke commit_ordered(), in order. */ + struct commit_entry + { + struct commit_entry *next; + THD *thd; + }; + char logname[FN_REFLEN]; File fd; my_off_t file_length; @@ -90,16 +138,38 @@ class TC_LOG_MMAP: public TC_LOG */ pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync; pthread_cond_t COND_pool, COND_active; + /* + Queue of threads that need to call commit_ordered(). + Access to this queue must be protected by LOCK_prepare_ordered. + */ + commit_entry *commit_ordered_queue; + /* + This flag and condition is used to reserve the queue while threads in it + each run the commit_ordered() methods one after the other. Only once the + last commit_ordered() in the queue is done can we start on a new queue + run. + + Since we start this process in the first thread in the queue and finish in + the last (and possibly different) thread, we need a condition variable for + this (we cannot unlock a mutex in a different thread than the one who + locked it). + + The condition is used together with the LOCK_prepare_ordered mutex. + */ + my_bool commit_ordered_queue_busy; + pthread_cond_t COND_queue_busy; public: TC_LOG_MMAP(): inited(0) {} int open(const char *opt_name); void close(); - int log_xid(THD *thd, my_xid xid); + int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, bool need_commit_ordered); int unlog(ulong cookie, my_xid xid); int recover(); private: + int log_one_transaction(my_xid xid); void get_active_from_pool(); int sync(); int overflow(); @@ -218,7 +288,7 @@ public: uint user_host_len, int thread_id, const char *command_type, uint command_type_len, const char *sql_text, uint sql_text_len); - bool write(THD *thd, time_t current_time, time_t query_start_arg, + bool write(THD *thd, time_t current_time, const char *user_host, uint user_host_len, ulonglong query_utime, ulonglong lock_utime, bool is_command, const char *sql_text, uint sql_text_len); @@ -239,9 +309,31 @@ private: time_t last_time; }; +class binlog_trx_data; class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG { private: + struct group_commit_entry + { + struct group_commit_entry *next; + THD *thd; + binlog_trx_data *trx_data; + /* + Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be + written during group commit. The incident_event is only valid if + trx_data->has_incident() is true. + */ + Log_event *begin_event; + Log_event *end_event; + Log_event *incident_event; + /* Set during group commit to record any per-thread error. */ + int error; + int commit_errno; + /* This is the `all' parameter for ha_commit_ordered(). */ + bool all; + /* True if we come in through XA log_and_order(), false otherwise. */ + }; + /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ pthread_mutex_t LOCK_index; pthread_mutex_t LOCK_prep_xids; @@ -283,6 +375,20 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG In 5.0 it's 0 for relay logs too! */ bool no_auto_events; + /* Queue of transactions queued up to participate in group commit. */ + group_commit_entry *group_commit_queue; + /* + Condition variable to mark that the group commit queue is busy. + Used when each thread does it's own commit_ordered() (when + binlog_optimize_thread_scheduling=1). + Used with the LOCK_commit_ordered mutex. + */ + my_bool group_commit_queue_busy; + pthread_cond_t COND_queue_busy; + /* Total number of committed transactions. */ + ulonglong num_commits; + /* Number of group commits done. */ + ulonglong num_group_commits; int write_to_file(IO_CACHE *cache); /* @@ -292,6 +398,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG */ int new_file_without_locking(); int new_file_impl(bool need_lock); + int write_transaction(group_commit_entry *entry); + bool write_transaction_to_binlog_events(group_commit_entry *entry); + void trx_group_commit_leader(group_commit_entry *leader); + void mark_xid_done(); + void mark_xids_active(uint xid_count); public: using MYSQL_LOG::generate_name; @@ -299,7 +410,41 @@ public: /* This is relay log */ bool is_relay_log; - + uint8 checksum_alg_reset; // to contain a new value when binlog is rotated + /* + Holds the last seen in Relay-Log FD's checksum alg value. + The initial value comes from the slave's local FD that heads + the very first Relay-Log file. In the following the value may change + with each received master's FD_m. + Besides to be used in verification events that IO thread receives + (except the 1st fake Rotate, see @c Master_info:: checksum_alg_before_fd), + the value specifies if/how to compute checksum for slave's local events + and the first fake Rotate (R_f^1) coming from the master. + R_f^1 needs logging checksum-compatibly with the RL's heading FD_s. + + Legends for the checksum related comments: + + FD - Format-Description event, + R - Rotate event + R_f - the fake Rotate event + E - an arbirary event + + The underscore indexes for any event + `_s' indicates the event is generated by Slave + `_m' - by Master + + Two special underscore indexes of FD: + FD_q - Format Description event for queuing (relay-logging) + FD_e - Format Description event for executing (relay-logging) + + Upper indexes: + E^n - n:th event is a sequence + + RL - Relay Log + (A) - checksum algorithm descriptor value + FD.(A) - the value of (A) in FD + */ + uint8 relay_log_checksum_alg; /* These describe the log's format. This is used only for relay logs. _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's @@ -310,6 +455,12 @@ public: */ Format_description_log_event *description_event_for_exec, *description_event_for_queue; + /* + Binlog position of last commit (or non-transactional write) to the binlog. + Access to this is protected by LOCK_commit_ordered. + */ + char last_commit_pos_file[FN_REFLEN]; + my_off_t last_commit_pos_offset; MYSQL_BIN_LOG(); /* @@ -320,7 +471,8 @@ public: int open(const char *opt_name); void close(); - int log_xid(THD *thd, my_xid xid); + int log_and_order(THD *thd, my_xid xid, bool all, + bool need_prepare_ordered, bool need_commit_ordered); int unlog(ulong cookie, my_xid xid); int recover(IO_CACHE *log, Format_description_log_event *fdle); #if !defined(MYSQL_CLIENT) @@ -364,11 +516,14 @@ public: int new_file(); void reset_gathered_updates(THD *thd); - bool write(Log_event* event_info); // binary log write - bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident); - bool write_incident(THD *thd, bool lock); - int write_cache(THD *thd, IO_CACHE *cache, bool lock_log, - bool flush_and_sync); + bool write(Log_event* event_info, + my_bool *with_annotate= 0); // binary log write + bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data, + Log_event *end_ev, bool all); + + bool write_incident_already_locked(THD *thd); + bool write_incident(THD *thd); + int write_cache(THD *thd, IO_CACHE *cache); void set_write_error(THD *thd); bool check_write_error(THD *thd); @@ -423,6 +578,7 @@ public: inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} inline IO_CACHE *get_index_file() { return &index_file;} inline uint32 get_open_count() { return open_count; } + void set_status_variables(THD *thd); }; class Log_event_handler @@ -432,14 +588,14 @@ public: virtual bool init()= 0; virtual void cleanup()= 0; - virtual bool log_slow(THD *thd, time_t current_time, - time_t query_start_arg, const char *user_host, + virtual bool log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, uint user_host_len, ulonglong query_utime, ulonglong lock_utime, bool is_command, const char *sql_text, uint sql_text_len)= 0; virtual bool log_error(enum loglevel level, const char *format, va_list args)= 0; - virtual bool log_general(THD *thd, time_t event_time, const char *user_host, + virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host, uint user_host_len, int thread_id, const char *command_type, uint command_type_len, const char *sql_text, uint sql_text_len, @@ -461,14 +617,14 @@ public: virtual bool init(); virtual void cleanup(); - virtual bool log_slow(THD *thd, time_t current_time, - time_t query_start_arg, const char *user_host, + virtual bool log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, uint user_host_len, ulonglong query_utime, ulonglong lock_utime, bool is_command, const char *sql_text, uint sql_text_len); virtual bool log_error(enum loglevel level, const char *format, va_list args); - virtual bool log_general(THD *thd, time_t event_time, const char *user_host, + virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host, uint user_host_len, int thread_id, const char *command_type, uint command_type_len, const char *sql_text, uint sql_text_len, @@ -493,14 +649,14 @@ public: virtual bool init(); virtual void cleanup(); - virtual bool log_slow(THD *thd, time_t current_time, - time_t query_start_arg, const char *user_host, + virtual bool log_slow(THD *thd, my_hrtime_t current_time, + const char *user_host, uint user_host_len, ulonglong query_utime, ulonglong lock_utime, bool is_command, const char *sql_text, uint sql_text_len); virtual bool log_error(enum loglevel level, const char *format, va_list args); - virtual bool log_general(THD *thd, time_t event_time, const char *user_host, + virtual bool log_general(THD *thd, my_hrtime_t event_time, const char *user_host, uint user_host_len, int thread_id, const char *command_type, uint command_type_len, const char *sql_text, uint sql_text_len, |