diff options
Diffstat (limited to 'storage/innobase/include/trx0sys.h')
-rw-r--r-- | storage/innobase/include/trx0sys.h | 323 |
1 files changed, 159 insertions, 164 deletions
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 8c6b13f9dd4..7d26bf5b23e 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -28,99 +28,81 @@ Created 3/26/1996 Heikki Tuuri #include "univ.i" -#include "trx0types.h" -#include "fsp0types.h" -#include "fil0fil.h" #include "buf0buf.h" +#include "fil0fil.h" +#include "trx0types.h" #ifndef UNIV_HOTBACKUP +#include "mem0mem.h" #include "mtr0mtr.h" #include "ut0byte.h" #include "mem0mem.h" -#include "sync0sync.h" #include "ut0lst.h" -#include "ut0bh.h" #include "read0types.h" #include "page0types.h" -#include "ut0bh.h" +#include "ut0mutex.h" +#include "trx0trx.h" #ifdef WITH_WSREP #include "trx0xa.h" #endif /* WITH_WSREP */ -typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t; +typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t; -/** In a MySQL replication slave, in crash recovery we store the master log -file name and position here. */ -/* @{ */ -/** Master binlog file name */ -extern char trx_sys_mysql_master_log_name[]; -/** Master binlog file position. We have successfully got the updates -up to this position. -1 means that no crash recovery was needed, or -there was no master log position info inside InnoDB.*/ -extern ib_int64_t trx_sys_mysql_master_log_pos; -/* @} */ - -/** If this MySQL server uses binary logging, after InnoDB has been inited -and if it has done a crash recovery, we store the binlog file name and position -here. */ -/* @{ */ -/** Binlog file name */ -extern char trx_sys_mysql_bin_log_name[]; -/** Binlog file position, or -1 if unknown */ -extern ib_int64_t trx_sys_mysql_bin_log_pos; -/* @} */ +// Forward declaration +class MVCC; +class ReadView; /** The transaction system */ extern trx_sys_t* trx_sys; -/***************************************************************//** -Checks if a page address is the trx sys header page. -@return TRUE if trx sys header page */ +/** Checks if a page address is the trx sys header page. +@param[in] page_id page id +@return true if trx sys header page */ UNIV_INLINE -ibool +bool trx_sys_hdr_page( -/*=============*/ - ulint space, /*!< in: space */ - ulint page_no);/*!< in: page number */ + const page_id_t& page_id); + /*****************************************************************//** Creates and initializes the central memory structures for the transaction system. This is called when the database is started. @return min binary heap of rsegs to purge */ -UNIV_INTERN -ib_bh_t* +purge_pq_t* trx_sys_init_at_db_start(void); /*==========================*/ /*****************************************************************//** -Creates the trx_sys instance and initializes ib_bh and mutex. */ -UNIV_INTERN +Creates the trx_sys instance and initializes purge_queue and mutex. */ void trx_sys_create(void); /*================*/ /*****************************************************************//** Creates and initializes the transaction system at the database creation. */ -UNIV_INTERN void trx_sys_create_sys_pages(void); /*==========================*/ /****************************************************************//** Looks for a free slot for a rollback segment in the trx system file copy. -@return slot index or ULINT_UNDEFINED if not found */ -UNIV_INTERN +@return slot index or ULINT_UNDEFINED if not found */ ulint trx_sysf_rseg_find_free( /*====================*/ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mtr */ + bool include_tmp_slots, /*!< in: if true, report slots reserved + for temp-tablespace as free slots. */ + ulint nth_free_slots); /*!< in: allocate nth free slot. + 0 means next free slot. */ /***************************************************************//** Gets the pointer in the nth slot of the rseg array. -@return pointer to rseg object, NULL if slot not in use */ +@return pointer to rseg object, NULL if slot not in use */ UNIV_INLINE trx_rseg_t* trx_sys_get_nth_rseg( /*=================*/ - trx_sys_t* sys, /*!< in: trx system */ - ulint n); /*!< in: index of slot */ + trx_sys_t* sys, /*!< in: trx system */ + ulint n, /*!< in: index of slot */ + bool is_redo_rseg); /*!< in: true if redo rseg. */ /**********************************************************************//** Gets a pointer to the transaction system file copy and x-locks its page. -@return pointer to system file copy, page x-locked */ +@return pointer to system file copy, page x-locked */ UNIV_INLINE trx_sysf_t* trx_sysf_get( @@ -129,7 +111,7 @@ trx_sysf_get( /*****************************************************************//** Gets the space of the nth rollback segment slot in the trx system file copy. -@return space id */ +@return space id */ UNIV_INLINE ulint trx_sysf_rseg_get_space( @@ -140,7 +122,7 @@ trx_sysf_rseg_get_space( /*****************************************************************//** Gets the page number of the nth rollback segment slot in the trx system file copy. -@return page number, FIL_NULL if slot unused */ +@return page number, FIL_NULL if slot unused */ UNIV_INLINE ulint trx_sysf_rseg_get_page_no( @@ -173,11 +155,11 @@ trx_sysf_rseg_set_page_no( mtr_t* mtr); /*!< in: mtr */ /*****************************************************************//** Allocates a new transaction id. -@return new, allocated trx id */ +@return new, allocated trx id */ UNIV_INLINE trx_id_t -trx_sys_get_new_trx_id(void); -/*========================*/ +trx_sys_get_new_trx_id(); +/*===================*/ /*****************************************************************//** Determines the maximum transaction id. @return maximum currently allocated trx id; will be stale after the @@ -193,6 +175,14 @@ extern uint trx_rseg_n_slots_debug; #endif /*****************************************************************//** +Check if slot-id is reserved slot-id for noredo rsegs. */ +UNIV_INLINE +bool +trx_sys_is_noredo_rseg_slot( +/*========================*/ + ulint slot_id); /*!< in: slot_id to check */ + +/*****************************************************************//** Writes a trx id to an index page. In case that the id size changes in some future version, this function should be used instead of mach_write_... */ @@ -206,7 +196,7 @@ trx_write_trx_id( Reads a trx id from an index page. In case that the id size changes in some future version, this function should be used instead of mach_read_... -@return id */ +@return id */ UNIV_INLINE trx_id_t trx_read_trx_id( @@ -214,10 +204,7 @@ trx_read_trx_id( const byte* ptr); /*!< in: pointer to memory from where to read */ /****************************************************************//** Looks for the trx instance with the given id in the rw trx_list. -The caller must be holding trx_sys->mutex. -@return the trx handle or NULL if not found; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ +@return the trx handle or NULL if not found */ UNIV_INLINE trx_t* trx_get_rw_trx_by_id( @@ -228,18 +215,14 @@ Returns the minimum trx id in rw trx list. This is the smallest id for which the trx can possibly be active. (But, you must look at the trx->state to find out if the minimum trx id transaction itself is active, or already committed.) -@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ +@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ UNIV_INLINE trx_id_t trx_rw_min_trx_id(void); /*===================*/ /****************************************************************//** -Checks if a rw transaction with the given id is active. Caller must hold -trx_sys->mutex in shared mode. If the caller is not holding -lock_sys->mutex, the transaction may already have been committed. -@return transaction instance if active, or NULL; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ +Checks if a rw transaction with the given id is active. +@return transaction instance if active, or NULL */ UNIV_INLINE trx_t* trx_rw_is_active_low( @@ -249,25 +232,24 @@ trx_rw_is_active_low( that will be set if corrupt */ /****************************************************************//** Checks if a rw transaction with the given id is active. If the caller is -not holding lock_sys->mutex, the transaction may already have been +not holding trx_sys->mutex, the transaction may already have been committed. -@return transaction instance if active, or NULL; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ +@return transaction instance if active, or NULL; */ UNIV_INLINE trx_t* trx_rw_is_active( /*=============*/ trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt); /*!< in: NULL or pointer to a flag + ibool* corrupt, /*!< in: NULL or pointer to a flag that will be set if corrupt */ + bool do_ref_count); /*!< in: if true then increment the + trx_t::n_ref_count */ #ifdef UNIV_DEBUG /****************************************************************//** -Checks whether a trx is in one of rw_trx_list or ro_trx_list. -@return TRUE if is in */ -UNIV_INTERN -ibool -trx_in_trx_list( +Checks whether a trx is in on of rw_trx_list +@return TRUE if is in */ +bool +trx_in_rw_trx_list( /*============*/ const trx_t* in_trx) /*!< in: transaction */ MY_ATTRIBUTE((nonnull, warn_unused_result)); @@ -288,12 +270,11 @@ Updates the offset information about the end of the MySQL binlog entry which corresponds to the transaction just being committed. In a MySQL replication slave updates the latest master binlog position up to which replication has proceeded. */ -UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ const char* file_name,/*!< in: MySQL log file name */ - ib_int64_t offset, /*!< in: position in that log file */ + int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in the trx sys header */ #ifdef WITH_WSREP @@ -303,7 +284,6 @@ trx_sys_update_mysql_binlog_offset( /*****************************************************************//** Prints to stderr the MySQL binlog offset info in the trx system header if the magic number shows it valid. */ -UNIV_INTERN void trx_sys_print_mysql_binlog_offset(void); /*===================================*/ @@ -321,21 +301,12 @@ trx_sys_read_wsrep_checkpoint( XID* xid); /*!< out: WSREP XID */ #endif /* WITH_WSREP */ /*****************************************************************//** -Prints to stderr the MySQL master log offset info in the trx system header if -the magic number shows it valid. */ -UNIV_INTERN -void -trx_sys_print_mysql_master_log_pos(void); -/*====================================*/ -/*****************************************************************//** Initializes the tablespace tag system. */ -UNIV_INTERN void trx_sys_file_format_init(void); /*==========================*/ /*****************************************************************//** Closes the tablespace tag system. */ -UNIV_INTERN void trx_sys_file_format_close(void); /*===========================*/ @@ -344,20 +315,17 @@ Tags the system table space with minimum format id if it has not been tagged yet. WARNING: This function is only called during the startup and AFTER the redo log application during recovery has finished. */ -UNIV_INTERN void trx_sys_file_format_tag_init(void); /*==============================*/ /*****************************************************************//** Shutdown/Close the transaction system. */ -UNIV_INTERN void trx_sys_close(void); /*===============*/ /*****************************************************************//** Get the name representation of the file format from its id. -@return pointer to the name */ -UNIV_INTERN +@return pointer to the name */ const char* trx_sys_file_format_id_to_name( /*===========================*/ @@ -365,8 +333,7 @@ trx_sys_file_format_id_to_name( /*****************************************************************//** Set the file format id unconditionally except if it's already the same value. -@return TRUE if value updated */ -UNIV_INTERN +@return TRUE if value updated */ ibool trx_sys_file_format_max_set( /*========================*/ @@ -376,12 +343,13 @@ trx_sys_file_format_max_set( /********************************************************************* Creates the rollback segments @return number of rollback segments that are active. */ -UNIV_INTERN ulint trx_sys_create_rsegs( /*=================*/ ulint n_spaces, /*!< number of tablespaces for UNDO logs */ - ulint n_rsegs); /*!< number of rollback segments to create */ + ulint n_rsegs, /*!< number of rollback segments to create */ + ulint n_tmp_rsegs); /*!< number of rollback segments reserved for + temp-tables. */ /*****************************************************************//** Get the number of transaction in the system, independent of their state. @return count of transactions in trx_sys_t::trx_list */ @@ -393,7 +361,6 @@ trx_sys_get_n_rw_trx(void); /********************************************************************* Check if there are any active (non-prepared) transactions. @return total number of active transactions or 0 if none */ -UNIV_INTERN ulint trx_sys_any_active_transactions(void); /*=================================*/ @@ -401,7 +368,6 @@ trx_sys_any_active_transactions(void); /*****************************************************************//** Prints to stderr the MySQL binlog info in the system header if the magic number shows it valid. */ -UNIV_INTERN void trx_sys_print_mysql_binlog_offset_from_page( /*========================================*/ @@ -414,7 +380,6 @@ Even if the call succeeds and returns TRUE, the returned format id may be ULINT_UNDEFINED signalling that the format id was not present in the data file. @return TRUE if call succeeds */ -UNIV_INTERN ibool trx_sys_read_file_format_id( /*========================*/ @@ -425,7 +390,6 @@ trx_sys_read_file_format_id( /*****************************************************************//** Reads the file format id from the given per-table data file. @return TRUE if call succeeds */ -UNIV_INTERN ibool trx_sys_read_pertable_file_format_id( /*=================================*/ @@ -436,15 +400,13 @@ trx_sys_read_pertable_file_format_id( #endif /* !UNIV_HOTBACKUP */ /*****************************************************************//** Get the name representation of the file format from its id. -@return pointer to the max format name */ -UNIV_INTERN +@return pointer to the max format name */ const char* trx_sys_file_format_max_get(void); /*=============================*/ /*****************************************************************//** Check for the max file format tag stored on disk. -@return DB_SUCCESS or error code */ -UNIV_INTERN +@return DB_SUCCESS or error code */ dberr_t trx_sys_file_format_max_check( /*==========================*/ @@ -452,8 +414,7 @@ trx_sys_file_format_max_check( /********************************************************************//** Update the file format tag in the system tablespace only if the given format id is greater than the known max id. -@return TRUE if format_id was bigger than the known max id */ -UNIV_INTERN +@return TRUE if format_id was bigger than the known max id */ ibool trx_sys_file_format_max_upgrade( /*============================*/ @@ -461,31 +422,32 @@ trx_sys_file_format_max_upgrade( ulint format_id); /*!< in: file format identifier */ /*****************************************************************//** Get the name representation of the file format from its id. -@return pointer to the name */ -UNIV_INTERN +@return pointer to the name */ const char* trx_sys_file_format_id_to_name( /*===========================*/ const ulint id); /*!< in: id of the file format */ +/** +Add the transaction to the RW transaction set +@param trx transaction instance to add */ +UNIV_INLINE +void +trx_sys_rw_trx_add(trx_t* trx); + #ifdef UNIV_DEBUG /*************************************************************//** -Validate the trx_sys_t::trx_list. */ -UNIV_INTERN -ibool -trx_sys_validate_trx_list(void); -/*===========================*/ +Validate the trx_sys_t::rw_trx_list. +@return true if the list is valid */ +bool +trx_sys_validate_trx_list(); +/*========================*/ #endif /* UNIV_DEBUG */ -/* The automatically created system rollback segment has this id */ +/** The automatically created system rollback segment has this id */ #define TRX_SYS_SYSTEM_RSEG_ID 0 -/* Space id and page no where the trx system file copy resides */ -#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ -#include "fsp0fsp.h" -#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO - -/* The offset of the transaction system header on the page */ +/** The offset of the transaction system header on the page */ #define TRX_SYS FSEG_PAGE_DATA /** Transaction system header */ @@ -522,9 +484,7 @@ rollback segment. It initialized some arrays with this number of entries. We must remember this limit in order to keep file compatibility. */ #define TRX_SYS_OLD_N_RSEGS 256 -/** Maximum length of MySQL binlog file name, in bytes. -@see trx_sys_mysql_master_log_name -@see trx_sys_mysql_bin_log_name */ +/** Maximum length of MySQL binlog file name, in bytes. */ #define TRX_SYS_MYSQL_LOG_NAME_LEN 512 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */ #define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 @@ -532,10 +492,6 @@ We must remember this limit in order to keep file compatibility. */ #if UNIV_PAGE_SIZE_MIN < 4096 # error "UNIV_PAGE_SIZE_MIN < 4096" #endif -/** The offset of the MySQL replication info in the trx system header; -this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ -#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000) - /** The offset of the MySQL binlog offset info in the trx system header */ #define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000) #define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is @@ -633,73 +589,112 @@ identifier is added to this 64-bit constant. */ #ifndef UNIV_HOTBACKUP /** The transaction system central memory data structure. */ -struct trx_sys_t{ +struct trx_sys_t { - ib_mutex_t mutex; /*!< mutex protecting most fields in + TrxSysMutex mutex; /*!< mutex protecting most fields in this structure except when noted otherwise */ - ulint n_prepared_trx; /*!< Number of transactions currently - in the XA PREPARED state */ - ulint n_prepared_recovered_trx; /*!< Number of transactions - currently in XA PREPARED state that are - also recovered. Such transactions cannot - be added during runtime. They can only - occur after recovery if mysqld crashed - while there were XA PREPARED - transactions. We disable query cache - if such transactions exist. */ - trx_id_t max_trx_id; /*!< The smallest number not yet + + MVCC* mvcc; /*!< Multi version concurrency control + manager */ + volatile trx_id_t + max_trx_id; /*!< The smallest number not yet assigned as a transaction id or - transaction number */ + transaction number. This is declared + volatile because it can be accessed + without holding any mutex during + AC-NL-RO view creation. */ + trx_ut_list_t serialisation_list; + /*!< Ordered on trx_t::no of all the + currenrtly active RW transactions */ #ifdef UNIV_DEBUG - trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions - which exist or existed */ -#endif - trx_list_t rw_trx_list; /*!< List of active and committed in + trx_id_t rw_max_trx_id; /*!< Max trx id of read-write + transactions which exist or existed */ +#endif /* UNIV_DEBUG */ + + char pad1[64]; /*!< To avoid false sharing */ + trx_ut_list_t rw_trx_list; /*!< List of active and committed in memory read-write transactions, sorted on trx id, biggest first. Recovered transactions are always on this list. */ - trx_list_t ro_trx_list; /*!< List of active and committed in - memory read-only transactions, sorted - on trx id, biggest first. NOTE: - The order for read-only transactions - is not necessary. We should exploit - this and increase concurrency during - add/remove. */ - trx_list_t mysql_trx_list; /*!< List of transactions created - for MySQL. All transactions on - ro_trx_list are on mysql_trx_list. The - rw_trx_list can contain system - transactions and recovered transactions - that will not be in the mysql_trx_list. - There can be active non-locking - auto-commit read only transactions that - are on this list but not on ro_trx_list. + + char pad2[64]; /*!< To avoid false sharing */ + trx_ut_list_t mysql_trx_list; /*!< List of transactions created + for MySQL. All user transactions are + on mysql_trx_list. The rw_trx_list + can contain system transactions and + recovered transactions that will not + be in the mysql_trx_list. mysql_trx_list may additionally contain transactions that have not yet been started in InnoDB. */ - trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS]; + + trx_ids_t rw_trx_ids; /*!< Array of Read write transaction IDs + for MVCC snapshot. A ReadView would take + a snapshot of these transactions whose + changes are not visible to it. We should + remove transactions from the list before + committing in memory and releasing locks + to ensure right order of removal and + consistent snapshot. */ + + char pad3[64]; /*!< To avoid false sharing */ + trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS]; /*!< Pointer array to rollback segments; NULL if slot not in use; created and destroyed in single-threaded mode; not protected by any mutex, because it is read-only during multi-threaded operation */ - ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY + ulint rseg_history_len; + /*!< Length of the TRX_RSEG_HISTORY list (update undo logs for committed transactions), protected by rseg->mutex */ - UT_LIST_BASE_NODE_T(read_view_t) view_list; - /*!< List of read views sorted - on trx no, biggest first */ + + trx_rseg_t* const pending_purge_rseg_array[TRX_SYS_N_RSEGS]; + /*!< Pointer array to rollback segments + between slot-1..slot-srv_tmp_undo_logs + that are now replaced by non-redo + rollback segments. We need them for + scheduling purge if any of the rollback + segment has pending records to purge. */ + + TrxIdSet rw_trx_set; /*!< Mapping from transaction id + to transaction instance */ + + ulint n_prepared_trx; /*!< Number of transactions currently + in the XA PREPARED state */ + + ulint n_prepared_recovered_trx; /*!< Number of transactions + currently in XA PREPARED state that are + also recovered. Such transactions cannot + be added during runtime. They can only + occur after recovery if mysqld crashed + while there were XA PREPARED + transactions. We disable query cache + if such transactions exist. */ }; /** When a trx id which is zero modulo this number (which must be a power of two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system page is updated */ -#define TRX_SYS_TRX_ID_WRITE_MARGIN 256 +#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256) #endif /* !UNIV_HOTBACKUP */ +/** Test if trx_sys->mutex is owned. */ +#define trx_sys_mutex_own() (trx_sys->mutex.is_owned()) + +/** Acquire the trx_sys->mutex. */ +#define trx_sys_mutex_enter() do { \ + mutex_enter(&trx_sys->mutex); \ +} while (0) + +/** Release the trx_sys->mutex. */ +#define trx_sys_mutex_exit() do { \ + trx_sys->mutex.exit(); \ +} while (0) + #ifndef UNIV_NONINL #include "trx0sys.ic" #endif |