diff options
author | Sergei Golubchik <sergii@pisem.net> | 2013-12-22 17:06:50 +0100 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2013-12-22 17:06:50 +0100 |
commit | ffa8c4cfcc41d4f160e3bdfca5cfd4b01a7d6e63 (patch) | |
tree | 728585c36f22a5db3cea796430883d0ebc5c05eb /storage/xtradb/include/trx0trx.h | |
parent | e27c34f9e4ca15c797fcd3191ee5679c2f237a09 (diff) | |
parent | 52c26f7a1f675185d2ef1a28aca7f9bcc67c6414 (diff) | |
download | mariadb-git-ffa8c4cfcc41d4f160e3bdfca5cfd4b01a7d6e63.tar.gz |
Percona-Server-5.6.14-rel62.0 merge
support ha_innodb.so as a dynamic plugin.
* remove obsolete *,innodb_plugin.rdiff files
* s/--plugin-load=/--plugin-load-add=/
* MYSQL_PLUGIN_IMPORT glob_hostname[]
* use my_error instead of push_warning_printf(ER_DEFAULT)
* don't use tdc_size and tc_size in a module
update test cases (XtraDB is 5.6.14, InnoDB is 5.6.10)
* copy new tests over
* disable some tests for (old) InnoDB
* delete XtraDB tests that no longer apply
small compatibility changes:
* s/HTON_EXTENDED_KEYS/HTON_SUPPORTS_EXTENDED_KEYS/
* revert unnecessary InnoDB changes to make it a bit closer to the upstream
fix XtraDB to compile on Windows (both as a static and a dynamic plugin)
disable XtraDB on Windows (deadlocks) and where no atomic ops are available (e.g. CentOS 5)
storage/innobase/handler/ha_innodb.cc:
revert few unnecessary changes to make it a bit closer to the original InnoDB
storage/innobase/include/univ.i:
correct the version to match what it was merged from
Diffstat (limited to 'storage/xtradb/include/trx0trx.h')
-rw-r--r-- | storage/xtradb/include/trx0trx.h | 952 |
1 files changed, 606 insertions, 346 deletions
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 4ab8e5b2cc5..82e9a90fcfb 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -31,23 +31,18 @@ Created 3/26/1996 Heikki Tuuri #include "dict0types.h" #ifndef UNIV_HOTBACKUP #include "lock0types.h" +#include "log0log.h" #include "usr0types.h" #include "que0types.h" #include "mem0mem.h" #include "read0types.h" #include "trx0xa.h" #include "ut0vec.h" +#include "fts0fts.h" /** Dummy session used currently in MySQL interface */ extern sess_t* trx_dummy_sess; -/** Number of transactions currently allocated for MySQL: protected by -the kernel mutex */ -extern ulint trx_n_mysql_transactions; -/** Number of transactions currently in the XA PREPARED state: protected by -the kernel mutex */ -extern ulint trx_n_prepared; - /********************************************************************//** In XtraDB it is impossible for a transaction to own a search latch outside of InnoDB code, so there is nothing to release on demand. We keep this function to @@ -82,15 +77,6 @@ const dict_index_t* trx_get_error_info( /*===============*/ const trx_t* trx); /*!< in: trx object */ -/****************************************************************//** -Creates and initializes a transaction object. -@return own: the transaction */ -UNIV_INTERN -trx_t* -trx_create( -/*=======*/ - sess_t* sess) /*!< in: session */ - __attribute__((nonnull)); /********************************************************************//** Creates a transaction object for MySQL. @return own: transaction object */ @@ -106,11 +92,11 @@ trx_t* trx_allocate_for_background(void); /*=============================*/ /********************************************************************//** -Frees a transaction object. */ +Frees a transaction object of a background operation of the master thread. */ UNIV_INTERN void -trx_free( -/*=====*/ +trx_free_for_background( +/*====================*/ trx_t* trx); /*!< in, own: trx object */ /********************************************************************//** At shutdown, frees a transaction object that is in the PREPARED state. */ @@ -127,13 +113,6 @@ void trx_free_for_mysql( /*===============*/ trx_t* trx); /*!< in, own: trx object */ -/********************************************************************//** -Frees a transaction object of a background operation of the master thread. */ -UNIV_INTERN -void -trx_free_for_background( -/*====================*/ - trx_t* trx); /*!< in, own: trx object */ /****************************************************************//** Creates trx objects for transactions and initializes the trx list of trx_sys at database start. Rollback segment and undo log lists must @@ -144,51 +123,87 @@ UNIV_INTERN void trx_lists_init_at_db_start(void); /*============================*/ -/****************************************************************//** -Starts a new transaction. -@return TRUE if success, FALSE if the rollback segment could not -support this many transactions */ -UNIV_INTERN -ibool -trx_start( -/*======*/ - trx_t* trx, /*!< in: transaction */ - ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ -/****************************************************************//** -Starts a new transaction. -@return TRUE */ -UNIV_INTERN -ibool -trx_start_low( -/*==========*/ - trx_t* trx, /*!< in: transaction */ - ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ + +#ifdef UNIV_DEBUG +#define trx_start_if_not_started_xa(t) \ + { \ + (t)->start_line = __LINE__; \ + (t)->start_file = __FILE__; \ + trx_start_if_not_started_xa_low((t)); \ + } +#else +#define trx_start_if_not_started_xa(t) \ + trx_start_if_not_started_xa_low((t)) +#endif /* UNIV_DEBUG */ + /*************************************************************//** Starts the transaction if it is not yet started. */ -UNIV_INLINE +UNIV_INTERN void -trx_start_if_not_started( -/*=====================*/ +trx_start_if_not_started_xa_low( +/*============================*/ trx_t* trx); /*!< in: transaction */ /*************************************************************//** -Starts the transaction if it is not yet started. Assumes we have reserved -the kernel mutex! */ -UNIV_INLINE +Starts the transaction if it is not yet started. */ +UNIV_INTERN void trx_start_if_not_started_low( /*=========================*/ trx_t* trx); /*!< in: transaction */ + +#ifdef UNIV_DEBUG +#define trx_start_if_not_started(t) \ + { \ + (t)->start_line = __LINE__; \ + (t)->start_file = __FILE__; \ + trx_start_if_not_started_low((t)); \ + } +#else +#define trx_start_if_not_started(t) \ + trx_start_if_not_started_low((t)) +#endif /* UNIV_DEBUG */ + +/*************************************************************//** +Starts the transaction for a DDL operation. */ +UNIV_INTERN +void +trx_start_for_ddl_low( +/*==================*/ + trx_t* trx, /*!< in/out: transaction */ + trx_dict_op_t op) /*!< in: dictionary operation type */ + __attribute__((nonnull)); + +#ifdef UNIV_DEBUG +#define trx_start_for_ddl(t, o) \ + { \ + ut_ad((t)->start_file == 0); \ + (t)->start_line = __LINE__; \ + (t)->start_file = __FILE__; \ + trx_start_for_ddl_low((t), (o)); \ + } +#else +#define trx_start_for_ddl(t, o) \ + trx_start_for_ddl_low((t), (o)) +#endif /* UNIV_DEBUG */ + /****************************************************************//** Commits a transaction. */ UNIV_INTERN void -trx_commit_off_kernel( -/*==================*/ - trx_t* trx); /*!< in: transaction */ +trx_commit( +/*=======*/ + trx_t* trx) /*!< in/out: transaction */ + __attribute__((nonnull)); +/****************************************************************//** +Commits a transaction and a mini-transaction. */ +UNIV_INTERN +void +trx_commit_low( +/*===========*/ + trx_t* trx, /*!< in/out: transaction */ + mtr_t* mtr) /*!< in/out: mini-transaction (will be committed), + or NULL if trx made no modifications */ + __attribute__((nonnull(1))); /****************************************************************//** Cleans up a transaction at database startup. The cleanup is needed if the transaction already got to the middle of a commit when the database @@ -202,18 +217,17 @@ trx_cleanup_at_db_startup( Does the transaction commit for MySQL. @return DB_SUCCESS or error number */ UNIV_INTERN -ulint +dberr_t trx_commit_for_mysql( /*=================*/ - trx_t* trx); /*!< in: trx handle */ + trx_t* trx); /*!< in/out: transaction */ /**********************************************************************//** -Does the transaction prepare for MySQL. -@return 0 or error number */ +Does the transaction prepare for MySQL. */ UNIV_INTERN -ulint +void trx_prepare_for_mysql( /*==================*/ - trx_t* trx); /*!< in: trx handle */ + trx_t* trx); /*!< in/out: trx handle */ /**********************************************************************//** This function is used to find number of prepared transactions and their transaction objects for a recovery. @@ -227,7 +241,9 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL; on match, the trx->xid will be invalidated */ +@return trx or NULL; on match, the trx->xid will be invalidated; +note that the trx may have been committed, unless the caller is +holding lock_sys->mutex */ UNIV_INTERN trx_t * trx_get_trx_by_xid( @@ -235,13 +251,13 @@ trx_get_trx_by_xid( const XID* xid); /*!< in: X/Open XA transaction identifier */ /**********************************************************************//** If required, flushes the log to disk if we called trx_commit_for_mysql() -with trx->flush_log_later == TRUE. -@return 0 or error number */ +with trx->flush_log_later == TRUE. */ UNIV_INTERN -ulint +void trx_commit_complete_for_mysql( /*==========================*/ - trx_t* trx); /*!< in: trx handle */ + trx_t* trx) /*!< in/out: transaction */ + __attribute__((nonnull)); /**********************************************************************//** Marks the latest SQL statement ended. */ UNIV_INTERN @@ -259,86 +275,20 @@ read_view_t* trx_assign_read_view( /*=================*/ trx_t* trx); /*!< in: active transaction */ -/***********************************************************//** -The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to -the TRX_QUE_RUNNING state and releases query threads which were -waiting for a lock in the wait_thrs list. */ -UNIV_INTERN -void -trx_end_lock_wait( -/*==============*/ - trx_t* trx); /*!< in: transaction */ /****************************************************************//** -Sends a signal to a trx object. */ +Prepares a transaction for commit/rollback. */ UNIV_INTERN void -trx_sig_send( -/*=========*/ - trx_t* trx, /*!< in: trx handle */ - ulint type, /*!< in: signal type */ - ulint sender, /*!< in: TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver_thr, /*!< in: query thread which wants the - reply, or NULL; if type is - TRX_SIG_END_WAIT, this must be NULL */ - trx_savept_t* savept, /*!< in: possible rollback savepoint, or - NULL */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if the parameter - is NULL, it is ignored */ -/****************************************************************//** -Send the reply message when a signal in the queue of the trx has -been handled. */ -UNIV_INTERN -void -trx_sig_reply( -/*==========*/ - trx_sig_t* sig, /*!< in: signal */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/****************************************************************//** -Removes the signal object from a trx signal queue. */ -UNIV_INTERN -void -trx_sig_remove( -/*===========*/ - trx_t* trx, /*!< in: trx handle */ - trx_sig_t* sig); /*!< in, own: signal */ -/****************************************************************//** -Starts handling of a trx signal. */ -UNIV_INTERN -void -trx_sig_start_handle( -/*=================*/ - trx_t* trx, /*!< in: trx handle */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/****************************************************************//** -Ends signal handling. If the session is in the error state, and -trx->graph_before_signal_handling != NULL, returns control to the error -handling routine of the graph (currently only returns the control to the -graph root which then sends an error message to the client). */ -UNIV_INTERN -void -trx_end_signal_handling( -/*====================*/ - trx_t* trx); /*!< in: trx */ +trx_commit_or_rollback_prepare( +/*===========================*/ + trx_t* trx); /*!< in/out: transaction */ /*********************************************************************//** Creates a commit command node struct. @return own: commit node struct */ UNIV_INTERN commit_node_t* -commit_node_create( -/*===============*/ +trx_commit_node_create( +/*===================*/ mem_heap_t* heap); /*!< in: mem heap where created */ /***********************************************************//** Performs an execution step for a commit type node in a query graph. @@ -350,37 +300,59 @@ trx_commit_step( que_thr_t* thr); /*!< in: query thread */ /**********************************************************************//** -Prints info about a transaction to the given file. The caller must own the -kernel mutex. */ +Prints info about a transaction. +Caller must hold trx_sys->mutex. */ +UNIV_INTERN +void +trx_print_low( +/*==========*/ + FILE* f, + /*!< in: output stream */ + const trx_t* trx, + /*!< in: transaction */ + ulint max_query_len, + /*!< in: max query length to print, + or 0 to use the default max length */ + ulint n_rec_locks, + /*!< in: lock_number_of_rows_locked(&trx->lock) */ + ulint n_trx_locks, + /*!< in: length of trx->lock.trx_locks */ + ulint heap_size) + /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ + __attribute__((nonnull)); + +/**********************************************************************//** +Prints info about a transaction. +The caller must hold lock_sys->mutex and trx_sys->mutex. +When possible, use trx_print() instead. */ +UNIV_INTERN +void +trx_print_latched( +/*==============*/ + FILE* f, /*!< in: output stream */ + const trx_t* trx, /*!< in: transaction */ + ulint max_query_len) /*!< in: max query length to print, + or 0 to use the default max length */ + __attribute__((nonnull)); + +/**********************************************************************//** +Prints info about a transaction. +Acquires and releases lock_sys->mutex and trx_sys->mutex. */ UNIV_INTERN void trx_print( /*======*/ - FILE* f, /*!< in: output stream */ - trx_t* trx, /*!< in: transaction */ - ulint max_query_len); /*!< in: max query length to print, or 0 to - use the default max length */ - -/** Type of data dictionary operation */ -typedef enum trx_dict_op { - /** The transaction is not modifying the data dictionary. */ - TRX_DICT_OP_NONE = 0, - /** The transaction is creating a table or an index, or - dropping a table. The table must be dropped in crash - recovery. This and TRX_DICT_OP_NONE are the only possible - operation modes in crash recovery. */ - TRX_DICT_OP_TABLE = 1, - /** The transaction is creating or dropping an index in an - existing table. In crash recovery, the data dictionary - must be locked, but the table must not be dropped. */ - TRX_DICT_OP_INDEX = 2 -} trx_dict_op_t; + FILE* f, /*!< in: output stream */ + const trx_t* trx, /*!< in: transaction */ + ulint max_query_len) /*!< in: max query length to print, + or 0 to use the default max length */ + __attribute__((nonnull)); /**********************************************************************//** Determine if a transaction is a dictionary operation. @return dictionary operation mode */ UNIV_INLINE -enum trx_dict_op +enum trx_dict_op_t trx_get_dict_operation( /*===================*/ const trx_t* trx) /*!< in: transaction */ @@ -392,18 +364,49 @@ void trx_set_dict_operation( /*===================*/ trx_t* trx, /*!< in/out: transaction */ - enum trx_dict_op op); /*!< in: operation, not + enum trx_dict_op_t op); /*!< in: operation, not TRX_DICT_OP_NONE */ #ifndef UNIV_HOTBACKUP /**********************************************************************//** +Determines if a transaction is in the given state. +The caller must hold trx_sys->mutex, or it must be the thread +that is serving a running transaction. +A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list +unless it is a non-locking autocommit read only transaction, which is only +in trx_sys->mysql_trx_list. +@return TRUE if trx->state == state */ +UNIV_INLINE +ibool +trx_state_eq( +/*=========*/ + const trx_t* trx, /*!< in: transaction */ + trx_state_t state) /*!< in: state; + if state != TRX_STATE_NOT_STARTED + asserts that + trx->state != TRX_STATE_NOT_STARTED */ + __attribute__((nonnull, warn_unused_result)); +# ifdef UNIV_DEBUG +/**********************************************************************//** +Asserts that a transaction has been started. +The caller must hold trx_sys->mutex. +@return TRUE if started */ +UNIV_INTERN +ibool +trx_assert_started( +/*===============*/ + const trx_t* trx) /*!< in: transaction */ + __attribute__((nonnull, warn_unused_result)); +# endif /* UNIV_DEBUG */ + +/**********************************************************************//** Determines if the currently running transaction has been interrupted. @return TRUE if interrupted */ UNIV_INTERN ibool trx_is_interrupted( /*===============*/ - trx_t* trx); /*!< in: transaction */ + const trx_t* trx); /*!< in: transaction */ /**********************************************************************//** Determines if the currently running transaction is in strict mode. @return TRUE if strict */ @@ -421,7 +424,7 @@ Calculates the "weight" of a transaction. The weight of one transaction is estimated as the number of altered rows + the number of locked rows. @param t transaction @return transaction weight */ -#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->trx_locks)) +#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks)) /*******************************************************************//** Compares the "weight" (or size) of two transactions. Transactions that @@ -449,6 +452,16 @@ trx_get_que_state_str( /*==================*/ const trx_t* trx); /*!< in: transaction */ +/****************************************************************//** +Assign a read-only transaction a rollback-segment, if it is attempting +to write to a TEMPORARY table. */ +UNIV_INTERN +void +trx_assign_rseg( +/*============*/ + trx_t* trx); /*!< A read-only transaction that + needs to be assigned a RBS. */ + /*************************************************************//** Callback function for trx_find_descriptor() to compare trx IDs. */ UNIV_INTERN @@ -466,53 +479,309 @@ trx_release_descriptor( /*===================*/ trx_t* trx); /*!< in: trx pointer */ -/* Signal to a transaction */ -struct trx_sig_struct{ - unsigned type:3; /*!< signal type */ - unsigned sender:1; /*!< TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver; /*!< non-NULL if the sender of the signal - wants reply after the operation induced - by the signal is completed */ - trx_savept_t savept; /*!< possible rollback savepoint */ - UT_LIST_NODE_T(trx_sig_t) - signals; /*!< queue of pending signals to the - transaction */ - UT_LIST_NODE_T(trx_sig_t) - reply_signals; /*!< list of signals for which the sender - transaction is waiting a reply */ +/*******************************************************************//** +Transactions that aren't started by the MySQL server don't set +the trx_t::mysql_thd field. For such transactions we set the lock +wait timeout to 0 instead of the user configured value that comes +from innodb_lock_wait_timeout via trx_t::mysql_thd. +@param trx transaction +@return lock wait timeout in seconds */ +#define trx_lock_wait_timeout_get(trx) \ + ((trx)->mysql_thd != NULL \ + ? thd_lock_wait_timeout((trx)->mysql_thd) \ + : 0) + +/*******************************************************************//** +Determine if the transaction is a non-locking autocommit select +(implied read-only). +@param t transaction +@return true if non-locking autocommit select transaction. */ +#define trx_is_autocommit_non_locking(t) \ +((t)->auto_commit && (t)->will_lock == 0) + +/*******************************************************************//** +Determine if the transaction is a non-locking autocommit select +with an explicit check for the read-only status. +@param t transaction +@return true if non-locking autocommit read-only transaction. */ +#define trx_is_ac_nl_ro(t) \ +((t)->read_only && trx_is_autocommit_non_locking((t))) + +/*******************************************************************//** +Assert that the transaction is in the trx_sys_t::rw_trx_list */ +#define assert_trx_in_rw_list(t) do { \ + ut_ad(!(t)->read_only); \ + assert_trx_in_list(t); \ +} while (0) + +/*******************************************************************//** +Assert that the transaction is either in trx_sys->ro_trx_list or +trx_sys->rw_trx_list but not both and it cannot be an autocommit +non-locking select */ +#define assert_trx_in_list(t) do { \ + ut_ad((t)->in_ro_trx_list == (t)->read_only); \ + ut_ad((t)->in_rw_trx_list == !(t)->read_only); \ + ut_ad(!trx_is_autocommit_non_locking((t))); \ + switch ((t)->state) { \ + case TRX_STATE_PREPARED: \ + /* fall through */ \ + case TRX_STATE_ACTIVE: \ + case TRX_STATE_COMMITTED_IN_MEMORY: \ + continue; \ + case TRX_STATE_NOT_STARTED: \ + break; \ + } \ + ut_error; \ +} while (0) + +#ifdef UNIV_DEBUG +/*******************************************************************//** +Assert that an autocommit non-locking select cannot be in the +ro_trx_list nor the rw_trx_list and that it is a read-only transaction. +The tranasction must be in the mysql_trx_list. */ +# define assert_trx_nonlocking_or_in_list(t) \ + do { \ + if (trx_is_autocommit_non_locking(t)) { \ + trx_state_t t_state = (t)->state; \ + ut_ad((t)->read_only); \ + ut_ad(!(t)->is_recovered); \ + ut_ad(!(t)->in_ro_trx_list); \ + ut_ad(!(t)->in_rw_trx_list); \ + ut_ad((t)->in_mysql_trx_list); \ + ut_ad(t_state == TRX_STATE_NOT_STARTED \ + || t_state == TRX_STATE_ACTIVE); \ + } else { \ + assert_trx_in_list(t); \ + } \ + } while (0) +#else /* UNIV_DEBUG */ +/*******************************************************************//** +Assert that an autocommit non-locking slect cannot be in the +ro_trx_list nor the rw_trx_list and that it is a read-only transaction. +The tranasction must be in the mysql_trx_list. */ +# define assert_trx_nonlocking_or_in_list(trx) ((void)0) +#endif /* UNIV_DEBUG */ + +/*******************************************************************//** +Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state +captures the state of the query thread during the execution of a query. +This is different from a transaction state. The query state of a transaction +can be updated asynchronously by other threads. The other threads can be +system threads, like the timeout monitor thread or user threads executing +other queries. Another thing to be mindful of is that there is a delay between +when a query thread is put into LOCK_WAIT state and before it actually starts +waiting. Between these two events it is possible that the query thread is +granted the lock it was waiting for, which implies that the state can be changed +asynchronously. + +All these operations take place within the context of locking. Therefore state +changes within the locking code must acquire both the lock mutex and the +trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or +trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient +to only acquire the trx->mutex. +To query the state either of the mutexes is sufficient within the locking +code and no mutex is required when the query thread is no longer waiting. */ + +/** The locks and state of an active transaction. Protected by +lock_sys->mutex, trx->mutex or both. */ +struct trx_lock_t { + ulint n_active_thrs; /*!< number of active query threads */ + + trx_que_t que_state; /*!< valid when trx->state + == TRX_STATE_ACTIVE: TRX_QUE_RUNNING, + TRX_QUE_LOCK_WAIT, ... */ + + lock_t* wait_lock; /*!< if trx execution state is + TRX_QUE_LOCK_WAIT, this points to + the lock request, otherwise this is + NULL; set to non-NULL when holding + both trx->mutex and lock_sys->mutex; + set to NULL when holding + lock_sys->mutex; readers should + hold lock_sys->mutex, except when + they are holding trx->mutex and + wait_lock==NULL */ + ib_uint64_t deadlock_mark; /*!< A mark field that is initialized + to and checked against lock_mark_counter + by lock_deadlock_recursive(). */ + ibool was_chosen_as_deadlock_victim; + /*!< when the transaction decides to + wait for a lock, it sets this to FALSE; + if another transaction chooses this + transaction as a victim in deadlock + resolution, it sets this to TRUE. + Protected by trx->mutex. */ + time_t wait_started; /*!< lock wait started at this time, + protected only by lock_sys->mutex */ + + que_thr_t* wait_thr; /*!< query thread belonging to this + trx that is in QUE_THR_LOCK_WAIT + state. For threads suspended in a + lock wait, this is protected by + lock_sys->mutex. Otherwise, this may + only be modified by the thread that is + serving the running transaction. */ + + mem_heap_t* lock_heap; /*!< memory heap for trx_locks; + protected by lock_sys->mutex */ + + UT_LIST_BASE_NODE_T(lock_t) + trx_locks; /*!< locks requested + by the transaction; + insertions are protected by trx->mutex + and lock_sys->mutex; removals are + protected by lock_sys->mutex */ + + ib_vector_t* table_locks; /*!< All table locks requested by this + transaction, including AUTOINC locks */ + + ibool cancel; /*!< TRUE if the transaction is being + rolled back either via deadlock + detection or due to lock timeout. The + caller has to acquire the trx_t::mutex + in order to cancel the locks. In + lock_trx_table_locks_remove() we + check for this cancel of a transaction's + locks and avoid reacquiring the trx + mutex to prevent recursive deadlocks. + Protected by both the lock sys mutex + and the trx_t::mutex. */ }; #define TRX_MAGIC_N 91118598 -/* The transaction handle; every session has a trx object which is freed only -when the session is freed; in addition there may be session-less transactions -rolling back after a database recovery */ +/** The transaction handle + +Normally, there is a 1:1 relationship between a transaction handle +(trx) and a session (client connection). One session is associated +with exactly one user transaction. There are some exceptions to this: + +* For DDL operations, a subtransaction is allocated that modifies the +data dictionary tables. Lock waits and deadlocks are prevented by +acquiring the dict_operation_lock before starting the subtransaction +and releasing it after committing the subtransaction. + +* The purge system uses a special transaction that is not associated +with any session. + +* If the system crashed or it was quickly shut down while there were +transactions in the ACTIVE or PREPARED state, these transactions would +no longer be associated with a session when the server is restarted. + +A session may be served by at most one thread at a time. The serving +thread of a session might change in some MySQL implementations. +Therefore we do not have os_thread_get_curr_id() assertions in the code. + +Normally, only the thread that is currently associated with a running +transaction may access (read and modify) the trx object, and it may do +so without holding any mutex. The following are exceptions to this: + +* trx_rollback_resurrected() may access resurrected (connectionless) +transactions while the system is already processing new user +transactions. The trx_sys->mutex prevents a race condition between it +and lock_trx_release_locks() [invoked by trx_commit()]. -struct trx_struct{ +* trx_print_low() may access transactions not associated with the current +thread. The caller must be holding trx_sys->mutex and lock_sys->mutex. + +* When a transaction handle is in the trx_sys->mysql_trx_list or +trx_sys->trx_list, some of its fields must not be modified without +holding trx_sys->mutex exclusively. + +* The locking code (in particular, lock_deadlock_recursive() and +lock_rec_convert_impl_to_expl()) will access transactions associated +to other connections. The locks of transactions are protected by +lock_sys->mutex and sometimes by trx->mutex. */ + +struct trx_t{ ulint magic_n; + ib_mutex_t mutex; /*!< Mutex protecting the fields + state and lock + (except some fields of lock, which + are protected by lock_sys->mutex) */ + + /** State of the trx from the point of view of concurrency control + and the valid state transitions. + + Possible states: + + TRX_STATE_NOT_STARTED + TRX_STATE_ACTIVE + TRX_STATE_PREPARED + TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED) + + Valid state transitions are: + + Regular transactions: + * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED + + Auto-commit non-locking read-only: + * NOT_STARTED -> ACTIVE -> NOT_STARTED + + XA (2PC): + * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED + + Recovered XA: + * NOT_STARTED -> PREPARED -> COMMITTED -> (freed) + + XA (2PC) (shutdown before ROLLBACK or COMMIT): + * NOT_STARTED -> PREPARED -> (freed) + + Latching and various transaction lists membership rules: + + XA (2PC) transactions are always treated as non-autocommit. + + Transitions to ACTIVE or NOT_STARTED occur when + !in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed). + + Autocommit non-locking read-only transactions move between states + without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list. + + When a transaction is NOT_STARTED, it can be in_mysql_trx_list if + it is a user transaction. It cannot be in ro_trx_list or rw_trx_list. + + ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list. + The transition ACTIVE->PREPARED is protected by trx_sys->mutex. + + ACTIVE->COMMITTED is possible when the transaction is in + ro_trx_list or rw_trx_list. + + Transitions to COMMITTED are protected by both lock_sys->mutex + and trx->mutex. + + NOTE: Some of these state change constraints are an overkill, + currently only required for a consistent view for printing stats. + This unnecessarily adds a huge cost for the general case. + + NOTE: In the future we should add read only transactions to the + ro_trx_list the first time they try to acquire a lock ie. by default + we treat all read-only transactions as non-locking. */ + trx_state_t state; + + trx_lock_t lock; /*!< Information about the transaction + locks and state. Protected by + trx->mutex or lock_sys->mutex + or both */ + ulint is_recovered; /*!< 0=normal transaction, + 1=recovered, must be rolled back, + protected by trx_sys->mutex when + trx->in_rw_trx_list holds */ + /* These fields are not protected by any mutex. */ const char* op_info; /*!< English text describing the current operation, or an empty string */ - ulint state; /*!< state of the trx from the point of - view of concurrency control: TRX_ACTIVE, - TRX_COMMITTED_IN_MEMORY, ... This was - called 'conc_state' in the upstream and - has been renamed in Percona Server, - because changing it's value to/from - either TRX_ACTIVE or TRX_PREPARED - requires calling - trx_reserve_descriptor() / - trx_release_descriptor(). Different name - ensures we notice any new code changing - the state. */ + ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */ + ulint check_foreigns; /*!< normally TRUE, but if the user + wants to suppress foreign key checks, + (in table imports, for example) we + set this FALSE */ /*------------------------------*/ /* MySQL has a transaction coordinator to coordinate two phase - commit between multiple storage engines and the binary log. When - an engine participates in a transaction, it's responsible for - registering itself using the trans_register_ha() API. */ + commit between multiple storage engines and the binary log. When + an engine participates in a transaction, it's responsible for + registering itself using the trans_register_ha() API. */ unsigned is_registered:1;/* This flag is set to 1 after the transaction has been registered with the coordinator using the XA API, and @@ -521,17 +790,9 @@ struct trx_struct{ this is set to 1 then registered should also be set to 1. This is used in the XA code */ - unsigned is_in_trx_serial_list:1; - /* Set when transaction is in the - trx_serial_list */ /*------------------------------*/ - ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ - ulint check_foreigns; /* normally TRUE, but if the user - wants to suppress foreign key checks, - (in table imports, for example) we - set this FALSE */ ulint check_unique_secondary; - /* normally TRUE, but if the user + /*!< normally TRUE, but if the user wants to speed up inserts by suppressing unique key checks for secondary indexes when we decide @@ -549,123 +810,120 @@ struct trx_struct{ defer flush of the logs to disk until after we release the mutex. */ - ulint must_flush_log_later;/* this flag is set to TRUE in - trx_commit_off_kernel() if - flush_log_later was TRUE, and there - were modifications by the transaction; - in that case we must flush the log - in trx_commit_complete_for_mysql() */ + ulint must_flush_log_later;/*!< this flag is set to TRUE in + trx_commit() if flush_log_later was + TRUE, and there were modifications by + the transaction; in that case we must + flush the log in + trx_commit_complete_for_mysql() */ ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ - ibool has_search_latch; - /* TRUE if this trx has latched any + bool has_search_latch; + /*!< true if this trx has latched any search system latch in S-mode */ - ulint deadlock_mark; /*!< a mark field used in deadlock - checking algorithm. */ + ulint search_latch_timeout; + /*!< If we notice that someone is + waiting for our S-lock on the search + latch to be released, we wait in + row0sel.cc for BTR_SEA_TIMEOUT new + searches until we try to keep + the search latch again over + calls from MySQL; this is intended + to reduce contention on the search + latch */ trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */ /* Fields protected by the srv_conc_mutex. */ ulint declared_to_be_inside_innodb; - /* this is TRUE if we have declared + /*!< this is TRUE if we have declared this transaction in srv_conc_enter_innodb to be inside the InnoDB engine */ - - /* Fields protected by dict_operation_lock. The very latch - it is used to track. */ + ulint n_tickets_to_enter_innodb; + /*!< this can be > 0 only when + declared_to_... is TRUE; when we come + to srv_conc_innodb_enter, if the value + here is > 0, we decrement this by 1 */ ulint dict_operation_lock_mode; /*!< 0, RW_S_LATCH, or RW_X_LATCH: the latch mode trx currently holds - on dict_operation_lock */ + on dict_operation_lock. Protected + by dict_operation_lock. */ + + trx_id_t no; /*!< transaction serialization number: + max trx id shortly before the + transaction is moved to + COMMITTED_IN_MEMORY state. + Protected by trx_sys_t::mutex + when trx->in_rw_trx_list. Initially + set to TRX_ID_MAX. */ - /* All the next fields are protected by the kernel mutex, except the - undo logs which are protected by undo_mutex */ - ulint is_purge; /*!< 0=user transaction, 1=purge */ - ulint is_recovered; /*!< 0=normal transaction, - 1=recovered, must be rolled back */ - ulint que_state; /*!< valid when conc_state - == TRX_ACTIVE: TRX_QUE_RUNNING, - TRX_QUE_LOCK_WAIT, ... */ - ulint handling_signals;/* this is TRUE as long as the trx - is handling signals */ time_t start_time; /*!< time the trx object was created or the state last time became - TRX_ACTIVE */ + TRX_STATE_ACTIVE */ trx_id_t id; /*!< transaction id */ XID xid; /*!< X/Open XA transaction identification to identify a transaction branch */ - trx_id_t no; /*!< transaction serialization number == - max trx id when the transaction is - moved to COMMITTED_IN_MEMORY state */ - ib_uint64_t commit_lsn; /*!< lsn at the time of the commit */ + lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation - is TRUE, or 0. */ + == TRX_DICT_OP_TABLE, or 0. */ /*------------------------------*/ - void* mysql_thd; /*!< MySQL thread handle corresponding + THD* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ const char* mysql_log_file_name; - /* if MySQL binlog is used, this field + /*!< if MySQL binlog is used, this field contains a pointer to the latest file name; this is NULL if binlog is not used */ - ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field - contains the end offset of the binlog - entry */ - const char* mysql_master_log_file_name; - /* if the database server is a MySQL - replication slave, we have here the - master binlog name up to which - replication has processed; otherwise - this is a pointer to a null - character */ - ib_int64_t mysql_master_log_pos; - /* if the database server is a MySQL - replication slave, this is the - position in the log file up to which - replication has processed */ - const char* mysql_relay_log_file_name; - ib_int64_t mysql_relay_log_pos; + ib_int64_t mysql_log_offset; + /*!< if MySQL binlog is used, this + field contains the end offset of the + binlog entry */ time_t idle_start; ib_int64_t last_stmt_start; /*------------------------------*/ - ulint n_mysql_tables_in_use; /* number of Innobase tables + ulint n_mysql_tables_in_use; /*!< number of Innobase tables used in the processing of the current SQL statement in MySQL */ ulint mysql_n_tables_locked; - /* how many tables the current SQL + /*!< how many tables the current SQL statement uses, except those in consistent read */ - ulint search_latch_timeout; - /* If we notice that someone is - waiting for our S-lock on the search - latch to be released, we wait in - row0sel.c for BTR_SEA_TIMEOUT new - searches until we try to keep - the search latch again over - calls from MySQL; this is intended - to reduce contention on the search - latch */ - /*------------------------------*/ - ulint n_tickets_to_enter_innodb; - /* this can be > 0 only when - declared_to_... is TRUE; when we come - to srv_conc_innodb_enter, if the value - here is > 0, we decrement this by 1 */ /*------------------------------*/ UT_LIST_NODE_T(trx_t) - trx_list; /*!< list of transactions */ + trx_list; /*!< list of transactions; + protected by trx_sys->mutex. + The same node is used for both + trx_sys_t::ro_trx_list and + trx_sys_t::rw_trx_list */ +#ifdef UNIV_DEBUG + /** The following two fields are mutually exclusive. */ + /* @{ */ + + ibool in_ro_trx_list; /*!< TRUE if in trx_sys->ro_trx_list */ + ibool in_rw_trx_list; /*!< TRUE if in trx_sys->rw_trx_list */ + /* @} */ +#endif /* UNIV_DEBUG */ UT_LIST_NODE_T(trx_t) mysql_trx_list; /*!< list of transactions created for - MySQL */ + MySQL; protected by trx_sys->mutex */ +#ifdef UNIV_DEBUG + ibool in_mysql_trx_list; + /*!< TRUE if in + trx_sys->mysql_trx_list */ +#endif /* UNIV_DEBUG */ UT_LIST_NODE_T(trx_t) trx_serial_list;/*!< list node for trx_sys->trx_serial_list */ + bool in_trx_serial_list; + /* Set when transaction is in the + trx_serial_list */ /*------------------------------*/ - ulint error_state; /*!< 0 if no error, otherwise error + dberr_t error_state; /*!< 0 if no error, otherwise error number; NOTE That ONLY the thread doing the transaction is allowed to set this field: this is NOT protected - by the kernel mutex */ + by any mutex */ const dict_index_t*error_info; /*!< if the error number indicates a duplicate key error, a pointer to the problematic index is stored here */ @@ -679,47 +937,8 @@ struct trx_struct{ survive over a transaction commit, if it is a stored procedure with a COMMIT WORK statement, for instance */ - ulint n_active_thrs; /*!< number of active query threads */ - que_t* graph_before_signal_handling; - /* value of graph when signal handling - for this trx started: this is used to - return control to the original query - graph for error processing */ - trx_sig_t sig; /*!< one signal object can be allocated - in this space, avoiding mem_alloc */ - UT_LIST_BASE_NODE_T(trx_sig_t) - signals; /*!< queue of processed or pending - signals to the trx */ - UT_LIST_BASE_NODE_T(trx_sig_t) - reply_signals; /*!< list of signals sent by the query - threads of this trx for which a thread - is waiting for a reply; if this trx is - killed, the reply requests in the list - must be canceled */ - /*------------------------------*/ - lock_t* wait_lock; /*!< if trx execution state is - TRX_QUE_LOCK_WAIT, this points to - the lock request, otherwise this is - NULL */ - ibool was_chosen_as_deadlock_victim; - /* when the transaction decides to wait - for a lock, it sets this to FALSE; - if another transaction chooses this - transaction as a victim in deadlock - resolution, it sets this to TRUE */ - time_t wait_started; /*!< lock wait started at this time */ - UT_LIST_BASE_NODE_T(que_thr_t) - wait_thrs; /*!< query threads belonging to this - trx that are in the QUE_THR_LOCK_WAIT - state */ - /*------------------------------*/ - mem_heap_t* lock_heap; /*!< memory heap for the locks of the - transaction */ - UT_LIST_BASE_NODE_T(lock_t) - trx_locks; /*!< locks reserved by the transaction */ - /*------------------------------*/ read_view_t* global_read_view; - /* consistent read view associated + /*!< consistent read view associated to a transaction or NULL */ read_view_t* read_view; /*!< consistent read view used in the transaction or NULL, this read view @@ -733,7 +952,7 @@ struct trx_struct{ trx_savepoints; /*!< savepoints set with SAVEPOINT ..., oldest first */ /*------------------------------*/ - mutex_t undo_mutex; /*!< mutex protecting the fields in this + ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this section (down to undo_no_arr), EXCEPT last_sql_stat_start, which can be accessed only when we know that there @@ -747,7 +966,7 @@ struct trx_struct{ the number of modified/inserted rows in a transaction */ trx_savept_t last_sql_stat_start; - /* undo_no when the last sql statement + /*!< undo_no when the last sql statement was started: in case of an error, trx is rolled back down to this undo number; see note at undo_mutex! */ @@ -773,7 +992,39 @@ struct trx_struct{ transaction. Note that these are also in the lock list trx_locks. This vector needs to be freed explicitly - when the trx_t instance is desrtoyed */ + when the trx instance is destroyed. + Protected by lock_sys->mutex. */ + /*------------------------------*/ + ibool read_only; /*!< TRUE if transaction is flagged + as a READ-ONLY transaction. + if !auto_commit || will_lock > 0 + then it will added to the list + trx_sys_t::ro_trx_list. A read only + transaction will not be assigned an + UNDO log. Non-locking auto-commit + read-only transaction will not be on + either list. */ + ibool auto_commit; /*!< TRUE if it is an autocommit */ + ulint will_lock; /*!< Will acquire some locks. Increment + each time we determine that a lock will + be acquired by the MySQL layer. */ + bool ddl; /*!< true if it is a transaction that + is being started for a DDL operation */ + /*------------------------------*/ + fts_trx_t* fts_trx; /*!< FTS information, or NULL if + transaction hasn't modified tables + with FTS indexes (yet). */ + doc_id_t fts_next_doc_id;/* The document id used for updates */ + /*------------------------------*/ + ulint flush_tables; /*!< if "covering" the FLUSH TABLES", + count of tables being flushed. */ + + /*------------------------------*/ +#ifdef UNIV_DEBUG + ulint start_line; /*!< Track where it was started from */ + const char* start_file; /*!< Filename where it was started */ +#endif /* UNIV_DEBUG */ + /*------------------------------*/ char detailed_error[256]; /*!< detailed error message for last error, or empty. */ @@ -790,23 +1041,6 @@ struct trx_struct{ ibool take_stats; }; -#define TRX_MAX_N_THREADS 32 /* maximum number of - concurrent threads running a - single operation of a - transaction, e.g., a parallel - query */ -/* Transaction concurrency states (trx->conc_state) */ -#define TRX_NOT_STARTED 0 -#define TRX_ACTIVE 1 -#define TRX_COMMITTED_IN_MEMORY 2 -#define TRX_PREPARED 3 /* Support for 2PC/XA */ - -/* Transaction execution states when trx->conc_state == TRX_ACTIVE */ -#define TRX_QUE_RUNNING 0 /* transaction is running */ -#define TRX_QUE_LOCK_WAIT 1 /* transaction is waiting for a lock */ -#define TRX_QUE_ROLLING_BACK 2 /* transaction is rolling back */ -#define TRX_QUE_COMMITTING 3 /* transaction is committing */ - /* Transaction isolation levels (trx->isolation_level) */ #define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking SELECTs are performed so that @@ -853,7 +1087,6 @@ Multiple flags can be combined with bitwise OR. */ #define TRX_SIG_TOTAL_ROLLBACK 1 #define TRX_SIG_ROLLBACK_TO_SAVEPT 2 #define TRX_SIG_COMMIT 3 -#define TRX_SIG_ERROR_OCCURRED 4 #define TRX_SIG_BREAK_EXECUTION 5 /* Sender types of a signal */ @@ -876,13 +1109,40 @@ enum commit_node_state { }; /** Commit command node in a query graph */ -struct commit_node_struct{ +struct commit_node_t{ que_common_t common; /*!< node type: QUE_NODE_COMMIT */ enum commit_node_state state; /*!< node execution state */ }; +/** Test if trx->mutex is owned. */ +#define trx_mutex_own(t) mutex_own(&t->mutex) + +/** Acquire the trx->mutex. */ +#define trx_mutex_enter(t) do { \ + mutex_enter(&t->mutex); \ +} while (0) + +/** Release the trx->mutex. */ +#define trx_mutex_exit(t) do { \ + mutex_exit(&t->mutex); \ +} while (0) + +/** @brief The latch protecting the adaptive search system + +This latch protects the +(1) hash index; +(2) columns of a record to which we have a pointer in the hash index; + +but does NOT protect: + +(3) next record offset field in a record; +(4) next or previous records on the same page. + +Bear in mind (3) and (4) when using the hash index. +*/ +extern prio_rw_lock_t* btr_search_latch_arr; #ifndef UNIV_NONINL #include "trx0trx.ic" |