summaryrefslogtreecommitdiff
path: root/storage/innobase/include/trx0trx.h
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include/trx0trx.h')
-rw-r--r--storage/innobase/include/trx0trx.h703
1 files changed, 372 insertions, 331 deletions
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index ce3eca7593f..5b2b2264a46 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2021, MariaDB Corporation.
+Copyright (c) 2015, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,7 +38,6 @@ Created 3/26/1996 Heikki Tuuri
#include "ilist.h"
#include <vector>
-#include <set>
// Forward declaration
struct mtr_t;
@@ -96,18 +95,11 @@ trx_start_if_not_started_low(
trx_t* trx, /*!< in/out: transaction */
bool read_write); /*!< in: true if read write transaction */
-/*************************************************************//**
-Starts a transaction for internal processing. */
-void
-trx_start_internal_low(
-/*===================*/
- trx_t* trx); /*!< in/out: transaction */
-
-/** Starts a read-only transaction for internal processing.
-@param[in,out] trx transaction to be started */
-void
-trx_start_internal_read_only_low(
- trx_t* trx);
+/**
+Start a transaction for internal processing.
+@param trx transaction
+@param read_write whether writes may be performed */
+void trx_start_internal_low(trx_t *trx, bool read_write);
#ifdef UNIV_DEBUG
#define trx_start_if_not_started_xa(t, rw) \
@@ -128,48 +120,39 @@ trx_start_internal_read_only_low(
do { \
(t)->start_line = __LINE__; \
(t)->start_file = __FILE__; \
- trx_start_internal_low((t)); \
+ trx_start_internal_low(t, true); \
} while (false)
-
#define trx_start_internal_read_only(t) \
do { \
(t)->start_line = __LINE__; \
(t)->start_file = __FILE__; \
- trx_start_internal_read_only_low(t); \
+ trx_start_internal_low(t, false); \
} while (false)
#else
#define trx_start_if_not_started(t, rw) \
trx_start_if_not_started_low((t), rw)
-#define trx_start_internal(t) \
- trx_start_internal_low((t))
-
-#define trx_start_internal_read_only(t) \
- trx_start_internal_read_only_low(t)
+#define trx_start_internal(t) trx_start_internal_low(t, true)
+#define trx_start_internal_read_only(t) trx_start_internal_low(t, false)
#define trx_start_if_not_started_xa(t, rw) \
trx_start_if_not_started_xa_low((t), (rw))
#endif /* UNIV_DEBUG */
-/*************************************************************//**
-Starts the transaction for a DDL operation. */
-void
-trx_start_for_ddl_low(
-/*==================*/
- trx_t* trx, /*!< in/out: transaction */
- trx_dict_op_t op); /*!< in: dictionary operation type */
+/** Start a transaction for a DDL operation.
+@param trx transaction */
+void trx_start_for_ddl_low(trx_t *trx);
#ifdef UNIV_DEBUG
-#define trx_start_for_ddl(t, o) \
+# define trx_start_for_ddl(t) \
do { \
ut_ad((t)->start_file == 0); \
(t)->start_line = __LINE__; \
(t)->start_file = __FILE__; \
- trx_start_for_ddl_low((t), (o)); \
+ trx_start_for_ddl_low(t); \
} while (0)
#else
-#define trx_start_for_ddl(t, o) \
- trx_start_for_ddl_low((t), (o))
+# define trx_start_for_ddl(t) trx_start_for_ddl_low(t)
#endif /* UNIV_DEBUG */
/**********************************************************************//**
@@ -245,7 +228,7 @@ trx_print_low(
/*!< in: max query length to print,
or 0 to use the default max length */
ulint n_rec_locks,
- /*!< in: lock_number_of_rows_locked(&trx->lock) */
+ /*!< in: trx->lock.n_rec_locks */
ulint n_trx_locks,
/*!< in: length of trx->lock.trx_locks */
ulint heap_size);
@@ -264,7 +247,7 @@ trx_print_latched(
/**********************************************************************//**
Prints info about a transaction.
-Acquires and releases lock_sys.mutex. */
+Acquires and releases lock_sys.latch. */
void
trx_print(
/*======*/
@@ -274,25 +257,6 @@ trx_print(
or 0 to use the default max length */
/**********************************************************************//**
-Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
-UNIV_INLINE
-enum trx_dict_op_t
-trx_get_dict_operation(
-/*===================*/
- const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************************//**
-Flag a transaction a dictionary operation. */
-UNIV_INLINE
-void
-trx_set_dict_operation(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op_t op); /*!< in: operation, not
- TRX_DICT_OP_NONE */
-
-/**********************************************************************//**
Determines if a transaction is in the given state.
The caller must hold trx->mutex, or it must be the thread
that is serving a running transaction.
@@ -328,43 +292,6 @@ is estimated as the number of altered rows + the number of locked rows.
@return transaction weight */
#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
-/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. Transactions that
-have edited non-transactional tables are considered heavier than ones
-that have not.
-@return true if weight(a) >= weight(b) */
-bool
-trx_weight_ge(
-/*==========*/
- const trx_t* a, /*!< in: the transaction to be compared */
- const trx_t* b); /*!< in: the transaction to be compared */
-/* Maximum length of a string that can be returned by
-trx_get_que_state_str(). */
-#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
-
-/*******************************************************************//**
-Retrieves transaction's que state in a human readable string. The string
-should not be free()'d or modified.
-@return string in the data segment */
-UNIV_INLINE
-const char*
-trx_get_que_state_str(
-/*==================*/
- const trx_t* trx); /*!< in: transaction */
-
-/** Retreieves the transaction ID.
-In a given point in time it is guaranteed that IDs of the running
-transactions are unique. The values returned by this function for readonly
-transactions may be reused, so a subsequent RO transaction may get the same ID
-as a RO transaction that existed in the past. The values returned by this
-function should be used for printing purposes only.
-@param[in] trx transaction whose id to retrieve
-@return transaction id */
-UNIV_INLINE
-trx_id_t
-trx_get_id_for_print(
- const trx_t* trx);
-
/** Create the trx_t pool */
void
trx_pool_init();
@@ -395,95 +322,82 @@ from innodb_lock_wait_timeout via trx_t::mysql_thd.
typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_list;
-/*******************************************************************//**
-Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state
-captures the state of the query thread during the execution of a query.
-This is different from a transaction state. The query state of a transaction
-can be updated asynchronously by other threads. The other threads can be
-system threads, like the timeout monitor thread or user threads executing
-other queries. Another thing to be mindful of is that there is a delay between
-when a query thread is put into LOCK_WAIT state and before it actually starts
-waiting. Between these two events it is possible that the query thread is
-granted the lock it was waiting for, which implies that the state can be changed
-asynchronously.
-
-All these operations take place within the context of locking. Therefore state
-changes within the locking code must acquire both the lock mutex and the
-trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
-trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
-to only acquire the trx->mutex.
-To query the state either of the mutexes is sufficient within the locking
-code and no mutex is required when the query thread is no longer waiting. */
-
/** The locks and state of an active transaction. Protected by
-lock_sys.mutex, trx->mutex or both. */
-struct trx_lock_t {
-#ifdef UNIV_DEBUG
- /** number of active query threads; at most 1, except for the
- dummy transaction in trx_purge() */
- ulint n_active_thrs;
-#endif
- trx_que_t que_state; /*!< valid when trx->state
- == TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
- TRX_QUE_LOCK_WAIT, ... */
-
- lock_t* wait_lock; /*!< if trx execution state is
- TRX_QUE_LOCK_WAIT, this points to
- the lock request, otherwise this is
- NULL; set to non-NULL when holding
- both trx->mutex and lock_sys.mutex;
- set to NULL when holding
- lock_sys.mutex; readers should
- hold lock_sys.mutex, except when
- they are holding trx->mutex and
- wait_lock==NULL */
- ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
- to and checked against lock_mark_counter
- by lock_deadlock_recursive(). */
- bool was_chosen_as_deadlock_victim;
- /*!< when the transaction decides to
- wait for a lock, it sets this to false;
- if another transaction chooses this
- transaction as a victim in deadlock
- resolution, it sets this to true.
- Protected by trx->mutex. */
- time_t wait_started; /*!< lock wait started at this time,
- protected only by lock_sys.mutex */
+lock_sys.latch, trx->mutex or both. */
+struct trx_lock_t
+{
+ /** Lock request being waited for.
+ Set to nonnull when holding lock_sys.latch, lock_sys.wait_mutex and
+ trx->mutex, by the thread that is executing the transaction.
+ Set to nullptr when holding lock_sys.wait_mutex. */
+ Atomic_relaxed<lock_t*> wait_lock;
+ /** Transaction being waited for; protected by lock_sys.wait_mutex */
+ trx_t *wait_trx;
+ /** condition variable for !wait_lock; used with lock_sys.wait_mutex */
+ pthread_cond_t cond;
+ /** lock wait start time */
+ Atomic_relaxed<my_hrtime_t> suspend_time;
+
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
+ /** 2=high priority WSREP thread has marked this trx to abort;
+ 1=another transaction chose this as a victim in deadlock resolution. */
+ Atomic_relaxed<byte> was_chosen_as_deadlock_victim;
+
+ /** Flag the lock owner as a victim in Galera conflict resolution. */
+ void set_wsrep_victim()
+ {
+# if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ /* There is no 8-bit version of the 80386 BTS instruction.
+ Technically, this is the wrong addressing mode (16-bit), but
+ there are other data members stored after the byte. */
+ __asm__ __volatile__("lock btsw $1, %0"
+ : "+m" (was_chosen_as_deadlock_victim));
+# else
+ was_chosen_as_deadlock_victim.fetch_or(2);
+# endif
+ }
+#else /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+
+ /** High priority WSREP thread has marked this trx to abort or
+ another transaction chose this as a victim in deadlock resolution. */
+ Atomic_relaxed<bool> was_chosen_as_deadlock_victim;
+
+ /** Flag the lock owner as a victim in Galera conflict resolution. */
+ void set_wsrep_victim() {
+ was_chosen_as_deadlock_victim= true;
+ }
+#endif /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+
+ /** Next available rec_pool[] entry */
+ byte rec_cached;
+ /** Next available table_pool[] entry */
+ byte table_cached;
que_thr_t* wait_thr; /*!< query thread belonging to this
- trx that is in QUE_THR_LOCK_WAIT
+ trx that is in waiting
state. For threads suspended in a
lock wait, this is protected by
- lock_sys.mutex. Otherwise, this may
+ lock_sys.latch. Otherwise, this may
only be modified by the thread that is
serving the running transaction. */
-#ifdef WITH_WSREP
- bool was_chosen_as_wsrep_victim;
- /*!< high priority wsrep thread has
- marked this trx to abort */
-#endif /* WITH_WSREP */
-
- /** Pre-allocated record locks */
- struct {
- ib_lock_t lock; byte pad[256];
- } rec_pool[8];
- /** Pre-allocated table locks */
- ib_lock_t table_pool[8];
+ /** Pre-allocated record locks */
+ struct {
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) ib_lock_t lock;
+ } rec_pool[8];
- /** Next available rec_pool[] entry */
- unsigned rec_cached;
+ /** Pre-allocated table locks */
+ ib_lock_t table_pool[8];
- /** Next available table_pool[] entry */
- unsigned table_cached;
+ /** Memory heap for trx_locks. Protected by lock_sys.assert_locked()
+ and lock_sys.is_writer() || trx->mutex_is_owner(). */
+ mem_heap_t *lock_heap;
- mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
- protected by lock_sys.mutex */
-
- trx_lock_list_t trx_locks; /*!< locks requested by the transaction;
- insertions are protected by trx->mutex
- and lock_sys.mutex; removals are
- protected by lock_sys.mutex */
+ /** Locks held by the transaction. Protected by lock_sys.assert_locked()
+ and lock_sys.is_writer() || trx->mutex_is_owner().
+ (If lock_sys.latch is only held in shared mode, then the modification
+ must be protected by trx->mutex.) */
+ trx_lock_list_t trx_locks;
lock_list table_locks; /*!< All table locks requested by this
transaction, including AUTOINC locks */
@@ -491,75 +405,94 @@ struct trx_lock_t {
/** List of pending trx_t::evict_table() */
UT_LIST_BASE_NODE_T(dict_table_t) evicted_tables;
- bool cancel; /*!< true if the transaction is being
- rolled back either via deadlock
- detection or due to lock timeout. The
- caller has to acquire the trx_t::mutex
- in order to cancel the locks. In
- lock_trx_table_locks_remove() we
- check for this cancel of a transaction's
- locks and avoid reacquiring the trx
- mutex to prevent recursive deadlocks.
- Protected by both the lock sys mutex
- and the trx_t::mutex. */
- ulint n_rec_locks; /*!< number of rec locks in this trx */
+ /** number of record locks; protected by lock_sys.assert_locked(page_id) */
+ ulint n_rec_locks;
};
/** Logical first modification time of a table in a transaction */
class trx_mod_table_time_t
{
- /** First modification of the table */
- undo_no_t first;
- /** First modification of a system versioned column */
- undo_no_t first_versioned;
-
- /** Magic value signifying that a system versioned column of a
- table was never modified in a transaction. */
- static const undo_no_t UNVERSIONED = IB_ID_MAX;
-
+ /** Impossible value for trx_t::undo_no */
+ static constexpr undo_no_t NONE= ~undo_no_t{0};
+ /** Theoretical maximum value for trx_t::undo_no.
+ DB_ROLL_PTR is only 7 bytes, so it cannot point to more than
+ this many undo log records. */
+ static constexpr undo_no_t LIMIT= (undo_no_t{1} << (7 * 8)) - 1;
+
+ /** Flag in 'first' to indicate that subsequent operations are
+ covered by a TRX_UNDO_EMPTY record (for the first statement to
+ insert into an empty table) */
+ static constexpr undo_no_t BULK= 1ULL << 63;
+
+ /** First modification of the table, possibly ORed with BULK */
+ undo_no_t first;
+ /** First modification of a system versioned column
+ (NONE= no versioning, BULK= the table was dropped) */
+ undo_no_t first_versioned= NONE;
+#ifdef UNIV_DEBUG
+ /** Whether the modified table is a FTS auxiliary table */
+ bool fts_aux_table= false;
+#endif /* UNIV_DEBUG */
public:
- /** Constructor
- @param[in] rows number of modified rows so far */
- trx_mod_table_time_t(undo_no_t rows)
- : first(rows), first_versioned(UNVERSIONED) {}
+ /** Constructor
+ @param rows number of modified rows so far */
+ trx_mod_table_time_t(undo_no_t rows) : first(rows) { ut_ad(rows < LIMIT); }
#ifdef UNIV_DEBUG
- /** Validation
- @param[in] rows number of modified rows so far
- @return whether the object is valid */
- bool valid(undo_no_t rows = UNVERSIONED) const
- {
- return first <= first_versioned && first <= rows;
- }
+ /** Validation
+ @param rows number of modified rows so far
+ @return whether the object is valid */
+ bool valid(undo_no_t rows= NONE) const
+ { auto f= first & LIMIT; return f <= first_versioned && f <= rows; }
#endif /* UNIV_DEBUG */
- /** @return if versioned columns were modified */
- bool is_versioned() const { return first_versioned != UNVERSIONED; }
+ /** @return if versioned columns were modified */
+ bool is_versioned() const { return (~first_versioned & LIMIT) != 0; }
+ /** @return if the table was dropped */
+ bool is_dropped() const { return first_versioned == BULK; }
+
+ /** After writing an undo log record, set is_versioned() if needed
+ @param rows number of modified rows so far */
+ void set_versioned(undo_no_t rows)
+ {
+ ut_ad(first_versioned == NONE);
+ first_versioned= rows;
+ ut_ad(valid(rows));
+ }
- /** After writing an undo log record, set is_versioned() if needed
- @param[in] rows number of modified rows so far */
- void set_versioned(undo_no_t rows)
- {
- ut_ad(!is_versioned());
- first_versioned = rows;
- ut_ad(valid());
- }
+ /** After writing an undo log record, note that the table will be dropped */
+ void set_dropped()
+ {
+ ut_ad(first_versioned == NONE);
+ first_versioned= BULK;
+ }
- /** Invoked after partial rollback
- @param[in] limit number of surviving modified rows
- @return whether this should be erased from trx_t::mod_tables */
- bool rollback(undo_no_t limit)
- {
- ut_ad(valid());
- if (first >= limit) {
- return true;
- }
+ /** Notify the start of a bulk insert operation */
+ void start_bulk_insert() { first|= BULK; }
- if (first_versioned < limit && is_versioned()) {
- first_versioned = UNVERSIONED;
- }
+ /** Notify the end of a bulk insert operation */
+ void end_bulk_insert() { first&= ~BULK; }
- return false;
- }
+ /** @return whether an insert is covered by TRX_UNDO_EMPTY record */
+ bool is_bulk_insert() const { return first & BULK; }
+
+ /** Invoked after partial rollback
+ @param limit number of surviving modified rows (trx_t::undo_no)
+ @return whether this should be erased from trx_t::mod_tables */
+ bool rollback(undo_no_t limit)
+ {
+ ut_ad(valid());
+ if ((LIMIT & first) >= limit)
+ return true;
+ if (first_versioned < limit)
+ first_versioned= NONE;
+ return false;
+ }
+
+#ifdef UNIV_DEBUG
+ void set_aux_table() { fts_aux_table= true; }
+
+ bool is_aux_table() const { return fts_aux_table; }
+#endif /* UNIV_DEBUG */
};
/** Collection of persistent tables and their first modification
@@ -593,7 +526,7 @@ no longer be associated with a session when the server is restarted.
A session may be served by at most one thread at a time. The serving
thread of a session might change in some MySQL implementations.
-Therefore we do not have os_thread_get_curr_id() assertions in the code.
+Therefore we do not have pthread_self() assertions in the code.
Normally, only the thread that is currently associated with a running
transaction may access (read and modify) the trx object, and it may do
@@ -604,7 +537,7 @@ transactions (state == TRX_STATE_ACTIVE && is_recovered)
while the system is already processing new user transactions (!is_recovered).
* trx_print_low() may access transactions not associated with the current
-thread. The caller must be holding lock_sys.mutex.
+thread. The caller must be holding lock_sys.latch.
* When a transaction handle is in the trx_sys.trx_list, some of its fields
must not be modified without holding trx->mutex.
@@ -612,7 +545,7 @@ must not be modified without holding trx->mutex.
* The locking code (in particular, lock_deadlock_recursive() and
lock_rec_convert_impl_to_expl()) will access transactions associated
to other connections. The locks of transactions are protected by
-lock_sys.mutex (insertions also by trx->mutex). */
+lock_sys.latch (insertions also by trx->mutex). */
/** Represents an instance of rollback segment along with its state variables.*/
struct trx_undo_ptr_t {
@@ -643,7 +576,8 @@ struct trx_rsegs_t {
trx_temp_undo_t m_noredo;
};
-struct trx_t : ilist_node<> {
+struct trx_t : ilist_node<>
+{
private:
/**
Least significant 31 bits is count of references.
@@ -658,96 +592,139 @@ private:
we don't want to get blocked on GAP locks taken for protecting
concurrent unique insert or replace operation.
*/
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE)
Atomic_relaxed<uint32_t> skip_lock_inheritance_and_n_ref;
public:
- TrxMutex mutex; /*!< Mutex protecting the fields
- state and lock (except some fields
- of lock, which are protected by
- lock_sys.mutex) */
+ /** Transaction identifier (0 if no locks were acquired).
+ Set by trx_sys_t::register_rw() or trx_resurrect() before
+ the transaction is added to trx_sys.rw_trx_hash.
+ Cleared in commit_in_memory() after commit_state(),
+ trx_sys_t::deregister_rw(), release_locks(). */
+ trx_id_t id;
+ /** The largest encountered transaction identifier for which no
+ transaction was observed to be active. This is a cache to speed up
+ trx_sys_t::find_same_or_older(). */
+ trx_id_t max_inactive_id;
+
+private:
+ /** mutex protecting state and some of lock
+ (some are protected by lock_sys.latch) */
+ srw_spin_mutex mutex;
+#ifdef UNIV_DEBUG
+ /** The owner of mutex (0 if none); protected by mutex */
+ std::atomic<pthread_t> mutex_owner{0};
+#endif /* UNIV_DEBUG */
+public:
+ void mutex_init() { mutex.init(); }
+ void mutex_destroy() { mutex.destroy(); }
+
+ /** Acquire the mutex */
+ void mutex_lock()
+ {
+ ut_ad(!mutex_is_owner());
+ mutex.wr_lock();
+ ut_ad(!mutex_owner.exchange(pthread_self(),
+ std::memory_order_relaxed));
+ }
+ /** Release the mutex */
+ void mutex_unlock()
+ {
+ ut_ad(mutex_owner.exchange(0, std::memory_order_relaxed)
+ == pthread_self());
+ mutex.wr_unlock();
+ }
+#ifndef SUX_LOCK_GENERIC
+ bool mutex_is_locked() const noexcept { return mutex.is_locked(); }
+#endif
+#ifdef UNIV_DEBUG
+ /** @return whether the current thread holds the mutex */
+ bool mutex_is_owner() const
+ {
+ return mutex_owner.load(std::memory_order_relaxed) ==
+ pthread_self();
+ }
+#endif /* UNIV_DEBUG */
+
+ /** State of the trx from the point of view of concurrency control
+ and the valid state transitions.
- trx_id_t id; /*!< transaction id */
+ Possible states:
- /** State of the trx from the point of view of concurrency control
- and the valid state transitions.
+ TRX_STATE_NOT_STARTED
+ TRX_STATE_ACTIVE
+ TRX_STATE_PREPARED
+ TRX_STATE_PREPARED_RECOVERED (special case of TRX_STATE_PREPARED)
+ TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
- Possible states:
+ Valid state transitions are:
- TRX_STATE_NOT_STARTED
- TRX_STATE_ACTIVE
- TRX_STATE_PREPARED
- TRX_STATE_PREPARED_RECOVERED (special case of TRX_STATE_PREPARED)
- TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
+ Regular transactions:
+ * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
- Valid state transitions are:
+ Auto-commit non-locking read-only:
+ * NOT_STARTED -> ACTIVE -> NOT_STARTED
- Regular transactions:
- * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
+ XA (2PC):
+ * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
- Auto-commit non-locking read-only:
- * NOT_STARTED -> ACTIVE -> NOT_STARTED
+ Recovered XA:
+ * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
- XA (2PC):
- * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
+ Recovered XA followed by XA ROLLBACK:
+ * NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
- Recovered XA:
- * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
+ XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
+ * NOT_STARTED -> PREPARED -> (freed)
- Recovered XA followed by XA ROLLBACK:
- * NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
+ Disconnected XA PREPARE transaction can become recovered:
+ * ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
- XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
- * NOT_STARTED -> PREPARED -> (freed)
+ Latching and various transaction lists membership rules:
- Disconnected XA can become recovered:
- * ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
- Disconnected means from mysql e.g due to the mysql client disconnection.
- Latching and various transaction lists membership rules:
+ XA (2PC) transactions are always treated as non-autocommit.
- XA (2PC) transactions are always treated as non-autocommit.
+ Transitions to ACTIVE or NOT_STARTED occur when transaction
+ is not in rw_trx_hash.
- Transitions to ACTIVE or NOT_STARTED occur when transaction
- is not in rw_trx_hash.
+ Autocommit non-locking read-only transactions move between states
+ without holding any mutex. They are not in rw_trx_hash.
- Autocommit non-locking read-only transactions move between states
- without holding any mutex. They are not in rw_trx_hash.
+ All transactions, unless they are determined to be ac-nl-ro,
+ explicitly tagged as read-only or read-write, will first be put
+ on the read-only transaction list. Only when a !read-only transaction
+ in the read-only list tries to acquire an X or IX lock on a table
+ do we remove it from the read-only list and put it on the read-write
+ list. During this switch we assign it a rollback segment.
- All transactions, unless they are determined to be ac-nl-ro,
- explicitly tagged as read-only or read-write, will first be put
- on the read-only transaction list. Only when a !read-only transaction
- in the read-only list tries to acquire an X or IX lock on a table
- do we remove it from the read-only list and put it on the read-write
- list. During this switch we assign it a rollback segment.
+ When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
+ in rw_trx_hash.
- When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
- in rw_trx_hash.
+ ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
+ The transition ACTIVE->PREPARED is protected by trx->mutex.
- ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
- The transition ACTIVE->PREPARED is protected by trx->mutex.
+ ACTIVE->COMMITTED is possible when the transaction is in
+ rw_trx_hash.
- ACTIVE->COMMITTED is possible when the transaction is in
- rw_trx_hash.
+ Transitions to COMMITTED are protected by trx_t::mutex. */
+ Atomic_relaxed<trx_state_t> state;
+
+ /** The locks of the transaction. Protected by lock_sys.latch
+ (insertions also by trx_t::mutex). */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_lock_t lock;
- Transitions to COMMITTED are protected by trx_t::mutex. */
- trx_state_t state;
#ifdef WITH_WSREP
- /** whether wsrep_on(mysql_thd) held at the start of transaction */
- bool wsrep;
- bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); }
- /** true, if BF thread is performing unique secondary index scanning */
- bool wsrep_UK_scan;
- bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep_UK_scan); }
+ /** whether wsrep_on(mysql_thd) held at the start of transaction */
+ byte wsrep;
+ bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); }
+ bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep & 2); }
#else /* WITH_WSREP */
- bool is_wsrep() const { return false; }
+ bool is_wsrep() const { return false; }
#endif /* WITH_WSREP */
- ReadView read_view; /*!< consistent read view used in the
- transaction, or NULL if not yet set */
- trx_lock_t lock; /*!< Information about the transaction
- locks and state. Protected by
- lock_sys.mutex (insertions also
- by trx_t::mutex). */
+ /** Consistent read view of the transaction */
+ ReadView read_view;
/* These fields are not protected by any mutex. */
@@ -767,6 +744,8 @@ public:
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
+ /** whether an insert into an empty table is active */
+ bool bulk_insert;
/*------------------------------*/
/* MySQL has a transaction coordinator to coordinate two phase
commit between multiple storage engines and the binary log. When
@@ -800,13 +779,15 @@ public:
flush the log in
trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- trx_dict_op_t dict_operation; /**< @see enum trx_dict_op_t */
-
- ib_uint32_t dict_operation_lock_mode;
- /*!< 0, RW_S_LATCH, or RW_X_LATCH:
- the latch mode trx currently holds
- on dict_sys.latch. Protected
- by dict_sys.latch. */
+ /** whether this modifies InnoDB dictionary tables */
+ bool dict_operation;
+#ifdef UNIV_DEBUG
+ /** copy of dict_operation during commit() */
+ bool was_dict_operation;
+#endif
+ /** whether dict_sys.latch is held exclusively; protected by
+ dict_sys.latch */
+ bool dict_operation_lock_mode;
/** wall-clock time of the latest transition to TRX_STATE_ACTIVE;
used for diagnostic purposes only */
@@ -814,8 +795,6 @@ public:
/** microsecond_interval_timer() of transaction start */
ulonglong start_time_micro;
lsn_t commit_lsn; /*!< lsn at the time of the commit */
- table_id_t table_id; /*!< Table to drop iff dict_operation
- == TRX_DICT_OP_TABLE, or 0. */
/*------------------------------*/
THD* mysql_thd; /*!< MySQL thread handle corresponding
to this trx, or NULL */
@@ -886,7 +865,7 @@ public:
also in the lock list trx_locks. This
vector needs to be freed explicitly
when the trx instance is destroyed.
- Protected by lock_sys.mutex. */
+ Protected by lock_sys.latch. */
/*------------------------------*/
bool read_only; /*!< true if transaction is flagged
as a READ-ONLY transaction.
@@ -899,6 +878,10 @@ public:
bool auto_commit; /*!< true if it is an autocommit */
bool will_lock; /*!< set to inform trx_start_low() that
the transaction may acquire locks */
+ /* True if transaction has to read the undo log and
+ log the DML changes for online DDL table */
+ bool apply_online_log = false;
+
/*------------------------------*/
fts_trx_t* fts_trx; /*!< FTS information, or NULL if
transaction hasn't modified tables
@@ -909,20 +892,12 @@ public:
count of tables being flushed. */
/*------------------------------*/
- bool ddl; /*!< true if it is an internal
- transaction for DDL */
- bool internal; /*!< true if it is a system/internal
- transaction background task. This
- includes DDL transactions too. Such
- transactions are always treated as
- read-write. */
- /*------------------------------*/
#ifdef UNIV_DEBUG
unsigned start_line; /*!< Track where it was started from */
const char* start_file; /*!< Filename where it was started */
#endif /* UNIV_DEBUG */
- XID* xid; /*!< X/Open XA transaction
+ XID xid; /*!< X/Open XA transaction
identification to identify a
transaction branch */
trx_mod_tables_t mod_tables; /*!< List of tables that were modified
@@ -964,8 +939,9 @@ public:
inline void release_locks();
/** Evict a table definition due to the rollback of ALTER TABLE.
- @param[in] table_id table identifier */
- void evict_table(table_id_t table_id);
+ @param table_id table identifier
+ @param reset_only whether to only reset dict_table_t::def_trx_id */
+ void evict_table(table_id_t table_id, bool reset_only= false);
/** Initiate rollback.
@param savept savepoint to which to roll back
@@ -979,8 +955,17 @@ public:
@retval false if the rollback was aborted by shutdown */
inline bool rollback_finish();
private:
- /** Mark a transaction committed in the main memory data structures. */
+ /** Apply any changes to tables for which online DDL is in progress. */
+ ATTRIBUTE_COLD void apply_log();
+ /** Process tables that were modified by the committing transaction. */
+ inline void commit_tables();
+ /** Mark a transaction committed in the main memory data structures.
+ @param mtr mini-transaction (if there are any persistent modifications) */
inline void commit_in_memory(const mtr_t *mtr);
+ /** Write log for committing the transaction. */
+ void commit_persist();
+ /** Clean up the transaction after commit_in_memory() */
+ void commit_cleanup();
/** Commit the transaction in a mini-transaction.
@param mtr mini-transaction (if there are any persistent modifications) */
void commit_low(mtr_t *mtr= nullptr);
@@ -988,11 +973,41 @@ public:
/** Commit the transaction. */
void commit();
+
+ /** Try to drop a persistent table.
+ @param table persistent table
+ @param fk whether to drop FOREIGN KEY metadata
+ @return error code */
+ dberr_t drop_table(const dict_table_t &table);
+ /** Try to drop the foreign key constraints for a persistent table.
+ @param name name of persistent table
+ @return error code */
+ dberr_t drop_table_foreign(const table_name_t &name);
+ /** Try to drop the statistics for a persistent table.
+ @param name name of persistent table
+ @return error code */
+ dberr_t drop_table_statistics(const table_name_t &name);
+ /** Commit the transaction, possibly after drop_table().
+ @param deleted handles of data files that were deleted */
+ void commit(std::vector<pfs_os_file_t> &deleted);
+
+
+ /** Discard all savepoints */
+ void savepoints_discard()
+ { savepoints_discard(UT_LIST_GET_FIRST(trx_savepoints)); }
+
+
+ /** Discard all savepoints starting from a particular savepoint.
+ @param savept first savepoint to discard */
+ void savepoints_discard(trx_named_savept_t *savept);
+
+
bool is_referenced() const
{
return (skip_lock_inheritance_and_n_ref & ~(1U << 31)) > 0;
}
+
void reference()
{
ut_d(auto old_n_ref =)
@@ -1032,7 +1047,7 @@ public:
}
/** @return whether the table has lock on
- mysql.innodb_table_stats and mysql.innodb_index_stats */
+ mysql.innodb_table_stats or mysql.innodb_index_stats */
bool has_stats_table_lock() const;
/** Free the memory to trx_pools */
@@ -1043,25 +1058,64 @@ public:
{
ut_ad(state == TRX_STATE_NOT_STARTED);
ut_ad(!id);
+ ut_ad(!mutex_is_owner());
ut_ad(!has_logged());
ut_ad(!is_referenced());
ut_ad(!is_wsrep());
-#ifdef WITH_WSREP
- ut_ad(!lock.was_chosen_as_wsrep_victim);
-#endif
+ ut_ad(!lock.was_chosen_as_deadlock_victim);
+ ut_ad(mod_tables.empty());
ut_ad(!read_view.is_open());
ut_ad(!lock.wait_thr);
+ ut_ad(!lock.wait_lock);
ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0);
ut_ad(lock.table_locks.empty());
ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
- ut_ad(dict_operation == TRX_DICT_OP_NONE);
+ ut_ad(!dict_operation);
+ ut_ad(!apply_online_log);
ut_ad(!is_not_inheriting_locks());
+ ut_ad(check_foreigns);
+ ut_ad(check_unique_secondary);
+ }
+
+ /** This has to be invoked on SAVEPOINT or at the end of a statement.
+ Even if a TRX_UNDO_EMPTY record was written for this table to cover an
+ insert into an empty table, subsequent operations will have to be covered
+ by row-level undo log records, so that ROLLBACK TO SAVEPOINT or a
+ rollback to the start of a statement will work.
+ @param table table on which any preceding bulk insert ended */
+ void end_bulk_insert(const dict_table_t &table)
+ {
+ auto it= mod_tables.find(const_cast<dict_table_t*>(&table));
+ if (it != mod_tables.end())
+ it->second.end_bulk_insert();
}
/** @return whether this is a non-locking autocommit transaction */
bool is_autocommit_non_locking() const { return auto_commit && !will_lock; }
+ /** This has to be invoked on SAVEPOINT or at the start of a statement.
+ Even if TRX_UNDO_EMPTY records were written for any table to cover an
+ insert into an empty table, subsequent operations will have to be covered
+ by row-level undo log records, so that ROLLBACK TO SAVEPOINT or a
+ rollback to the start of a statement will work. */
+ void end_bulk_insert()
+ {
+ for (auto& t : mod_tables)
+ t.second.end_bulk_insert();
+ }
+
+ /** @return whether a bulk insert into empty table is in progress */
+ bool is_bulk_insert() const
+ {
+ if (!bulk_insert || check_unique_secondary || check_foreigns)
+ return false;
+ for (const auto& t : mod_tables)
+ if (t.second.is_bulk_insert())
+ return true;
+ return false;
+ }
+
private:
/** Assign a rollback segment for modifying temporary tables.
@return the assigned rollback segment */
@@ -1134,19 +1188,6 @@ struct commit_node_t{
};
-/** Test if trx->mutex is owned. */
-#define trx_mutex_own(t) mutex_own(&t->mutex)
-
-/** Acquire the trx->mutex. */
-#define trx_mutex_enter(t) do { \
- mutex_enter(&t->mutex); \
-} while (0)
-
-/** Release the trx->mutex. */
-#define trx_mutex_exit(t) do { \
- mutex_exit(&t->mutex); \
-} while (0)
-
#include "trx0trx.inl"
#endif