summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--storage/innobase/include/read0types.h310
-rw-r--r--storage/innobase/include/sync0sync.h1
-rw-r--r--storage/innobase/include/sync0types.h4
-rw-r--r--storage/innobase/include/trx0purge.h29
-rw-r--r--storage/innobase/include/trx0sys.h6
-rw-r--r--storage/innobase/include/trx0trx.h17
-rw-r--r--storage/innobase/include/trx0trx.ic2
-rw-r--r--storage/innobase/lock/lock0lock.cc14
-rw-r--r--storage/innobase/read/read0read.cc128
-rw-r--r--storage/innobase/row/row0row.cc1
-rw-r--r--storage/innobase/row/row0umod.cc7
-rw-r--r--storage/innobase/row/row0vers.cc1
-rw-r--r--storage/innobase/sync/sync0debug.cc3
-rw-r--r--storage/innobase/sync/sync0sync.cc1
-rw-r--r--storage/innobase/trx/trx0i_s.cc2
-rw-r--r--storage/innobase/trx/trx0purge.cc16
-rw-r--r--storage/innobase/trx/trx0rec.cc6
-rw-r--r--storage/innobase/trx/trx0roll.cc6
-rw-r--r--storage/innobase/trx/trx0rseg.cc2
-rw-r--r--storage/innobase/trx/trx0sys.cc2
-rw-r--r--storage/innobase/trx/trx0trx.cc21
-rw-r--r--storage/innobase/trx/trx0undo.cc6
22 files changed, 265 insertions, 320 deletions
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 48575feda10..0ebdb6a4761 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -32,56 +32,47 @@ Created 2/16/1997 Heikki Tuuri
#include <algorithm>
-/** View is not visible to purge thread. */
-#define READ_VIEW_STATE_CLOSED 0
-
-/** View is being opened, purge thread must wait for state change. */
-#define READ_VIEW_STATE_SNAPSHOT 1
-
-/** View is visible to purge thread. */
-#define READ_VIEW_STATE_OPEN 2
-
-
/**
Read view lists the trx ids of those transactions for which a consistent read
should not see the modifications to the database.
*/
-class ReadView
+class ReadViewBase
{
/**
- View state.
-
- It is not defined as enum as it has to be updated using atomic operations.
- Possible values are READ_VIEW_STATE_CLOSED, READ_VIEW_STATE_SNAPSHOT and
- READ_VIEW_STATE_OPEN.
-
- Possible state transfers...
+ The read should not see any transaction with trx id >= this value.
+ In other words, this is the "high water mark".
+ */
+ trx_id_t m_low_limit_id;
- Start view open:
- READ_VIEW_STATE_CLOSED -> READ_VIEW_STATE_SNAPSHOT
+ /**
+ The read should see all trx ids which are strictly
+ smaller (<) than this value. In other words, this is the
+ low water mark".
+ */
+ trx_id_t m_up_limit_id;
- Complete view open:
- READ_VIEW_STATE_SNAPSHOT -> READ_VIEW_STATE_OPEN
+ /** Set of RW transactions that was active when this snapshot was taken */
+ trx_ids_t m_ids;
- Close view:
- READ_VIEW_STATE_OPEN -> READ_VIEW_STATE_CLOSED
+ /**
+ The view does not need to see the undo logs for transactions whose
+ transaction number is strictly smaller (<) than this value: they can be
+ removed in purge if not needed by other views.
*/
- std::atomic<uint32_t> m_state;
-
+ trx_id_t m_low_limit_no;
- /** m_state getter for ReadView owner thread */
- uint32_t state() const
- {
- return m_state.load(std::memory_order_relaxed);
- }
+protected:
+ bool empty() { return m_ids.empty(); }
+ /** @return the up limit id */
+ trx_id_t up_limit_id() const { return m_up_limit_id; }
public:
- ReadView(): m_state(READ_VIEW_STATE_CLOSED), m_low_limit_id(0) {}
+ ReadViewBase(): m_low_limit_id(0) {}
/**
- Copy state from another view.
+ Append state from another view.
This method is used to find min(m_low_limit_no), min(m_low_limit_id) and
all transaction ids below min(m_low_limit_id). These values effectively
@@ -89,7 +80,7 @@ public:
@param other view to copy from
*/
- void copy(const ReadView &other)
+ void append(const ReadViewBase &other)
{
ut_ad(&other != this);
if (m_low_limit_no > other.m_low_limit_no)
@@ -98,25 +89,24 @@ public:
m_low_limit_id= other.m_low_limit_id;
trx_ids_t::iterator dst= m_ids.begin();
- for (trx_ids_t::const_iterator src= other.m_ids.begin();
- src != other.m_ids.end(); src++)
+ for (const trx_id_t id : other.m_ids)
{
- if (*src >= m_low_limit_id)
+ if (id >= m_low_limit_id)
break;
loop:
if (dst == m_ids.end())
{
- m_ids.push_back(*src);
+ m_ids.push_back(id);
dst= m_ids.end();
continue;
}
- if (*dst < *src)
+ if (*dst < id)
{
dst++;
goto loop;
}
- else if (*dst > *src)
- dst= m_ids.insert(dst, *src) + 1;
+ else if (*dst > id)
+ dst= m_ids.insert(dst, id) + 1;
}
m_ids.erase(std::lower_bound(dst, m_ids.end(), m_low_limit_id),
m_ids.end());
@@ -127,66 +117,122 @@ loop:
/**
- Opens a read view where exactly the transactions serialized before this
+ Creates a snapshot where exactly the transactions serialized before this
point in time are seen in the view.
- View becomes visible to purge thread.
-
@param[in,out] trx transaction
*/
- void open(trx_t *trx);
+ inline void snapshot(trx_t *trx);
/**
- Closes the view.
+ Check whether transaction id is valid.
+ @param[in] id transaction id to check
+ @param[in] name table name
+
+ @todo changes_visible() was an unfortunate choice for this check.
+ It should be moved towards the functions that load trx id like
+ trx_read_trx_id(). No need to issue a warning, error log message should
+ be enough. Although statement should ideally fail if it sees corrupt
+ data.
+ */
+ static void check_trx_id_sanity(trx_id_t id, const table_name_t &name);
- View becomes not visible to purge thread.
- This method is intended to be called by ReadView owner thread, thus
- m_state cannot change.
+ /**
+ Check whether the changes by id are visible.
+ @param[in] id transaction id to check against the view
+ @param[in] name table name
+ @return whether the view sees the modifications of id.
*/
- void close()
+ bool changes_visible(trx_id_t id, const table_name_t &name) const
+ MY_ATTRIBUTE((warn_unused_result))
{
- ut_ad(state() == READ_VIEW_STATE_CLOSED ||
- state() == READ_VIEW_STATE_OPEN);
- m_state.store(READ_VIEW_STATE_CLOSED, std::memory_order_relaxed);
+ if (id >= m_low_limit_id)
+ {
+ check_trx_id_sanity(id, name);
+ return false;
+ }
+ return id < m_up_limit_id ||
+ m_ids.empty() ||
+ !std::binary_search(m_ids.begin(), m_ids.end(), id);
}
- /** m_state getter for trx_sys::clone_oldest_view() trx_sys::size(). */
- uint32_t get_state() const
- {
- return m_state.load(std::memory_order_acquire);
- }
+ /**
+ @param id transaction to check
+ @return true if view sees transaction id
+ */
+ bool sees(trx_id_t id) const { return id < m_up_limit_id; }
+
+ /** @return the low limit no */
+ trx_id_t low_limit_no() const { return m_low_limit_no; }
+
+ /** @return the low limit id */
+ trx_id_t low_limit_id() const { return m_low_limit_id; }
+};
+/** A ReadView with extra members required for trx_t::read_view. */
+class ReadView: public ReadViewBase
+{
/**
- Returns true if view is open.
+ View state.
- This method is intended to be called by ReadView owner thread, thus
- m_state cannot change.
+ Implemented as atomic to allow mutex-free view close and re-use.
+ Non-owner thread is allowed to call is_open() alone without mutex
+ protection as well. E.g. trx_sys.view_count() does this.
+
+ If non-owner thread intends to access other members as well, both
+ is_open() and other members accesses must be protected by m_mutex.
+ E.g. copy_to().
*/
- bool is_open() const
- {
- ut_ad(state() == READ_VIEW_STATE_OPEN ||
- state() == READ_VIEW_STATE_CLOSED);
- return state() == READ_VIEW_STATE_OPEN;
- }
+ std::atomic<bool> m_open;
+ /** For synchronisation with purge coordinator. */
+ mutable ib_mutex_t m_mutex;
/**
- Creates a snapshot where exactly the transactions serialized before this
+ trx id of creating transaction.
+ Used exclusively by the read view owner thread.
+ */
+ trx_id_t m_creator_trx_id;
+
+public:
+ ReadView(): m_open(false) { mutex_create(LATCH_ID_READ_VIEW, &m_mutex); }
+ ~ReadView() { mutex_free(&m_mutex); }
+
+
+ /**
+ Opens a read view where exactly the transactions serialized before this
point in time are seen in the view.
+ View becomes visible to purge thread. Intended to be called by the ReadView
+ owner thread.
+
@param[in,out] trx transaction
*/
- inline void snapshot(trx_t *trx);
+ void open(trx_t *trx);
+
+
+ /**
+ Closes the view.
+
+ View becomes not visible to purge thread. Intended to be called by the
+ ReadView owner thread.
+ */
+ void close() { m_open.store(false, std::memory_order_relaxed); }
+
+
+ /** Returns true if view is open. */
+ bool is_open() const { return m_open.load(std::memory_order_relaxed); }
/**
Sets the creator transaction id.
This should be set only for views created by RW transactions.
+ Intended to be called by the ReadView owner thread.
*/
void set_creator_trx_id(trx_id_t id)
{
@@ -196,97 +242,39 @@ loop:
}
- /** Check whether transaction id is valid.
- @param[in] id transaction id to check
- @param[in] name table name */
- static void check_trx_id_sanity(
- trx_id_t id,
- const table_name_t& name);
-
- /** Check whether the changes by id are visible.
- @param[in] id transaction id to check against the view
- @param[in] name table name
- @return whether the view sees the modifications of id. */
- bool changes_visible(
- trx_id_t id,
- const table_name_t& name) const
- MY_ATTRIBUTE((warn_unused_result))
- {
- if (id < m_up_limit_id || id == m_creator_trx_id) {
-
- return(true);
- }
-
- check_trx_id_sanity(id, name);
-
- if (id >= m_low_limit_id) {
-
- return(false);
-
- } else if (m_ids.empty()) {
-
- return(true);
- }
-
- return(!std::binary_search(m_ids.begin(), m_ids.end(), id));
- }
-
- /**
- @param id transaction to check
- @return true if view sees transaction id */
- bool sees(trx_id_t id) const
- {
- return(id < m_up_limit_id);
- }
-
- /**
- Write the limits to the file.
- @param file file to write to */
- void print_limits(FILE* file) const
- {
- fprintf(file,
- "Trx read view will not see trx with"
- " id >= " TRX_ID_FMT ", sees < " TRX_ID_FMT "\n",
- m_low_limit_id, m_up_limit_id);
- }
-
- /**
- @return the low limit no */
- trx_id_t low_limit_no() const
- {
- return(m_low_limit_no);
- }
-
- /**
- @return the low limit id */
- trx_id_t low_limit_id() const
- {
- return(m_low_limit_id);
- }
-
-
-private:
- /** The read should not see any transaction with trx id >= this
- value. In other words, this is the "high water mark". */
- trx_id_t m_low_limit_id;
-
- /** The read should see all trx ids which are strictly
- smaller (<) than this value. In other words, this is the
- low water mark". */
- trx_id_t m_up_limit_id;
-
- /** trx id of creating transaction, set to TRX_ID_MAX for free
- views. */
- trx_id_t m_creator_trx_id;
-
- /** Set of RW transactions that was active when this snapshot
- was taken */
- trx_ids_t m_ids;
-
- /** The view does not need to see the undo logs for transactions
- whose transaction number is strictly smaller (<) than this value:
- they can be removed in purge if not needed by other views */
- trx_id_t m_low_limit_no;
-};
+ /**
+ Writes the limits to the file.
+ @param file file to write to
+ */
+ void print_limits(FILE *file) const
+ {
+ mutex_enter(&m_mutex);
+ if (is_open())
+ fprintf(file, "Trx read view will not see trx with"
+ " id >= " TRX_ID_FMT ", sees < " TRX_ID_FMT "\n",
+ low_limit_id(), up_limit_id());
+ mutex_exit(&m_mutex);
+ }
+
+ /**
+ A wrapper around ReadViewBase::changes_visible().
+ Intended to be called by the ReadView owner thread.
+ */
+ bool changes_visible(trx_id_t id, const table_name_t &name) const
+ { return id == m_creator_trx_id || ReadViewBase::changes_visible(id, name); }
+
+
+ /**
+ A wrapper around ReadViewBase::append().
+ Intended to be called by the purge coordinator task.
+ */
+ void append_to(ReadViewBase *to) const
+ {
+ mutex_enter(&m_mutex);
+ if (is_open())
+ to->append(*this);
+ mutex_exit(&m_mutex);
+ }
+};
#endif
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index c6f7008e036..6c67fe9182d 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -108,6 +108,7 @@ extern mysql_pfs_key_t thread_mutex_key;
extern mysql_pfs_key_t zip_pad_mutex_key;
extern mysql_pfs_key_t row_drop_list_mutex_key;
extern mysql_pfs_key_t rw_trx_hash_element_mutex_key;
+extern mysql_pfs_key_t read_view_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_PFS_RWLOCK
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 4d2a7c8ff28..91908935086 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -147,7 +147,7 @@ V
lock_sys_mutex Mutex protecting lock_sys_t
|
V
-trx_sys.mutex Mutex protecting trx_sys_t
+trx_sys.mutex Mutex protecting trx_sys.trx_list
|
V
Threads mutex Background thread scheduling mutex
@@ -221,6 +221,7 @@ enum latch_level_t {
SYNC_THREADS,
SYNC_TRX,
SYNC_RW_TRX_HASH_ELEMENT,
+ SYNC_READ_VIEW,
SYNC_TRX_SYS,
SYNC_LOCK_SYS,
SYNC_LOCK_WAIT_SYS,
@@ -368,6 +369,7 @@ enum latch_id_t {
LATCH_ID_FIL_CRYPT_DATA_MUTEX,
LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
LATCH_ID_RW_TRX_HASH_ELEMENT,
+ LATCH_ID_READ_VIEW,
LATCH_ID_TEST_MUTEX,
LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
};
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 0c3d4ee5c00..38e9e7207a2 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -128,8 +128,11 @@ class purge_sys_t
public:
/** latch protecting view, m_enabled */
MY_ALIGNED(CACHE_LINE_SIZE)
- rw_lock_t latch;
+ mutable rw_lock_t latch;
private:
+ /** The purge will not remove undo logs which are >= this view */
+ MY_ALIGNED(CACHE_LINE_SIZE)
+ ReadViewBase view;
/** whether purge is enabled; protected by latch and std::atomic */
std::atomic<bool> m_enabled;
/** number of pending stop() calls without resume() */
@@ -137,9 +140,6 @@ private:
public:
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
- MY_ALIGNED(CACHE_LINE_SIZE)
- ReadView view; /*!< The purge will not remove undo logs
- which are >= this view (purge view) */
/** Iterator to the undo log records of committed transactions */
struct iterator
@@ -246,6 +246,27 @@ public:
void stop();
/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
void resume();
+ /** A wrapper around ReadView::changes_visible(). */
+ bool changes_visible(trx_id_t id, const table_name_t &name) const
+ {
+ ut_ad(rw_lock_own(&latch, RW_LOCK_S));
+ return view.changes_visible(id, name);
+ }
+ /** A wrapper around ReadView::low_limit_no(). */
+ trx_id_t low_limit_no() const
+ {
+#if 0 /* Unfortunately we don't hold this assertion, see MDEV-22718. */
+ ut_ad(rw_lock_own(&latch, RW_LOCK_S));
+#endif
+ return view.low_limit_no();
+ }
+ /** A wrapper around trx_sys_t::clone_oldest_view(). */
+ void clone_oldest_view()
+ {
+ rw_lock_x_lock(&latch);
+ trx_sys.clone_oldest_view(&view);
+ rw_lock_x_unlock(&latch);
+ }
};
/** The global data structure coordinating a purge */
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 73f05eb5d48..a2f63905e30 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -814,7 +814,7 @@ public:
*/
MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter<uint32_t> rseg_history_len;
- /** Mutex protecting trx_list. */
+ /** Mutex protecting trx_list AND NOTHING ELSE. */
MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex;
/** List of all transactions. */
@@ -1086,7 +1086,7 @@ public:
in. This function is called by purge thread to determine whether it should
purge the delete marked record or not.
*/
- void clone_oldest_view();
+ void clone_oldest_view(ReadViewBase *view) const;
/** @return the number of active views */
@@ -1098,7 +1098,7 @@ public:
for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
trx= UT_LIST_GET_NEXT(trx_list, trx))
{
- if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN)
+ if (trx->read_view.is_open())
++count;
}
mutex_exit(&mutex);
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 294670a6f43..cb590886473 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -241,8 +241,7 @@ trx_commit_step(
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
-Prints info about a transaction.
-Caller must hold trx_sys.mutex. */
+Prints info about a transaction. */
void
trx_print_low(
/*==========*/
@@ -262,7 +261,6 @@ trx_print_low(
/**********************************************************************//**
Prints info about a transaction.
-The caller must hold lock_sys.mutex and trx_sys.mutex.
When possible, use trx_print() instead. */
void
trx_print_latched(
@@ -304,7 +302,7 @@ trx_set_dict_operation(
/**********************************************************************//**
Determines if a transaction is in the given state.
-The caller must hold trx_sys.mutex, or it must be the thread
+The caller must hold trx->mutex, or it must be the thread
that is serving a running transaction.
A running RW transaction must be in trx_sys.rw_trx_hash.
@return TRUE if trx->state == state */
@@ -740,9 +738,10 @@ public:
max trx id shortly before the
transaction is moved to
COMMITTED_IN_MEMORY state.
- Protected by trx_sys_t::mutex
- when trx is in rw_trx_hash. Initially
- set to TRX_ID_MAX. */
+ Accessed exclusively by trx owner
+ thread. Should be removed in favour of
+ trx->rw_trx_hash_element->no.
+ Initially set to TRX_ID_MAX. */
/** State of the trx from the point of view of concurrency control
and the valid state transitions.
@@ -783,7 +782,7 @@ public:
XA (2PC) transactions are always treated as non-autocommit.
Transitions to ACTIVE or NOT_STARTED occur when transaction
- is not in rw_trx_hash (no trx_sys.mutex needed).
+ is not in rw_trx_hash.
Autocommit non-locking read-only transactions move between states
without holding any mutex. They are not in rw_trx_hash.
@@ -799,7 +798,7 @@ public:
in rw_trx_hash.
ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
- The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
+ The transition ACTIVE->PREPARED is protected by trx->mutex.
ACTIVE->COMMITTED is possible when the transaction is in
rw_trx_hash.
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index f51d5368022..9fa4136f743 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -26,7 +26,7 @@ Created 3/26/1996 Heikki Tuuri
/**********************************************************************//**
Determines if a transaction is in the given state.
-The caller must hold trx_sys.mutex, or it must be the thread
+The caller must hold trx->mutex, or it must be the thread
that is serving a running transaction.
A running RW transaction must be in trx_sys.rw_trx_hash.
@return TRUE if trx->state == state */
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 622e5ba7515..82bfdff3917 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -4595,15 +4595,7 @@ lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now)
fprintf(file, "---");
trx_print_latched(file, trx, 600);
-
- /* Note: read_view->get_state() check is race condition. But it
- should "kind of work" because read_view is freed only at shutdown.
- Worst thing that may happen is that it'll get transferred to
- another thread and print wrong values. */
-
- if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN) {
- trx->read_view.print_limits(file);
- }
+ trx->read_view.print_limits(file);
if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
@@ -5169,8 +5161,8 @@ lock_validate()
(lock_validate_table_locks), 0);
/* Iterate over all the record locks and validate the locks. We
- don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
- Release both mutexes during the validation check. */
+ don't want to hog the lock_sys_t::mutex. Release it during the
+ validation check. */
for (ulint i = 0; i < hash_get_n_cells(lock_sys.rec_hash); i++) {
ib_uint64_t limit = 0;
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index 1cc88f4b758..e1d93d74a09 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -161,16 +161,6 @@ but it will never be dereferenced, because the purge view is older
than any active transaction.
For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
-
-Some additional issues:
-
-What if trx_sys.view_list == NULL and some transaction T1 and Purge both
-try to open read_view at same time. Only one can acquire trx_sys.mutex.
-In which order will the views be opened? Should it matter? If no, why?
-
-The order does not matter. No new transactions can be created and no running
-RW transaction can commit or rollback (or free views). AC-NL-RO transactions
-will mark their views as closed but not actually free their views.
*/
@@ -180,7 +170,7 @@ will mark their views as closed but not actually free their views.
@param[in,out] trx transaction
*/
-inline void ReadView::snapshot(trx_t *trx)
+inline void ReadViewBase::snapshot(trx_t *trx)
{
trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no);
std::sort(m_ids.begin(), m_ids.end());
@@ -196,74 +186,52 @@ inline void ReadView::snapshot(trx_t *trx)
View becomes visible to purge thread.
@param[in,out] trx transaction
+
+ Reuses closed view if there were no read-write transactions since (and at)
+ its creation time.
+
+ Original comment states: there is an inherent race here between purge
+ and this thread.
+
+ To avoid this race we should've checked trx_sys.get_max_trx_id() and
+ set m_open atomically under ReadView::m_mutex protection. But we're cutting
+ edges to achieve greater performance.
+
+ There're at least two types of concurrent threads interested in this
+ value: purge coordinator thread (see trx_sys_t::clone_oldest_view()) and
+ InnoDB monitor thread (see lock_trx_print_wait_and_mvcc_state()).
+
+ What bad things can happen because we allow this race?
+
+ Speculative execution may reorder state change before get_max_trx_id().
+ In this case purge thread has short gap to clone outdated view. Which is
+ probably not that bad: it just won't be able to purge things that it was
+ actually allowed to purge for a short while.
+
+ This thread may as well get suspended after trx_sys.get_max_trx_id() and
+ before m_open is set. New read-write transaction may get started, committed
+ and purged meanwhile. It is acceptable as well, since this view doesn't see
+ it.
*/
void ReadView::open(trx_t *trx)
{
ut_ad(this == &trx->read_view);
- switch (state())
- {
- case READ_VIEW_STATE_OPEN:
+ if (is_open())
ut_ad(!srv_read_only_mode);
- return;
- case READ_VIEW_STATE_CLOSED:
- if (srv_read_only_mode)
- return;
- /*
- Reuse closed view if there were no read-write transactions since (and at)
- its creation time.
-
- Original comment states: there is an inherent race here between purge
- and this thread.
-
- To avoid this race we should've checked trx_sys.get_max_trx_id() and
- set state to READ_VIEW_STATE_OPEN atomically under trx_sys.mutex
- protection. But we're cutting edges to achieve great scalability.
-
- There're at least two types of concurrent threads interested in this
- value: purge coordinator thread (see trx_sys_t::clone_oldest_view()) and
- InnoDB monitor thread (see lock_trx_print_wait_and_mvcc_state()).
-
- What bad things can happen because we allow this race?
-
- Speculative execution may reorder state change before get_max_trx_id().
- In this case purge thread has short gap to clone outdated view. Which is
- probably not that bad: it just won't be able to purge things that it was
- actually allowed to purge for a short while.
-
- This thread may as well get suspended after trx_sys.get_max_trx_id() and
- before state is set to READ_VIEW_STATE_OPEN. New read-write transaction
- may get started, committed and purged meanwhile. It is acceptable as
- well, since this view doesn't see it.
- */
- if (trx_is_autocommit_non_locking(trx) && m_ids.empty() &&
- m_low_limit_id == trx_sys.get_max_trx_id())
- goto reopen;
-
- /*
- Can't reuse view, take new snapshot.
-
- Alas this empty critical section is simplest way to make sure concurrent
- purge thread completed snapshot copy. Of course purge thread may come
- again and try to copy once again after we release this mutex, but in
- this case it is guaranteed to see READ_VIEW_STATE_REGISTERED and thus
- it'll skip this view.
-
- This critical section can be replaced with new state, which purge thread
- would set to inform us to wait until it completes snapshot. However it'd
- complicate m_state even further.
- */
- mutex_enter(&trx_sys.mutex);
- mutex_exit(&trx_sys.mutex);
- m_state.store(READ_VIEW_STATE_SNAPSHOT, std::memory_order_relaxed);
- break;
- default:
- ut_ad(0);
+ else if (likely(!srv_read_only_mode))
+ {
+ m_creator_trx_id= trx->id;
+ if (trx_is_autocommit_non_locking(trx) && empty() &&
+ low_limit_id() == trx_sys.get_max_trx_id())
+ m_open.store(true, std::memory_order_relaxed);
+ else
+ {
+ mutex_enter(&m_mutex);
+ snapshot(trx);
+ m_open.store(true, std::memory_order_relaxed);
+ mutex_exit(&m_mutex);
+ }
}
-
- snapshot(trx);
-reopen:
- m_creator_trx_id= trx->id;
- m_state.store(READ_VIEW_STATE_OPEN, std::memory_order_release);
}
@@ -274,21 +242,13 @@ reopen:
in. This function is called by purge thread to determine whether it should
purge the delete marked record or not.
*/
-void trx_sys_t::clone_oldest_view()
+void trx_sys_t::clone_oldest_view(ReadViewBase *view) const
{
- purge_sys.view.snapshot(0);
+ view->snapshot(nullptr);
mutex_enter(&mutex);
/* Find oldest view. */
for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
trx= UT_LIST_GET_NEXT(trx_list, trx))
- {
- uint32_t state;
-
- while ((state= trx->read_view.get_state()) == READ_VIEW_STATE_SNAPSHOT)
- ut_delay(1);
-
- if (state == READ_VIEW_STATE_OPEN)
- purge_sys.view.copy(trx->read_view);
- }
+ trx->read_view.append_to(view);
mutex_exit(&mutex);
}
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 08572b2a3a1..aec5377ce14 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -435,7 +435,6 @@ row_build_low(
ut_ad(rec != NULL);
ut_ad(heap != NULL);
ut_ad(dict_index_is_clust(index));
- ut_ad(!mutex_own(&trx_sys.mutex));
ut_ad(!col_map || col_table);
if (!offsets) {
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 3df17365560..2fb48f4036a 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -216,8 +216,7 @@ static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
mtr->s_lock(&purge_sys.latch, __FILE__, __LINE__);
- if (!purge_sys.view.changes_visible(node->new_trx_id,
- node->table->name)) {
+ if (!purge_sys.changes_visible(node->new_trx_id, node->table->name)) {
return false;
}
@@ -422,8 +421,8 @@ row_undo_mod_clust(
}
rec_t* rec = btr_pcur_get_rec(pcur);
mtr.s_lock(&purge_sys.latch, __FILE__, __LINE__);
- if (!purge_sys.view.changes_visible(node->new_trx_id,
- node->table->name)) {
+ if (!purge_sys.changes_visible(node->new_trx_id,
+ node->table->name)) {
goto mtr_commit_exit;
}
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index 9295dee931e..86fe3ea9155 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -396,7 +396,6 @@ row_vers_impl_x_locked(
dict_index_t* clust_index;
ut_ad(!lock_mutex_own());
- ut_ad(!mutex_own(&trx_sys.mutex));
mtr_start(&mtr);
diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc
index 837d0846699..2e64fdd732e 100644
--- a/storage/innobase/sync/sync0debug.cc
+++ b/storage/innobase/sync/sync0debug.cc
@@ -478,6 +478,7 @@ LatchDebug::LatchDebug()
LEVEL_MAP_INSERT(SYNC_THREADS);
LEVEL_MAP_INSERT(SYNC_TRX);
LEVEL_MAP_INSERT(SYNC_RW_TRX_HASH_ELEMENT);
+ LEVEL_MAP_INSERT(SYNC_READ_VIEW);
LEVEL_MAP_INSERT(SYNC_TRX_SYS);
LEVEL_MAP_INSERT(SYNC_LOCK_SYS);
LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS);
@@ -759,6 +760,7 @@ LatchDebug::check_order(
case SYNC_LOCK_SYS:
case SYNC_LOCK_WAIT_SYS:
case SYNC_RW_TRX_HASH_ELEMENT:
+ case SYNC_READ_VIEW:
case SYNC_TRX_SYS:
case SYNC_IBUF_BITMAP_MUTEX:
case SYNC_REDO_RSEG:
@@ -1499,6 +1501,7 @@ sync_latch_meta_init()
PFS_NOT_INSTRUMENTED);
LATCH_ADD_MUTEX(RW_TRX_HASH_ELEMENT, SYNC_RW_TRX_HASH_ELEMENT,
rw_trx_hash_element_mutex_key);
+ LATCH_ADD_MUTEX(READ_VIEW, SYNC_READ_VIEW, read_view_mutex_key);
latch_id_t id = LATCH_ID_NONE;
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 0f028d6400d..af97603d551 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -95,6 +95,7 @@ mysql_pfs_key_t thread_mutex_key;
mysql_pfs_key_t zip_pad_mutex_key;
mysql_pfs_key_t row_drop_list_mutex_key;
mysql_pfs_key_t rw_trx_hash_element_mutex_key;
+mysql_pfs_key_t read_view_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_PFS_RWLOCK
mysql_pfs_key_t btr_search_latch_key;
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index 2aaba63885c..27329448091 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -162,7 +162,7 @@ struct trx_i_s_cache_t {
ha_storage_t* storage; /*!< storage for external volatile
data that may become unavailable
when we release
- lock_sys.mutex or trx_sys.mutex */
+ lock_sys.mutex */
ulint mem_allocd; /*!< the amount of memory
allocated with mem_alloc*() */
bool is_truncated; /*!< this is true if the memory
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 6e730faf567..702d6081d0d 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -555,9 +555,9 @@ static void trx_purge_truncate_history()
purge_sys_t::iterator& head = purge_sys.head.commit
? purge_sys.head : purge_sys.tail;
- if (head.trx_no() >= purge_sys.view.low_limit_no()) {
+ if (head.trx_no() >= purge_sys.low_limit_no()) {
/* This is sometimes necessary. TODO: find out why. */
- head.reset_trx_no(purge_sys.view.low_limit_no());
+ head.reset_trx_no(purge_sys.low_limit_no());
head.undo_no = 0;
}
@@ -978,7 +978,7 @@ trx_purge_get_next_rec(
mtr_t mtr;
ut_ad(purge_sys.next_stored);
- ut_ad(purge_sys.tail.trx_no() < purge_sys.view.low_limit_no());
+ ut_ad(purge_sys.tail.trx_no() < purge_sys.low_limit_no());
const ulint space = purge_sys.rseg->space->id;
const uint32_t page_no = purge_sys.page_no;
@@ -1068,7 +1068,7 @@ trx_purge_fetch_next_rec(
}
}
- if (purge_sys.tail.trx_no() >= purge_sys.view.low_limit_no()) {
+ if (purge_sys.tail.trx_no() >= purge_sys.low_limit_no()) {
return(NULL);
}
@@ -1213,9 +1213,7 @@ trx_purge_dml_delay(void)
thread. */
ulint delay = 0; /* in microseconds; default: no delay */
- /* If purge lag is set (ie. > 0) then calculate the new DML delay.
- Note: we do a dirty read of the trx_sys_t data structure here,
- without holding trx_sys.mutex. */
+ /* If purge lag is set then calculate the new DML delay. */
if (srv_max_purge_lag > 0) {
double ratio = static_cast<double>(trx_sys.rseg_history_len) /
@@ -1273,9 +1271,7 @@ ulint trx_purge(ulint n_tasks, bool truncate)
srv_dml_needed_delay = trx_purge_dml_delay();
- rw_lock_x_lock(&purge_sys.latch);
- trx_sys.clone_oldest_view();
- rw_lock_x_unlock(&purge_sys.latch);
+ purge_sys.clone_oldest_view();
#ifdef UNIV_DEBUG
if (srv_purge_view_update_only_debug) {
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index cf18c249a3b..5a05ce988f9 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -2174,11 +2174,9 @@ trx_undo_get_undo_rec(
const table_name_t& name,
trx_undo_rec_t** undo_rec)
{
- bool missing_history;
-
rw_lock_s_lock(&purge_sys.latch);
- missing_history = purge_sys.view.changes_visible(trx_id, name);
+ bool missing_history = purge_sys.changes_visible(trx_id, name);
if (!missing_history) {
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
}
@@ -2344,7 +2342,7 @@ trx_undo_prev_version_build(
rw_lock_s_lock(&purge_sys.latch);
- missing_extern = purge_sys.view.changes_visible(
+ missing_extern = purge_sys.changes_visible(
trx_id, index->table->name);
rw_lock_s_unlock(&purge_sys.latch);
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index 202a5c2e009..79d4fa21b9b 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -199,7 +199,7 @@ trx_rollback_for_mysql_low(
@return error code or DB_SUCCESS */
dberr_t trx_rollback_for_mysql(trx_t* trx)
{
- /* We are reading trx->state without holding trx_sys.mutex
+ /* We are reading trx->state without holding trx->mutex
here, because the rollback should be invoked for a running
active MySQL transaction (or recovered prepared transaction)
that is associated with the current thread. */
@@ -286,7 +286,7 @@ trx_rollback_last_sql_stat_for_mysql(
{
dberr_t err;
- /* We are reading trx->state without holding trx_sys.mutex
+ /* We are reading trx->state without holding trx->mutex
here, because the statement rollback should be invoked for a
running active MySQL transaction that is associated with the
current thread. */
@@ -460,7 +460,7 @@ trx_rollback_to_savepoint_for_mysql(
{
trx_named_savept_t* savep;
- /* We are reading trx->state without holding trx_sys.mutex
+ /* We are reading trx->state without holding trx->mutex
here, because the savepoint rollback should be invoked for a
running active MySQL transaction that is associated with the
current thread. */
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 25dfaded978..3790b89bf35 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -671,8 +671,6 @@ trx_rseg_create(ulint space_id)
mtr.start();
- /* To obey the latching order, acquire the file space
- x-latch before the trx_sys.mutex. */
fil_space_t* space = mtr_x_lock_space(space_id, &mtr);
ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index aa2fe4bff45..5131a07d4a5 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -48,7 +48,7 @@ trx_sys_t trx_sys;
@param[in] id transaction id to check
@param[in] name table name */
void
-ReadView::check_trx_id_sanity(
+ReadViewBase::check_trx_id_sanity(
trx_id_t id,
const table_name_t& name)
{
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 47cd014a1c3..80aa716e7e1 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -650,7 +650,7 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
trx_state_t state;
/*
This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys.mutex here.
+ protection of trx->mutex here.
*/
switch (undo->state)
{
@@ -804,7 +804,7 @@ trx_lists_init_at_db_start()
ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id();
}
- trx_sys.clone_oldest_view();
+ purge_sys.clone_oldest_view();
}
/** Assign a persistent rollback segment in a round-robin fashion,
@@ -974,9 +974,8 @@ trx_start_low(
ut_a(ib_vector_is_empty(trx->autoinc_locks));
ut_a(trx->lock.table_locks.empty());
- /* No other thread can access this trx object through rw_trx_hash, thus
- we don't need trx_sys.mutex protection for that purpose. Still this
- trx can be found through trx_sys.trx_list, which means state
+ /* No other thread can access this trx object through rw_trx_hash,
+ still it can be found through trx_sys.trx_list, which means state
change must be protected by e.g. trx->mutex.
For now we update it without mutex protection, because original code
@@ -1582,7 +1581,7 @@ trx_commit_or_rollback_prepare(
/*===========================*/
trx_t* trx) /*!< in/out: transaction */
{
- /* We are reading trx->state without holding trx_sys.mutex
+ /* We are reading trx->state without holding trx->mutex
here, because the commit or rollback should be invoked for a
running (or recovered prepared) transaction that is associated
with the current thread. */
@@ -1789,9 +1788,6 @@ trx_print_low(
fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
- /* trx->state cannot change from or to NOT_STARTED while we
- are holding the trx_sys.mutex. It may change from ACTIVE to
- PREPARED or COMMITTED. */
switch (trx->state) {
case TRX_STATE_NOT_STARTED:
fputs(", not started", f);
@@ -2366,13 +2362,6 @@ trx_set_rw_mode(
return;
}
- /* Function is promoting existing trx from ro mode to rw mode.
- In this process it has acquired trx_sys.mutex as it plan to
- move trx from ro list to rw list. If in future, some other thread
- looks at this trx object while it is being promoted then ensure
- that both threads are synced by acquring trx->mutex to avoid decision
- based on in-consistent view formed during promotion. */
-
trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
ut_ad(trx->rsegs.m_redo.rseg != 0);
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index aeefa2bf7ca..c5dab0be836 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -74,9 +74,9 @@ can still remove old versions from the bottom of the stack. */
-------------------------------------------------------------------
latches?
-------
-The contention of the trx_sys.mutex should be minimized. When a transaction
-does its first insert or modify in an index, an undo log is assigned for it.
-Then we must have an x-latch to the rollback segment header.
+When a transaction does its first insert or modify in the clustered index, an
+undo log is assigned for it. Then we must have an x-latch to the rollback
+segment header.
When the transaction performs modifications or rolls back, its
undo log is protected by undo page latches.
Only the thread that is associated with the transaction may hold multiple