diff options
Diffstat (limited to 'storage/innobase/include/trx0sys.h')
-rw-r--r-- | storage/innobase/include/trx0sys.h | 163 |
1 files changed, 139 insertions, 24 deletions
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index e7b0e75de10..0d3c916ee81 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -804,6 +804,10 @@ private: MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_max_trx_id; + + /** Solves race condition between register_rw() and snapshot_ids(). */ + MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_rw_trx_hash_version; + bool m_initialised; public: @@ -831,16 +835,6 @@ public: started in InnoDB. */ MY_ALIGNED(CACHE_LINE_SIZE) - trx_ids_t rw_trx_ids; /*!< Array of Read write transaction IDs - for MVCC snapshot. A ReadView would take - a snapshot of these transactions whose - changes are not visible to it. We should - remove transactions from the list before - committing in memory and releasing locks - to ensure right order of removal and - consistent snapshot. */ - - MY_ALIGNED(CACHE_LINE_SIZE) /** Temporary rollback segments */ trx_rseg_t* temp_rsegs[TRX_SYS_N_RSEGS]; @@ -870,13 +864,11 @@ public: /** Constructor. - We only initialise rw_trx_ids here as it is impossible to postpone it's - initialisation to create(). + Some members may require late initialisation, thus we just mark object as + uninitialised. Real initialisation happens in create(). */ - trx_sys_t(): m_initialised(false), - rw_trx_ids(ut_allocator<trx_id_t>(mem_key_trx_sys_t_rw_trx_ids)) - {} + trx_sys_t(): m_initialised(false) {} /** @@ -920,15 +912,54 @@ public: trx_id_t get_new_trx_id() { - ut_ad(mutex_own(&mutex)); - return static_cast<trx_id_t>(my_atomic_add64_explicit( - reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED)); + trx_id_t id= get_new_trx_id_no_refresh(); + refresh_rw_trx_hash_version(); + return id; + } + + + /** + Takes MVCC snapshot. + + To reduce malloc probablility we reserver rw_trx_hash.size() + 32 elements + in ids. + + For details about get_rw_trx_hash_version() != get_max_trx_id() spin + @sa register_rw(). + + We rely on get_rw_trx_hash_version() to issue ACQUIRE memory barrier so + that loading of m_rw_trx_hash_version happens before accessing rw_trx_hash. + + To optimise snapshot creation rw_trx_hash.iterate() is being used instead + of rw_trx_hash.iterate_no_dups(). It means that some transaction + identifiers may appear multiple times in ids. + + @param[in,out] caller_trx used to get access to rw_trx_hash_pins + @param[out] ids array to store registered transaction identifiers + @param[out] max_trx_id variable to store m_max_trx_id value + */ + + void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id) + { + snapshot_ids_arg arg(ids); + + while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id()) + ut_delay(1); + + ids->clear(); + ids->reserve(rw_trx_hash.size() + 32); + *max_trx_id= arg.m_id; + rw_trx_hash.iterate(caller_trx, + reinterpret_cast<my_hash_walk_action>(copy_one_id), + &arg); + std::sort(ids->begin(), ids->end()); } + /** Initialiser for m_max_trx_id and m_rw_trx_hash_version. */ void init_max_trx_id(trx_id_t value) { - m_max_trx_id= value; + m_max_trx_id= m_rw_trx_hash_version= value; } @@ -945,14 +976,44 @@ public: ulint any_active_transactions(); - /** Registers read-write transaction. */ + /** + Registers read-write transaction. + + Transaction becomes visible to MVCC. + + There's a gap between m_max_trx_id increment and transaction becoming + visible through rw_trx_hash. While we're in this gap concurrent thread may + come and do MVCC snapshot. As a result concurrent read view will be able to + observe records owned by this transaction even before it was committed. + + m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has + to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively + means that all transactions up to m_max_trx_id are available through + rw_trx_hash. + + We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so + that m_rw_trx_hash_version increment happens after transaction becomes + visible through rw_trx_hash. + */ + void register_rw(trx_t *trx) { - mutex_enter(&mutex); - trx->id= get_new_trx_id(); - rw_trx_ids.push_back(trx->id); - mutex_exit(&mutex); + trx->id= get_new_trx_id_no_refresh(); rw_trx_hash.insert(trx); + refresh_rw_trx_hash_version(); + } + + + /** + Deregisters read-write transaction. + + Transaction is removed from rw_trx_hash, which releases all implicit locks. + MVCC snapshot won't see this transaction anymore. + */ + + void deregister_rw(trx_t *trx) + { + rw_trx_hash.erase(trx); } @@ -982,6 +1043,60 @@ private: } return 0; } + + + struct snapshot_ids_arg + { + snapshot_ids_arg(trx_ids_t *ids): m_ids(ids) {} + trx_ids_t *m_ids; + trx_id_t m_id; + }; + + + static my_bool copy_one_id(rw_trx_hash_element_t *element, + snapshot_ids_arg *arg) + { + if (element->id < arg->m_id) + arg->m_ids->push_back(element->id); + return 0; + } + + + /** Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */ + trx_id_t get_rw_trx_hash_version() + { + return static_cast<trx_id_t> + (my_atomic_load64_explicit(reinterpret_cast<int64*> + (&m_rw_trx_hash_version), + MY_MEMORY_ORDER_ACQUIRE)); + } + + + /** Increments m_rw_trx_hash_version, must issue RELEASE memory barrier. */ + void refresh_rw_trx_hash_version() + { + my_atomic_add64_explicit(reinterpret_cast<int64*>(&m_rw_trx_hash_version), + 1, MY_MEMORY_ORDER_RELEASE); + } + + + /** + Allocates new transaction id without refreshing rw_trx_hash version. + + This method is extracted for exclusive use by register_rw() where + transaction must be inserted into rw_trx_hash between new transaction id + allocation and rw_trx_hash version refresh. + + @sa get_new_trx_id() + + @return new transaction id + */ + + trx_id_t get_new_trx_id_no_refresh() + { + return static_cast<trx_id_t>(my_atomic_add64_explicit( + reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED)); + } }; |