diff options
author | Sergei Golubchik <vuvova@gmail.com> | 2015-05-04 19:17:21 +0200 |
---|---|---|
committer | Sergei Golubchik <vuvova@gmail.com> | 2015-05-04 19:17:21 +0200 |
commit | 6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch) | |
tree | 21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/lock | |
parent | 1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff) | |
download | mariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz |
move to storage/innobase
Diffstat (limited to 'storage/innobase/lock')
-rw-r--r-- | storage/innobase/lock/lock0iter.cc | 111 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 7104 | ||||
-rw-r--r-- | storage/innobase/lock/lock0wait.cc | 543 |
3 files changed, 7758 insertions, 0 deletions
diff --git a/storage/innobase/lock/lock0iter.cc b/storage/innobase/lock/lock0iter.cc new file mode 100644 index 00000000000..b424d2fc757 --- /dev/null +++ b/storage/innobase/lock/lock0iter.cc @@ -0,0 +1,111 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file lock/lock0iter.cc +Lock queue iterator. Can iterate over table and record +lock queues. + +Created July 16, 2007 Vasil Dimov +*******************************************************/ + +#define LOCK_MODULE_IMPLEMENTATION + +#include "univ.i" +#include "lock0iter.h" +#include "lock0lock.h" +#include "lock0priv.h" +#include "ut0dbg.h" +#include "ut0lst.h" + +/*******************************************************************//** +Initialize lock queue iterator so that it starts to iterate from +"lock". bit_no specifies the record number within the heap where the +record is stored. It can be undefined (ULINT_UNDEFINED) in two cases: +1. If the lock is a table lock, thus we have a table lock queue; +2. If the lock is a record lock and it is a wait lock. In this case + bit_no is calculated in this function by using + lock_rec_find_set_bit(). There is exactly one bit set in the bitmap + of a wait lock. */ +UNIV_INTERN +void +lock_queue_iterator_reset( +/*======================*/ + lock_queue_iterator_t* iter, /*!< out: iterator */ + const lock_t* lock, /*!< in: lock to start from */ + ulint bit_no) /*!< in: record number in the + heap */ +{ + ut_ad(lock_mutex_own()); + + iter->current_lock = lock; + + if (bit_no != ULINT_UNDEFINED) { + + iter->bit_no = bit_no; + } else { + + switch (lock_get_type_low(lock)) { + case LOCK_TABLE: + iter->bit_no = ULINT_UNDEFINED; + break; + case LOCK_REC: + iter->bit_no = lock_rec_find_set_bit(lock); + ut_a(iter->bit_no != ULINT_UNDEFINED); + break; + default: + ut_error; + } + } +} + +/*******************************************************************//** +Gets the previous lock in the lock queue, returns NULL if there are no +more locks (i.e. the current lock is the first one). The iterator is +receded (if not-NULL is returned). +@return previous lock or NULL */ +UNIV_INTERN +const lock_t* +lock_queue_iterator_get_prev( +/*=========================*/ + lock_queue_iterator_t* iter) /*!< in/out: iterator */ +{ + const lock_t* prev_lock; + + ut_ad(lock_mutex_own()); + + switch (lock_get_type_low(iter->current_lock)) { + case LOCK_REC: + prev_lock = lock_rec_get_prev( + iter->current_lock, iter->bit_no); + break; + case LOCK_TABLE: + prev_lock = UT_LIST_GET_PREV( + un_member.tab_lock.locks, iter->current_lock); + break; + default: + ut_error; + } + + if (prev_lock != NULL) { + + iter->current_lock = prev_lock; + } + + return(prev_lock); +} diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc new file mode 100644 index 00000000000..bf7ca1607d1 --- /dev/null +++ b/storage/innobase/lock/lock0lock.cc @@ -0,0 +1,7104 @@ +/***************************************************************************** + +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file lock/lock0lock.cc +The transaction lock system + +Created 5/7/1996 Heikki Tuuri +*******************************************************/ + +#define LOCK_MODULE_IMPLEMENTATION + +#include "lock0lock.h" +#include "lock0priv.h" + +#ifdef UNIV_NONINL +#include "lock0lock.ic" +#include "lock0priv.ic" +#endif + +#include "ha_prototypes.h" +#include "usr0sess.h" +#include "trx0purge.h" +#include "dict0mem.h" +#include "dict0boot.h" +#include "trx0sys.h" +#include "pars0pars.h" /* pars_complete_graph_for_exec() */ +#include "que0que.h" /* que_node_get_parent() */ +#include "row0mysql.h" /* row_mysql_handle_errors() */ +#include "row0sel.h" /* sel_node_create(), sel_node_t */ +#include "row0types.h" /* sel_node_t */ +#include "srv0mon.h" +#include "ut0vec.h" +#include "btr0btr.h" +#include "dict0boot.h" +#include <set> + +/* Restricts the length of search we will do in the waits-for +graph of transactions */ +#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000 + +/* Restricts the search depth we will do in the waits-for graph of +transactions */ +#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200 + +/* When releasing transaction locks, this specifies how often we release +the lock mutex for a moment to give also others access to it */ + +#define LOCK_RELEASE_INTERVAL 1000 + +/* Safety margin when creating a new record lock: this many extra records +can be inserted to the page without need to create a lock with a bigger +bitmap */ + +#define LOCK_PAGE_BITMAP_MARGIN 64 + +/* An explicit record lock affects both the record and the gap before it. +An implicit x-lock does not affect the gap, it only locks the index +record from read or update. + +If a transaction has modified or inserted an index record, then +it owns an implicit x-lock on the record. On a secondary index record, +a transaction has an implicit x-lock also if it has modified the +clustered index record, the max trx id of the page where the secondary +index record resides is >= trx id of the transaction (or database recovery +is running), and there are no explicit non-gap lock requests on the +secondary index record. + +This complicated definition for a secondary index comes from the +implementation: we want to be able to determine if a secondary index +record has an implicit x-lock, just by looking at the present clustered +index record, not at the historical versions of the record. The +complicated definition can be explained to the user so that there is +nondeterminism in the access path when a query is answered: we may, +or may not, access the clustered index record and thus may, or may not, +bump into an x-lock set there. + +Different transaction can have conflicting locks set on the gap at the +same time. The locks on the gap are purely inhibitive: an insert cannot +be made, or a select cursor may have to wait if a different transaction +has a conflicting lock on the gap. An x-lock on the gap does not give +the right to insert into the gap. + +An explicit lock can be placed on a user record or the supremum record of +a page. The locks on the supremum record are always thought to be of the gap +type, though the gap bit is not set. When we perform an update of a record +where the size of the record changes, we may temporarily store its explicit +locks on the infimum record of the page, though the infimum otherwise never +carries locks. + +A waiting record lock can also be of the gap type. A waiting lock request +can be granted when there is no conflicting mode lock request by another +transaction ahead of it in the explicit lock queue. + +In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP. +It only locks the record it is placed on, not the gap before the record. +This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation +level. + +------------------------------------------------------------------------- +RULE 1: If there is an implicit x-lock on a record, and there are non-gap +------- +lock requests waiting in the queue, then the transaction holding the implicit +x-lock also has an explicit non-gap record x-lock. Therefore, as locks are +released, we can grant locks to waiting lock requests purely by looking at +the explicit lock requests in the queue. + +RULE 3: Different transactions cannot have conflicting granted non-gap locks +------- +on a record at the same time. However, they can have conflicting granted gap +locks. +RULE 4: If a there is a waiting lock request in a queue, no lock request, +------- +gap or not, can be inserted ahead of it in the queue. In record deletes +and page splits new gap type locks can be created by the database manager +for a transaction, and without rule 4, the waits-for graph of transactions +might become cyclic without the database noticing it, as the deadlock check +is only performed when a transaction itself requests a lock! +------------------------------------------------------------------------- + +An insert is allowed to a gap if there are no explicit lock requests by +other transactions on the next record. It does not matter if these lock +requests are granted or waiting, gap bit set or not, with the exception +that a gap type request set by another transaction to wait for +its turn to do an insert is ignored. On the other hand, an +implicit x-lock by another transaction does not prevent an insert, which +allows for more concurrency when using an Oracle-style sequence number +generator for the primary key with many transactions doing inserts +concurrently. + +A modify of a record is allowed if the transaction has an x-lock on the +record, or if other transactions do not have any non-gap lock requests on the +record. + +A read of a single user record with a cursor is allowed if the transaction +has a non-gap explicit, or an implicit lock on the record, or if the other +transactions have no x-lock requests on the record. At a page supremum a +read is always allowed. + +In summary, an implicit lock is seen as a granted x-lock only on the +record, not on the gap. An explicit lock with no gap bit set is a lock +both on the record and the gap. If the gap bit is set, the lock is only +on the gap. Different transaction cannot own conflicting locks on the +record at the same time, but they may own conflicting locks on the gap. +Granted locks on a record give an access right to the record, but gap type +locks just inhibit operations. + +NOTE: Finding out if some transaction has an implicit x-lock on a secondary +index record can be cumbersome. We may have to look at previous versions of +the corresponding clustered index record to find out if a delete marked +secondary index record was delete marked by an active transaction, not by +a committed one. + +FACT A: If a transaction has inserted a row, it can delete it any time +without need to wait for locks. + +PROOF: The transaction has an implicit x-lock on every index record inserted +for the row, and can thus modify each record without the need to wait. Q.E.D. + +FACT B: If a transaction has read some result set with a cursor, it can read +it again, and retrieves the same result set, if it has not modified the +result set in the meantime. Hence, there is no phantom problem. If the +biggest record, in the alphabetical order, touched by the cursor is removed, +a lock wait may occur, otherwise not. + +PROOF: When a read cursor proceeds, it sets an s-lock on each user record +it passes, and a gap type s-lock on each page supremum. The cursor must +wait until it has these locks granted. Then no other transaction can +have a granted x-lock on any of the user records, and therefore cannot +modify the user records. Neither can any other transaction insert into +the gaps which were passed over by the cursor. Page splits and merges, +and removal of obsolete versions of records do not affect this, because +when a user record or a page supremum is removed, the next record inherits +its locks as gap type locks, and therefore blocks inserts to the same gap. +Also, if a page supremum is inserted, it inherits its locks from the successor +record. When the cursor is positioned again at the start of the result set, +the records it will touch on its course are either records it touched +during the last pass or new inserted page supremums. It can immediately +access all these records, and when it arrives at the biggest record, it +notices that the result set is complete. If the biggest record was removed, +lock wait can occur because the next record only inherits a gap type lock, +and a wait may be needed. Q.E.D. */ + +/* If an index record should be changed or a new inserted, we must check +the lock on the record or the next. When a read cursor starts reading, +we will set a record level s-lock on each record it passes, except on the +initial record on which the cursor is positioned before we start to fetch +records. Our index tree search has the convention that the B-tree +cursor is positioned BEFORE the first possibly matching record in +the search. Optimizations are possible here: if the record is searched +on an equality condition to a unique key, we could actually set a special +lock on the record, a lock which would not prevent any insert before +this record. In the next key locking an x-lock set on a record also +prevents inserts just before that record. + There are special infimum and supremum records on each page. +A supremum record can be locked by a read cursor. This records cannot be +updated but the lock prevents insert of a user record to the end of +the page. + Next key locks will prevent the phantom problem where new rows +could appear to SELECT result sets after the select operation has been +performed. Prevention of phantoms ensures the serilizability of +transactions. + What should we check if an insert of a new record is wanted? +Only the lock on the next record on the same page, because also the +supremum record can carry a lock. An s-lock prevents insertion, but +what about an x-lock? If it was set by a searched update, then there +is implicitly an s-lock, too, and the insert should be prevented. +What if our transaction owns an x-lock to the next record, but there is +a waiting s-lock request on the next record? If this s-lock was placed +by a read cursor moving in the ascending order in the index, we cannot +do the insert immediately, because when we finally commit our transaction, +the read cursor should see also the new inserted record. So we should +move the read cursor backward from the next record for it to pass over +the new inserted record. This move backward may be too cumbersome to +implement. If we in this situation just enqueue a second x-lock request +for our transaction on the next record, then the deadlock mechanism +notices a deadlock between our transaction and the s-lock request +transaction. This seems to be an ok solution. + We could have the convention that granted explicit record locks, +lock the corresponding records from changing, and also lock the gaps +before them from inserting. A waiting explicit lock request locks the gap +before from inserting. Implicit record x-locks, which we derive from the +transaction id in the clustered index record, only lock the record itself +from modification, not the gap before it from inserting. + How should we store update locks? If the search is done by a unique +key, we could just modify the record trx id. Otherwise, we could put a record +x-lock on the record. If the update changes ordering fields of the +clustered index record, the inserted new record needs no record lock in +lock table, the trx id is enough. The same holds for a secondary index +record. Searched delete is similar to update. + +PROBLEM: +What about waiting lock requests? If a transaction is waiting to make an +update to a record which another modified, how does the other transaction +know to send the end-lock-wait signal to the waiting transaction? If we have +the convention that a transaction may wait for just one lock at a time, how +do we preserve it if lock wait ends? + +PROBLEM: +Checking the trx id label of a secondary index record. In the case of a +modification, not an insert, is this necessary? A secondary index record +is modified only by setting or resetting its deleted flag. A secondary index +record contains fields to uniquely determine the corresponding clustered +index record. A secondary index record is therefore only modified if we +also modify the clustered index record, and the trx id checking is done +on the clustered index record, before we come to modify the secondary index +record. So, in the case of delete marking or unmarking a secondary index +record, we do not have to care about trx ids, only the locks in the lock +table must be checked. In the case of a select from a secondary index, the +trx id is relevant, and in this case we may have to search the clustered +index record. + +PROBLEM: How to update record locks when page is split or merged, or +-------------------------------------------------------------------- +a record is deleted or updated? +If the size of fields in a record changes, we perform the update by +a delete followed by an insert. How can we retain the locks set or +waiting on the record? Because a record lock is indexed in the bitmap +by the heap number of the record, when we remove the record from the +record list, it is possible still to keep the lock bits. If the page +is reorganized, we could make a table of old and new heap numbers, +and permute the bitmaps in the locks accordingly. We can add to the +table a row telling where the updated record ended. If the update does +not require a reorganization of the page, we can simply move the lock +bits for the updated record to the position determined by its new heap +number (we may have to allocate a new lock, if we run out of the bitmap +in the old one). + A more complicated case is the one where the reinsertion of the +updated record is done pessimistically, because the structure of the +tree may change. + +PROBLEM: If a supremum record is removed in a page merge, or a record +--------------------------------------------------------------------- +removed in a purge, what to do to the waiting lock requests? In a split to +the right, we just move the lock requests to the new supremum. If a record +is removed, we could move the waiting lock request to its inheritor, the +next record in the index. But, the next record may already have lock +requests on its own queue. A new deadlock check should be made then. Maybe +it is easier just to release the waiting transactions. They can then enqueue +new lock requests on appropriate records. + +PROBLEM: When a record is inserted, what locks should it inherit from the +------------------------------------------------------------------------- +upper neighbor? An insert of a new supremum record in a page split is +always possible, but an insert of a new user record requires that the upper +neighbor does not have any lock requests by other transactions, granted or +waiting, in its lock queue. Solution: We can copy the locks as gap type +locks, so that also the waiting locks are transformed to granted gap type +locks on the inserted record. */ + +#define LOCK_STACK_SIZE OS_THREAD_MAX_N + +/* LOCK COMPATIBILITY MATRIX + * IS IX S X AI + * IS + + + - + + * IX + + - - + + * S + - + - - + * X - - - - - + * AI + + - - - + * + * Note that for rows, InnoDB only acquires S or X locks. + * For tables, InnoDB normally acquires IS or IX locks. + * S or X table locks are only acquired for LOCK TABLES. + * Auto-increment (AI) locks are needed because of + * statement-level MySQL binlog. + * See also lock_mode_compatible(). + */ +static const byte lock_compatibility_matrix[5][5] = { + /** IS IX S X AI */ + /* IS */ { TRUE, TRUE, TRUE, FALSE, TRUE}, + /* IX */ { TRUE, TRUE, FALSE, FALSE, TRUE}, + /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE}, + /* X */ { FALSE, FALSE, FALSE, FALSE, FALSE}, + /* AI */ { TRUE, TRUE, FALSE, FALSE, FALSE} +}; + +/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column) + * IS IX S X AI + * IS + - - - - + * IX + + - - - + * S + - + - - + * X + + + + + + * AI - - - - + + * See lock_mode_stronger_or_eq(). + */ +static const byte lock_strength_matrix[5][5] = { + /** IS IX S X AI */ + /* IS */ { TRUE, FALSE, FALSE, FALSE, FALSE}, + /* IX */ { TRUE, TRUE, FALSE, FALSE, FALSE}, + /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE}, + /* X */ { TRUE, TRUE, TRUE, TRUE, TRUE}, + /* AI */ { FALSE, FALSE, FALSE, FALSE, TRUE} +}; + +/** Deadlock check context. */ +struct lock_deadlock_ctx_t { + const trx_t* start; /*!< Joining transaction that is + requesting a lock in an incompatible + mode */ + + const lock_t* wait_lock; /*!< Lock that trx wants */ + + ib_uint64_t mark_start; /*!< Value of lock_mark_count at + the start of the deadlock check. */ + + ulint depth; /*!< Stack depth */ + + ulint cost; /*!< Calculation steps thus far */ + + ibool too_deep; /*!< TRUE if search was too deep and + was aborted */ +}; + +/** DFS visited node information used during deadlock checking. */ +struct lock_stack_t { + const lock_t* lock; /*!< Current lock */ + const lock_t* wait_lock; /*!< Waiting for lock */ + ulint heap_no; /*!< heap number if rec lock */ +}; + +/** Stack to use during DFS search. Currently only a single stack is required +because there is no parallel deadlock check. This stack is protected by +the lock_sys_t::mutex. */ +static lock_stack_t* lock_stack; + +/** The count of the types of locks. */ +static const ulint lock_types = UT_ARR_SIZE(lock_compatibility_matrix); + +#ifdef UNIV_PFS_MUTEX +/* Key to register mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t lock_sys_mutex_key; +/* Key to register mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t lock_sys_wait_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +#ifdef UNIV_DEBUG +UNIV_INTERN ibool lock_print_waits = FALSE; + +/*********************************************************************//** +Validates the lock system. +@return TRUE if ok */ +static +bool +lock_validate(); +/*============*/ + +/*********************************************************************//** +Validates the record lock queues on a page. +@return TRUE if ok */ +static +ibool +lock_rec_validate_page( +/*===================*/ + const buf_block_t* block) /*!< in: buffer block */ + __attribute__((nonnull, warn_unused_result)); +#endif /* UNIV_DEBUG */ + +/* The lock system */ +UNIV_INTERN lock_sys_t* lock_sys = NULL; + +/** We store info on the latest deadlock error to this buffer. InnoDB +Monitor will then fetch it and print */ +UNIV_INTERN ibool lock_deadlock_found = FALSE; +/** Only created if !srv_read_only_mode */ +static FILE* lock_latest_err_file; + +/********************************************************************//** +Checks if a joining lock request results in a deadlock. If a deadlock is +found this function will resolve the dadlock by choosing a victim transaction +and rolling it back. It will attempt to resolve all deadlocks. The returned +transaction id will be the joining transaction id or 0 if some other +transaction was chosen as a victim and rolled back or no deadlock found. + +@return id of transaction chosen as victim or 0 */ +static +trx_id_t +lock_deadlock_check_and_resolve( +/*===========================*/ + const lock_t* lock, /*!< in: lock the transaction is requesting */ + const trx_t* trx); /*!< in: transaction */ + +/*********************************************************************//** +Gets the nth bit of a record lock. +@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/ +UNIV_INLINE +ibool +lock_rec_get_nth_bit( +/*=================*/ + const lock_t* lock, /*!< in: record lock */ + ulint i) /*!< in: index of the bit */ +{ + const byte* b; + + ut_ad(lock); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + if (i >= lock->un_member.rec_lock.n_bits) { + + return(FALSE); + } + + b = ((const byte*) &lock[1]) + (i / 8); + + return(1 & *b >> (i % 8)); +} + +/*********************************************************************//** +Reports that a transaction id is insensible, i.e., in the future. */ +UNIV_INTERN +void +lock_report_trx_id_insanity( +/*========================*/ + trx_id_t trx_id, /*!< in: trx id */ + const rec_t* rec, /*!< in: user record */ + dict_index_t* index, /*!< in: index */ + const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ + trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */ +{ + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: transaction id associated with record\n", + stderr); + rec_print_new(stderr, rec, offsets); + fputs("InnoDB: in ", stderr); + dict_index_name_print(stderr, NULL, index); + fprintf(stderr, "\n" + "InnoDB: is " TRX_ID_FMT " which is higher than the" + " global trx id counter " TRX_ID_FMT "!\n" + "InnoDB: The table is corrupt. You have to do" + " dump + drop + reimport.\n", + trx_id, max_trx_id); +} + +/*********************************************************************//** +Checks that a transaction id is sensible, i.e., not in the future. +@return true if ok */ +#ifdef UNIV_DEBUG +UNIV_INTERN +#else +static __attribute__((nonnull, warn_unused_result)) +#endif +bool +lock_check_trx_id_sanity( +/*=====================*/ + trx_id_t trx_id, /*!< in: trx id */ + const rec_t* rec, /*!< in: user record */ + dict_index_t* index, /*!< in: index */ + const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ +{ + bool is_ok; + trx_id_t max_trx_id; + + ut_ad(rec_offs_validate(rec, index, offsets)); + + max_trx_id = trx_sys_get_max_trx_id(); + is_ok = trx_id < max_trx_id; + + if (UNIV_UNLIKELY(!is_ok)) { + lock_report_trx_id_insanity(trx_id, + rec, index, offsets, max_trx_id); + } + + return(is_ok); +} + +/*********************************************************************//** +Checks that a record is seen in a consistent read. +@return true if sees, or false if an earlier version of the record +should be retrieved */ +UNIV_INTERN +bool +lock_clust_rec_cons_read_sees( +/*==========================*/ + const rec_t* rec, /*!< in: user record which should be read or + passed over by a read cursor */ + dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ + read_view_t* view) /*!< in: consistent read view */ +{ + trx_id_t trx_id; + + ut_ad(dict_index_is_clust(index)); + ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + + /* NOTE that we call this function while holding the search + system latch. */ + + trx_id = row_get_rec_trx_id(rec, index, offsets); + + return(read_view_sees_trx_id(view, trx_id)); +} + +/*********************************************************************//** +Checks that a non-clustered index record is seen in a consistent read. + +NOTE that a non-clustered index page contains so little information on +its modifications that also in the case false, the present version of +rec may be the right, but we must check this from the clustered index +record. + +@return true if certainly sees, or false if an earlier version of the +clustered index record might be needed */ +UNIV_INTERN +bool +lock_sec_rec_cons_read_sees( +/*========================*/ + const rec_t* rec, /*!< in: user record which + should be read or passed over + by a read cursor */ + const read_view_t* view) /*!< in: consistent read view */ +{ + trx_id_t max_trx_id; + + ut_ad(page_rec_is_user_rec(rec)); + + /* NOTE that we might call this function while holding the search + system latch. */ + + if (recv_recovery_is_on()) { + + return(false); + } + + max_trx_id = page_get_max_trx_id(page_align(rec)); + ut_ad(max_trx_id); + + return(max_trx_id < view->up_limit_id); +} + +/*********************************************************************//** +Creates the lock system at database start. */ +UNIV_INTERN +void +lock_sys_create( +/*============*/ + ulint n_cells) /*!< in: number of slots in lock hash table */ +{ + ulint lock_sys_sz; + + lock_sys_sz = sizeof(*lock_sys) + + OS_THREAD_MAX_N * sizeof(srv_slot_t); + + lock_sys = static_cast<lock_sys_t*>(mem_zalloc(lock_sys_sz)); + + lock_stack = static_cast<lock_stack_t*>( + mem_zalloc(sizeof(*lock_stack) * LOCK_STACK_SIZE)); + + void* ptr = &lock_sys[1]; + + lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr); + + lock_sys->last_slot = lock_sys->waiting_threads; + + mutex_create(lock_sys_mutex_key, &lock_sys->mutex, SYNC_LOCK_SYS); + + mutex_create(lock_sys_wait_mutex_key, + &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS); + + lock_sys->timeout_event = os_event_create(); + + lock_sys->rec_hash = hash_create(n_cells); + + if (!srv_read_only_mode) { + lock_latest_err_file = os_file_create_tmpfile(); + ut_a(lock_latest_err_file); + } +} + +/*********************************************************************//** +Closes the lock system at database shutdown. */ +UNIV_INTERN +void +lock_sys_close(void) +/*================*/ +{ + if (lock_latest_err_file != NULL) { + fclose(lock_latest_err_file); + lock_latest_err_file = NULL; + } + + hash_table_free(lock_sys->rec_hash); + + mutex_free(&lock_sys->mutex); + mutex_free(&lock_sys->wait_mutex); + + mem_free(lock_stack); + mem_free(lock_sys); + + lock_sys = NULL; + lock_stack = NULL; +} + +/*********************************************************************//** +Gets the size of a lock struct. +@return size in bytes */ +UNIV_INTERN +ulint +lock_get_size(void) +/*===============*/ +{ + return((ulint) sizeof(lock_t)); +} + +/*********************************************************************//** +Gets the mode of a lock. +@return mode */ +UNIV_INLINE +enum lock_mode +lock_get_mode( +/*==========*/ + const lock_t* lock) /*!< in: lock */ +{ + ut_ad(lock); + + return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK)); +} + +/*********************************************************************//** +Gets the wait flag of a lock. +@return LOCK_WAIT if waiting, 0 if not */ +UNIV_INLINE +ulint +lock_get_wait( +/*==========*/ + const lock_t* lock) /*!< in: lock */ +{ + ut_ad(lock); + + return(lock->type_mode & LOCK_WAIT); +} + +/*********************************************************************//** +Gets the source table of an ALTER TABLE transaction. The table must be +covered by an IX or IS table lock. +@return the source table of transaction, if it is covered by an IX or +IS table lock; dest if there is no source table, and NULL if the +transaction is locking more than two tables or an inconsistency is +found */ +UNIV_INTERN +dict_table_t* +lock_get_src_table( +/*===============*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* dest, /*!< in: destination of ALTER TABLE */ + enum lock_mode* mode) /*!< out: lock mode of the source table */ +{ + dict_table_t* src; + lock_t* lock; + + ut_ad(!lock_mutex_own()); + + src = NULL; + *mode = LOCK_NONE; + + /* The trx mutex protects the trx_locks for our purposes. + Other transactions could want to convert one of our implicit + record locks to an explicit one. For that, they would need our + trx mutex. Waiting locks can be removed while only holding + lock_sys->mutex, but this is a running transaction and cannot + thus be holding any waiting locks. */ + trx_mutex_enter(trx); + + for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); + lock != NULL; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + lock_table_t* tab_lock; + enum lock_mode lock_mode; + if (!(lock_get_type_low(lock) & LOCK_TABLE)) { + /* We are only interested in table locks. */ + continue; + } + tab_lock = &lock->un_member.tab_lock; + if (dest == tab_lock->table) { + /* We are not interested in the destination table. */ + continue; + } else if (!src) { + /* This presumably is the source table. */ + src = tab_lock->table; + if (UT_LIST_GET_LEN(src->locks) != 1 + || UT_LIST_GET_FIRST(src->locks) != lock) { + /* We only support the case when + there is only one lock on this table. */ + src = NULL; + goto func_exit; + } + } else if (src != tab_lock->table) { + /* The transaction is locking more than + two tables (src and dest): abort */ + src = NULL; + goto func_exit; + } + + /* Check that the source table is locked by + LOCK_IX or LOCK_IS. */ + lock_mode = lock_get_mode(lock); + if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) { + if (*mode != LOCK_NONE && *mode != lock_mode) { + /* There are multiple locks on src. */ + src = NULL; + goto func_exit; + } + *mode = lock_mode; + } + } + + if (!src) { + /* No source table lock found: flag the situation to caller */ + src = dest; + } + +func_exit: + trx_mutex_exit(trx); + return(src); +} + +/*********************************************************************//** +Determine if the given table is exclusively "owned" by the given +transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC +on the table. +@return TRUE if table is only locked by trx, with LOCK_IX, and +possibly LOCK_AUTO_INC */ +UNIV_INTERN +ibool +lock_is_table_exclusive( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + const trx_t* trx) /*!< in: transaction */ +{ + const lock_t* lock; + ibool ok = FALSE; + + ut_ad(table); + ut_ad(trx); + + lock_mutex_enter(); + + for (lock = UT_LIST_GET_FIRST(table->locks); + lock != NULL; + lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) { + if (lock->trx != trx) { + /* A lock on the table is held + by some other transaction. */ + goto not_ok; + } + + if (!(lock_get_type_low(lock) & LOCK_TABLE)) { + /* We are interested in table locks only. */ + continue; + } + + switch (lock_get_mode(lock)) { + case LOCK_IX: + ok = TRUE; + break; + case LOCK_AUTO_INC: + /* It is allowed for trx to hold an + auto_increment lock. */ + break; + default: +not_ok: + /* Other table locks than LOCK_IX are not allowed. */ + ok = FALSE; + goto func_exit; + } + } + +func_exit: + lock_mutex_exit(); + + return(ok); +} + +/*********************************************************************//** +Sets the wait flag of a lock and the back pointer in trx to lock. */ +UNIV_INLINE +void +lock_set_lock_and_trx_wait( +/*=======================*/ + lock_t* lock, /*!< in: lock */ + trx_t* trx) /*!< in/out: trx */ +{ + ut_ad(lock); + ut_ad(lock->trx == trx); + ut_ad(trx->lock.wait_lock == NULL); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(trx)); + + trx->lock.wait_lock = lock; + lock->type_mode |= LOCK_WAIT; +} + +/**********************************************************************//** +The back pointer to a waiting lock request in the transaction is set to NULL +and the wait bit in lock type_mode is reset. */ +UNIV_INLINE +void +lock_reset_lock_and_trx_wait( +/*=========================*/ + lock_t* lock) /*!< in/out: record lock */ +{ + ut_ad(lock->trx->lock.wait_lock == lock); + ut_ad(lock_get_wait(lock)); + ut_ad(lock_mutex_own()); + + lock->trx->lock.wait_lock = NULL; + lock->type_mode &= ~LOCK_WAIT; +} + +/*********************************************************************//** +Gets the gap flag of a record lock. +@return LOCK_GAP or 0 */ +UNIV_INLINE +ulint +lock_rec_get_gap( +/*=============*/ + const lock_t* lock) /*!< in: record lock */ +{ + ut_ad(lock); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + return(lock->type_mode & LOCK_GAP); +} + +/*********************************************************************//** +Gets the LOCK_REC_NOT_GAP flag of a record lock. +@return LOCK_REC_NOT_GAP or 0 */ +UNIV_INLINE +ulint +lock_rec_get_rec_not_gap( +/*=====================*/ + const lock_t* lock) /*!< in: record lock */ +{ + ut_ad(lock); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + return(lock->type_mode & LOCK_REC_NOT_GAP); +} + +/*********************************************************************//** +Gets the waiting insert flag of a record lock. +@return LOCK_INSERT_INTENTION or 0 */ +UNIV_INLINE +ulint +lock_rec_get_insert_intention( +/*==========================*/ + const lock_t* lock) /*!< in: record lock */ +{ + ut_ad(lock); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + return(lock->type_mode & LOCK_INSERT_INTENTION); +} + +/*********************************************************************//** +Calculates if lock mode 1 is stronger or equal to lock mode 2. +@return nonzero if mode1 stronger or equal to mode2 */ +UNIV_INLINE +ulint +lock_mode_stronger_or_eq( +/*=====================*/ + enum lock_mode mode1, /*!< in: lock mode */ + enum lock_mode mode2) /*!< in: lock mode */ +{ + ut_ad((ulint) mode1 < lock_types); + ut_ad((ulint) mode2 < lock_types); + + return(lock_strength_matrix[mode1][mode2]); +} + +/*********************************************************************//** +Calculates if lock mode 1 is compatible with lock mode 2. +@return nonzero if mode1 compatible with mode2 */ +UNIV_INLINE +ulint +lock_mode_compatible( +/*=================*/ + enum lock_mode mode1, /*!< in: lock mode */ + enum lock_mode mode2) /*!< in: lock mode */ +{ + ut_ad((ulint) mode1 < lock_types); + ut_ad((ulint) mode2 < lock_types); + + return(lock_compatibility_matrix[mode1][mode2]); +} + +/*********************************************************************//** +Checks if a lock request for a new lock has to wait for request lock2. +@return TRUE if new lock has to wait for lock2 to be removed */ +UNIV_INLINE +ibool +lock_rec_has_to_wait( +/*=================*/ + const trx_t* trx, /*!< in: trx of new lock */ + ulint type_mode,/*!< in: precise mode of the new lock + to set: LOCK_S or LOCK_X, possibly + ORed to LOCK_GAP or LOCK_REC_NOT_GAP, + LOCK_INSERT_INTENTION */ + const lock_t* lock2, /*!< in: another record lock; NOTE that + it is assumed that this has a lock bit + set on the same record as in the new + lock we are setting */ + ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the + lock on the 'supremum' record of an + index page: we know then that the lock + request is really for a 'gap' type lock */ +{ + ut_ad(trx && lock2); + ut_ad(lock_get_type_low(lock2) == LOCK_REC); + + if (trx != lock2->trx + && !lock_mode_compatible(static_cast<enum lock_mode>( + LOCK_MODE_MASK & type_mode), + lock_get_mode(lock2))) { + + /* We have somewhat complex rules when gap type record locks + cause waits */ + + if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) + && !(type_mode & LOCK_INSERT_INTENTION)) { + + /* Gap type locks without LOCK_INSERT_INTENTION flag + do not need to wait for anything. This is because + different users can have conflicting lock types + on gaps. */ + + return(FALSE); + } + + if (!(type_mode & LOCK_INSERT_INTENTION) + && lock_rec_get_gap(lock2)) { + + /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP + does not need to wait for a gap type lock */ + + return(FALSE); + } + + if ((type_mode & LOCK_GAP) + && lock_rec_get_rec_not_gap(lock2)) { + + /* Lock on gap does not need to wait for + a LOCK_REC_NOT_GAP type lock */ + + return(FALSE); + } + + if (lock_rec_get_insert_intention(lock2)) { + + /* No lock request needs to wait for an insert + intention lock to be removed. This is ok since our + rules allow conflicting locks on gaps. This eliminates + a spurious deadlock caused by a next-key lock waiting + for an insert intention lock; when the insert + intention lock was granted, the insert deadlocked on + the waiting next-key lock. + + Also, insert intention locks do not disturb each + other. */ + + return(FALSE); + } + + return(TRUE); + } + + return(FALSE); +} + +/*********************************************************************//** +Checks if a lock request lock1 has to wait for request lock2. +@return TRUE if lock1 has to wait for lock2 to be removed */ +UNIV_INTERN +ibool +lock_has_to_wait( +/*=============*/ + const lock_t* lock1, /*!< in: waiting lock */ + const lock_t* lock2) /*!< in: another lock; NOTE that it is + assumed that this has a lock bit set + on the same record as in lock1 if the + locks are record locks */ +{ + ut_ad(lock1 && lock2); + + if (lock1->trx != lock2->trx + && !lock_mode_compatible(lock_get_mode(lock1), + lock_get_mode(lock2))) { + if (lock_get_type_low(lock1) == LOCK_REC) { + ut_ad(lock_get_type_low(lock2) == LOCK_REC); + + /* If this lock request is for a supremum record + then the second bit on the lock bitmap is set */ + + return(lock_rec_has_to_wait(lock1->trx, + lock1->type_mode, lock2, + lock_rec_get_nth_bit( + lock1, 1))); + } + + return(TRUE); + } + + return(FALSE); +} + +/*============== RECORD LOCK BASIC FUNCTIONS ============================*/ + +/*********************************************************************//** +Gets the number of bits in a record lock bitmap. +@return number of bits */ +UNIV_INLINE +ulint +lock_rec_get_n_bits( +/*================*/ + const lock_t* lock) /*!< in: record lock */ +{ + return(lock->un_member.rec_lock.n_bits); +} + +/**********************************************************************//** +Sets the nth bit of a record lock to TRUE. */ +UNIV_INLINE +void +lock_rec_set_nth_bit( +/*=================*/ + lock_t* lock, /*!< in: record lock */ + ulint i) /*!< in: index of the bit */ +{ + ulint byte_index; + ulint bit_index; + + ut_ad(lock); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + ut_ad(i < lock->un_member.rec_lock.n_bits); + + byte_index = i / 8; + bit_index = i % 8; + + ((byte*) &lock[1])[byte_index] |= 1 << bit_index; +} + +/**********************************************************************//** +Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, +if none found. +@return bit index == heap number of the record, or ULINT_UNDEFINED if +none found */ +UNIV_INTERN +ulint +lock_rec_find_set_bit( +/*==================*/ + const lock_t* lock) /*!< in: record lock with at least one bit set */ +{ + ulint i; + + for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + + if (lock_rec_get_nth_bit(lock, i)) { + + return(i); + } + } + + return(ULINT_UNDEFINED); +} + +/**********************************************************************//** +Resets the nth bit of a record lock. */ +UNIV_INLINE +void +lock_rec_reset_nth_bit( +/*===================*/ + lock_t* lock, /*!< in: record lock */ + ulint i) /*!< in: index of the bit which must be set to TRUE + when this function is called */ +{ + ulint byte_index; + ulint bit_index; + + ut_ad(lock); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + ut_ad(i < lock->un_member.rec_lock.n_bits); + + byte_index = i / 8; + bit_index = i % 8; + + ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index); +} + +/*********************************************************************//** +Gets the first or next record lock on a page. +@return next lock, NULL if none exists */ +UNIV_INLINE +const lock_t* +lock_rec_get_next_on_page_const( +/*============================*/ + const lock_t* lock) /*!< in: a record lock */ +{ + ulint space; + ulint page_no; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + space = lock->un_member.rec_lock.space; + page_no = lock->un_member.rec_lock.page_no; + + for (;;) { + lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock)); + + if (!lock) { + + break; + } + + if ((lock->un_member.rec_lock.space == space) + && (lock->un_member.rec_lock.page_no == page_no)) { + + break; + } + } + + return(lock); +} + +/*********************************************************************//** +Gets the first or next record lock on a page. +@return next lock, NULL if none exists */ +UNIV_INLINE +lock_t* +lock_rec_get_next_on_page( +/*======================*/ + lock_t* lock) /*!< in: a record lock */ +{ + return((lock_t*) lock_rec_get_next_on_page_const(lock)); +} + +/*********************************************************************//** +Gets the first record lock on a page, where the page is identified by its +file address. +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t* +lock_rec_get_first_on_page_addr( +/*============================*/ + ulint space, /*!< in: space */ + ulint page_no)/*!< in: page number */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + + for (lock = static_cast<lock_t*>( + HASH_GET_FIRST(lock_sys->rec_hash, + lock_rec_hash(space, page_no))); + lock != NULL; + lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) { + + if (lock->un_member.rec_lock.space == space + && lock->un_member.rec_lock.page_no == page_no) { + + break; + } + } + + return(lock); +} + +/*********************************************************************//** +Determines if there are explicit record locks on a page. +@return an explicit record lock on the page, or NULL if there are none */ +UNIV_INTERN +lock_t* +lock_rec_expl_exist_on_page( +/*========================*/ + ulint space, /*!< in: space id */ + ulint page_no)/*!< in: page number */ +{ + lock_t* lock; + + lock_mutex_enter(); + lock = lock_rec_get_first_on_page_addr(space, page_no); + lock_mutex_exit(); + + return(lock); +} + +/*********************************************************************//** +Gets the first record lock on a page, where the page is identified by a +pointer to it. +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t* +lock_rec_get_first_on_page( +/*=======================*/ + const buf_block_t* block) /*!< in: buffer block */ +{ + ulint hash; + lock_t* lock; + ulint space = buf_block_get_space(block); + ulint page_no = buf_block_get_page_no(block); + + ut_ad(lock_mutex_own()); + + hash = buf_block_get_lock_hash_val(block); + + for (lock = static_cast<lock_t*>( + HASH_GET_FIRST( lock_sys->rec_hash, hash)); + lock != NULL; + lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) { + + if ((lock->un_member.rec_lock.space == space) + && (lock->un_member.rec_lock.page_no == page_no)) { + + break; + } + } + + return(lock); +} + +/*********************************************************************//** +Gets the next explicit lock request on a record. +@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ +UNIV_INLINE +lock_t* +lock_rec_get_next( +/*==============*/ + ulint heap_no,/*!< in: heap number of the record */ + lock_t* lock) /*!< in: lock */ +{ + ut_ad(lock_mutex_own()); + + do { + ut_ad(lock_get_type_low(lock) == LOCK_REC); + lock = lock_rec_get_next_on_page(lock); + } while (lock && !lock_rec_get_nth_bit(lock, heap_no)); + + return(lock); +} + +/*********************************************************************//** +Gets the next explicit lock request on a record. +@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ +UNIV_INLINE +const lock_t* +lock_rec_get_next_const( +/*====================*/ + ulint heap_no,/*!< in: heap number of the record */ + const lock_t* lock) /*!< in: lock */ +{ + return(lock_rec_get_next(heap_no, (lock_t*) lock)); +} + +/*********************************************************************//** +Gets the first explicit lock request on a record. +@return first lock, NULL if none exists */ +UNIV_INLINE +lock_t* +lock_rec_get_first( +/*===============*/ + const buf_block_t* block, /*!< in: block containing the record */ + ulint heap_no)/*!< in: heap number of the record */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + + for (lock = lock_rec_get_first_on_page(block); lock; + lock = lock_rec_get_next_on_page(lock)) { + if (lock_rec_get_nth_bit(lock, heap_no)) { + break; + } + } + + return(lock); +} + +/*********************************************************************//** +Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock +pointer in the transaction! This function is used in lock object creation +and resetting. */ +static +void +lock_rec_bitmap_reset( +/*==================*/ + lock_t* lock) /*!< in: record lock */ +{ + ulint n_bytes; + + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + /* Reset to zero the bitmap which resides immediately after the lock + struct */ + + n_bytes = lock_rec_get_n_bits(lock) / 8; + + ut_ad((lock_rec_get_n_bits(lock) % 8) == 0); + + memset(&lock[1], 0, n_bytes); +} + +/*********************************************************************//** +Copies a record lock to heap. +@return copy of lock */ +static +lock_t* +lock_rec_copy( +/*==========*/ + const lock_t* lock, /*!< in: record lock */ + mem_heap_t* heap) /*!< in: memory heap */ +{ + ulint size; + + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8; + + return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size))); +} + +/*********************************************************************//** +Gets the previous record lock set on a record. +@return previous lock on the same record, NULL if none exists */ +UNIV_INTERN +const lock_t* +lock_rec_get_prev( +/*==============*/ + const lock_t* in_lock,/*!< in: record lock */ + ulint heap_no)/*!< in: heap number of the record */ +{ + lock_t* lock; + ulint space; + ulint page_no; + lock_t* found_lock = NULL; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_type_low(in_lock) == LOCK_REC); + + space = in_lock->un_member.rec_lock.space; + page_no = in_lock->un_member.rec_lock.page_no; + + for (lock = lock_rec_get_first_on_page_addr(space, page_no); + /* No op */; + lock = lock_rec_get_next_on_page(lock)) { + + ut_ad(lock); + + if (lock == in_lock) { + + return(found_lock); + } + + if (lock_rec_get_nth_bit(lock, heap_no)) { + + found_lock = lock; + } + } +} + +/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/ + +/*********************************************************************//** +Checks if a transaction has the specified table lock, or stronger. This +function should only be called by the thread that owns the transaction. +@return lock or NULL */ +UNIV_INLINE +const lock_t* +lock_table_has( +/*===========*/ + const trx_t* trx, /*!< in: transaction */ + const dict_table_t* table, /*!< in: table */ + enum lock_mode mode) /*!< in: lock mode */ +{ + lint i; + + if (ib_vector_is_empty(trx->lock.table_locks)) { + return(NULL); + } + + /* Look for stronger locks the same trx already has on the table */ + + for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { + const lock_t* lock; + enum lock_mode lock_mode; + + lock = *static_cast<const lock_t**>( + ib_vector_get(trx->lock.table_locks, i)); + + if (lock == NULL) { + continue; + } + + lock_mode = lock_get_mode(lock); + + ut_ad(trx == lock->trx); + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + ut_ad(lock->un_member.tab_lock.table != NULL); + + if (table == lock->un_member.tab_lock.table + && lock_mode_stronger_or_eq(lock_mode, mode)) { + + ut_ad(!lock_get_wait(lock)); + + return(lock); + } + } + + return(NULL); +} + +/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/ + +/*********************************************************************//** +Checks if a transaction has a GRANTED explicit lock on rec stronger or equal +to precise_mode. +@return lock or NULL */ +UNIV_INLINE +lock_t* +lock_rec_has_expl( +/*==============*/ + ulint precise_mode,/*!< in: LOCK_S or LOCK_X + possibly ORed to LOCK_GAP or + LOCK_REC_NOT_GAP, for a + supremum record we regard this + always a gap type request */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of the record */ + const trx_t* trx) /*!< in: transaction */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S + || (precise_mode & LOCK_MODE_MASK) == LOCK_X); + ut_ad(!(precise_mode & LOCK_INSERT_INTENTION)); + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + + if (lock->trx == trx + && !lock_rec_get_insert_intention(lock) + && lock_mode_stronger_or_eq( + lock_get_mode(lock), + static_cast<enum lock_mode>( + precise_mode & LOCK_MODE_MASK)) + && !lock_get_wait(lock) + && (!lock_rec_get_rec_not_gap(lock) + || (precise_mode & LOCK_REC_NOT_GAP) + || heap_no == PAGE_HEAP_NO_SUPREMUM) + && (!lock_rec_get_gap(lock) + || (precise_mode & LOCK_GAP) + || heap_no == PAGE_HEAP_NO_SUPREMUM)) { + + return(lock); + } + } + + return(NULL); +} + +#ifdef UNIV_DEBUG +/*********************************************************************//** +Checks if some other transaction has a lock request in the queue. +@return lock or NULL */ +static +const lock_t* +lock_rec_other_has_expl_req( +/*========================*/ + enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */ + ulint gap, /*!< in: LOCK_GAP if also gap + locks are taken into account, + or 0 if not */ + ulint wait, /*!< in: LOCK_WAIT if also + waiting locks are taken into + account, or 0 if not */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of the record */ + const trx_t* trx) /*!< in: transaction, or NULL if + requests by all transactions + are taken into account */ +{ + const lock_t* lock; + + ut_ad(lock_mutex_own()); + ut_ad(mode == LOCK_X || mode == LOCK_S); + ut_ad(gap == 0 || gap == LOCK_GAP); + ut_ad(wait == 0 || wait == LOCK_WAIT); + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next_const(heap_no, lock)) { + + if (lock->trx != trx + && (gap + || !(lock_rec_get_gap(lock) + || heap_no == PAGE_HEAP_NO_SUPREMUM)) + && (wait || !lock_get_wait(lock)) + && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) { + + return(lock); + } + } + + return(NULL); +} +#endif /* UNIV_DEBUG */ + +/*********************************************************************//** +Checks if some other transaction has a conflicting explicit lock request +in the queue, so that we have to wait. +@return lock or NULL */ +static +const lock_t* +lock_rec_other_has_conflicting( +/*===========================*/ + enum lock_mode mode, /*!< in: LOCK_S or LOCK_X, + possibly ORed to LOCK_GAP or + LOC_REC_NOT_GAP, + LOCK_INSERT_INTENTION */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of the record */ + const trx_t* trx) /*!< in: our transaction */ +{ + const lock_t* lock; + ibool is_supremum; + + ut_ad(lock_mutex_own()); + + is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM); + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next_const(heap_no, lock)) { + + if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) { + return(lock); + } + } + + return(NULL); +} + +/*********************************************************************//** +Looks for a suitable type record lock struct by the same trx on the same page. +This can be used to save space when a new record lock should be set on a page: +no new struct is needed, if a suitable old is found. +@return lock or NULL */ +UNIV_INLINE +lock_t* +lock_rec_find_similar_on_page( +/*==========================*/ + ulint type_mode, /*!< in: lock type_mode field */ + ulint heap_no, /*!< in: heap number of the record */ + lock_t* lock, /*!< in: lock_rec_get_first_on_page() */ + const trx_t* trx) /*!< in: transaction */ +{ + ut_ad(lock_mutex_own()); + + for (/* No op */; + lock != NULL; + lock = lock_rec_get_next_on_page(lock)) { + + if (lock->trx == trx + && lock->type_mode == type_mode + && lock_rec_get_n_bits(lock) > heap_no) { + + return(lock); + } + } + + return(NULL); +} + +/*********************************************************************//** +Checks if some transaction has an implicit x-lock on a record in a secondary +index. +@return transaction id of the transaction which has the x-lock, or 0; +NOTE that this function can return false positives but never false +negatives. The caller must confirm all positive results by calling +trx_is_active(). */ +static +trx_id_t +lock_sec_rec_some_has_impl( +/*=======================*/ + const rec_t* rec, /*!< in: user record */ + dict_index_t* index, /*!< in: secondary index */ + const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ +{ + trx_id_t trx_id; + trx_id_t max_trx_id; + const page_t* page = page_align(rec); + + ut_ad(!lock_mutex_own()); + ut_ad(!mutex_own(&trx_sys->mutex)); + ut_ad(!dict_index_is_clust(index)); + ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + + max_trx_id = page_get_max_trx_id(page); + + /* Some transaction may have an implicit x-lock on the record only + if the max trx id for the page >= min trx id for the trx list, or + database recovery is running. We do not write the changes of a page + max trx id to the log, and therefore during recovery, this value + for a page may be incorrect. */ + + if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) { + + trx_id = 0; + + } else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) { + + buf_page_print(page, 0, 0); + + /* The page is corrupt: try to avoid a crash by returning 0 */ + trx_id = 0; + + /* In this case it is possible that some transaction has an implicit + x-lock. We have to look in the clustered index. */ + + } else { + trx_id = row_vers_impl_x_locked(rec, index, offsets); + } + + return(trx_id); +} + +#ifdef UNIV_DEBUG +/*********************************************************************//** +Checks if some transaction, other than given trx_id, has an explicit +lock on the given rec, in the given precise_mode. +@return the transaction, whose id is not equal to trx_id, that has an +explicit lock on the given rec, in the given precise_mode or NULL.*/ +static +trx_t* +lock_rec_other_trx_holds_expl( +/*==========================*/ + ulint precise_mode, /*!< in: LOCK_S or LOCK_X + possibly ORed to LOCK_GAP or + LOCK_REC_NOT_GAP. */ + trx_id_t trx_id, /*!< in: trx holding implicit + lock on rec */ + const rec_t* rec, /*!< in: user record */ + const buf_block_t* block) /*!< in: buffer block + containing the record */ +{ + trx_t* holds = NULL; + + lock_mutex_enter(); + + if (trx_t *impl_trx = trx_rw_is_active(trx_id, NULL)) { + ulint heap_no = page_rec_get_heap_no(rec); + mutex_enter(&trx_sys->mutex); + + for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); + t != NULL; + t = UT_LIST_GET_NEXT(trx_list, t)) { + + lock_t *expl_lock = lock_rec_has_expl( + precise_mode, block, heap_no, t); + + if (expl_lock && expl_lock->trx != impl_trx) { + /* An explicit lock is held by trx other than + the trx holding the implicit lock. */ + holds = expl_lock->trx; + break; + } + } + + mutex_exit(&trx_sys->mutex); + } + + lock_mutex_exit(); + + return(holds); +} +#endif /* UNIV_DEBUG */ + +/*********************************************************************//** +Return approximate number or record locks (bits set in the bitmap) for +this transaction. Since delete-marked records may be removed, the +record count will not be precise. +The caller must be holding lock_sys->mutex. */ +UNIV_INTERN +ulint +lock_number_of_rows_locked( +/*=======================*/ + const trx_lock_t* trx_lock) /*!< in: transaction locks */ +{ + const lock_t* lock; + ulint n_records = 0; + + ut_ad(lock_mutex_own()); + + for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks); + lock != NULL; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + + if (lock_get_type_low(lock) == LOCK_REC) { + ulint n_bit; + ulint n_bits = lock_rec_get_n_bits(lock); + + for (n_bit = 0; n_bit < n_bits; n_bit++) { + if (lock_rec_get_nth_bit(lock, n_bit)) { + n_records++; + } + } + } + } + + return(n_records); +} + +/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/ + +/*********************************************************************//** +Creates a new record lock and inserts it to the lock queue. Does NOT check +for deadlocks or lock compatibility! +@return created lock */ +static +lock_t* +lock_rec_create( +/*============*/ + ulint type_mode,/*!< in: lock mode and wait + flag, type is ignored and + replaced by LOCK_REC */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of the record */ + dict_index_t* index, /*!< in: index of record */ + trx_t* trx, /*!< in/out: transaction */ + ibool caller_owns_trx_mutex) + /*!< in: TRUE if caller owns + trx mutex */ +{ + lock_t* lock; + ulint page_no; + ulint space; + ulint n_bits; + ulint n_bytes; + const page_t* page; + + ut_ad(lock_mutex_own()); + ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx)); + ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); + + /* Non-locking autocommit read-only transactions should not set + any locks. */ + assert_trx_in_list(trx); + + space = buf_block_get_space(block); + page_no = buf_block_get_page_no(block); + page = block->frame; + + btr_assert_not_corrupted(block, index); + + /* If rec is the supremum record, then we reset the gap and + LOCK_REC_NOT_GAP bits, as all locks on the supremum are + automatically of the gap type */ + + if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) { + ut_ad(!(type_mode & LOCK_REC_NOT_GAP)); + + type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP); + } + + /* Make lock bitmap bigger by a safety margin */ + n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN; + n_bytes = 1 + n_bits / 8; + + lock = static_cast<lock_t*>( + mem_heap_alloc(trx->lock.lock_heap, sizeof(lock_t) + n_bytes)); + + lock->trx = trx; + + lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; + lock->index = index; + + lock->un_member.rec_lock.space = space; + lock->un_member.rec_lock.page_no = page_no; + lock->un_member.rec_lock.n_bits = n_bytes * 8; + + /* Reset to zero the bitmap which resides immediately after the + lock struct */ + + lock_rec_bitmap_reset(lock); + + /* Set the bit corresponding to rec */ + lock_rec_set_nth_bit(lock, heap_no); + + index->table->n_rec_locks++; + + ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted); + + HASH_INSERT(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), lock); + + if (!caller_owns_trx_mutex) { + trx_mutex_enter(trx); + } + ut_ad(trx_mutex_own(trx)); + + if (type_mode & LOCK_WAIT) { + + lock_set_lock_and_trx_wait(lock, trx); + } + + UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); + + if (!caller_owns_trx_mutex) { + trx_mutex_exit(trx); + } + + MONITOR_INC(MONITOR_RECLOCK_CREATED); + MONITOR_INC(MONITOR_NUM_RECLOCK); + + return(lock); +} + +/*********************************************************************//** +Enqueues a waiting request for a lock which cannot be granted immediately. +Checks for deadlocks. +@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or +DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that +there was a deadlock, but another transaction was chosen as a victim, +and we got the lock immediately: no need to wait then */ +static +dberr_t +lock_rec_enqueue_waiting( +/*=====================*/ + ulint type_mode,/*!< in: lock mode this + transaction is requesting: + LOCK_S or LOCK_X, possibly + ORed with LOCK_GAP or + LOCK_REC_NOT_GAP, ORed with + LOCK_INSERT_INTENTION if this + waiting lock request is set + when performing an insert of + an index record */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of the record */ + dict_index_t* index, /*!< in: index of record */ + que_thr_t* thr) /*!< in: query thread */ +{ + trx_t* trx; + lock_t* lock; + trx_id_t victim_trx_id; + + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); + + trx = thr_get_trx(thr); + + ut_ad(trx_mutex_own(trx)); + + /* Test if there already is some other reason to suspend thread: + we do not enqueue a lock request if the query thread should be + stopped anyway */ + + if (que_thr_stop(thr)) { + ut_error; + + return(DB_QUE_THR_SUSPENDED); + } + + switch (trx_get_dict_operation(trx)) { + case TRX_DICT_OP_NONE: + break; + case TRX_DICT_OP_TABLE: + case TRX_DICT_OP_INDEX: + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: a record lock wait happens" + " in a dictionary operation!\n" + "InnoDB: ", stderr); + dict_index_name_print(stderr, trx, index); + fputs(".\n" + "InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", + stderr); + ut_ad(0); + } + + /* Enqueue the lock request that will wait to be granted, note that + we already own the trx mutex. */ + lock = lock_rec_create( + type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE); + + /* Release the mutex to obey the latching order. + This is safe, because lock_deadlock_check_and_resolve() + is invoked when a lock wait is enqueued for the currently + running transaction. Because trx is a running transaction + (it is not currently suspended because of a lock wait), + its state can only be changed by this thread, which is + currently associated with the transaction. */ + + trx_mutex_exit(trx); + + victim_trx_id = lock_deadlock_check_and_resolve(lock, trx); + + trx_mutex_enter(trx); + + if (victim_trx_id != 0) { + + ut_ad(victim_trx_id == trx->id); + + lock_reset_lock_and_trx_wait(lock); + lock_rec_reset_nth_bit(lock, heap_no); + + return(DB_DEADLOCK); + + } else if (trx->lock.wait_lock == NULL) { + + /* If there was a deadlock but we chose another + transaction as a victim, it is possible that we + already have the lock now granted! */ + + return(DB_SUCCESS_LOCKED_REC); + } + + trx->lock.que_state = TRX_QUE_LOCK_WAIT; + + trx->lock.was_chosen_as_deadlock_victim = FALSE; + trx->lock.wait_started = ut_time(); + + ut_a(que_thr_stop(thr)); + +#ifdef UNIV_DEBUG + if (lock_print_waits) { + fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ", + trx->id); + ut_print_name(stderr, trx, FALSE, index->name); + } +#endif /* UNIV_DEBUG */ + + MONITOR_INC(MONITOR_LOCKREC_WAIT); + + return(DB_LOCK_WAIT); +} + +/*********************************************************************//** +Adds a record lock request in the record queue. The request is normally +added as the last in the queue, but if there are no waiting lock requests +on the record, and the request to be added is not a waiting request, we +can reuse a suitable record lock object already existing on the same page, +just setting the appropriate bit in its bitmap. This is a low-level function +which does NOT check for deadlocks or lock compatibility! +@return lock where the bit was set */ +static +lock_t* +lock_rec_add_to_queue( +/*==================*/ + ulint type_mode,/*!< in: lock mode, wait, gap + etc. flags; type is ignored + and replaced by LOCK_REC */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of the record */ + dict_index_t* index, /*!< in: index of record */ + trx_t* trx, /*!< in/out: transaction */ + ibool caller_owns_trx_mutex) + /*!< in: TRUE if caller owns the + transaction mutex */ +{ + lock_t* lock; + lock_t* first_lock; + + ut_ad(lock_mutex_own()); + ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx)); + ut_ad(dict_index_is_clust(index) + || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION); +#ifdef UNIV_DEBUG + switch (type_mode & LOCK_MODE_MASK) { + case LOCK_X: + case LOCK_S: + break; + default: + ut_error; + } + + if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) { + enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S + ? LOCK_X + : LOCK_S; + const lock_t* other_lock + = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT, + block, heap_no, trx); + ut_a(!other_lock); + } +#endif /* UNIV_DEBUG */ + + type_mode |= LOCK_REC; + + /* If rec is the supremum record, then we can reset the gap bit, as + all locks on the supremum are automatically of the gap type, and we + try to avoid unnecessary memory consumption of a new record lock + struct for a gap type lock */ + + if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) { + ut_ad(!(type_mode & LOCK_REC_NOT_GAP)); + + /* There should never be LOCK_REC_NOT_GAP on a supremum + record, but let us play safe */ + + type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP); + } + + /* Look for a waiting lock request on the same record or on a gap */ + + for (first_lock = lock = lock_rec_get_first_on_page(block); + lock != NULL; + lock = lock_rec_get_next_on_page(lock)) { + + if (lock_get_wait(lock) + && lock_rec_get_nth_bit(lock, heap_no)) { + + goto somebody_waits; + } + } + + if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) { + + /* Look for a similar record lock on the same page: + if one is found and there are no waiting lock requests, + we can just set the bit */ + + lock = lock_rec_find_similar_on_page( + type_mode, heap_no, first_lock, trx); + + if (lock) { + + lock_rec_set_nth_bit(lock, heap_no); + + return(lock); + } + } + +somebody_waits: + return(lock_rec_create( + type_mode, block, heap_no, index, trx, + caller_owns_trx_mutex)); +} + +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + +/*********************************************************************//** +This is a fast routine for locking a record in the most common cases: +there are no explicit locks on the page, or there is just one lock, owned +by this transaction, and of the right type_mode. This is a low-level function +which does NOT look at implicit locks! Checks lock compatibility within +explicit locks. This function sets a normal next-key lock, or in the case of +a page supremum record, a gap type lock. +@return whether the locking succeeded */ +UNIV_INLINE +enum lock_rec_req_status +lock_rec_lock_fast( +/*===============*/ + ibool impl, /*!< in: if TRUE, no lock is set + if no wait is necessary: we + assume that the caller will + set an implicit lock */ + ulint mode, /*!< in: lock mode: LOCK_X or + LOCK_S possibly ORed to either + LOCK_GAP or LOCK_REC_NOT_GAP */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of record */ + dict_index_t* index, /*!< in: index of record */ + que_thr_t* thr) /*!< in: query thread */ +{ + lock_t* lock; + trx_t* trx; + enum lock_rec_req_status status = LOCK_REC_SUCCESS; + + ut_ad(lock_mutex_own()); + ut_ad((LOCK_MODE_MASK & mode) != LOCK_S + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + ut_ad((LOCK_MODE_MASK & mode) != LOCK_X + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad((LOCK_MODE_MASK & mode) == LOCK_S + || (LOCK_MODE_MASK & mode) == LOCK_X); + ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP + || mode - (LOCK_MODE_MASK & mode) == 0 + || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); + ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); + + DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL);); + + lock = lock_rec_get_first_on_page(block); + + trx = thr_get_trx(thr); + + if (lock == NULL) { + if (!impl) { + /* Note that we don't own the trx mutex. */ + lock = lock_rec_create( + mode, block, heap_no, index, trx, FALSE); + + } + status = LOCK_REC_SUCCESS_CREATED; + } else { + trx_mutex_enter(trx); + + if (lock_rec_get_next_on_page(lock) + || lock->trx != trx + || lock->type_mode != (mode | LOCK_REC) + || lock_rec_get_n_bits(lock) <= heap_no) { + + status = LOCK_REC_FAIL; + } else if (!impl) { + /* If the nth bit of the record lock is already set + then we do not set a new lock bit, otherwise we do + set */ + if (!lock_rec_get_nth_bit(lock, heap_no)) { + lock_rec_set_nth_bit(lock, heap_no); + status = LOCK_REC_SUCCESS_CREATED; + } + } + + trx_mutex_exit(trx); + } + + return(status); +} + +/*********************************************************************//** +This is the general, and slower, routine for locking a record. This is a +low-level function which does NOT look at implicit locks! Checks lock +compatibility within explicit locks. This function sets a normal next-key +lock, or in the case of a page supremum record, a gap type lock. +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ +static +dberr_t +lock_rec_lock_slow( +/*===============*/ + ibool impl, /*!< in: if TRUE, no lock is set + if no wait is necessary: we + assume that the caller will + set an implicit lock */ + ulint mode, /*!< in: lock mode: LOCK_X or + LOCK_S possibly ORed to either + LOCK_GAP or LOCK_REC_NOT_GAP */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of record */ + dict_index_t* index, /*!< in: index of record */ + que_thr_t* thr) /*!< in: query thread */ +{ + trx_t* trx; + dberr_t err = DB_SUCCESS; + + ut_ad(lock_mutex_own()); + ut_ad((LOCK_MODE_MASK & mode) != LOCK_S + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + ut_ad((LOCK_MODE_MASK & mode) != LOCK_X + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad((LOCK_MODE_MASK & mode) == LOCK_S + || (LOCK_MODE_MASK & mode) == LOCK_X); + ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP + || mode - (LOCK_MODE_MASK & mode) == 0 + || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); + ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); + + DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK);); + + trx = thr_get_trx(thr); + trx_mutex_enter(trx); + + if (lock_rec_has_expl(mode, block, heap_no, trx)) { + + /* The trx already has a strong enough lock on rec: do + nothing */ + + } else if (lock_rec_other_has_conflicting( + static_cast<enum lock_mode>(mode), + block, heap_no, trx)) { + + /* If another transaction has a non-gap conflicting + request in the queue, as this transaction does not + have a lock strong enough already granted on the + record, we have to wait. */ + + err = lock_rec_enqueue_waiting( + mode, block, heap_no, index, thr); + + } else if (!impl) { + /* Set the requested lock on the record, note that + we already own the transaction mutex. */ + + lock_rec_add_to_queue( + LOCK_REC | mode, block, heap_no, index, trx, TRUE); + + err = DB_SUCCESS_LOCKED_REC; + } + + trx_mutex_exit(trx); + + return(err); +} + +/*********************************************************************//** +Tries to lock the specified record in the mode requested. If not immediately +possible, enqueues a waiting lock request. This is a low-level function +which does NOT look at implicit locks! Checks lock compatibility within +explicit locks. This function sets a normal next-key lock, or in the case +of a page supremum record, a gap type lock. +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ +static +dberr_t +lock_rec_lock( +/*==========*/ + ibool impl, /*!< in: if TRUE, no lock is set + if no wait is necessary: we + assume that the caller will + set an implicit lock */ + ulint mode, /*!< in: lock mode: LOCK_X or + LOCK_S possibly ORed to either + LOCK_GAP or LOCK_REC_NOT_GAP */ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no,/*!< in: heap number of record */ + dict_index_t* index, /*!< in: index of record */ + que_thr_t* thr) /*!< in: query thread */ +{ + ut_ad(lock_mutex_own()); + ut_ad((LOCK_MODE_MASK & mode) != LOCK_S + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + ut_ad((LOCK_MODE_MASK & mode) != LOCK_X + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad((LOCK_MODE_MASK & mode) == LOCK_S + || (LOCK_MODE_MASK & mode) == LOCK_X); + ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP + || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP + || mode - (LOCK_MODE_MASK & mode) == 0); + ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); + + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, block, + heap_no, index, thr)); + } + + ut_error; + return(DB_ERROR); +} + +/*********************************************************************//** +Checks if a waiting record lock request still has to wait in a queue. +@return lock that is causing the wait */ +static +const lock_t* +lock_rec_has_to_wait_in_queue( +/*==========================*/ + const lock_t* wait_lock) /*!< in: waiting record lock */ +{ + const lock_t* lock; + ulint space; + ulint page_no; + ulint heap_no; + ulint bit_mask; + ulint bit_offset; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_wait(wait_lock)); + ut_ad(lock_get_type_low(wait_lock) == LOCK_REC); + + space = wait_lock->un_member.rec_lock.space; + page_no = wait_lock->un_member.rec_lock.page_no; + heap_no = lock_rec_find_set_bit(wait_lock); + + bit_offset = heap_no / 8; + bit_mask = static_cast<ulint>(1 << (heap_no % 8)); + + for (lock = lock_rec_get_first_on_page_addr(space, page_no); + lock != wait_lock; + lock = lock_rec_get_next_on_page_const(lock)) { + + const byte* p = (const byte*) &lock[1]; + + if (heap_no < lock_rec_get_n_bits(lock) + && (p[bit_offset] & bit_mask) + && lock_has_to_wait(wait_lock, lock)) { + + return(lock); + } + } + + return(NULL); +} + +/*************************************************************//** +Grants a lock to a waiting lock request and releases the waiting transaction. +The caller must hold lock_sys->mutex but not lock->trx->mutex. */ +static +void +lock_grant( +/*=======*/ + lock_t* lock) /*!< in/out: waiting lock request */ +{ + ut_ad(lock_mutex_own()); + + lock_reset_lock_and_trx_wait(lock); + + trx_mutex_enter(lock->trx); + + if (lock_get_mode(lock) == LOCK_AUTO_INC) { + dict_table_t* table = lock->un_member.tab_lock.table; + + if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) { + fprintf(stderr, + "InnoDB: Error: trx already had" + " an AUTO-INC lock!\n"); + } else { + table->autoinc_trx = lock->trx; + + ib_vector_push(lock->trx->autoinc_locks, &lock); + } + } + +#ifdef UNIV_DEBUG + if (lock_print_waits) { + fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n", + lock->trx->id); + } +#endif /* UNIV_DEBUG */ + + /* If we are resolving a deadlock by choosing another transaction + as a victim, then our original transaction may not be in the + TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait + for it */ + + if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { + que_thr_t* thr; + + thr = que_thr_end_lock_wait(lock->trx); + + if (thr != NULL) { + lock_wait_release_thread_if_suspended(thr); + } + } + + trx_mutex_exit(lock->trx); +} + +/*************************************************************//** +Cancels a waiting record lock request and releases the waiting transaction +that requested it. NOTE: does NOT check if waiting lock requests behind this +one can now be granted! */ +static +void +lock_rec_cancel( +/*============*/ + lock_t* lock) /*!< in: waiting record lock request */ +{ + que_thr_t* thr; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_type_low(lock) == LOCK_REC); + + /* Reset the bit (there can be only one set bit) in the lock bitmap */ + lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock)); + + /* Reset the wait flag and the back pointer to lock in trx */ + + lock_reset_lock_and_trx_wait(lock); + + /* The following function releases the trx from lock wait */ + + trx_mutex_enter(lock->trx); + + thr = que_thr_end_lock_wait(lock->trx); + + if (thr != NULL) { + lock_wait_release_thread_if_suspended(thr); + } + + trx_mutex_exit(lock->trx); +} + +/*************************************************************//** +Removes a record lock request, waiting or granted, from the queue and +grants locks to other transactions in the queue if they now are entitled +to a lock. NOTE: all record locks contained in in_lock are removed. */ +static +void +lock_rec_dequeue_from_page( +/*=======================*/ + lock_t* in_lock) /*!< in: record lock object: all + record locks which are contained in + this lock object are removed; + transactions waiting behind will + get their lock requests granted, + if they are now qualified to it */ +{ + ulint space; + ulint page_no; + lock_t* lock; + trx_lock_t* trx_lock; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_type_low(in_lock) == LOCK_REC); + /* We may or may not be holding in_lock->trx->mutex here. */ + + trx_lock = &in_lock->trx->lock; + + space = in_lock->un_member.rec_lock.space; + page_no = in_lock->un_member.rec_lock.page_no; + + in_lock->index->table->n_rec_locks--; + + HASH_DELETE(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), in_lock); + + UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock); + + MONITOR_INC(MONITOR_RECLOCK_REMOVED); + MONITOR_DEC(MONITOR_NUM_RECLOCK); + + /* Check if waiting locks in the queue can now be granted: grant + locks if there are no conflicting locks ahead. Stop at the first + X lock that is waiting or has been granted. */ + + for (lock = lock_rec_get_first_on_page_addr(space, page_no); + lock != NULL; + lock = lock_rec_get_next_on_page(lock)) { + + if (lock_get_wait(lock) + && !lock_rec_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + ut_ad(lock->trx != in_lock->trx); + lock_grant(lock); + } + } +} + +/*************************************************************//** +Removes a record lock request, waiting or granted, from the queue. */ +static +void +lock_rec_discard( +/*=============*/ + lock_t* in_lock) /*!< in: record lock object: all + record locks which are contained + in this lock object are removed */ +{ + ulint space; + ulint page_no; + trx_lock_t* trx_lock; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_type_low(in_lock) == LOCK_REC); + + trx_lock = &in_lock->trx->lock; + + space = in_lock->un_member.rec_lock.space; + page_no = in_lock->un_member.rec_lock.page_no; + + in_lock->index->table->n_rec_locks--; + + HASH_DELETE(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), in_lock); + + UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock); + + MONITOR_INC(MONITOR_RECLOCK_REMOVED); + MONITOR_DEC(MONITOR_NUM_RECLOCK); +} + +/*************************************************************//** +Removes record lock objects set on an index page which is discarded. This +function does not move locks, or check for waiting locks, therefore the +lock bitmaps must already be reset when this function is called. */ +static +void +lock_rec_free_all_from_discard_page( +/*================================*/ + const buf_block_t* block) /*!< in: page to be discarded */ +{ + ulint space; + ulint page_no; + lock_t* lock; + lock_t* next_lock; + + ut_ad(lock_mutex_own()); + + space = buf_block_get_space(block); + page_no = buf_block_get_page_no(block); + + lock = lock_rec_get_first_on_page_addr(space, page_no); + + while (lock != NULL) { + ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED); + ut_ad(!lock_get_wait(lock)); + + next_lock = lock_rec_get_next_on_page(lock); + + lock_rec_discard(lock); + + lock = next_lock; + } +} + +/*============= RECORD LOCK MOVING AND INHERITING ===================*/ + +/*************************************************************//** +Resets the lock bits for a single record. Releases transactions waiting for +lock requests here. */ +static +void +lock_rec_reset_and_release_wait( +/*============================*/ + const buf_block_t* block, /*!< in: buffer block containing + the record */ + ulint heap_no)/*!< in: heap number of record */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + + if (lock_get_wait(lock)) { + lock_rec_cancel(lock); + } else { + lock_rec_reset_nth_bit(lock, heap_no); + } + } +} + +/*************************************************************//** +Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type) +of another record as gap type locks, but does not reset the lock bits of +the other record. Also waiting lock requests on rec are inherited as +GRANTED gap locks. */ +static +void +lock_rec_inherit_to_gap( +/*====================*/ + const buf_block_t* heir_block, /*!< in: block containing the + record which inherits */ + const buf_block_t* block, /*!< in: block containing the + record from which inherited; + does NOT reset the locks on + this record */ + ulint heir_heap_no, /*!< in: heap_no of the + inheriting record */ + ulint heap_no) /*!< in: heap_no of the + donating record */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + + /* If srv_locks_unsafe_for_binlog is TRUE or session is using + READ COMMITTED isolation level, we do not want locks set + by an UPDATE or a DELETE to be inherited as gap type locks. But we + DO want S-locks set by a consistency constraint to be inherited also + then. */ + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + + if (!lock_rec_get_insert_intention(lock) + && !((srv_locks_unsafe_for_binlog + || lock->trx->isolation_level + <= TRX_ISO_READ_COMMITTED) + && lock_get_mode(lock) == LOCK_X)) { + + lock_rec_add_to_queue( + LOCK_REC | LOCK_GAP | lock_get_mode(lock), + heir_block, heir_heap_no, lock->index, + lock->trx, FALSE); + } + } +} + +/*************************************************************//** +Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type) +of another record as gap type locks, but does not reset the lock bits of the +other record. Also waiting lock requests are inherited as GRANTED gap locks. */ +static +void +lock_rec_inherit_to_gap_if_gap_lock( +/*================================*/ + const buf_block_t* block, /*!< in: buffer block */ + ulint heir_heap_no, /*!< in: heap_no of + record which inherits */ + ulint heap_no) /*!< in: heap_no of record + from which inherited; + does NOT reset the locks + on this record */ +{ + lock_t* lock; + + lock_mutex_enter(); + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + + if (!lock_rec_get_insert_intention(lock) + && (heap_no == PAGE_HEAP_NO_SUPREMUM + || !lock_rec_get_rec_not_gap(lock))) { + + lock_rec_add_to_queue( + LOCK_REC | LOCK_GAP | lock_get_mode(lock), + block, heir_heap_no, lock->index, + lock->trx, FALSE); + } + } + + lock_mutex_exit(); +} + +/*************************************************************//** +Moves the locks of a record to another record and resets the lock bits of +the donating record. */ +static +void +lock_rec_move( +/*==========*/ + const buf_block_t* receiver, /*!< in: buffer block containing + the receiving record */ + const buf_block_t* donator, /*!< in: buffer block containing + the donating record */ + ulint receiver_heap_no,/*!< in: heap_no of the record + which gets the locks; there + must be no lock requests + on it! */ + ulint donator_heap_no)/*!< in: heap_no of the record + which gives the locks */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + + ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL); + + for (lock = lock_rec_get_first(donator, donator_heap_no); + lock != NULL; + lock = lock_rec_get_next(donator_heap_no, lock)) { + + const ulint type_mode = lock->type_mode; + + lock_rec_reset_nth_bit(lock, donator_heap_no); + + if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { + lock_reset_lock_and_trx_wait(lock); + } + + /* Note that we FIRST reset the bit, and then set the lock: + the function works also if donator == receiver */ + + lock_rec_add_to_queue( + type_mode, receiver, receiver_heap_no, + lock->index, lock->trx, FALSE); + } + + ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL); +} + +/*************************************************************//** +Updates the lock table when we have reorganized a page. NOTE: we copy +also the locks set on the infimum of the page; the infimum may carry +locks if an update of a record is occurring on the page, and its locks +were temporarily stored on the infimum. */ +UNIV_INTERN +void +lock_move_reorganize_page( +/*======================*/ + const buf_block_t* block, /*!< in: old index page, now + reorganized */ + const buf_block_t* oblock) /*!< in: copy of the old, not + reorganized page */ +{ + lock_t* lock; + UT_LIST_BASE_NODE_T(lock_t) old_locks; + mem_heap_t* heap = NULL; + ulint comp; + + lock_mutex_enter(); + + lock = lock_rec_get_first_on_page(block); + + if (lock == NULL) { + lock_mutex_exit(); + + return; + } + + heap = mem_heap_create(256); + + /* Copy first all the locks on the page to heap and reset the + bitmaps in the original locks; chain the copies of the locks + using the trx_locks field in them. */ + + UT_LIST_INIT(old_locks); + + do { + /* Make a copy of the lock */ + lock_t* old_lock = lock_rec_copy(lock, heap); + + UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock); + + /* Reset bitmap of lock */ + lock_rec_bitmap_reset(lock); + + if (lock_get_wait(lock)) { + + lock_reset_lock_and_trx_wait(lock); + } + + lock = lock_rec_get_next_on_page(lock); + } while (lock != NULL); + + comp = page_is_comp(block->frame); + ut_ad(comp == page_is_comp(oblock->frame)); + + for (lock = UT_LIST_GET_FIRST(old_locks); lock; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + /* NOTE: we copy also the locks set on the infimum and + supremum of the page; the infimum may carry locks if an + update of a record is occurring on the page, and its locks + were temporarily stored on the infimum */ + page_cur_t cur1; + page_cur_t cur2; + + page_cur_set_before_first(block, &cur1); + page_cur_set_before_first(oblock, &cur2); + + /* Set locks according to old locks */ + for (;;) { + ulint old_heap_no; + ulint new_heap_no; + + ut_ad(comp || !memcmp(page_cur_get_rec(&cur1), + page_cur_get_rec(&cur2), + rec_get_data_size_old( + page_cur_get_rec( + &cur2)))); + if (UNIV_LIKELY(comp)) { + old_heap_no = rec_get_heap_no_new( + page_cur_get_rec(&cur2)); + new_heap_no = rec_get_heap_no_new( + page_cur_get_rec(&cur1)); + } else { + old_heap_no = rec_get_heap_no_old( + page_cur_get_rec(&cur2)); + new_heap_no = rec_get_heap_no_old( + page_cur_get_rec(&cur1)); + } + + if (lock_rec_get_nth_bit(lock, old_heap_no)) { + + /* Clear the bit in old_lock. */ + ut_d(lock_rec_reset_nth_bit(lock, + old_heap_no)); + + /* NOTE that the old lock bitmap could be too + small for the new heap number! */ + + lock_rec_add_to_queue( + lock->type_mode, block, new_heap_no, + lock->index, lock->trx, FALSE); + + /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM + && lock_get_wait(lock)) { + fprintf(stderr, + "---\n--\n!!!Lock reorg: supr type %lu\n", + lock->type_mode); + } */ + } + + if (UNIV_UNLIKELY + (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) { + + ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM); + break; + } + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + +#ifdef UNIV_DEBUG + { + ulint i = lock_rec_find_set_bit(lock); + + /* Check that all locks were moved. */ + if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) { + fprintf(stderr, + "lock_move_reorganize_page():" + " %lu not moved in %p\n", + (ulong) i, (void*) lock); + ut_error; + } + } +#endif /* UNIV_DEBUG */ + } + + lock_mutex_exit(); + + mem_heap_free(heap); + +#ifdef UNIV_DEBUG_LOCK_VALIDATE + ut_ad(lock_rec_validate_page(block)); +#endif +} + +/*************************************************************//** +Moves the explicit locks on user records to another page if a record +list end is moved to another page. */ +UNIV_INTERN +void +lock_move_rec_list_end( +/*===================*/ + const buf_block_t* new_block, /*!< in: index page to move to */ + const buf_block_t* block, /*!< in: index page */ + const rec_t* rec) /*!< in: record on page: this + is the first record moved */ +{ + lock_t* lock; + const ulint comp = page_rec_is_comp(rec); + + lock_mutex_enter(); + + /* Note: when we move locks from record to record, waiting locks + and possible granted gap type locks behind them are enqueued in + the original order, because new elements are inserted to a hash + table to the end of the hash chain, and lock_rec_add_to_queue + does not reuse locks if there are waiters in the queue. */ + + for (lock = lock_rec_get_first_on_page(block); lock; + lock = lock_rec_get_next_on_page(lock)) { + page_cur_t cur1; + page_cur_t cur2; + const ulint type_mode = lock->type_mode; + + page_cur_position(rec, block, &cur1); + + if (page_cur_is_before_first(&cur1)) { + page_cur_move_to_next(&cur1); + } + + page_cur_set_before_first(new_block, &cur2); + page_cur_move_to_next(&cur2); + + /* Copy lock requests on user records to new page and + reset the lock bits on the old */ + + while (!page_cur_is_after_last(&cur1)) { + ulint heap_no; + + if (comp) { + heap_no = rec_get_heap_no_new( + page_cur_get_rec(&cur1)); + } else { + heap_no = rec_get_heap_no_old( + page_cur_get_rec(&cur1)); + ut_ad(!memcmp(page_cur_get_rec(&cur1), + page_cur_get_rec(&cur2), + rec_get_data_size_old( + page_cur_get_rec(&cur2)))); + } + + if (lock_rec_get_nth_bit(lock, heap_no)) { + lock_rec_reset_nth_bit(lock, heap_no); + + if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { + lock_reset_lock_and_trx_wait(lock); + } + + if (comp) { + heap_no = rec_get_heap_no_new( + page_cur_get_rec(&cur2)); + } else { + heap_no = rec_get_heap_no_old( + page_cur_get_rec(&cur2)); + } + + lock_rec_add_to_queue( + type_mode, new_block, heap_no, + lock->index, lock->trx, FALSE); + } + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + } + + lock_mutex_exit(); + +#ifdef UNIV_DEBUG_LOCK_VALIDATE + ut_ad(lock_rec_validate_page(block)); + ut_ad(lock_rec_validate_page(new_block)); +#endif +} + +/*************************************************************//** +Moves the explicit locks on user records to another page if a record +list start is moved to another page. */ +UNIV_INTERN +void +lock_move_rec_list_start( +/*=====================*/ + const buf_block_t* new_block, /*!< in: index page to + move to */ + const buf_block_t* block, /*!< in: index page */ + const rec_t* rec, /*!< in: record on page: + this is the first + record NOT copied */ + const rec_t* old_end) /*!< in: old + previous-to-last + record on new_page + before the records + were copied */ +{ + lock_t* lock; + const ulint comp = page_rec_is_comp(rec); + + ut_ad(block->frame == page_align(rec)); + ut_ad(new_block->frame == page_align(old_end)); + + lock_mutex_enter(); + + for (lock = lock_rec_get_first_on_page(block); lock; + lock = lock_rec_get_next_on_page(lock)) { + page_cur_t cur1; + page_cur_t cur2; + const ulint type_mode = lock->type_mode; + + page_cur_set_before_first(block, &cur1); + page_cur_move_to_next(&cur1); + + page_cur_position(old_end, new_block, &cur2); + page_cur_move_to_next(&cur2); + + /* Copy lock requests on user records to new page and + reset the lock bits on the old */ + + while (page_cur_get_rec(&cur1) != rec) { + ulint heap_no; + + if (comp) { + heap_no = rec_get_heap_no_new( + page_cur_get_rec(&cur1)); + } else { + heap_no = rec_get_heap_no_old( + page_cur_get_rec(&cur1)); + ut_ad(!memcmp(page_cur_get_rec(&cur1), + page_cur_get_rec(&cur2), + rec_get_data_size_old( + page_cur_get_rec( + &cur2)))); + } + + if (lock_rec_get_nth_bit(lock, heap_no)) { + lock_rec_reset_nth_bit(lock, heap_no); + + if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { + lock_reset_lock_and_trx_wait(lock); + } + + if (comp) { + heap_no = rec_get_heap_no_new( + page_cur_get_rec(&cur2)); + } else { + heap_no = rec_get_heap_no_old( + page_cur_get_rec(&cur2)); + } + + lock_rec_add_to_queue( + type_mode, new_block, heap_no, + lock->index, lock->trx, FALSE); + } + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + +#ifdef UNIV_DEBUG + if (page_rec_is_supremum(rec)) { + ulint i; + + for (i = PAGE_HEAP_NO_USER_LOW; + i < lock_rec_get_n_bits(lock); i++) { + if (UNIV_UNLIKELY + (lock_rec_get_nth_bit(lock, i))) { + + fprintf(stderr, + "lock_move_rec_list_start():" + " %lu not moved in %p\n", + (ulong) i, (void*) lock); + ut_error; + } + } + } +#endif /* UNIV_DEBUG */ + } + + lock_mutex_exit(); + +#ifdef UNIV_DEBUG_LOCK_VALIDATE + ut_ad(lock_rec_validate_page(block)); +#endif +} + +/*************************************************************//** +Updates the lock table when a page is split to the right. */ +UNIV_INTERN +void +lock_update_split_right( +/*====================*/ + const buf_block_t* right_block, /*!< in: right page */ + const buf_block_t* left_block) /*!< in: left page */ +{ + ulint heap_no = lock_get_min_heap_no(right_block); + + lock_mutex_enter(); + + /* Move the locks on the supremum of the left page to the supremum + of the right page */ + + lock_rec_move(right_block, left_block, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + + /* Inherit the locks to the supremum of left page from the successor + of the infimum on right page */ + + lock_rec_inherit_to_gap(left_block, right_block, + PAGE_HEAP_NO_SUPREMUM, heap_no); + + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when a page is merged to the right. */ +UNIV_INTERN +void +lock_update_merge_right( +/*====================*/ + const buf_block_t* right_block, /*!< in: right page to + which merged */ + const rec_t* orig_succ, /*!< in: original + successor of infimum + on the right page + before merge */ + const buf_block_t* left_block) /*!< in: merged index + page which will be + discarded */ +{ + lock_mutex_enter(); + + /* Inherit the locks from the supremum of the left page to the + original successor of infimum on the right page, to which the left + page was merged */ + + lock_rec_inherit_to_gap(right_block, left_block, + page_rec_get_heap_no(orig_succ), + PAGE_HEAP_NO_SUPREMUM); + + /* Reset the locks on the supremum of the left page, releasing + waiting transactions */ + + lock_rec_reset_and_release_wait(left_block, + PAGE_HEAP_NO_SUPREMUM); + + lock_rec_free_all_from_discard_page(left_block); + + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when the root page is copied to another in +btr_root_raise_and_insert. Note that we leave lock structs on the +root page, even though they do not make sense on other than leaf +pages: the reason is that in a pessimistic update the infimum record +of the root page will act as a dummy carrier of the locks of the record +to be updated. */ +UNIV_INTERN +void +lock_update_root_raise( +/*===================*/ + const buf_block_t* block, /*!< in: index page to which copied */ + const buf_block_t* root) /*!< in: root page */ +{ + lock_mutex_enter(); + + /* Move the locks on the supremum of the root to the supremum + of block */ + + lock_rec_move(block, root, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when a page is copied to another and the original page +is removed from the chain of leaf pages, except if page is the root! */ +UNIV_INTERN +void +lock_update_copy_and_discard( +/*=========================*/ + const buf_block_t* new_block, /*!< in: index page to + which copied */ + const buf_block_t* block) /*!< in: index page; + NOT the root! */ +{ + lock_mutex_enter(); + + /* Move the locks on the supremum of the old page to the supremum + of new_page */ + + lock_rec_move(new_block, block, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + lock_rec_free_all_from_discard_page(block); + + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when a page is split to the left. */ +UNIV_INTERN +void +lock_update_split_left( +/*===================*/ + const buf_block_t* right_block, /*!< in: right page */ + const buf_block_t* left_block) /*!< in: left page */ +{ + ulint heap_no = lock_get_min_heap_no(right_block); + + lock_mutex_enter(); + + /* Inherit the locks to the supremum of the left page from the + successor of the infimum on the right page */ + + lock_rec_inherit_to_gap(left_block, right_block, + PAGE_HEAP_NO_SUPREMUM, heap_no); + + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when a page is merged to the left. */ +UNIV_INTERN +void +lock_update_merge_left( +/*===================*/ + const buf_block_t* left_block, /*!< in: left page to + which merged */ + const rec_t* orig_pred, /*!< in: original predecessor + of supremum on the left page + before merge */ + const buf_block_t* right_block) /*!< in: merged index page + which will be discarded */ +{ + const rec_t* left_next_rec; + + ut_ad(left_block->frame == page_align(orig_pred)); + + lock_mutex_enter(); + + left_next_rec = page_rec_get_next_const(orig_pred); + + if (!page_rec_is_supremum(left_next_rec)) { + + /* Inherit the locks on the supremum of the left page to the + first record which was moved from the right page */ + + lock_rec_inherit_to_gap(left_block, left_block, + page_rec_get_heap_no(left_next_rec), + PAGE_HEAP_NO_SUPREMUM); + + /* Reset the locks on the supremum of the left page, + releasing waiting transactions */ + + lock_rec_reset_and_release_wait(left_block, + PAGE_HEAP_NO_SUPREMUM); + } + + /* Move the locks from the supremum of right page to the supremum + of the left page */ + + lock_rec_move(left_block, right_block, + PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); + + lock_rec_free_all_from_discard_page(right_block); + + lock_mutex_exit(); +} + +/*************************************************************//** +Resets the original locks on heir and replaces them with gap type locks +inherited from rec. */ +UNIV_INTERN +void +lock_rec_reset_and_inherit_gap_locks( +/*=================================*/ + const buf_block_t* heir_block, /*!< in: block containing the + record which inherits */ + const buf_block_t* block, /*!< in: block containing the + record from which inherited; + does NOT reset the locks on + this record */ + ulint heir_heap_no, /*!< in: heap_no of the + inheriting record */ + ulint heap_no) /*!< in: heap_no of the + donating record */ +{ + lock_mutex_enter(); + + lock_rec_reset_and_release_wait(heir_block, heir_heap_no); + + lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no); + + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when a page is discarded. */ +UNIV_INTERN +void +lock_update_discard( +/*================*/ + const buf_block_t* heir_block, /*!< in: index page + which will inherit the locks */ + ulint heir_heap_no, /*!< in: heap_no of the record + which will inherit the locks */ + const buf_block_t* block) /*!< in: index page + which will be discarded */ +{ + const page_t* page = block->frame; + const rec_t* rec; + ulint heap_no; + + lock_mutex_enter(); + + if (!lock_rec_get_first_on_page(block)) { + /* No locks exist on page, nothing to do */ + + lock_mutex_exit(); + + return; + } + + /* Inherit all the locks on the page to the record and reset all + the locks on the page */ + + if (page_is_comp(page)) { + rec = page + PAGE_NEW_INFIMUM; + + do { + heap_no = rec_get_heap_no_new(rec); + + lock_rec_inherit_to_gap(heir_block, block, + heir_heap_no, heap_no); + + lock_rec_reset_and_release_wait(block, heap_no); + + rec = page + rec_get_next_offs(rec, TRUE); + } while (heap_no != PAGE_HEAP_NO_SUPREMUM); + } else { + rec = page + PAGE_OLD_INFIMUM; + + do { + heap_no = rec_get_heap_no_old(rec); + + lock_rec_inherit_to_gap(heir_block, block, + heir_heap_no, heap_no); + + lock_rec_reset_and_release_wait(block, heap_no); + + rec = page + rec_get_next_offs(rec, FALSE); + } while (heap_no != PAGE_HEAP_NO_SUPREMUM); + } + + lock_rec_free_all_from_discard_page(block); + + lock_mutex_exit(); +} + +/*************************************************************//** +Updates the lock table when a new user record is inserted. */ +UNIV_INTERN +void +lock_update_insert( +/*===============*/ + const buf_block_t* block, /*!< in: buffer block containing rec */ + const rec_t* rec) /*!< in: the inserted record */ +{ + ulint receiver_heap_no; + ulint donator_heap_no; + + ut_ad(block->frame == page_align(rec)); + + /* Inherit the gap-locking locks for rec, in gap mode, from the next + record */ + + if (page_rec_is_comp(rec)) { + receiver_heap_no = rec_get_heap_no_new(rec); + donator_heap_no = rec_get_heap_no_new( + page_rec_get_next_low(rec, TRUE)); + } else { + receiver_heap_no = rec_get_heap_no_old(rec); + donator_heap_no = rec_get_heap_no_old( + page_rec_get_next_low(rec, FALSE)); + } + + lock_rec_inherit_to_gap_if_gap_lock( + block, receiver_heap_no, donator_heap_no); +} + +/*************************************************************//** +Updates the lock table when a record is removed. */ +UNIV_INTERN +void +lock_update_delete( +/*===============*/ + const buf_block_t* block, /*!< in: buffer block containing rec */ + const rec_t* rec) /*!< in: the record to be removed */ +{ + const page_t* page = block->frame; + ulint heap_no; + ulint next_heap_no; + + ut_ad(page == page_align(rec)); + + if (page_is_comp(page)) { + heap_no = rec_get_heap_no_new(rec); + next_heap_no = rec_get_heap_no_new(page + + rec_get_next_offs(rec, + TRUE)); + } else { + heap_no = rec_get_heap_no_old(rec); + next_heap_no = rec_get_heap_no_old(page + + rec_get_next_offs(rec, + FALSE)); + } + + lock_mutex_enter(); + + /* Let the next record inherit the locks from rec, in gap mode */ + + lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no); + + /* Reset the lock bits on rec and release waiting transactions */ + + lock_rec_reset_and_release_wait(block, heap_no); + + lock_mutex_exit(); +} + +/*********************************************************************//** +Stores on the page infimum record the explicit locks of another record. +This function is used to store the lock state of a record when it is +updated and the size of the record changes in the update. The record +is moved in such an update, perhaps to another page. The infimum record +acts as a dummy carrier record, taking care of lock releases while the +actual record is being moved. */ +UNIV_INTERN +void +lock_rec_store_on_page_infimum( +/*===========================*/ + const buf_block_t* block, /*!< in: buffer block containing rec */ + const rec_t* rec) /*!< in: record whose lock state + is stored on the infimum + record of the same page; lock + bits are reset on the + record */ +{ + ulint heap_no = page_rec_get_heap_no(rec); + + ut_ad(block->frame == page_align(rec)); + + lock_mutex_enter(); + + lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no); + + lock_mutex_exit(); +} + +/*********************************************************************//** +Restores the state of explicit lock requests on a single record, where the +state was stored on the infimum of the page. */ +UNIV_INTERN +void +lock_rec_restore_from_page_infimum( +/*===============================*/ + const buf_block_t* block, /*!< in: buffer block containing rec */ + const rec_t* rec, /*!< in: record whose lock state + is restored */ + const buf_block_t* donator)/*!< in: page (rec is not + necessarily on this page) + whose infimum stored the lock + state; lock bits are reset on + the infimum */ +{ + ulint heap_no = page_rec_get_heap_no(rec); + + lock_mutex_enter(); + + lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM); + + lock_mutex_exit(); +} + +/*=========== DEADLOCK CHECKING ======================================*/ + +/*********************************************************************//** +rewind(3) the file used for storing the latest detected deadlock and +print a heading message to stderr if printing of all deadlocks to stderr +is enabled. */ +UNIV_INLINE +void +lock_deadlock_start_print() +/*=======================*/ +{ + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + + rewind(lock_latest_err_file); + ut_print_timestamp(lock_latest_err_file); + + if (srv_print_all_deadlocks) { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: transactions deadlock detected, " + "dumping detailed information.\n"); + ut_print_timestamp(stderr); + } +} + +/*********************************************************************//** +Print a message to the deadlock file and possibly to stderr. */ +UNIV_INLINE +void +lock_deadlock_fputs( +/*================*/ + const char* msg) /*!< in: message to print */ +{ + if (!srv_read_only_mode) { + fputs(msg, lock_latest_err_file); + + if (srv_print_all_deadlocks) { + fputs(msg, stderr); + } + } +} + +/*********************************************************************//** +Print transaction data to the deadlock file and possibly to stderr. */ +UNIV_INLINE +void +lock_deadlock_trx_print( +/*====================*/ + const trx_t* trx, /*!< in: transaction */ + ulint max_query_len) /*!< in: max query length to print, + or 0 to use the default max length */ +{ + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + + ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock); + ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); + ulint heap_size = mem_heap_get_size(trx->lock.lock_heap); + + mutex_enter(&trx_sys->mutex); + + trx_print_low(lock_latest_err_file, trx, max_query_len, + n_rec_locks, n_trx_locks, heap_size); + + if (srv_print_all_deadlocks) { + trx_print_low(stderr, trx, max_query_len, + n_rec_locks, n_trx_locks, heap_size); + } + + mutex_exit(&trx_sys->mutex); +} + +/*********************************************************************//** +Print lock data to the deadlock file and possibly to stderr. */ +UNIV_INLINE +void +lock_deadlock_lock_print( +/*=====================*/ + const lock_t* lock) /*!< in: record or table type lock */ +{ + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + + if (lock_get_type_low(lock) == LOCK_REC) { + lock_rec_print(lock_latest_err_file, lock); + + if (srv_print_all_deadlocks) { + lock_rec_print(stderr, lock); + } + } else { + lock_table_print(lock_latest_err_file, lock); + + if (srv_print_all_deadlocks) { + lock_table_print(stderr, lock); + } + } +} + +/** Used in deadlock tracking. Protected by lock_sys->mutex. */ +static ib_uint64_t lock_mark_counter = 0; + +/** Check if the search is too deep. */ +#define lock_deadlock_too_deep(c) \ + (c->depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK \ + || c->cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK) + +/********************************************************************//** +Get the next lock in the queue that is owned by a transaction whose +sub-tree has not already been searched. +@return next lock or NULL if at end of queue */ +static +const lock_t* +lock_get_next_lock( +/*===============*/ + const lock_deadlock_ctx_t* + ctx, /*!< in: deadlock context */ + const lock_t* lock, /*!< in: lock in the queue */ + ulint heap_no)/*!< in: heap no if rec lock else + ULINT_UNDEFINED */ +{ + ut_ad(lock_mutex_own()); + + do { + if (lock_get_type_low(lock) == LOCK_REC) { + ut_ad(heap_no != ULINT_UNDEFINED); + lock = lock_rec_get_next_const(heap_no, lock); + } else { + ut_ad(heap_no == ULINT_UNDEFINED); + ut_ad(lock_get_type_low(lock) == LOCK_TABLE); + + lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); + } + } while (lock != NULL + && lock->trx->lock.deadlock_mark > ctx->mark_start); + + ut_ad(lock == NULL + || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock)); + + return(lock); +} + +/********************************************************************//** +Get the first lock to search. The search starts from the current +wait_lock. What we are really interested in is an edge from the +current wait_lock's owning transaction to another transaction that has +a lock ahead in the queue. We skip locks where the owning transaction's +sub-tree has already been searched. +@return first lock or NULL */ +static +const lock_t* +lock_get_first_lock( +/*================*/ + const lock_deadlock_ctx_t* + ctx, /*!< in: deadlock context */ + ulint* heap_no)/*!< out: heap no if rec lock, + else ULINT_UNDEFINED */ +{ + const lock_t* lock; + + ut_ad(lock_mutex_own()); + + lock = ctx->wait_lock; + + if (lock_get_type_low(lock) == LOCK_REC) { + + *heap_no = lock_rec_find_set_bit(lock); + ut_ad(*heap_no != ULINT_UNDEFINED); + + lock = lock_rec_get_first_on_page_addr( + lock->un_member.rec_lock.space, + lock->un_member.rec_lock.page_no); + + /* Position on the first lock on the physical record. */ + if (!lock_rec_get_nth_bit(lock, *heap_no)) { + lock = lock_rec_get_next_const(*heap_no, lock); + } + + } else { + *heap_no = ULINT_UNDEFINED; + ut_ad(lock_get_type_low(lock) == LOCK_TABLE); + lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); + } + + ut_a(lock != NULL); + ut_a(lock != ctx->wait_lock); + ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock)); + + return(lock); +} + +/********************************************************************//** +Notify that a deadlock has been detected and print the conflicting +transaction info. */ +static +void +lock_deadlock_notify( +/*=================*/ + const lock_deadlock_ctx_t* ctx, /*!< in: deadlock context */ + const lock_t* lock) /*!< in: lock causing + deadlock */ +{ + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + + lock_deadlock_start_print(); + + lock_deadlock_fputs("\n*** (1) TRANSACTION:\n"); + + lock_deadlock_trx_print(ctx->wait_lock->trx, 3000); + + lock_deadlock_fputs("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n"); + + lock_deadlock_lock_print(ctx->wait_lock); + + lock_deadlock_fputs("*** (2) TRANSACTION:\n"); + + lock_deadlock_trx_print(lock->trx, 3000); + + lock_deadlock_fputs("*** (2) HOLDS THE LOCK(S):\n"); + + lock_deadlock_lock_print(lock); + + /* It is possible that the joining transaction was granted its + lock when we rolled back some other waiting transaction. */ + + if (ctx->start->lock.wait_lock != 0) { + lock_deadlock_fputs( + "*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n"); + + lock_deadlock_lock_print(ctx->start->lock.wait_lock); + } + +#ifdef UNIV_DEBUG + if (lock_print_waits) { + fputs("Deadlock detected\n", stderr); + } +#endif /* UNIV_DEBUG */ +} + +/********************************************************************//** +Select the victim transaction that should be rolledback. +@return victim transaction */ +static +const trx_t* +lock_deadlock_select_victim( +/*========================*/ + const lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */ +{ + ut_ad(lock_mutex_own()); + ut_ad(ctx->start->lock.wait_lock != 0); + ut_ad(ctx->wait_lock->trx != ctx->start); + + if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) { + /* The joining transaction is 'smaller', + choose it as the victim and roll it back. */ + + return(ctx->start); + } + + return(ctx->wait_lock->trx); +} + +/********************************************************************//** +Pop the deadlock search state from the stack. +@return stack slot instance that was on top of the stack. */ +static +const lock_stack_t* +lock_deadlock_pop( +/*==============*/ + lock_deadlock_ctx_t* ctx) /*!< in/out: context */ +{ + ut_ad(lock_mutex_own()); + + ut_ad(ctx->depth > 0); + + return(&lock_stack[--ctx->depth]); +} + +/********************************************************************//** +Push the deadlock search state onto the stack. +@return slot that was used in the stack */ +static +lock_stack_t* +lock_deadlock_push( +/*===============*/ + lock_deadlock_ctx_t* ctx, /*!< in/out: context */ + const lock_t* lock, /*!< in: current lock */ + ulint heap_no) /*!< in: heap number */ +{ + ut_ad(lock_mutex_own()); + + /* Save current search state. */ + + if (LOCK_STACK_SIZE > ctx->depth) { + lock_stack_t* stack; + + stack = &lock_stack[ctx->depth++]; + + stack->lock = lock; + stack->heap_no = heap_no; + stack->wait_lock = ctx->wait_lock; + + return(stack); + } + + return(NULL); +} + +/********************************************************************//** +Looks iteratively for a deadlock. Note: the joining transaction may +have been granted its lock by the deadlock checks. +@return 0 if no deadlock else the victim transaction id.*/ +static +trx_id_t +lock_deadlock_search( +/*=================*/ + lock_deadlock_ctx_t* ctx) /*!< in/out: deadlock context */ +{ + const lock_t* lock; + ulint heap_no; + + ut_ad(lock_mutex_own()); + ut_ad(!trx_mutex_own(ctx->start)); + + ut_ad(ctx->start != NULL); + ut_ad(ctx->wait_lock != NULL); + assert_trx_in_list(ctx->wait_lock->trx); + ut_ad(ctx->mark_start <= lock_mark_counter); + + /* Look at the locks ahead of wait_lock in the lock queue. */ + lock = lock_get_first_lock(ctx, &heap_no); + + for (;;) { + + /* We should never visit the same sub-tree more than once. */ + ut_ad(lock == NULL + || lock->trx->lock.deadlock_mark <= ctx->mark_start); + + while (ctx->depth > 0 && lock == NULL) { + const lock_stack_t* stack; + + /* Restore previous search state. */ + + stack = lock_deadlock_pop(ctx); + + lock = stack->lock; + heap_no = stack->heap_no; + ctx->wait_lock = stack->wait_lock; + + lock = lock_get_next_lock(ctx, lock, heap_no); + } + + if (lock == NULL) { + break; + } else if (lock == ctx->wait_lock) { + + /* We can mark this subtree as searched */ + ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start); + + lock->trx->lock.deadlock_mark = ++lock_mark_counter; + + /* We are not prepared for an overflow. This 64-bit + counter should never wrap around. At 10^9 increments + per second, it would take 10^3 years of uptime. */ + + ut_ad(lock_mark_counter > 0); + + lock = NULL; + + } else if (!lock_has_to_wait(ctx->wait_lock, lock)) { + + /* No conflict, next lock */ + lock = lock_get_next_lock(ctx, lock, heap_no); + + } else if (lock->trx == ctx->start) { + + /* Found a cycle. */ + + lock_deadlock_notify(ctx, lock); + + return(lock_deadlock_select_victim(ctx)->id); + + } else if (lock_deadlock_too_deep(ctx)) { + + /* Search too deep to continue. */ + + ctx->too_deep = TRUE; + + /* Select the joining transaction as the victim. */ + return(ctx->start->id); + + } else if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { + + /* Another trx ahead has requested a lock in an + incompatible mode, and is itself waiting for a lock. */ + + ++ctx->cost; + + /* Save current search state. */ + if (!lock_deadlock_push(ctx, lock, heap_no)) { + + /* Unable to save current search state, stack + size not big enough. */ + + ctx->too_deep = TRUE; + + return(ctx->start->id); + } + + ctx->wait_lock = lock->trx->lock.wait_lock; + lock = lock_get_first_lock(ctx, &heap_no); + + if (lock->trx->lock.deadlock_mark > ctx->mark_start) { + lock = lock_get_next_lock(ctx, lock, heap_no); + } + + } else { + lock = lock_get_next_lock(ctx, lock, heap_no); + } + } + + ut_a(lock == NULL && ctx->depth == 0); + + /* No deadlock found. */ + return(0); +} + +/********************************************************************//** +Print info about transaction that was rolled back. */ +static +void +lock_deadlock_joining_trx_print( +/*============================*/ + const trx_t* trx, /*!< in: transaction rolled back */ + const lock_t* lock) /*!< in: lock trx wants */ +{ + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + + /* If the lock search exceeds the max step + or the max depth, the current trx will be + the victim. Print its information. */ + lock_deadlock_start_print(); + + lock_deadlock_fputs( + "TOO DEEP OR LONG SEARCH IN THE LOCK TABLE" + " WAITS-FOR GRAPH, WE WILL ROLL BACK" + " FOLLOWING TRANSACTION \n\n" + "*** TRANSACTION:\n"); + + lock_deadlock_trx_print(trx, 3000); + + lock_deadlock_fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n"); + + lock_deadlock_lock_print(lock); +} + +/********************************************************************//** +Rollback transaction selected as the victim. */ +static +void +lock_deadlock_trx_rollback( +/*=======================*/ + lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */ +{ + trx_t* trx; + + ut_ad(lock_mutex_own()); + + trx = ctx->wait_lock->trx; + + lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (1)\n"); + + trx_mutex_enter(trx); + + trx->lock.was_chosen_as_deadlock_victim = TRUE; + + lock_cancel_waiting_and_release(trx->lock.wait_lock); + + trx_mutex_exit(trx); +} + +/********************************************************************//** +Checks if a joining lock request results in a deadlock. If a deadlock is +found this function will resolve the dadlock by choosing a victim transaction +and rolling it back. It will attempt to resolve all deadlocks. The returned +transaction id will be the joining transaction id or 0 if some other +transaction was chosen as a victim and rolled back or no deadlock found. + +@return id of transaction chosen as victim or 0 */ +static +trx_id_t +lock_deadlock_check_and_resolve( +/*============================*/ + const lock_t* lock, /*!< in: lock the transaction is requesting */ + const trx_t* trx) /*!< in: transaction */ +{ + trx_id_t victim_trx_id; + + ut_ad(trx != NULL); + ut_ad(lock != NULL); + ut_ad(lock_mutex_own()); + assert_trx_in_list(trx); + + /* Try and resolve as many deadlocks as possible. */ + do { + lock_deadlock_ctx_t ctx; + + /* Reset the context. */ + ctx.cost = 0; + ctx.depth = 0; + ctx.start = trx; + ctx.too_deep = FALSE; + ctx.wait_lock = lock; + ctx.mark_start = lock_mark_counter; + + victim_trx_id = lock_deadlock_search(&ctx); + + /* Search too deep, we rollback the joining transaction. */ + if (ctx.too_deep) { + + ut_a(trx == ctx.start); + ut_a(victim_trx_id == trx->id); + + if (!srv_read_only_mode) { + lock_deadlock_joining_trx_print(trx, lock); + } + + MONITOR_INC(MONITOR_DEADLOCK); + + } else if (victim_trx_id != 0 && victim_trx_id != trx->id) { + + ut_ad(victim_trx_id == ctx.wait_lock->trx->id); + lock_deadlock_trx_rollback(&ctx); + + lock_deadlock_found = TRUE; + + MONITOR_INC(MONITOR_DEADLOCK); + } + + } while (victim_trx_id != 0 && victim_trx_id != trx->id); + + /* If the joining transaction was selected as the victim. */ + if (victim_trx_id != 0) { + ut_a(victim_trx_id == trx->id); + + lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n"); + + lock_deadlock_found = TRUE; + } + + return(victim_trx_id); +} + +/*========================= TABLE LOCKS ==============================*/ + +/*********************************************************************//** +Creates a table lock object and adds it as the last in the lock queue +of the table. Does NOT check for deadlocks or lock compatibility. +@return own: new lock object */ +UNIV_INLINE +lock_t* +lock_table_create( +/*==============*/ + dict_table_t* table, /*!< in/out: database table + in dictionary cache */ + ulint type_mode,/*!< in: lock mode possibly ORed with + LOCK_WAIT */ + trx_t* trx) /*!< in: trx */ +{ + lock_t* lock; + + ut_ad(table && trx); + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(trx)); + + /* Non-locking autocommit read-only transactions should not set + any locks. */ + assert_trx_in_list(trx); + + if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) { + ++table->n_waiting_or_granted_auto_inc_locks; + } + + /* For AUTOINC locking we reuse the lock instance only if + there is no wait involved else we allocate the waiting lock + from the transaction lock heap. */ + if (type_mode == LOCK_AUTO_INC) { + + lock = table->autoinc_lock; + + table->autoinc_trx = trx; + + ib_vector_push(trx->autoinc_locks, &lock); + } else { + lock = static_cast<lock_t*>( + mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock))); + } + + lock->type_mode = type_mode | LOCK_TABLE; + lock->trx = trx; + + lock->un_member.tab_lock.table = table; + + ut_ad(table->n_ref_count > 0 || !table->can_be_evicted); + + UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); + UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); + + if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { + + lock_set_lock_and_trx_wait(lock, trx); + } + + ib_vector_push(lock->trx->lock.table_locks, &lock); + + MONITOR_INC(MONITOR_TABLELOCK_CREATED); + MONITOR_INC(MONITOR_NUM_TABLELOCK); + + return(lock); +} + +/*************************************************************//** +Pops autoinc lock requests from the transaction's autoinc_locks. We +handle the case where there are gaps in the array and they need to +be popped off the stack. */ +UNIV_INLINE +void +lock_table_pop_autoinc_locks( +/*=========================*/ + trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */ +{ + ut_ad(lock_mutex_own()); + ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); + + /* Skip any gaps, gaps are NULL lock entries in the + trx->autoinc_locks vector. */ + + do { + ib_vector_pop(trx->autoinc_locks); + + if (ib_vector_is_empty(trx->autoinc_locks)) { + return; + } + + } while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL); +} + +/*************************************************************//** +Removes an autoinc lock request from the transaction's autoinc_locks. */ +UNIV_INLINE +void +lock_table_remove_autoinc_lock( +/*===========================*/ + lock_t* lock, /*!< in: table lock */ + trx_t* trx) /*!< in/out: transaction that owns the lock */ +{ + lock_t* autoinc_lock; + lint i = ib_vector_size(trx->autoinc_locks) - 1; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC); + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); + + /* With stored functions and procedures the user may drop + a table within the same "statement". This special case has + to be handled by deleting only those AUTOINC locks that were + held by the table being dropped. */ + + autoinc_lock = *static_cast<lock_t**>( + ib_vector_get(trx->autoinc_locks, i)); + + /* This is the default fast case. */ + + if (autoinc_lock == lock) { + lock_table_pop_autoinc_locks(trx); + } else { + /* The last element should never be NULL */ + ut_a(autoinc_lock != NULL); + + /* Handle freeing the locks from within the stack. */ + + while (--i >= 0) { + autoinc_lock = *static_cast<lock_t**>( + ib_vector_get(trx->autoinc_locks, i)); + + if (UNIV_LIKELY(autoinc_lock == lock)) { + void* null_var = NULL; + ib_vector_set(trx->autoinc_locks, i, &null_var); + return; + } + } + + /* Must find the autoinc lock. */ + ut_error; + } +} + +/*************************************************************//** +Removes a table lock request from the queue and the trx list of locks; +this is a low-level function which does NOT check if waiting requests +can now be granted. */ +UNIV_INLINE +void +lock_table_remove_low( +/*==================*/ + lock_t* lock) /*!< in/out: table lock */ +{ + trx_t* trx; + dict_table_t* table; + + ut_ad(lock_mutex_own()); + + trx = lock->trx; + table = lock->un_member.tab_lock.table; + + /* Remove the table from the transaction's AUTOINC vector, if + the lock that is being released is an AUTOINC lock. */ + if (lock_get_mode(lock) == LOCK_AUTO_INC) { + + /* The table's AUTOINC lock can get transferred to + another transaction before we get here. */ + if (table->autoinc_trx == trx) { + table->autoinc_trx = NULL; + } + + /* The locks must be freed in the reverse order from + the one in which they were acquired. This is to avoid + traversing the AUTOINC lock vector unnecessarily. + + We only store locks that were granted in the + trx->autoinc_locks vector (see lock_table_create() + and lock_grant()). Therefore it can be empty and we + need to check for that. */ + + if (!lock_get_wait(lock) + && !ib_vector_is_empty(trx->autoinc_locks)) { + + lock_table_remove_autoinc_lock(lock, trx); + } + + ut_a(table->n_waiting_or_granted_auto_inc_locks > 0); + table->n_waiting_or_granted_auto_inc_locks--; + } + + UT_LIST_REMOVE(trx_locks, trx->lock.trx_locks, lock); + UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock); + + MONITOR_INC(MONITOR_TABLELOCK_REMOVED); + MONITOR_DEC(MONITOR_NUM_TABLELOCK); +} + +/*********************************************************************//** +Enqueues a waiting request for a table lock which cannot be granted +immediately. Checks for deadlocks. +@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or +DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another +transaction was chosen as a victim, and we got the lock immediately: +no need to wait then */ +static +dberr_t +lock_table_enqueue_waiting( +/*=======================*/ + ulint mode, /*!< in: lock mode this transaction is + requesting */ + dict_table_t* table, /*!< in/out: table */ + que_thr_t* thr) /*!< in: query thread */ +{ + trx_t* trx; + lock_t* lock; + trx_id_t victim_trx_id; + + ut_ad(lock_mutex_own()); + ut_ad(!srv_read_only_mode); + + trx = thr_get_trx(thr); + ut_ad(trx_mutex_own(trx)); + + /* Test if there already is some other reason to suspend thread: + we do not enqueue a lock request if the query thread should be + stopped anyway */ + + if (que_thr_stop(thr)) { + ut_error; + + return(DB_QUE_THR_SUSPENDED); + } + + switch (trx_get_dict_operation(trx)) { + case TRX_DICT_OP_NONE: + break; + case TRX_DICT_OP_TABLE: + case TRX_DICT_OP_INDEX: + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: a table lock wait happens" + " in a dictionary operation!\n" + "InnoDB: Table name ", stderr); + ut_print_name(stderr, trx, TRUE, table->name); + fputs(".\n" + "InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", + stderr); + ut_ad(0); + } + + /* Enqueue the lock request that will wait to be granted */ + + lock = lock_table_create(table, mode | LOCK_WAIT, trx); + + /* Release the mutex to obey the latching order. + This is safe, because lock_deadlock_check_and_resolve() + is invoked when a lock wait is enqueued for the currently + running transaction. Because trx is a running transaction + (it is not currently suspended because of a lock wait), + its state can only be changed by this thread, which is + currently associated with the transaction. */ + + trx_mutex_exit(trx); + + victim_trx_id = lock_deadlock_check_and_resolve(lock, trx); + + trx_mutex_enter(trx); + + if (victim_trx_id != 0) { + ut_ad(victim_trx_id == trx->id); + + /* The order here is important, we don't want to + lose the state of the lock before calling remove. */ + lock_table_remove_low(lock); + lock_reset_lock_and_trx_wait(lock); + + return(DB_DEADLOCK); + } else if (trx->lock.wait_lock == NULL) { + /* Deadlock resolution chose another transaction as a victim, + and we accidentally got our lock granted! */ + + return(DB_SUCCESS); + } + + trx->lock.que_state = TRX_QUE_LOCK_WAIT; + + trx->lock.wait_started = ut_time(); + trx->lock.was_chosen_as_deadlock_victim = FALSE; + + ut_a(que_thr_stop(thr)); + + MONITOR_INC(MONITOR_TABLELOCK_WAIT); + + return(DB_LOCK_WAIT); +} + +/*********************************************************************//** +Checks if other transactions have an incompatible mode lock request in +the lock queue. +@return lock or NULL */ +UNIV_INLINE +const lock_t* +lock_table_other_has_incompatible( +/*==============================*/ + const trx_t* trx, /*!< in: transaction, or NULL if all + transactions should be included */ + ulint wait, /*!< in: LOCK_WAIT if also + waiting locks are taken into + account, or 0 if not */ + const dict_table_t* table, /*!< in: table */ + enum lock_mode mode) /*!< in: lock mode */ +{ + const lock_t* lock; + + ut_ad(lock_mutex_own()); + + for (lock = UT_LIST_GET_LAST(table->locks); + lock != NULL; + lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) { + + if (lock->trx != trx + && !lock_mode_compatible(lock_get_mode(lock), mode) + && (wait || !lock_get_wait(lock))) { + + return(lock); + } + } + + return(NULL); +} + +/*********************************************************************//** +Locks the specified database table in the mode given. If the lock cannot +be granted immediately, the query thread is put to wait. +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_table( +/*=======*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + dict_table_t* table, /*!< in/out: database table + in dictionary cache */ + enum lock_mode mode, /*!< in: lock mode */ + que_thr_t* thr) /*!< in: query thread */ +{ + trx_t* trx; + dberr_t err; + const lock_t* wait_for; + + ut_ad(table && thr); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + ut_a(flags == 0); + + trx = thr_get_trx(thr); + + /* Look for equal or stronger locks the same trx already + has on the table. No need to acquire the lock mutex here + because only this transacton can add/access table locks + to/from trx_t::table_locks. */ + + if (lock_table_has(trx, table, mode)) { + + return(DB_SUCCESS); + } + + lock_mutex_enter(); + + /* We have to check if the new lock is compatible with any locks + other transactions have in the table lock queue. */ + + wait_for = lock_table_other_has_incompatible( + trx, LOCK_WAIT, table, mode); + + trx_mutex_enter(trx); + + /* Another trx has a request on the table in an incompatible + mode: this trx may have to wait */ + + if (wait_for != NULL) { + err = lock_table_enqueue_waiting(mode | flags, table, thr); + } else { + lock_table_create(table, mode | flags, trx); + + ut_a(!flags || mode == LOCK_S || mode == LOCK_X); + + err = DB_SUCCESS; + } + + lock_mutex_exit(); + + trx_mutex_exit(trx); + + return(err); +} + +/*********************************************************************//** +Creates a table IX lock object for a resurrected transaction. */ +UNIV_INTERN +void +lock_table_ix_resurrect( +/*====================*/ + dict_table_t* table, /*!< in/out: table */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(trx->is_recovered); + + if (lock_table_has(trx, table, LOCK_IX)) { + return; + } + + lock_mutex_enter(); + + /* We have to check if the new lock is compatible with any locks + other transactions have in the table lock queue. */ + + ut_ad(!lock_table_other_has_incompatible( + trx, LOCK_WAIT, table, LOCK_IX)); + + trx_mutex_enter(trx); + lock_table_create(table, LOCK_IX, trx); + lock_mutex_exit(); + trx_mutex_exit(trx); +} + +/*********************************************************************//** +Checks if a waiting table lock request still has to wait in a queue. +@return TRUE if still has to wait */ +static +ibool +lock_table_has_to_wait_in_queue( +/*============================*/ + const lock_t* wait_lock) /*!< in: waiting table lock */ +{ + const dict_table_t* table; + const lock_t* lock; + + ut_ad(lock_mutex_own()); + ut_ad(lock_get_wait(wait_lock)); + + table = wait_lock->un_member.tab_lock.table; + + for (lock = UT_LIST_GET_FIRST(table->locks); + lock != wait_lock; + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { + + if (lock_has_to_wait(wait_lock, lock)) { + + return(TRUE); + } + } + + return(FALSE); +} + +/*************************************************************//** +Removes a table lock request, waiting or granted, from the queue and grants +locks to other transactions in the queue, if they now are entitled to a +lock. */ +static +void +lock_table_dequeue( +/*===============*/ + lock_t* in_lock)/*!< in/out: table lock object; transactions waiting + behind will get their lock requests granted, if + they are now qualified to it */ +{ + lock_t* lock; + + ut_ad(lock_mutex_own()); + ut_a(lock_get_type_low(in_lock) == LOCK_TABLE); + + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock); + + lock_table_remove_low(in_lock); + + /* Check if waiting locks in the queue can now be granted: grant + locks if there are no conflicting locks ahead. */ + + for (/* No op */; + lock != NULL; + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { + + if (lock_get_wait(lock) + && !lock_table_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + ut_ad(in_lock->trx != lock->trx); + lock_grant(lock); + } + } +} + +/*=========================== LOCK RELEASE ==============================*/ + +/*************************************************************//** +Removes a granted record lock of a transaction from the queue and grants +locks to other transactions waiting in the queue if they now are entitled +to a lock. */ +UNIV_INTERN +void +lock_rec_unlock( +/*============*/ + trx_t* trx, /*!< in/out: transaction that has + set a record lock */ + const buf_block_t* block, /*!< in: buffer block containing rec */ + const rec_t* rec, /*!< in: record */ + enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ +{ + lock_t* first_lock; + lock_t* lock; + ulint heap_no; + const char* stmt; + size_t stmt_len; + + ut_ad(trx); + ut_ad(rec); + ut_ad(block->frame == page_align(rec)); + ut_ad(!trx->lock.wait_lock); + ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); + + heap_no = page_rec_get_heap_no(rec); + + lock_mutex_enter(); + trx_mutex_enter(trx); + + first_lock = lock_rec_get_first(block, heap_no); + + /* Find the last lock with the same lock_mode and transaction + on the record. */ + + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { + goto released; + } + } + + lock_mutex_exit(); + trx_mutex_exit(trx); + + stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: unlock row could not" + " find a %lu mode lock on the record\n", + (ulong) lock_mode); + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: current statement: %.*s\n", + (int) stmt_len, stmt); + + return; + +released: + ut_a(!lock_get_wait(lock)); + lock_rec_reset_nth_bit(lock, heap_no); + + /* Check if we can now grant waiting lock requests */ + + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (lock_get_wait(lock) + && !lock_rec_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + ut_ad(trx != lock->trx); + lock_grant(lock); + } + } + + lock_mutex_exit(); + trx_mutex_exit(trx); +} + +/*********************************************************************//** +Releases transaction locks, and releases possible other transactions waiting +because of these locks. */ +static +void +lock_release( +/*=========*/ + trx_t* trx) /*!< in/out: transaction */ +{ + lock_t* lock; + ulint count = 0; + trx_id_t max_trx_id; + + ut_ad(lock_mutex_own()); + ut_ad(!trx_mutex_own(trx)); + + max_trx_id = trx_sys_get_max_trx_id(); + + for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); + lock != NULL; + lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) { + + if (lock_get_type_low(lock) == LOCK_REC) { + +#ifdef UNIV_DEBUG + /* Check if the transcation locked a record + in a system table in X mode. It should have set + the dict_op code correctly if it did. */ + if (lock->index->table->id < DICT_HDR_FIRST_ID + && lock_get_mode(lock) == LOCK_X) { + + ut_ad(lock_get_mode(lock) != LOCK_IX); + ut_ad(trx->dict_operation != TRX_DICT_OP_NONE); + } +#endif /* UNIV_DEBUG */ + + lock_rec_dequeue_from_page(lock); + } else { + dict_table_t* table; + + table = lock->un_member.tab_lock.table; +#ifdef UNIV_DEBUG + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + + /* Check if the transcation locked a system table + in IX mode. It should have set the dict_op code + correctly if it did. */ + if (table->id < DICT_HDR_FIRST_ID + && (lock_get_mode(lock) == LOCK_X + || lock_get_mode(lock) == LOCK_IX)) { + + ut_ad(trx->dict_operation != TRX_DICT_OP_NONE); + } +#endif /* UNIV_DEBUG */ + + if (lock_get_mode(lock) != LOCK_IS + && trx->undo_no != 0) { + + /* The trx may have modified the table. We + block the use of the MySQL query cache for + all currently active transactions. */ + + table->query_cache_inv_trx_id = max_trx_id; + } + + lock_table_dequeue(lock); + } + + if (count == LOCK_RELEASE_INTERVAL) { + /* Release the mutex for a while, so that we + do not monopolize it */ + + lock_mutex_exit(); + + lock_mutex_enter(); + + count = 0; + } + + ++count; + } + + /* We don't remove the locks one by one from the vector for + efficiency reasons. We simply reset it because we would have + released all the locks anyway. */ + + ib_vector_reset(trx->lock.table_locks); + + ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); + ut_a(ib_vector_is_empty(trx->autoinc_locks)); + ut_a(ib_vector_is_empty(trx->lock.table_locks)); + + mem_heap_empty(trx->lock.lock_heap); +} + +/* True if a lock mode is S or X */ +#define IS_LOCK_S_OR_X(lock) \ + (lock_get_mode(lock) == LOCK_S \ + || lock_get_mode(lock) == LOCK_X) + +/*********************************************************************//** +Removes table locks of the transaction on a table to be dropped. */ +static +void +lock_trx_table_locks_remove( +/*========================*/ + const lock_t* lock_to_remove) /*!< in: lock to remove */ +{ + lint i; + trx_t* trx = lock_to_remove->trx; + + ut_ad(lock_mutex_own()); + + /* It is safe to read this because we are holding the lock mutex */ + if (!trx->lock.cancel) { + trx_mutex_enter(trx); + } else { + ut_ad(trx_mutex_own(trx)); + } + + for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { + const lock_t* lock; + + lock = *static_cast<lock_t**>( + ib_vector_get(trx->lock.table_locks, i)); + + if (lock == NULL) { + continue; + } + + ut_a(trx == lock->trx); + ut_a(lock_get_type_low(lock) & LOCK_TABLE); + ut_a(lock->un_member.tab_lock.table != NULL); + + if (lock == lock_to_remove) { + void* null_var = NULL; + ib_vector_set(trx->lock.table_locks, i, &null_var); + + if (!trx->lock.cancel) { + trx_mutex_exit(trx); + } + + return; + } + } + + if (!trx->lock.cancel) { + trx_mutex_exit(trx); + } + + /* Lock must exist in the vector. */ + ut_error; +} + +/*********************************************************************//** +Removes locks of a transaction on a table to be dropped. +If remove_also_table_sx_locks is TRUE then table-level S and X locks are +also removed in addition to other table-level and record-level locks. +No lock that is going to be removed is allowed to be a wait lock. */ +static +void +lock_remove_all_on_table_for_trx( +/*=============================*/ + dict_table_t* table, /*!< in: table to be dropped */ + trx_t* trx, /*!< in: a transaction */ + ibool remove_also_table_sx_locks)/*!< in: also removes + table S and X locks */ +{ + lock_t* lock; + lock_t* prev_lock; + + ut_ad(lock_mutex_own()); + + for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); + lock != NULL; + lock = prev_lock) { + + prev_lock = UT_LIST_GET_PREV(trx_locks, lock); + + if (lock_get_type_low(lock) == LOCK_REC + && lock->index->table == table) { + ut_a(!lock_get_wait(lock)); + + lock_rec_discard(lock); + } else if (lock_get_type_low(lock) & LOCK_TABLE + && lock->un_member.tab_lock.table == table + && (remove_also_table_sx_locks + || !IS_LOCK_S_OR_X(lock))) { + + ut_a(!lock_get_wait(lock)); + + lock_trx_table_locks_remove(lock); + lock_table_remove_low(lock); + } + } +} + +/*******************************************************************//** +Remove any explicit record locks held by recovering transactions on +the table. +@return number of recovered transactions examined */ +static +ulint +lock_remove_recovered_trx_record_locks( +/*===================================*/ + dict_table_t* table) /*!< in: check if there are any locks + held on records in this table or on the + table itself */ +{ + trx_t* trx; + ulint n_recovered_trx = 0; + + ut_a(table != NULL); + ut_ad(lock_mutex_own()); + + mutex_enter(&trx_sys->mutex); + + for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); + trx != NULL; + trx = UT_LIST_GET_NEXT(trx_list, trx)) { + + lock_t* lock; + lock_t* next_lock; + + assert_trx_in_rw_list(trx); + + if (!trx->is_recovered) { + continue; + } + + /* Because we are holding the lock_sys->mutex, + implicit locks cannot be converted to explicit ones + while we are scanning the explicit locks. */ + + for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); + lock != NULL; + lock = next_lock) { + + ut_a(lock->trx == trx); + + /* Recovered transactions can't wait on a lock. */ + + ut_a(!lock_get_wait(lock)); + + next_lock = UT_LIST_GET_NEXT(trx_locks, lock); + + switch (lock_get_type_low(lock)) { + default: + ut_error; + case LOCK_TABLE: + if (lock->un_member.tab_lock.table == table) { + lock_trx_table_locks_remove(lock); + lock_table_remove_low(lock); + } + break; + case LOCK_REC: + if (lock->index->table == table) { + lock_rec_discard(lock); + } + } + } + + ++n_recovered_trx; + } + + mutex_exit(&trx_sys->mutex); + + return(n_recovered_trx); +} + +/*********************************************************************//** +Removes locks on a table to be dropped or truncated. +If remove_also_table_sx_locks is TRUE then table-level S and X locks are +also removed in addition to other table-level and record-level locks. +No lock, that is going to be removed, is allowed to be a wait lock. */ +UNIV_INTERN +void +lock_remove_all_on_table( +/*=====================*/ + dict_table_t* table, /*!< in: table to be dropped + or truncated */ + ibool remove_also_table_sx_locks)/*!< in: also removes + table S and X locks */ +{ + lock_t* lock; + + lock_mutex_enter(); + + for (lock = UT_LIST_GET_FIRST(table->locks); + lock != NULL; + /* No op */) { + + lock_t* prev_lock; + + prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); + + /* If we should remove all locks (remove_also_table_sx_locks + is TRUE), or if the lock is not table-level S or X lock, + then check we are not going to remove a wait lock. */ + if (remove_also_table_sx_locks + || !(lock_get_type(lock) == LOCK_TABLE + && IS_LOCK_S_OR_X(lock))) { + + ut_a(!lock_get_wait(lock)); + } + + lock_remove_all_on_table_for_trx( + table, lock->trx, remove_also_table_sx_locks); + + if (prev_lock == NULL) { + if (lock == UT_LIST_GET_FIRST(table->locks)) { + /* lock was not removed, pick its successor */ + lock = UT_LIST_GET_NEXT( + un_member.tab_lock.locks, lock); + } else { + /* lock was removed, pick the first one */ + lock = UT_LIST_GET_FIRST(table->locks); + } + } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks, + prev_lock) != lock) { + /* If lock was removed by + lock_remove_all_on_table_for_trx() then pick the + successor of prev_lock ... */ + lock = UT_LIST_GET_NEXT( + un_member.tab_lock.locks, prev_lock); + } else { + /* ... otherwise pick the successor of lock. */ + lock = UT_LIST_GET_NEXT( + un_member.tab_lock.locks, lock); + } + } + + /* Note: Recovered transactions don't have table level IX or IS locks + but can have implicit record locks that have been converted to explicit + record locks. Such record locks cannot be freed by traversing the + transaction lock list in dict_table_t (as above). */ + + if (!lock_sys->rollback_complete + && lock_remove_recovered_trx_record_locks(table) == 0) { + + lock_sys->rollback_complete = TRUE; + } + + lock_mutex_exit(); +} + +/*===================== VALIDATION AND DEBUGGING ====================*/ + +/*********************************************************************//** +Prints info of a table lock. */ +UNIV_INTERN +void +lock_table_print( +/*=============*/ + FILE* file, /*!< in: file where to print */ + const lock_t* lock) /*!< in: table type lock */ +{ + ut_ad(lock_mutex_own()); + ut_a(lock_get_type_low(lock) == LOCK_TABLE); + + fputs("TABLE LOCK table ", file); + ut_print_name(file, lock->trx, TRUE, + lock->un_member.tab_lock.table->name); + fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id); + + if (lock_get_mode(lock) == LOCK_S) { + fputs(" lock mode S", file); + } else if (lock_get_mode(lock) == LOCK_X) { + fputs(" lock mode X", file); + } else if (lock_get_mode(lock) == LOCK_IS) { + fputs(" lock mode IS", file); + } else if (lock_get_mode(lock) == LOCK_IX) { + fputs(" lock mode IX", file); + } else if (lock_get_mode(lock) == LOCK_AUTO_INC) { + fputs(" lock mode AUTO-INC", file); + } else { + fprintf(file, " unknown lock mode %lu", + (ulong) lock_get_mode(lock)); + } + + if (lock_get_wait(lock)) { + fputs(" waiting", file); + } + + putc('\n', file); +} + +/*********************************************************************//** +Prints info of a record lock. */ +UNIV_INTERN +void +lock_rec_print( +/*===========*/ + FILE* file, /*!< in: file where to print */ + const lock_t* lock) /*!< in: record type lock */ +{ + const buf_block_t* block; + ulint space; + ulint page_no; + ulint i; + mtr_t mtr; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); + + ut_ad(lock_mutex_own()); + ut_a(lock_get_type_low(lock) == LOCK_REC); + + space = lock->un_member.rec_lock.space; + page_no = lock->un_member.rec_lock.page_no; + + fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ", + (ulong) space, (ulong) page_no, + (ulong) lock_rec_get_n_bits(lock)); + dict_index_name_print(file, lock->trx, lock->index); + fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id); + + if (lock_get_mode(lock) == LOCK_S) { + fputs(" lock mode S", file); + } else if (lock_get_mode(lock) == LOCK_X) { + fputs(" lock_mode X", file); + } else { + ut_error; + } + + if (lock_rec_get_gap(lock)) { + fputs(" locks gap before rec", file); + } + + if (lock_rec_get_rec_not_gap(lock)) { + fputs(" locks rec but not gap", file); + } + + if (lock_rec_get_insert_intention(lock)) { + fputs(" insert intention", file); + } + + if (lock_get_wait(lock)) { + fputs(" waiting", file); + } + + mtr_start(&mtr); + + putc('\n', file); + + block = buf_page_try_get(space, page_no, &mtr); + + for (i = 0; i < lock_rec_get_n_bits(lock); ++i) { + + if (!lock_rec_get_nth_bit(lock, i)) { + continue; + } + + fprintf(file, "Record lock, heap no %lu", (ulong) i); + + if (block) { + const rec_t* rec; + + rec = page_find_rec_with_heap_no( + buf_block_get_frame(block), i); + + offsets = rec_get_offsets( + rec, lock->index, offsets, + ULINT_UNDEFINED, &heap); + + putc(' ', file); + rec_print_new(file, rec, offsets); + } + + putc('\n', file); + } + + mtr_commit(&mtr); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } +} + +#ifdef UNIV_DEBUG +/* Print the number of lock structs from lock_print_info_summary() only +in non-production builds for performance reasons, see +http://bugs.mysql.com/36942 */ +#define PRINT_NUM_OF_LOCK_STRUCTS +#endif /* UNIV_DEBUG */ + +#ifdef PRINT_NUM_OF_LOCK_STRUCTS +/*********************************************************************//** +Calculates the number of record lock structs in the record lock hash table. +@return number of record locks */ +static +ulint +lock_get_n_rec_locks(void) +/*======================*/ +{ + ulint n_locks = 0; + ulint i; + + ut_ad(lock_mutex_own()); + + for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { + const lock_t* lock; + + for (lock = static_cast<const lock_t*>( + HASH_GET_FIRST(lock_sys->rec_hash, i)); + lock != 0; + lock = static_cast<const lock_t*>( + HASH_GET_NEXT(hash, lock))) { + + n_locks++; + } + } + + return(n_locks); +} +#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ + +/*********************************************************************//** +Prints info of locks for all transactions. +@return FALSE if not able to obtain lock mutex +and exits without printing info */ +UNIV_INTERN +ibool +lock_print_info_summary( +/*====================*/ + FILE* file, /*!< in: file where to print */ + ibool nowait) /*!< in: whether to wait for the lock mutex */ +{ + /* if nowait is FALSE, wait on the lock mutex, + otherwise return immediately if fail to obtain the + mutex. */ + if (!nowait) { + lock_mutex_enter(); + } else if (lock_mutex_enter_nowait()) { + fputs("FAIL TO OBTAIN LOCK MUTEX, " + "SKIP LOCK INFO PRINTING\n", file); + return(FALSE); + } + + if (lock_deadlock_found) { + fputs("------------------------\n" + "LATEST DETECTED DEADLOCK\n" + "------------------------\n", file); + + if (!srv_read_only_mode) { + ut_copy_file(file, lock_latest_err_file); + } + } + + fputs("------------\n" + "TRANSACTIONS\n" + "------------\n", file); + + fprintf(file, "Trx id counter " TRX_ID_FMT "\n", + trx_sys_get_max_trx_id()); + + fprintf(file, + "Purge done for trx's n:o < " TRX_ID_FMT + " undo n:o < " TRX_ID_FMT " state: ", + purge_sys->iter.trx_no, + purge_sys->iter.undo_no); + + /* Note: We are reading the state without the latch. One because it + will violate the latching order and two because we are merely querying + the state of the variable for display. */ + + switch (purge_sys->state){ + case PURGE_STATE_INIT: + /* Should never be in this state while the system is running. */ + ut_error; + + case PURGE_STATE_EXIT: + fprintf(file, "exited"); + break; + + case PURGE_STATE_DISABLED: + fprintf(file, "disabled"); + break; + + case PURGE_STATE_RUN: + fprintf(file, "running"); + /* Check if it is waiting for more data to arrive. */ + if (!purge_sys->running) { + fprintf(file, " but idle"); + } + break; + + case PURGE_STATE_STOP: + fprintf(file, "stopped"); + break; + } + + fprintf(file, "\n"); + + fprintf(file, + "History list length %lu\n", + (ulong) trx_sys->rseg_history_len); + +#ifdef PRINT_NUM_OF_LOCK_STRUCTS + fprintf(file, + "Total number of lock structs in row lock hash table %lu\n", + (ulong) lock_get_n_rec_locks()); +#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ + return(TRUE); +} + +/*********************************************************************//** +Prints info of locks for each transaction. This function assumes that the +caller holds the lock mutex and more importantly it will release the lock +mutex on behalf of the caller. (This should be fixed in the future). */ +UNIV_INTERN +void +lock_print_info_all_transactions( +/*=============================*/ + FILE* file) /*!< in: file where to print */ +{ + const lock_t* lock; + ibool load_page_first = TRUE; + ulint nth_trx = 0; + ulint nth_lock = 0; + ulint i; + mtr_t mtr; + const trx_t* trx; + trx_list_t* trx_list = &trx_sys->rw_trx_list; + + fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); + + ut_ad(lock_mutex_own()); + + mutex_enter(&trx_sys->mutex); + + /* First print info on non-active transactions */ + + /* NOTE: information of auto-commit non-locking read-only + transactions will be omitted here. The information will be + available from INFORMATION_SCHEMA.INNODB_TRX. */ + + for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); + trx != NULL; + trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) { + + ut_ad(trx->in_mysql_trx_list); + + /* See state transitions and locking rules in trx0trx.h */ + + if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) { + fputs("---", file); + trx_print_latched(file, trx, 600); + } + } + +loop: + /* Since we temporarily release lock_sys->mutex and + trx_sys->mutex when reading a database page in below, + variable trx may be obsolete now and we must loop + through the trx list to get probably the same trx, + or some other trx. */ + + for (trx = UT_LIST_GET_FIRST(*trx_list), i = 0; + trx && (i < nth_trx); + trx = UT_LIST_GET_NEXT(trx_list, trx), i++) { + + assert_trx_in_list(trx); + ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); + } + + ut_ad(trx == NULL + || trx->read_only == (trx_list == &trx_sys->ro_trx_list)); + + if (trx == NULL) { + /* Check the read-only transaction list next. */ + if (trx_list == &trx_sys->rw_trx_list) { + trx_list = &trx_sys->ro_trx_list; + nth_trx = 0; + nth_lock = 0; + goto loop; + } + + lock_mutex_exit(); + mutex_exit(&trx_sys->mutex); + + ut_ad(lock_validate()); + + return; + } + + assert_trx_in_list(trx); + + if (nth_lock == 0) { + fputs("---", file); + + trx_print_latched(file, trx, 600); + + if (trx->read_view) { + fprintf(file, + "Trx read view will not see trx with" + " id >= " TRX_ID_FMT + ", sees < " TRX_ID_FMT "\n", + trx->read_view->low_limit_id, + trx->read_view->up_limit_id); + } + + if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { + + fprintf(file, + "------- TRX HAS BEEN WAITING %lu SEC" + " FOR THIS LOCK TO BE GRANTED:\n", + (ulong) difftime(ut_time(), + trx->lock.wait_started)); + + if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) { + lock_rec_print(file, trx->lock.wait_lock); + } else { + lock_table_print(file, trx->lock.wait_lock); + } + + fputs("------------------\n", file); + } + } + + if (!srv_print_innodb_lock_monitor) { + nth_trx++; + goto loop; + } + + i = 0; + + /* Look at the note about the trx loop above why we loop here: + lock may be an obsolete pointer now. */ + + lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); + + while (lock && (i < nth_lock)) { + lock = UT_LIST_GET_NEXT(trx_locks, lock); + i++; + } + + if (lock == NULL) { + nth_trx++; + nth_lock = 0; + + goto loop; + } + + if (lock_get_type_low(lock) == LOCK_REC) { + if (load_page_first) { + ulint space = lock->un_member.rec_lock.space; + ulint zip_size= fil_space_get_zip_size(space); + ulint page_no = lock->un_member.rec_lock.page_no; + ibool tablespace_being_deleted = FALSE; + + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { + + /* It is a single table tablespace and + the .ibd file is missing (TRUNCATE + TABLE probably stole the locks): just + print the lock without attempting to + load the page in the buffer pool. */ + + fprintf(file, "RECORD LOCKS on" + " non-existing space %lu\n", + (ulong) space); + goto print_rec; + } + + lock_mutex_exit(); + mutex_exit(&trx_sys->mutex); + + DEBUG_SYNC_C("innodb_monitor_before_lock_page_read"); + + /* Check if the space is exists or not. only when the space + is valid, try to get the page. */ + tablespace_being_deleted = fil_inc_pending_ops(space, false); + + if (!tablespace_being_deleted) { + mtr_start(&mtr); + + buf_page_get_gen(space, zip_size, page_no, + RW_NO_LATCH, NULL, + BUF_GET_POSSIBLY_FREED, + __FILE__, __LINE__, &mtr); + + mtr_commit(&mtr); + + fil_decr_pending_ops(space); + } else { + fprintf(file, "RECORD LOCKS on" + " non-existing space %lu\n", + (ulong) space); + } + + load_page_first = FALSE; + + lock_mutex_enter(); + + mutex_enter(&trx_sys->mutex); + + goto loop; + } + +print_rec: + lock_rec_print(file, lock); + } else { + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + + lock_table_print(file, lock); + } + + load_page_first = TRUE; + + nth_lock++; + + if (nth_lock >= 10) { + fputs("10 LOCKS PRINTED FOR THIS TRX:" + " SUPPRESSING FURTHER PRINTS\n", + file); + + nth_trx++; + nth_lock = 0; + } + + goto loop; +} + +#ifdef UNIV_DEBUG +/*********************************************************************//** +Find the the lock in the trx_t::trx_lock_t::table_locks vector. +@return TRUE if found */ +static +ibool +lock_trx_table_locks_find( +/*======================*/ + trx_t* trx, /*!< in: trx to validate */ + const lock_t* find_lock) /*!< in: lock to find */ +{ + lint i; + ibool found = FALSE; + + trx_mutex_enter(trx); + + for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { + const lock_t* lock; + + lock = *static_cast<const lock_t**>( + ib_vector_get(trx->lock.table_locks, i)); + + if (lock == NULL) { + continue; + } else if (lock == find_lock) { + /* Can't be duplicates. */ + ut_a(!found); + found = TRUE; + } + + ut_a(trx == lock->trx); + ut_a(lock_get_type_low(lock) & LOCK_TABLE); + ut_a(lock->un_member.tab_lock.table != NULL); + } + + trx_mutex_exit(trx); + + return(found); +} + +/*********************************************************************//** +Validates the lock queue on a table. +@return TRUE if ok */ +static +ibool +lock_table_queue_validate( +/*======================*/ + const dict_table_t* table) /*!< in: table */ +{ + const lock_t* lock; + + ut_ad(lock_mutex_own()); + ut_ad(mutex_own(&trx_sys->mutex)); + + for (lock = UT_LIST_GET_FIRST(table->locks); + lock != NULL; + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { + + /* lock->trx->state cannot change from or to NOT_STARTED + while we are holding the trx_sys->mutex. It may change + from ACTIVE to PREPARED, but it may not change to + COMMITTED, because we are holding the lock_sys->mutex. */ + ut_ad(trx_assert_started(lock->trx)); + + if (!lock_get_wait(lock)) { + + ut_a(!lock_table_other_has_incompatible( + lock->trx, 0, table, + lock_get_mode(lock))); + } else { + + ut_a(lock_table_has_to_wait_in_queue(lock)); + } + + ut_a(lock_trx_table_locks_find(lock->trx, lock)); + } + + return(TRUE); +} + +/*********************************************************************//** +Validates the lock queue on a single record. +@return TRUE if ok */ +static +ibool +lock_rec_queue_validate( +/*====================*/ + ibool locked_lock_trx_sys, + /*!< in: if the caller holds + both the lock mutex and + trx_sys_t->lock. */ + const buf_block_t* block, /*!< in: buffer block containing rec */ + const rec_t* rec, /*!< in: record to look at */ + const dict_index_t* index, /*!< in: index, or NULL if not known */ + const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ +{ + const trx_t* impl_trx; + const lock_t* lock; + ulint heap_no; + + ut_a(rec); + ut_a(block->frame == page_align(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); + ut_ad(lock_mutex_own() == locked_lock_trx_sys); + ut_ad(!index || dict_index_is_clust(index) + || !dict_index_is_online_ddl(index)); + + heap_no = page_rec_get_heap_no(rec); + + if (!locked_lock_trx_sys) { + lock_mutex_enter(); + mutex_enter(&trx_sys->mutex); + } + + if (!page_rec_is_user_rec(rec)) { + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next_const(heap_no, lock)) { + + ut_a(trx_in_trx_list(lock->trx)); + + if (lock_get_wait(lock)) { + ut_a(lock_rec_has_to_wait_in_queue(lock)); + } + + if (index) { + ut_a(lock->index == index); + } + } + + goto func_exit; + } + + if (!index); + else if (dict_index_is_clust(index)) { + trx_id_t trx_id; + + /* Unlike the non-debug code, this invariant can only succeed + if the check and assertion are covered by the lock mutex. */ + + trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); + impl_trx = trx_rw_is_active_low(trx_id, NULL); + + ut_ad(lock_mutex_own()); + /* impl_trx cannot be committed until lock_mutex_exit() + because lock_trx_release_locks() acquires lock_sys->mutex */ + + if (impl_trx != NULL + && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, + block, heap_no, impl_trx)) { + + ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, impl_trx)); + } + } + + for (lock = lock_rec_get_first(block, heap_no); + lock != NULL; + lock = lock_rec_get_next_const(heap_no, lock)) { + + ut_a(trx_in_trx_list(lock->trx)); + + if (index) { + ut_a(lock->index == index); + } + + if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) { + + enum lock_mode mode; + + if (lock_get_mode(lock) == LOCK_S) { + mode = LOCK_X; + } else { + mode = LOCK_S; + } + ut_a(!lock_rec_other_has_expl_req( + mode, 0, 0, block, heap_no, lock->trx)); + + } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { + + ut_a(lock_rec_has_to_wait_in_queue(lock)); + } + } + +func_exit: + if (!locked_lock_trx_sys) { + lock_mutex_exit(); + mutex_exit(&trx_sys->mutex); + } + + return(TRUE); +} + +/*********************************************************************//** +Validates the record lock queues on a page. +@return TRUE if ok */ +static +ibool +lock_rec_validate_page( +/*===================*/ + const buf_block_t* block) /*!< in: buffer block */ +{ + const lock_t* lock; + const rec_t* rec; + ulint nth_lock = 0; + ulint nth_bit = 0; + ulint i; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); + + ut_ad(!lock_mutex_own()); + + lock_mutex_enter(); + mutex_enter(&trx_sys->mutex); +loop: + lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block), + buf_block_get_page_no(block)); + + if (!lock) { + goto function_exit; + } + +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG + ut_a(!block->page.file_page_was_freed); +#endif + + for (i = 0; i < nth_lock; i++) { + + lock = lock_rec_get_next_on_page_const(lock); + + if (!lock) { + goto function_exit; + } + } + + ut_a(trx_in_trx_list(lock->trx)); + +# ifdef UNIV_SYNC_DEBUG + /* Only validate the record queues when this thread is not + holding a space->latch. Deadlocks are possible due to + latching order violation when UNIV_DEBUG is defined while + UNIV_SYNC_DEBUG is not. */ + if (!sync_thread_levels_contains(SYNC_FSP)) +# endif /* UNIV_SYNC_DEBUG */ + for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) { + + if (i == 1 || lock_rec_get_nth_bit(lock, i)) { + + rec = page_find_rec_with_heap_no(block->frame, i); + ut_a(rec); + offsets = rec_get_offsets(rec, lock->index, offsets, + ULINT_UNDEFINED, &heap); +#if 0 + fprintf(stderr, + "Validating %u %u\n", + block->page.space, block->page.offset); +#endif + /* If this thread is holding the file space + latch (fil_space_t::latch), the following + check WILL break the latching order and may + cause a deadlock of threads. */ + + lock_rec_queue_validate( + TRUE, block, rec, lock->index, offsets); + + nth_bit = i + 1; + + goto loop; + } + } + + nth_bit = 0; + nth_lock++; + + goto loop; + +function_exit: + lock_mutex_exit(); + mutex_exit(&trx_sys->mutex); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + return(TRUE); +} + +/*********************************************************************//** +Validates the table locks. +@return TRUE if ok */ +static +ibool +lock_validate_table_locks( +/*======================*/ + const trx_list_t* trx_list) /*!< in: trx list */ +{ + const trx_t* trx; + + ut_ad(lock_mutex_own()); + ut_ad(mutex_own(&trx_sys->mutex)); + + ut_ad(trx_list == &trx_sys->rw_trx_list + || trx_list == &trx_sys->ro_trx_list); + + for (trx = UT_LIST_GET_FIRST(*trx_list); + trx != NULL; + trx = UT_LIST_GET_NEXT(trx_list, trx)) { + + const lock_t* lock; + + assert_trx_in_list(trx); + ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); + + for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); + lock != NULL; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + + if (lock_get_type_low(lock) & LOCK_TABLE) { + + lock_table_queue_validate( + lock->un_member.tab_lock.table); + } + } + } + + return(TRUE); +} + +/*********************************************************************//** +Validate record locks up to a limit. +@return lock at limit or NULL if no more locks in the hash bucket */ +static __attribute__((nonnull, warn_unused_result)) +const lock_t* +lock_rec_validate( +/*==============*/ + ulint start, /*!< in: lock_sys->rec_hash + bucket */ + ib_uint64_t* limit) /*!< in/out: upper limit of + (space, page_no) */ +{ + ut_ad(lock_mutex_own()); + ut_ad(mutex_own(&trx_sys->mutex)); + + for (const lock_t* lock = static_cast<const lock_t*>( + HASH_GET_FIRST(lock_sys->rec_hash, start)); + lock != NULL; + lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) { + + ib_uint64_t current; + + ut_a(trx_in_trx_list(lock->trx)); + ut_a(lock_get_type(lock) == LOCK_REC); + + current = ut_ull_create( + lock->un_member.rec_lock.space, + lock->un_member.rec_lock.page_no); + + if (current > *limit) { + *limit = current + 1; + return(lock); + } + } + + return(0); +} + +/*********************************************************************//** +Validate a record lock's block */ +static +void +lock_rec_block_validate( +/*====================*/ + ulint space, + ulint page_no) +{ + /* The lock and the block that it is referring to may be freed at + this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check. + If the lock exists in lock_rec_validate_page() we assert + !block->page.file_page_was_freed. */ + + buf_block_t* block; + mtr_t mtr; + + /* Make sure that the tablespace is not deleted while we are + trying to access the page. */ + if (!fil_inc_pending_ops(space, true)) { + mtr_start(&mtr); + block = buf_page_get_gen( + space, fil_space_get_zip_size(space), + page_no, RW_X_LATCH, NULL, + BUF_GET_POSSIBLY_FREED, + __FILE__, __LINE__, &mtr); + + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); + + ut_ad(lock_rec_validate_page(block)); + mtr_commit(&mtr); + + fil_decr_pending_ops(space); + } +} + +/*********************************************************************//** +Validates the lock system. +@return TRUE if ok */ +static +bool +lock_validate() +/*===========*/ +{ + typedef std::pair<ulint, ulint> page_addr_t; + typedef std::set<page_addr_t> page_addr_set; + page_addr_set pages; + + lock_mutex_enter(); + mutex_enter(&trx_sys->mutex); + + ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list)); + ut_a(lock_validate_table_locks(&trx_sys->ro_trx_list)); + + /* Iterate over all the record locks and validate the locks. We + don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex. + Release both mutexes during the validation check. */ + + for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { + const lock_t* lock; + ib_uint64_t limit = 0; + + while ((lock = lock_rec_validate(i, &limit)) != 0) { + + ulint space = lock->un_member.rec_lock.space; + ulint page_no = lock->un_member.rec_lock.page_no; + + pages.insert(std::make_pair(space, page_no)); + } + } + + mutex_exit(&trx_sys->mutex); + lock_mutex_exit(); + + for (page_addr_set::const_iterator it = pages.begin(); + it != pages.end(); + ++it) { + lock_rec_block_validate((*it).first, (*it).second); + } + + return(true); +} +#endif /* UNIV_DEBUG */ +/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/ + +/*********************************************************************//** +Checks if locks of other transactions prevent an immediate insert of +a record. If they do, first tests if the query thread should anyway +be suspended for some reason; if not, then puts the transaction and +the query thread to the lock wait state and inserts a waiting request +for a gap x-lock to the lock queue. +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_rec_insert_check_and_lock( +/*===========================*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is + set, does nothing */ + const rec_t* rec, /*!< in: record after which to insert */ + buf_block_t* block, /*!< in/out: buffer block of rec */ + dict_index_t* index, /*!< in: index */ + que_thr_t* thr, /*!< in: query thread */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + ibool* inherit)/*!< out: set to TRUE if the new + inserted record maybe should inherit + LOCK_GAP type locks from the successor + record */ +{ + const rec_t* next_rec; + trx_t* trx; + lock_t* lock; + dberr_t err; + ulint next_rec_heap_no; + ibool inherit_in = *inherit; + + ut_ad(block->frame == page_align(rec)); + ut_ad(!dict_index_is_online_ddl(index) + || dict_index_is_clust(index) + || (flags & BTR_CREATE_FLAG)); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + trx = thr_get_trx(thr); + next_rec = page_rec_get_next_const(rec); + next_rec_heap_no = page_rec_get_heap_no(next_rec); + + lock_mutex_enter(); + /* Because this code is invoked for a running transaction by + the thread that is serving the transaction, it is not necessary + to hold trx->mutex here. */ + + /* When inserting a record into an index, the table must be at + least IX-locked. When we are building an index, we would pass + BTR_NO_LOCKING_FLAG and skip the locking altogether. */ + ut_ad(lock_table_has(trx, index->table, LOCK_IX)); + + lock = lock_rec_get_first(block, next_rec_heap_no); + + if (UNIV_LIKELY(lock == NULL)) { + /* We optimize CPU time usage in the simplest case */ + + lock_mutex_exit(); + + if (inherit_in && !dict_index_is_clust(index)) { + /* Update the page max trx id field */ + page_update_max_trx_id(block, + buf_block_get_page_zip(block), + trx->id, mtr); + } + + *inherit = FALSE; + + return(DB_SUCCESS); + } + + *inherit = TRUE; + + /* If another transaction has an explicit lock request which locks + the gap, waiting or granted, on the successor, the insert has to wait. + + An exception is the case where the lock by the another transaction + is a gap type lock which it placed to wait for its turn to insert. We + do not consider that kind of a lock conflicting with our insert. This + eliminates an unnecessary deadlock which resulted when 2 transactions + had to wait for their insert. Both had waiting gap type lock requests + on the successor, which produced an unnecessary deadlock. */ + + if (lock_rec_other_has_conflicting( + static_cast<enum lock_mode>( + LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION), + block, next_rec_heap_no, trx)) { + + /* Note that we may get DB_SUCCESS also here! */ + trx_mutex_enter(trx); + + err = lock_rec_enqueue_waiting( + LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, + block, next_rec_heap_no, index, thr); + + trx_mutex_exit(trx); + } else { + err = DB_SUCCESS; + } + + lock_mutex_exit(); + + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (!inherit_in || dict_index_is_clust(index)) { + break; + } + /* Update the page max trx id field */ + page_update_max_trx_id(block, + buf_block_get_page_zip(block), + trx->id, mtr); + default: + /* We only care about the two return values. */ + break; + } + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + const ulint* offsets; + rec_offs_init(offsets_); + + offsets = rec_get_offsets(next_rec, index, offsets_, + ULINT_UNDEFINED, &heap); + + ut_ad(lock_rec_queue_validate( + FALSE, block, next_rec, index, offsets)); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + } +#endif /* UNIV_DEBUG */ + + return(err); +} + +/*********************************************************************//** +If a transaction has an implicit x-lock on a record, but no explicit x-lock +set on the record, sets one for it. */ +static +void +lock_rec_convert_impl_to_expl( +/*==========================*/ + const buf_block_t* block, /*!< in: buffer block of rec */ + const rec_t* rec, /*!< in: user record on page */ + dict_index_t* index, /*!< in: index of record */ + const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ +{ + trx_id_t trx_id; + + ut_ad(!lock_mutex_own()); + ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); + + if (dict_index_is_clust(index)) { + trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); + /* The clustered index record was last modified by + this transaction. The transaction may have been + committed a long time ago. */ + } else { + ut_ad(!dict_index_is_online_ddl(index)); + trx_id = lock_sec_rec_some_has_impl(rec, index, offsets); + /* The transaction can be committed before the + trx_is_active(trx_id, NULL) check below, because we are not + holding lock_mutex. */ + + ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP, + trx_id, rec, block)); + } + + if (trx_id != 0) { + trx_t* impl_trx; + ulint heap_no = page_rec_get_heap_no(rec); + + lock_mutex_enter(); + + /* If the transaction is still active and has no + explicit x-lock set on the record, set one for it */ + + impl_trx = trx_rw_is_active(trx_id, NULL); + + /* impl_trx cannot be committed until lock_mutex_exit() + because lock_trx_release_locks() acquires lock_sys->mutex */ + + if (impl_trx != NULL + && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, + heap_no, impl_trx)) { + ulint type_mode = (LOCK_REC | LOCK_X + | LOCK_REC_NOT_GAP); + + lock_rec_add_to_queue( + type_mode, block, heap_no, index, + impl_trx, FALSE); + } + + lock_mutex_exit(); + } +} + +/*********************************************************************//** +Checks if locks of other transactions prevent an immediate modify (update, +delete mark, or delete unmark) of a clustered index record. If they do, +first tests if the query thread should anyway be suspended for some +reason; if not, then puts the transaction and the query thread to the +lock wait state and inserts a waiting request for a record x-lock to the +lock queue. +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_clust_rec_modify_check_and_lock( +/*=================================*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /*!< in: buffer block of rec */ + const rec_t* rec, /*!< in: record which should be + modified */ + dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err; + ulint heap_no; + + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(dict_index_is_clust(index)); + ut_ad(block->frame == page_align(rec)); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + heap_no = rec_offs_comp(offsets) + ? rec_get_heap_no_new(rec) + : rec_get_heap_no_old(rec); + + /* If a transaction has no explicit x-lock set on the record, set one + for it */ + + lock_rec_convert_impl_to_expl(block, rec, index, offsets); + + lock_mutex_enter(); + + ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + + err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, index, thr); + + MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); + + lock_mutex_exit(); + + ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); +} + +/*********************************************************************//** +Checks if locks of other transactions prevent an immediate modify (delete +mark or delete unmark) of a secondary index record. +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_sec_rec_modify_check_and_lock( +/*===============================*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + buf_block_t* block, /*!< in/out: buffer block of rec */ + const rec_t* rec, /*!< in: record which should be + modified; NOTE: as this is a secondary + index, we always have to modify the + clustered index record first: see the + comment below */ + dict_index_t* index, /*!< in: secondary index */ + que_thr_t* thr, /*!< in: query thread + (can be NULL if BTR_NO_LOCKING_FLAG) */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + dberr_t err; + ulint heap_no; + + ut_ad(!dict_index_is_clust(index)); + ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG)); + ut_ad(block->frame == page_align(rec)); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + heap_no = page_rec_get_heap_no(rec); + + /* Another transaction cannot have an implicit lock on the record, + because when we come here, we already have modified the clustered + index record, and this would not have been possible if another active + transaction had modified this secondary index record. */ + + lock_mutex_enter(); + + ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + + err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, index, thr); + + MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); + + lock_mutex_exit(); + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + const ulint* offsets; + rec_offs_init(offsets_); + + offsets = rec_get_offsets(rec, index, offsets_, + ULINT_UNDEFINED, &heap); + + ut_ad(lock_rec_queue_validate( + FALSE, block, rec, index, offsets)); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + } +#endif /* UNIV_DEBUG */ + + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { + /* Update the page max trx id field */ + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ + page_update_max_trx_id(block, + buf_block_get_page_zip(block), + thr_get_trx(thr)->id, mtr); + err = DB_SUCCESS; + } + + return(err); +} + +/*********************************************************************//** +Like lock_clust_rec_read_check_and_lock(), but reads a +secondary index record. +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_sec_rec_read_check_and_lock( +/*=============================*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /*!< in: buffer block of rec */ + const rec_t* rec, /*!< in: user record or page + supremum record which should + be read or passed over by a + read cursor */ + dict_index_t* index, /*!< in: secondary index */ + const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ + enum lock_mode mode, /*!< in: mode of the lock which + the read cursor should set on + records: LOCK_S or LOCK_X; the + latter is possible in + SELECT FOR UPDATE */ + ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err; + ulint heap_no; + + ut_ad(!dict_index_is_clust(index)); + ut_ad(!dict_index_is_online_ddl(index)); + ut_ad(block->frame == page_align(rec)); + ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(mode == LOCK_X || mode == LOCK_S); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + heap_no = page_rec_get_heap_no(rec); + + /* Some transaction may have an implicit x-lock on the record only + if the max trx id for the page >= min trx id for the trx list or a + database recovery is running. */ + + if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id() + || recv_recovery_is_on()) + && !page_rec_is_supremum(rec)) { + + lock_rec_convert_impl_to_expl(block, rec, index, offsets); + } + + lock_mutex_enter(); + + ut_ad(mode != LOCK_X + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad(mode != LOCK_S + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + + err = lock_rec_lock(FALSE, mode | gap_mode, + block, heap_no, index, thr); + + MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); + + lock_mutex_exit(); + + ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); + + return(err); +} + +/*********************************************************************//** +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_clust_rec_read_check_and_lock( +/*===============================*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /*!< in: buffer block of rec */ + const rec_t* rec, /*!< in: user record or page + supremum record which should + be read or passed over by a + read cursor */ + dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ + enum lock_mode mode, /*!< in: mode of the lock which + the read cursor should set on + records: LOCK_S or LOCK_X; the + latter is possible in + SELECT FOR UPDATE */ + ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err; + ulint heap_no; + + ut_ad(dict_index_is_clust(index)); + ut_ad(block->frame == page_align(rec)); + ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); + ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP + || gap_mode == LOCK_REC_NOT_GAP); + ut_ad(rec_offs_validate(rec, index, offsets)); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + heap_no = page_rec_get_heap_no(rec); + + if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) { + + lock_rec_convert_impl_to_expl(block, rec, index, offsets); + } + + lock_mutex_enter(); + + ut_ad(mode != LOCK_X + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad(mode != LOCK_S + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + + err = lock_rec_lock(FALSE, mode | gap_mode, + block, heap_no, index, thr); + + MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); + + lock_mutex_exit(); + + ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); + + return(err); +} +/*********************************************************************//** +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. This is an alternative version of +lock_clust_rec_read_check_and_lock() that does not require the parameter +"offsets". +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +UNIV_INTERN +dberr_t +lock_clust_rec_read_check_and_lock_alt( +/*===================================*/ + ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /*!< in: buffer block of rec */ + const rec_t* rec, /*!< in: user record or page + supremum record which should + be read or passed over by a + read cursor */ + dict_index_t* index, /*!< in: clustered index */ + enum lock_mode mode, /*!< in: mode of the lock which + the read cursor should set on + records: LOCK_S or LOCK_X; the + latter is possible in + SELECT FOR UPDATE */ + ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr) /*!< in: query thread */ +{ + mem_heap_t* tmp_heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + dberr_t err; + rec_offs_init(offsets_); + + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, &tmp_heap); + err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, + offsets, mode, gap_mode, thr); + if (tmp_heap) { + mem_heap_free(tmp_heap); + } + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); +} + +/*******************************************************************//** +Release the last lock from the transaction's autoinc locks. */ +UNIV_INLINE +void +lock_release_autoinc_last_lock( +/*===========================*/ + ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */ +{ + ulint last; + lock_t* lock; + + ut_ad(lock_mutex_own()); + ut_a(!ib_vector_is_empty(autoinc_locks)); + + /* The lock to be release must be the last lock acquired. */ + last = ib_vector_size(autoinc_locks) - 1; + lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last)); + + /* Should have only AUTOINC locks in the vector. */ + ut_a(lock_get_mode(lock) == LOCK_AUTO_INC); + ut_a(lock_get_type(lock) == LOCK_TABLE); + + ut_a(lock->un_member.tab_lock.table != NULL); + + /* This will remove the lock from the trx autoinc_locks too. */ + lock_table_dequeue(lock); + + /* Remove from the table vector too. */ + lock_trx_table_locks_remove(lock); +} + +/*******************************************************************//** +Check if a transaction holds any autoinc locks. +@return TRUE if the transaction holds any AUTOINC locks. */ +static +ibool +lock_trx_holds_autoinc_locks( +/*=========================*/ + const trx_t* trx) /*!< in: transaction */ +{ + ut_a(trx->autoinc_locks != NULL); + + return(!ib_vector_is_empty(trx->autoinc_locks)); +} + +/*******************************************************************//** +Release all the transaction's autoinc locks. */ +static +void +lock_release_autoinc_locks( +/*=======================*/ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(lock_mutex_own()); + /* If this is invoked for a running transaction by the thread + that is serving the transaction, then it is not necessary to + hold trx->mutex here. */ + + ut_a(trx->autoinc_locks != NULL); + + /* We release the locks in the reverse order. This is to + avoid searching the vector for the element to delete at + the lower level. See (lock_table_remove_low()) for details. */ + while (!ib_vector_is_empty(trx->autoinc_locks)) { + + /* lock_table_remove_low() will also remove the lock from + the transaction's autoinc_locks vector. */ + lock_release_autoinc_last_lock(trx->autoinc_locks); + } + + /* Should release all locks. */ + ut_a(ib_vector_is_empty(trx->autoinc_locks)); +} + +/*******************************************************************//** +Gets the type of a lock. Non-inline version for using outside of the +lock module. +@return LOCK_TABLE or LOCK_REC */ +UNIV_INTERN +ulint +lock_get_type( +/*==========*/ + const lock_t* lock) /*!< in: lock */ +{ + return(lock_get_type_low(lock)); +} + +/*******************************************************************//** +Gets the id of the transaction owning a lock. +@return transaction id */ +UNIV_INTERN +trx_id_t +lock_get_trx_id( +/*============*/ + const lock_t* lock) /*!< in: lock */ +{ + return(lock->trx->id); +} + +/*******************************************************************//** +Gets the mode of a lock in a human readable string. +The string should not be free()'d or modified. +@return lock mode */ +UNIV_INTERN +const char* +lock_get_mode_str( +/*==============*/ + const lock_t* lock) /*!< in: lock */ +{ + ibool is_gap_lock; + + is_gap_lock = lock_get_type_low(lock) == LOCK_REC + && lock_rec_get_gap(lock); + + switch (lock_get_mode(lock)) { + case LOCK_S: + if (is_gap_lock) { + return("S,GAP"); + } else { + return("S"); + } + case LOCK_X: + if (is_gap_lock) { + return("X,GAP"); + } else { + return("X"); + } + case LOCK_IS: + if (is_gap_lock) { + return("IS,GAP"); + } else { + return("IS"); + } + case LOCK_IX: + if (is_gap_lock) { + return("IX,GAP"); + } else { + return("IX"); + } + case LOCK_AUTO_INC: + return("AUTO_INC"); + default: + return("UNKNOWN"); + } +} + +/*******************************************************************//** +Gets the type of a lock in a human readable string. +The string should not be free()'d or modified. +@return lock type */ +UNIV_INTERN +const char* +lock_get_type_str( +/*==============*/ + const lock_t* lock) /*!< in: lock */ +{ + switch (lock_get_type_low(lock)) { + case LOCK_REC: + return("RECORD"); + case LOCK_TABLE: + return("TABLE"); + default: + return("UNKNOWN"); + } +} + +/*******************************************************************//** +Gets the table on which the lock is. +@return table */ +UNIV_INLINE +dict_table_t* +lock_get_table( +/*===========*/ + const lock_t* lock) /*!< in: lock */ +{ + switch (lock_get_type_low(lock)) { + case LOCK_REC: + ut_ad(dict_index_is_clust(lock->index) + || !dict_index_is_online_ddl(lock->index)); + return(lock->index->table); + case LOCK_TABLE: + return(lock->un_member.tab_lock.table); + default: + ut_error; + return(NULL); + } +} + +/*******************************************************************//** +Gets the id of the table on which the lock is. +@return id of the table */ +UNIV_INTERN +table_id_t +lock_get_table_id( +/*==============*/ + const lock_t* lock) /*!< in: lock */ +{ + dict_table_t* table; + + table = lock_get_table(lock); + + return(table->id); +} + +/*******************************************************************//** +Gets the name of the table on which the lock is. +The string should not be free()'d or modified. +@return name of the table */ +UNIV_INTERN +const char* +lock_get_table_name( +/*================*/ + const lock_t* lock) /*!< in: lock */ +{ + dict_table_t* table; + + table = lock_get_table(lock); + + return(table->name); +} + +/*******************************************************************//** +For a record lock, gets the index on which the lock is. +@return index */ +UNIV_INTERN +const dict_index_t* +lock_rec_get_index( +/*===============*/ + const lock_t* lock) /*!< in: lock */ +{ + ut_a(lock_get_type_low(lock) == LOCK_REC); + ut_ad(dict_index_is_clust(lock->index) + || !dict_index_is_online_ddl(lock->index)); + + return(lock->index); +} + +/*******************************************************************//** +For a record lock, gets the name of the index on which the lock is. +The string should not be free()'d or modified. +@return name of the index */ +UNIV_INTERN +const char* +lock_rec_get_index_name( +/*====================*/ + const lock_t* lock) /*!< in: lock */ +{ + ut_a(lock_get_type_low(lock) == LOCK_REC); + ut_ad(dict_index_is_clust(lock->index) + || !dict_index_is_online_ddl(lock->index)); + + return(lock->index->name); +} + +/*******************************************************************//** +For a record lock, gets the tablespace number on which the lock is. +@return tablespace number */ +UNIV_INTERN +ulint +lock_rec_get_space_id( +/*==================*/ + const lock_t* lock) /*!< in: lock */ +{ + ut_a(lock_get_type_low(lock) == LOCK_REC); + + return(lock->un_member.rec_lock.space); +} + +/*******************************************************************//** +For a record lock, gets the page number on which the lock is. +@return page number */ +UNIV_INTERN +ulint +lock_rec_get_page_no( +/*=================*/ + const lock_t* lock) /*!< in: lock */ +{ + ut_a(lock_get_type_low(lock) == LOCK_REC); + + return(lock->un_member.rec_lock.page_no); +} + +/*********************************************************************//** +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ +UNIV_INTERN +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock) /*!< in/out: waiting lock request */ +{ + que_thr_t* thr; + + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(lock->trx)); + + lock->trx->lock.cancel = TRUE; + + if (lock_get_type_low(lock) == LOCK_REC) { + + lock_rec_dequeue_from_page(lock); + } else { + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + + if (lock->trx->autoinc_locks != NULL) { + /* Release the transaction's AUTOINC locks. */ + lock_release_autoinc_locks(lock->trx); + } + + lock_table_dequeue(lock); + } + + /* Reset the wait flag and the back pointer to lock in trx. */ + + lock_reset_lock_and_trx_wait(lock); + + /* The following function releases the trx from lock wait. */ + + thr = que_thr_end_lock_wait(lock->trx); + + if (thr != NULL) { + lock_wait_release_thread_if_suspended(thr); + } + + lock->trx->lock.cancel = FALSE; +} + +/*********************************************************************//** +Unlocks AUTO_INC type locks that were possibly reserved by a trx. This +function should be called at the the end of an SQL statement, by the +connection thread that owns the transaction (trx->mysql_thd). */ +UNIV_INTERN +void +lock_unlock_table_autoinc( +/*======================*/ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(!lock_mutex_own()); + ut_ad(!trx_mutex_own(trx)); + ut_ad(!trx->lock.wait_lock); + /* This can be invoked on NOT_STARTED, ACTIVE, PREPARED, + but not COMMITTED transactions. */ + ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED) + || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); + + /* This function is invoked for a running transaction by the + thread that is serving the transaction. Therefore it is not + necessary to hold trx->mutex here. */ + + if (lock_trx_holds_autoinc_locks(trx)) { + lock_mutex_enter(); + + lock_release_autoinc_locks(trx); + + lock_mutex_exit(); + } +} + +/*********************************************************************//** +Releases a transaction's locks, and releases possible other transactions +waiting because of these locks. Change the state of the transaction to +TRX_STATE_COMMITTED_IN_MEMORY. */ +UNIV_INTERN +void +lock_trx_release_locks( +/*===================*/ + trx_t* trx) /*!< in/out: transaction */ +{ + assert_trx_in_list(trx); + + if (trx_state_eq(trx, TRX_STATE_PREPARED)) { + mutex_enter(&trx_sys->mutex); + ut_a(trx_sys->n_prepared_trx > 0); + trx_sys->n_prepared_trx--; + if (trx->is_recovered) { + ut_a(trx_sys->n_prepared_recovered_trx > 0); + trx_sys->n_prepared_recovered_trx--; + } + mutex_exit(&trx_sys->mutex); + } else { + ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); + } + + /* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY + is protected by both the lock_sys->mutex and the trx->mutex. */ + lock_mutex_enter(); + trx_mutex_enter(trx); + + /* The following assignment makes the transaction committed in memory + and makes its changes to data visible to other transactions. + NOTE that there is a small discrepancy from the strict formal + visibility rules here: a human user of the database can see + modifications made by another transaction T even before the necessary + log segment has been flushed to the disk. If the database happens to + crash before the flush, the user has seen modifications from T which + will never be a committed transaction. However, any transaction T2 + which sees the modifications of the committing transaction T, and + which also itself makes modifications to the database, will get an lsn + larger than the committing transaction T. In the case where the log + flush fails, and T never gets committed, also T2 will never get + committed. */ + + /*--------------------------------------*/ + trx->state = TRX_STATE_COMMITTED_IN_MEMORY; + /*--------------------------------------*/ + + /* If the background thread trx_rollback_or_clean_recovered() + is still active then there is a chance that the rollback + thread may see this trx as COMMITTED_IN_MEMORY and goes ahead + to clean it up calling trx_cleanup_at_db_startup(). This can + happen in the case we are committing a trx here that is left + in PREPARED state during the crash. Note that commit of the + rollback of a PREPARED trx happens in the recovery thread + while the rollback of other transactions happen in the + background thread. To avoid this race we unconditionally unset + the is_recovered flag. */ + + trx->is_recovered = FALSE; + + trx_mutex_exit(trx); + + lock_release(trx); + + lock_mutex_exit(); +} + +/*********************************************************************//** +Check whether the transaction has already been rolled back because it +was selected as a deadlock victim, or if it has to wait then cancel +the wait lock. +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +UNIV_INTERN +dberr_t +lock_trx_handle_wait( +/*=================*/ + trx_t* trx) /*!< in/out: trx lock state */ +{ + dberr_t err; + + lock_mutex_enter(); + + trx_mutex_enter(trx); + + if (trx->lock.was_chosen_as_deadlock_victim) { + err = DB_DEADLOCK; + } else if (trx->lock.wait_lock != NULL) { + lock_cancel_waiting_and_release(trx->lock.wait_lock); + err = DB_LOCK_WAIT; + } else { + /* The lock was probably granted before we got here. */ + err = DB_SUCCESS; + } + + lock_mutex_exit(); + trx_mutex_exit(trx); + + return(err); +} + +/*********************************************************************//** +Get the number of locks on a table. +@return number of locks */ +UNIV_INTERN +ulint +lock_table_get_n_locks( +/*===================*/ + const dict_table_t* table) /*!< in: table */ +{ + ulint n_table_locks; + + lock_mutex_enter(); + + n_table_locks = UT_LIST_GET_LEN(table->locks); + + lock_mutex_exit(); + + return(n_table_locks); +} + +#ifdef UNIV_DEBUG +/*******************************************************************//** +Do an exhaustive check for any locks (table or rec) against the table. +@return lock if found */ +static +const lock_t* +lock_table_locks_lookup( +/*====================*/ + const dict_table_t* table, /*!< in: check if there are + any locks held on records in + this table or on the table + itself */ + const trx_list_t* trx_list) /*!< in: trx list to check */ +{ + trx_t* trx; + + ut_a(table != NULL); + ut_ad(lock_mutex_own()); + ut_ad(mutex_own(&trx_sys->mutex)); + + ut_ad(trx_list == &trx_sys->rw_trx_list + || trx_list == &trx_sys->ro_trx_list); + + for (trx = UT_LIST_GET_FIRST(*trx_list); + trx != NULL; + trx = UT_LIST_GET_NEXT(trx_list, trx)) { + + const lock_t* lock; + + assert_trx_in_list(trx); + ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); + + for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); + lock != NULL; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + + ut_a(lock->trx == trx); + + if (lock_get_type_low(lock) == LOCK_REC) { + ut_ad(!dict_index_is_online_ddl(lock->index) + || dict_index_is_clust(lock->index)); + if (lock->index->table == table) { + return(lock); + } + } else if (lock->un_member.tab_lock.table == table) { + return(lock); + } + } + } + + return(NULL); +} +#endif /* UNIV_DEBUG */ + +/*******************************************************************//** +Check if there are any locks (table or rec) against table. +@return TRUE if table has either table or record locks. */ +UNIV_INTERN +ibool +lock_table_has_locks( +/*=================*/ + const dict_table_t* table) /*!< in: check if there are any locks + held on records in this table or on the + table itself */ +{ + ibool has_locks; + + lock_mutex_enter(); + + has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0; + +#ifdef UNIV_DEBUG + if (!has_locks) { + mutex_enter(&trx_sys->mutex); + + ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list)); + ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list)); + + mutex_exit(&trx_sys->mutex); + } +#endif /* UNIV_DEBUG */ + + lock_mutex_exit(); + + return(has_locks); +} + +#ifdef UNIV_DEBUG +/*******************************************************************//** +Check if the transaction holds any locks on the sys tables +or its records. +@return the strongest lock found on any sys table or 0 for none */ +UNIV_INTERN +const lock_t* +lock_trx_has_sys_table_locks( +/*=========================*/ + const trx_t* trx) /*!< in: transaction to check */ +{ + lint i; + const lock_t* strongest_lock = 0; + lock_mode strongest = LOCK_NONE; + + lock_mutex_enter(); + + /* Find a valid mode. Note: ib_vector_size() can be 0. */ + for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { + const lock_t* lock; + + lock = *static_cast<const lock_t**>( + ib_vector_get(trx->lock.table_locks, i)); + + if (lock != NULL + && dict_is_sys_table(lock->un_member.tab_lock.table->id)) { + + strongest = lock_get_mode(lock); + ut_ad(strongest != LOCK_NONE); + strongest_lock = lock; + break; + } + } + + if (strongest == LOCK_NONE) { + lock_mutex_exit(); + return(NULL); + } + + for (/* No op */; i >= 0; --i) { + const lock_t* lock; + + lock = *static_cast<const lock_t**>( + ib_vector_get(trx->lock.table_locks, i)); + + if (lock == NULL) { + continue; + } + + ut_ad(trx == lock->trx); + ut_ad(lock_get_type_low(lock) & LOCK_TABLE); + ut_ad(lock->un_member.tab_lock.table != NULL); + + lock_mode mode = lock_get_mode(lock); + + if (dict_is_sys_table(lock->un_member.tab_lock.table->id) + && lock_mode_stronger_or_eq(mode, strongest)) { + + strongest = mode; + strongest_lock = lock; + } + } + + lock_mutex_exit(); + + return(strongest_lock); +} + +/*******************************************************************//** +Check if the transaction holds an exclusive lock on a record. +@return whether the locks are held */ +UNIV_INTERN +bool +lock_trx_has_rec_x_lock( +/*====================*/ + const trx_t* trx, /*!< in: transaction to check */ + const dict_table_t* table, /*!< in: table to check */ + const buf_block_t* block, /*!< in: buffer block of the record */ + ulint heap_no)/*!< in: record heap number */ +{ + ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM); + + lock_mutex_enter(); + ut_a(lock_table_has(trx, table, LOCK_IX)); + ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + block, heap_no, trx)); + lock_mutex_exit(); + return(true); +} +#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc new file mode 100644 index 00000000000..a1c35e20ead --- /dev/null +++ b/storage/innobase/lock/lock0wait.cc @@ -0,0 +1,543 @@ +/***************************************************************************** + +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file lock/lock0wait.cc +The transaction lock system + +Created 25/5/2010 Sunny Bains +*******************************************************/ + +#define LOCK_MODULE_IMPLEMENTATION + +#include "srv0mon.h" +#include "que0que.h" +#include "lock0lock.h" +#include "row0mysql.h" +#include "srv0start.h" +#include "ha_prototypes.h" +#include "lock0priv.h" + +/*********************************************************************//** +Print the contents of the lock_sys_t::waiting_threads array. */ +static +void +lock_wait_table_print(void) +/*=======================*/ +{ + ulint i; + const srv_slot_t* slot; + + ut_ad(lock_wait_mutex_own()); + + slot = lock_sys->waiting_threads; + + for (i = 0; i < OS_THREAD_MAX_N; i++, ++slot) { + + fprintf(stderr, + "Slot %lu: thread type %lu," + " in use %lu, susp %lu, timeout %lu, time %lu\n", + (ulong) i, + (ulong) slot->type, + (ulong) slot->in_use, + (ulong) slot->suspended, + slot->wait_timeout, + (ulong) difftime(ut_time(), slot->suspend_time)); + } +} + +/*********************************************************************//** +Release a slot in the lock_sys_t::waiting_threads. Adjust the array last pointer +if there are empty slots towards the end of the table. */ +static +void +lock_wait_table_release_slot( +/*=========================*/ + srv_slot_t* slot) /*!< in: slot to release */ +{ +#ifdef UNIV_DEBUG + srv_slot_t* upper = lock_sys->waiting_threads + OS_THREAD_MAX_N; +#endif /* UNIV_DEBUG */ + + lock_wait_mutex_enter(); + + ut_ad(slot->in_use); + ut_ad(slot->thr != NULL); + ut_ad(slot->thr->slot != NULL); + ut_ad(slot->thr->slot == slot); + + /* Must be within the array boundaries. */ + ut_ad(slot >= lock_sys->waiting_threads); + ut_ad(slot < upper); + + /* Note: When we reserve the slot we use the trx_t::mutex to update + the slot values to change the state to reserved. Here we are using the + lock mutex to change the state of the slot to free. This is by design, + because when we query the slot state we always hold both the lock and + trx_t::mutex. To reduce contention on the lock mutex when reserving the + slot we avoid acquiring the lock mutex. */ + + lock_mutex_enter(); + + slot->thr->slot = NULL; + slot->thr = NULL; + slot->in_use = FALSE; + + lock_mutex_exit(); + + /* Scan backwards and adjust the last free slot pointer. */ + for (slot = lock_sys->last_slot; + slot > lock_sys->waiting_threads && !slot->in_use; + --slot) { + /* No op */ + } + + /* Either the array is empty or the last scanned slot is in use. */ + ut_ad(slot->in_use || slot == lock_sys->waiting_threads); + + lock_sys->last_slot = slot + 1; + + /* The last slot is either outside of the array boundary or it's + on an empty slot. */ + ut_ad(lock_sys->last_slot == upper || !lock_sys->last_slot->in_use); + + ut_ad(lock_sys->last_slot >= lock_sys->waiting_threads); + ut_ad(lock_sys->last_slot <= upper); + + lock_wait_mutex_exit(); +} + +/*********************************************************************//** +Reserves a slot in the thread table for the current user OS thread. +@return reserved slot */ +static +srv_slot_t* +lock_wait_table_reserve_slot( +/*=========================*/ + que_thr_t* thr, /*!< in: query thread associated + with the user OS thread */ + ulong wait_timeout) /*!< in: lock wait timeout value */ +{ + ulint i; + srv_slot_t* slot; + + ut_ad(lock_wait_mutex_own()); + ut_ad(trx_mutex_own(thr_get_trx(thr))); + + slot = lock_sys->waiting_threads; + + for (i = OS_THREAD_MAX_N; i--; ++slot) { + if (!slot->in_use) { + slot->in_use = TRUE; + slot->thr = thr; + slot->thr->slot = slot; + + if (slot->event == NULL) { + slot->event = os_event_create(); + ut_a(slot->event); + } + + os_event_reset(slot->event); + slot->suspended = TRUE; + slot->suspend_time = ut_time(); + slot->wait_timeout = wait_timeout; + + if (slot == lock_sys->last_slot) { + ++lock_sys->last_slot; + } + + ut_ad(lock_sys->last_slot + <= lock_sys->waiting_threads + OS_THREAD_MAX_N); + + return(slot); + } + } + + ut_print_timestamp(stderr); + + fprintf(stderr, + " InnoDB: There appear to be %lu user" + " threads currently waiting\n" + "InnoDB: inside InnoDB, which is the" + " upper limit. Cannot continue operation.\n" + "InnoDB: As a last thing, we print" + " a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N); + + lock_wait_table_print(); + + ut_error; + return(NULL); +} + +/***************************************************************//** +Puts a user OS thread to wait for a lock to be released. If an error +occurs during the wait trx->error_state associated with thr is +!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK +are possible errors. DB_DEADLOCK is returned if selective deadlock +resolution chose this transaction as a victim. */ +UNIV_INTERN +void +lock_wait_suspend_thread( +/*=====================*/ + que_thr_t* thr) /*!< in: query thread associated with the + user OS thread */ +{ + srv_slot_t* slot; + double wait_time; + trx_t* trx; + ulint had_dict_lock; + ibool was_declared_inside_innodb; + ib_int64_t start_time = 0; + ib_int64_t finish_time; + ulint sec; + ulint ms; + ulong lock_wait_timeout; + + trx = thr_get_trx(thr); + + if (trx->mysql_thd != 0) { + DEBUG_SYNC_C("lock_wait_suspend_thread_enter"); + } + + /* InnoDB system transactions (such as the purge, and + incomplete transactions that are being rolled back after crash + recovery) will use the global value of + innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ + lock_wait_timeout = trx_lock_wait_timeout_get(trx); + + lock_wait_mutex_enter(); + + trx_mutex_enter(trx); + + trx->error_state = DB_SUCCESS; + + if (thr->state == QUE_THR_RUNNING) { + + ut_ad(thr->is_active); + + /* The lock has already been released or this transaction + was chosen as a deadlock victim: no need to suspend */ + + if (trx->lock.was_chosen_as_deadlock_victim) { + + trx->error_state = DB_DEADLOCK; + trx->lock.was_chosen_as_deadlock_victim = FALSE; + } + + lock_wait_mutex_exit(); + trx_mutex_exit(trx); + return; + } + + ut_ad(!thr->is_active); + + slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout); + + if (thr->lock_state == QUE_THR_LOCK_ROW) { + srv_stats.n_lock_wait_count.inc(); + srv_stats.n_lock_wait_current_count.inc(); + + if (ut_usectime(&sec, &ms) == -1) { + start_time = -1; + } else { + start_time = (ib_int64_t) sec * 1000000 + ms; + } + } + + /* Wake the lock timeout monitor thread, if it is suspended */ + + os_event_set(lock_sys->timeout_event); + + lock_wait_mutex_exit(); + trx_mutex_exit(trx); + + ulint lock_type = ULINT_UNDEFINED; + + lock_mutex_enter(); + + if (const lock_t* wait_lock = trx->lock.wait_lock) { + lock_type = lock_get_type_low(wait_lock); + } + + lock_mutex_exit(); + + had_dict_lock = trx->dict_operation_lock_mode; + + switch (had_dict_lock) { + case 0: + break; + case RW_S_LATCH: + /* Release foreign key check latch */ + row_mysql_unfreeze_data_dictionary(trx); + + DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep"); + break; + default: + /* There should never be a lock wait when the + dictionary latch is reserved in X mode. Dictionary + transactions should only acquire locks on dictionary + tables, not other tables. All access to dictionary + tables should be covered by dictionary + transactions. */ + ut_error; + } + + ut_a(trx->dict_operation_lock_mode == 0); + + /* Suspend this thread and wait for the event. */ + + was_declared_inside_innodb = trx->declared_to_be_inside_innodb; + + if (was_declared_inside_innodb) { + /* We must declare this OS thread to exit InnoDB, since a + possible other thread holding a lock which this thread waits + for must be allowed to enter, sooner or later */ + + srv_conc_force_exit_innodb(trx); + } + + /* Unknown is also treated like a record lock */ + if (lock_type == ULINT_UNDEFINED || lock_type == LOCK_REC) { + thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK); + } else { + ut_ad(lock_type == LOCK_TABLE); + thd_wait_begin(trx->mysql_thd, THD_WAIT_TABLE_LOCK); + } + + os_event_wait(slot->event); + + thd_wait_end(trx->mysql_thd); + + /* After resuming, reacquire the data dictionary latch if + necessary. */ + + if (was_declared_inside_innodb) { + + /* Return back inside InnoDB */ + + srv_conc_force_enter_innodb(trx); + } + + if (had_dict_lock) { + + row_mysql_freeze_data_dictionary(trx); + } + + wait_time = ut_difftime(ut_time(), slot->suspend_time); + + /* Release the slot for others to use */ + + lock_wait_table_release_slot(slot); + + if (thr->lock_state == QUE_THR_LOCK_ROW) { + ulint diff_time; + + if (ut_usectime(&sec, &ms) == -1) { + finish_time = -1; + } else { + finish_time = (ib_int64_t) sec * 1000000 + ms; + } + + diff_time = (finish_time > start_time) ? + (ulint) (finish_time - start_time) : 0; + + srv_stats.n_lock_wait_current_count.dec(); + srv_stats.n_lock_wait_time.add(diff_time); + + /* Only update the variable if we successfully + retrieved the start and finish times. See Bug#36819. */ + if (diff_time > lock_sys->n_lock_max_wait_time + && start_time != -1 + && finish_time != -1) { + + lock_sys->n_lock_max_wait_time = diff_time; + } + + /* Record the lock wait time for this thread */ + thd_set_lock_wait_time(trx->mysql_thd, diff_time); + + } + + if (lock_wait_timeout < 100000000 + && wait_time > (double) lock_wait_timeout) { + + trx->error_state = DB_LOCK_WAIT_TIMEOUT; + + MONITOR_INC(MONITOR_TIMEOUT); + } + + if (trx_is_interrupted(trx)) { + + trx->error_state = DB_INTERRUPTED; + } +} + +/********************************************************************//** +Releases a user OS thread waiting for a lock to be released, if the +thread is already suspended. */ +UNIV_INTERN +void +lock_wait_release_thread_if_suspended( +/*==================================*/ + que_thr_t* thr) /*!< in: query thread associated with the + user OS thread */ +{ + ut_ad(lock_mutex_own()); + ut_ad(trx_mutex_own(thr_get_trx(thr))); + + /* We own both the lock mutex and the trx_t::mutex but not the + lock wait mutex. This is OK because other threads will see the state + of this slot as being in use and no other thread can change the state + of the slot to free unless that thread also owns the lock mutex. */ + + if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) { + trx_t* trx = thr_get_trx(thr); + + if (trx->lock.was_chosen_as_deadlock_victim) { + + trx->error_state = DB_DEADLOCK; + trx->lock.was_chosen_as_deadlock_victim = FALSE; + } + + os_event_set(thr->slot->event); + } +} + +/*********************************************************************//** +Check if the thread lock wait has timed out. Release its locks if the +wait has actually timed out. */ +static +void +lock_wait_check_and_cancel( +/*=======================*/ + const srv_slot_t* slot) /*!< in: slot reserved by a user + thread when the wait started */ +{ + trx_t* trx; + double wait_time; + ib_time_t suspend_time = slot->suspend_time; + + ut_ad(lock_wait_mutex_own()); + + ut_ad(slot->in_use); + + ut_ad(slot->suspended); + + wait_time = ut_difftime(ut_time(), suspend_time); + + trx = thr_get_trx(slot->thr); + + if (trx_is_interrupted(trx) + || (slot->wait_timeout < 100000000 + && (wait_time > (double) slot->wait_timeout + || wait_time < 0))) { + + /* Timeout exceeded or a wrap-around in system + time counter: cancel the lock request queued + by the transaction and release possible + other transactions waiting behind; it is + possible that the lock has already been + granted: in that case do nothing */ + + lock_mutex_enter(); + + trx_mutex_enter(trx); + + if (trx->lock.wait_lock) { + + ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT); + + lock_cancel_waiting_and_release(trx->lock.wait_lock); + } + + lock_mutex_exit(); + + trx_mutex_exit(trx); + } + +} + +/*********************************************************************//** +A thread which wakes up threads whose lock wait may have lasted too long. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(lock_wait_timeout_thread)( +/*=====================================*/ + void* arg __attribute__((unused))) + /* in: a dummy parameter required by + os_thread_create */ +{ + ib_int64_t sig_count = 0; + os_event_t event = lock_sys->timeout_event; + + ut_ad(!srv_read_only_mode); + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(srv_lock_timeout_thread_key); +#endif /* UNIV_PFS_THREAD */ + + lock_sys->timeout_thread_active = true; + + do { + srv_slot_t* slot; + + /* When someone is waiting for a lock, we wake up every second + and check if a timeout has passed for a lock wait */ + + os_event_wait_time_low(event, 1000000, sig_count); + sig_count = os_event_reset(event); + + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { + break; + } + + lock_wait_mutex_enter(); + + /* Check all slots for user threads that are waiting + on locks, and if they have exceeded the time limit. */ + + for (slot = lock_sys->waiting_threads; + slot < lock_sys->last_slot; + ++slot) { + + /* We are doing a read without the lock mutex + and/or the trx mutex. This is OK because a slot + can't be freed or reserved without the lock wait + mutex. */ + + if (slot->in_use) { + lock_wait_check_and_cancel(slot); + } + } + + sig_count = os_event_reset(event); + + lock_wait_mutex_exit(); + + } while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); + + lock_sys->timeout_thread_active = false; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} |