diff options
Diffstat (limited to 'innobase/lock/lock0lock.c')
-rw-r--r-- | innobase/lock/lock0lock.c | 3976 |
1 files changed, 3976 insertions, 0 deletions
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c new file mode 100644 index 00000000000..efea160fac1 --- /dev/null +++ b/innobase/lock/lock0lock.c @@ -0,0 +1,3976 @@ +/****************************************************** +The transaction lock system + +(c) 1996 Innobase Oy + +Created 5/7/1996 Heikki Tuuri +*******************************************************/ + +#include "lock0lock.h" + +#ifdef UNIV_NONINL +#include "lock0lock.ic" +#endif + +#include "usr0sess.h" + +/* When releasing transaction locks, this specifies how often we release +the kernel mutex for a moment to give also others access to it */ + +#define LOCK_RELEASE_KERNEL_INTERVAL 1000 + +/* Safety margin when creating a new record lock: this many extra records +can be inserted to the page without need to create a lock with a bigger +bitmap */ + +#define LOCK_PAGE_BITMAP_MARGIN 64 + +/* An explicit record lock affects both the record and the gap before it. +An implicit x-lock does not affect the gap, it only locks the index +record from read or update. + +If a transaction has modified or inserted an index record, then +it owns an implicit x-lock on the record. On a secondary index record, +a transaction has an implicit x-lock also if it has modified the +clustered index record, the max trx id of the page where the secondary +index record resides is >= trx id of the transaction (or database recovery +is running), and there are no explicit non-gap lock requests on the +secondary index record. + +This complicated definition for a secondary index comes from the +implementation: we want to be able to determine if a secondary index +record has an implicit x-lock, just by looking at the present clustered +index record, not at the historical versions of the record. The +complicated definition can be explained to the user so that there is +nondeterminism in the access path when a query is answered: we may, +or may not, access the clustered index record and thus may, or may not, +bump into an x-lock set there. + +Different transaction can have conflicting locks set on the gap at the +same time. The locks on the gap are purely inhibitive: an insert cannot +be made, or a select cursor may have to wait, if a different transaction +has a conflicting lock on the gap. An x-lock on the gap does not give +the right to insert into the gap if there are conflicting locks granted +on the gap at the same time. + +An explicit lock can be placed on a user record or the supremum record of +a page. The locks on the supremum record are always thought to be of the gap +type, though the gap bit is not set. When we perform an update of a record +where the size of the record changes, we may temporarily store its explicit +locks on the infimum record of the page, though the infimum otherwise never +carries locks. + +A waiting record lock can also be of the gap type. A waiting lock request +can be granted when there is no conflicting mode lock request by another +transaction ahead of it in the explicit lock queue. + +------------------------------------------------------------------------- +RULE 1: If there is an implicit x-lock on a record, and there are non-gap +------- +lock requests waiting in the queue, then the transaction holding the implicit +x-lock also has an explicit non-gap record x-lock. Therefore, as locks are +released, we can grant locks to waiting lock requests purely by looking at +the explicit lock requests in the queue. + +RULE 2: Granted non-gap locks on a record are always ahead in the queue +------- +of waiting non-gap locks on a record. + +RULE 3: Different transactions cannot have conflicting granted non-gap locks +------- +on a record at the same time. However, they can have conflicting granted gap +locks. +RULE 4: If a there is a waiting lock request in a queue, no lock request, +------- +gap or not, can be inserted ahead of it in the queue. In record deletes +and page splits, new gap type locks can be created by the database manager +for a transaction, and without rule 4, the waits-for graph of transactions +might become cyclic without the database noticing it, as the deadlock check +is only performed when a transaction itself requests a lock! +------------------------------------------------------------------------- + +An insert is allowed to a gap if there are no explicit lock requests by +other transactions on the next record. It does not matter if these lock +requests are granted or waiting, gap bit set or not. On the other hand, an +implicit x-lock by another transaction does not prevent an insert, which +allows for more concurrency when using an Oracle-style sequence number +generator for the primary key with many transactions doing inserts +concurrently. + +A modify of a record is allowed if the transaction has an x-lock on the +record, or if other transactions do not have any non-gap lock requests on the +record. + +A read of a single user record with a cursor is allowed if the transaction +has a non-gap explicit, or an implicit lock on the record, or if the other +transactions have no x-lock requests on the record. At a page supremum a +read is always allowed. + +In summary, an implicit lock is seen as a granted x-lock only on the +record, not on the gap. An explicit lock with no gap bit set is a lock +both on the record and the gap. If the gap bit is set, the lock is only +on the gap. Different transaction cannot own conflicting locks on the +record at the same time, but they may own conflicting locks on the gap. +Granted locks on a record give an access right to the record, but gap type +locks just inhibit operations. + +NOTE: Finding out if some transaction has an implicit x-lock on a secondary +index record can be cumbersome. We may have to look at previous versions of +the corresponding clustered index record to find out if a delete marked +secondary index record was delete marked by an active transaction, not by +a committed one. + +FACT A: If a transaction has inserted a row, it can delete it any time +without need to wait for locks. + +PROOF: The transaction has an implicit x-lock on every index record inserted +for the row, and can thus modify each record without the need to wait. Q.E.D. + +FACT B: If a transaction has read some result set with a cursor, it can read +it again, and retrieves the same result set, if it has not modified the +result set in the meantime. Hence, there is no phantom problem. If the +biggest record, in the alphabetical order, touched by the cursor is removed, +a lock wait may occur, otherwise not. + +PROOF: When a read cursor proceeds, it sets an s-lock on each user record +it passes, and a gap type s-lock on each page supremum. The cursor must +wait until it has these locks granted. Then no other transaction can +have a granted x-lock on any of the user records, and therefore cannot +modify the user records. Neither can any other transaction insert into +the gaps which were passed over by the cursor. Page splits and merges, +and removal of obsolete versions of records do not affect this, because +when a user record or a page supremum is removed, the next record inherits +its locks as gap type locks, and therefore blocks inserts to the same gap. +Also, if a page supremum is inserted, it inherits its locks from the successor +record. When the cursor is positioned again at the start of the result set, +the records it will touch on its course are either records it touched +during the last pass or new inserted page supremums. It can immediately +access all these records, and when it arrives at the biggest record, it +notices that the result set is complete. If the biggest record was removed, +lock wait can occur because the next record only inherits a gap type lock, +and a wait may be needed. Q.E.D. */ + +/* If an index record should be changed or a new inserted, we must check +the lock on the record or the next. When a read cursor starts reading, +we will set a record level s-lock on each record it passes, except on the +initial record on which the cursor is positioned before we start to fetch +records. Our index tree search has the convention that the B-tree +cursor is positioned BEFORE the first possibly matching record in +the search. Optimizations are possible here: if the record is searched +on an equality condition to a unique key, we could actually set a special +lock on the record, a lock which would not prevent any insert before +this record. In the next key locking an x-lock set on a record also +prevents inserts just before that record. + There are special infimum and supremum records on each page. +A supremum record can be locked by a read cursor. This records cannot be +updated but the lock prevents insert of a user record to the end of +the page. + Next key locks will prevent the phantom problem where new rows +could appear to SELECT result sets after the select operation has been +performed. Prevention of phantoms ensures the serilizability of +transactions. + What should we check if an insert of a new record is wanted? +Only the lock on the next record on the same page, because also the +supremum record can carry a lock. An s-lock prevents insertion, but +what about an x-lock? If it was set by a searched update, then there +is implicitly an s-lock, too, and the insert should be prevented. +What if our transaction owns an x-lock to the next record, but there is +a waiting s-lock request on the next record? If this s-lock was placed +by a read cursor moving in the ascending order in the index, we cannot +do the insert immediately, because when we finally commit our transaction, +the read cursor should see also the new inserted record. So we should +move the read cursor backward from the the next record for it to pass over +the new inserted record. This move backward may be too cumbersome to +implement. If we in this situation just enqueue a second x-lock request +for our transaction on the next record, then the deadlock mechanism +notices a deadlock between our transaction and the s-lock request +transaction. This seems to be an ok solution. + We could have the convention that granted explicit record locks, +lock the corresponding records from changing, and also lock the gaps +before them from inserting. A waiting explicit lock request locks the gap +before from inserting. Implicit record x-locks, which we derive from the +transaction id in the clustered index record, only lock the record itself +from modification, not the gap before it from inserting. + How should we store update locks? If the search is done by a unique +key, we could just modify the record trx id. Otherwise, we could put a record +x-lock on the record. If the update changes ordering fields of the +clustered index record, the inserted new record needs no record lock in +lock table, the trx id is enough. The same holds for a secondary index +record. Searched delete is similar to update. + +PROBLEM: +What about waiting lock requests? If a transaction is waiting to make an +update to a record which another modified, how does the other transaction +know to send the end-lock-wait signal to the waiting transaction? If we have +the convention that a transaction may wait for just one lock at a time, how +do we preserve it if lock wait ends? + +PROBLEM: +Checking the trx id label of a secondary index record. In the case of a +modification, not an insert, is this necessary? A secondary index record +is modified only by setting or resetting its deleted flag. A secondary index +record contains fields to uniquely determine the corresponding clustered +index record. A secondary index record is therefore only modified if we +also modify the clustered index record, and the trx id checking is done +on the clustered index record, before we come to modify the secondary index +record. So, in the case of delete marking or unmarking a secondary index +record, we do not have to care about trx ids, only the locks in the lock +table must be checked. In the case of a select from a secondary index, the +trx id is relevant, and in this case we may have to search the clustered +index record. + +PROBLEM: How to update record locks when page is split or merged, or +-------------------------------------------------------------------- +a record is deleted or updated? +If the size of fields in a record changes, we perform the update by +a delete followed by an insert. How can we retain the locks set or +waiting on the record? Because a record lock is indexed in the bitmap +by the heap number of the record, when we remove the record from the +record list, it is possible still to keep the lock bits. If the page +is reorganized, we could make a table of old and new heap numbers, +and permute the bitmaps in the locks accordingly. We can add to the +table a row telling where the updated record ended. If the update does +not require a reorganization of the page, we can simply move the lock +bits for the updated record to the position determined by its new heap +number (we may have to allocate a new lock, if we run out of the bitmap +in the old one). + A more complicated case is the one where the reinsertion of the +updated record is done pessimistically, because the structure of the +tree may change. + +PROBLEM: If a supremum record is removed in a page merge, or a record +--------------------------------------------------------------------- +removed in a purge, what to do to the waiting lock requests? In a split to +the right, we just move the lock requests to the new supremum. If a record +is removed, we could move the waiting lock request to its inheritor, the +next record in the index. But, the next record may already have lock +requests on its own queue. A new deadlock check should be made then. Maybe +it is easier just to release the waiting transactions. They can then enqueue +new lock requests on appropriate records. + +PROBLEM: When a record is inserted, what locks should it inherit from the +------------------------------------------------------------------------- +upper neighbor? An insert of a new supremum record in a page split is +always possible, but an insert of a new user record requires that the upper +neighbor does not have any lock requests by other transactions, granted or +waiting, in its lock queue. Solution: We can copy the locks as gap type +locks, so that also the waiting locks are transformed to granted gap type +locks on the inserted record. */ + +ibool lock_print_waits = FALSE; + +/* The lock system */ +lock_sys_t* lock_sys = NULL; + +/* A table lock */ +typedef struct lock_table_struct lock_table_t; +struct lock_table_struct{ + dict_table_t* table; /* database table in dictionary cache */ + UT_LIST_NODE_T(lock_t) + locks; /* list of locks on the same table */ +}; + +/* Record lock for a page */ +typedef struct lock_rec_struct lock_rec_t; +struct lock_rec_struct{ + ulint space; /* space id */ + ulint page_no; /* page number */ + ulint n_bits; /* number of bits in the lock bitmap */ + /* NOTE: the lock bitmap is placed immediately + after the lock struct */ +}; + +/* Lock struct */ +struct lock_struct{ + trx_t* trx; /* transaction owning the lock */ + UT_LIST_NODE_T(lock_t) + trx_locks; /* list of the locks of the + transaction */ + ulint type_mode; /* lock type, mode, gap flag, and + wait flag, ORed */ + hash_node_t hash; /* hash chain node for a record lock */ + dict_index_t* index; /* index for a record lock */ + union { + lock_table_t tab_lock;/* table lock */ + lock_rec_t rec_lock;/* record lock */ + } un_member; +}; + +/************************************************************************ +Checks if a lock request results in a deadlock. */ +static +ibool +lock_deadlock_occurs( +/*=================*/ + /* out: TRUE if a deadlock was detected */ + lock_t* lock, /* in: lock the transaction is requesting */ + trx_t* trx); /* in: transaction */ +/************************************************************************ +Looks recursively for a deadlock. */ +static +ibool +lock_deadlock_recursive( +/*====================*/ + /* out: TRUE if a deadlock was detected */ + trx_t* start, /* in: recursion starting point */ + trx_t* trx, /* in: a transaction waiting for a lock */ + lock_t* wait_lock); /* in: the lock trx is waiting to be granted */ + +/************************************************************************* +Reserves the kernel mutex. This function is used in this module to allow +monitoring the contention degree on the kernel mutex caused by the lock +operations. */ +UNIV_INLINE +void +lock_mutex_enter_kernel(void) +/*=========================*/ +{ + mutex_enter(&kernel_mutex); +} + +/************************************************************************* +Releses the kernel mutex. This function is used in this module to allow +monitoring the contention degree on the kernel mutex caused by the lock +operations. */ +UNIV_INLINE +void +lock_mutex_exit_kernel(void) +/*=========================*/ +{ + mutex_exit(&kernel_mutex); +} + +#ifdef notdefined + +/************************************************************************* +Gets the mutex protecting record locks for a page in the buffer pool. */ +UNIV_INLINE +mutex_t* +lock_rec_get_mutex( +/*===============*/ + byte* ptr) /* in: pointer to somewhere within a buffer frame */ +{ + return(buf_frame_get_lock_mutex(ptr)); +} + +/************************************************************************* +Reserves the mutex protecting record locks for a page in the buffer pool. */ +UNIV_INLINE +void +lock_rec_mutex_enter( +/*=================*/ + byte* ptr) /* in: pointer to somewhere within a buffer frame */ +{ + mutex_enter(lock_rec_get_mutex(ptr)); +} + +/************************************************************************* +Releases the mutex protecting record locks for a page in the buffer pool. */ +UNIV_INLINE +void +lock_rec_mutex_exit( +/*================*/ + byte* ptr) /* in: pointer to somewhere within a buffer frame */ +{ + mutex_exit(lock_rec_get_mutex(ptr)); +} + +/************************************************************************* +Checks if the caller owns the mutex to record locks of a page. Works only in +the debug version. */ +UNIV_INLINE +ibool +lock_rec_mutex_own( +/*===============*/ + /* out: TRUE if the current OS thread has reserved the + mutex */ + byte* ptr) /* in: pointer to somewhere within a buffer frame */ +{ + return(mutex_own(lock_rec_get_mutex(ptr))); +} + +/************************************************************************* +Gets the mutex protecting record locks on a given page address. */ + +mutex_t* +lock_rec_get_mutex_for_addr( +/*========================*/ + ulint space, /* in: space id */ + ulint page_no)/* in: page number */ +{ + return(hash_get_mutex(lock_sys->rec_hash, + lock_rec_fold(space, page_no))); +} + +/************************************************************************* +Checks if the caller owns the mutex to record locks of a page. Works only in +the debug version. */ +UNIV_INLINE +ibool +lock_rec_mutex_own_addr( +/*====================*/ + ulint space, /* in: space id */ + ulint page_no)/* in: page number */ +{ + return(mutex_own(lock_rec_get_mutex_for_addr(space, page_no))); +} + +/************************************************************************* +Reserves all the mutexes protecting record locks. */ +UNIV_INLINE +void +lock_rec_mutex_enter_all(void) +/*==========================*/ +{ + hash_table_t* table; + ulint n_mutexes; + ulint i; + + table = lock_sys->rec_hash; + + n_mutexes = table->n_mutexes; + + for (i = 0; i < n_mutexes; i++) { + + mutex_enter(hash_get_nth_mutex(table, i)); + } +} + +/************************************************************************* +Releases all the mutexes protecting record locks. */ +UNIV_INLINE +void +lock_rec_mutex_exit_all(void) +/*=========================*/ +{ + hash_table_t* table; + ulint n_mutexes; + ulint i; + + table = lock_sys->rec_hash; + + n_mutexes = table->n_mutexes; + + for (i = 0; i < n_mutexes; i++) { + + mutex_exit(hash_get_nth_mutex(table, i)); + } +} + +/************************************************************************* +Checks that the current OS thread owns all the mutexes protecting record +locks. */ +UNIV_INLINE +ibool +lock_rec_mutex_own_all(void) +/*========================*/ + /* out: TRUE if owns all */ +{ + hash_table_t* table; + ulint n_mutexes; + ibool owns_yes = TRUE; + ulint i; + + table = lock_sys->rec_hash; + + n_mutexes = table->n_mutexes; + + for (i = 0; i < n_mutexes; i++) { + if (!mutex_own(hash_get_nth_mutex(table, i))) { + + owns_yes = FALSE; + } + } + + return(owns_yes); +} + +#endif + +/************************************************************************* +Checks that a record is seen in a consistent read. */ + +ibool +lock_clust_rec_cons_read_sees( +/*==========================*/ + /* out: TRUE if sees, or FALSE if an earlier + version of the record should be retrieved */ + rec_t* rec, /* in: user record which should be read or + passed over by a read cursor */ + dict_index_t* index, /* in: clustered index */ + read_view_t* view) /* in: consistent read view */ +{ + dulint trx_id; + + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(page_rec_is_user_rec(rec)); + + trx_id = row_get_rec_trx_id(rec, index); + + if (read_view_sees_trx_id(view, trx_id)) { + + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Checks that a non-clustered index record is seen in a consistent read. */ + +ulint +lock_sec_rec_cons_read_sees( +/*========================*/ + /* out: TRUE if certainly sees, or FALSE if an + earlier version of the clustered index record + might be needed: NOTE that a non-clustered + index page contains so little information on + its modifications that also in the case FALSE, + the present version of rec may be the right, + but we must check this from the clustered + index record */ + rec_t* rec, /* in: user record which should be read or + passed over by a read cursor */ + dict_index_t* index, /* in: non-clustered index */ + read_view_t* view) /* in: consistent read view */ +{ + dulint max_trx_id; + + ut_ad(!(index->type & DICT_CLUSTERED)); + ut_ad(page_rec_is_user_rec(rec)); + + if (recv_recovery_is_on()) { + + return(FALSE); + } + + max_trx_id = page_get_max_trx_id(buf_frame_align(rec)); + + if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) { + + return(FALSE); + } + + return(TRUE); +} + +/************************************************************************* +Creates the lock system at database start. */ + +void +lock_sys_create( +/*============*/ + ulint n_cells) /* in: number of slots in lock hash table */ +{ + lock_sys = mem_alloc(sizeof(lock_sys_t)); + + lock_sys->rec_hash = hash_create(n_cells); + + /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */ +} + +/************************************************************************* +Gets the mode of a lock. */ +UNIV_INLINE +ulint +lock_get_mode( +/*==========*/ + /* out: mode */ + lock_t* lock) /* in: lock */ +{ + ut_ad(lock); + + return(lock->type_mode & LOCK_MODE_MASK); +} + +/************************************************************************* +Gets the type of a lock. */ +UNIV_INLINE +ulint +lock_get_type( +/*==========*/ + /* out: LOCK_TABLE or LOCK_RECa */ + lock_t* lock) /* in: lock */ +{ + ut_ad(lock); + + return(lock->type_mode & LOCK_TYPE_MASK); +} + +/************************************************************************* +Gets the wait flag of a lock. */ +UNIV_INLINE +ibool +lock_get_wait( +/*==========*/ + /* out: TRUE if waiting */ + lock_t* lock) /* in: lock */ +{ + ut_ad(lock); + + if (lock->type_mode & LOCK_WAIT) { + + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Sets the wait flag of a lock and the back pointer in trx to lock. */ +UNIV_INLINE +void +lock_set_lock_and_trx_wait( +/*=======================*/ + lock_t* lock, /* in: lock */ + trx_t* trx) /* in: trx */ +{ + ut_ad(lock); + ut_ad(trx->wait_lock == NULL); + + trx->wait_lock = lock; + lock->type_mode = lock->type_mode | LOCK_WAIT; +} + +/************************************************************************** +The back pointer to a waiting lock request in the transaction is set to NULL +and the wait bit in lock type_mode is reset. */ +UNIV_INLINE +void +lock_reset_lock_and_trx_wait( +/*=========================*/ + lock_t* lock) /* in: record lock */ +{ + ut_ad((lock->trx)->wait_lock == lock); + ut_ad(lock_get_wait(lock)); + + /* Reset the back pointer in trx to this waiting lock request */ + + (lock->trx)->wait_lock = NULL; + lock->type_mode = lock->type_mode & ~LOCK_WAIT; +} + +/************************************************************************* +Gets the gap flag of a record lock. */ +UNIV_INLINE +ibool +lock_rec_get_gap( +/*=============*/ + /* out: TRUE if gap flag set */ + lock_t* lock) /* in: record lock */ +{ + ut_ad(lock); + ut_ad(lock_get_type(lock) == LOCK_REC); + + if (lock->type_mode & LOCK_GAP) { + + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Sets the gap flag of a record lock. */ +UNIV_INLINE +void +lock_rec_set_gap( +/*=============*/ + lock_t* lock, /* in: record lock */ + ibool val) /* in: value to set: TRUE or FALSE */ +{ + ut_ad(lock); + ut_ad((val == TRUE) || (val == FALSE)); + ut_ad(lock_get_type(lock) == LOCK_REC); + + if (val) { + lock->type_mode = lock->type_mode | LOCK_GAP; + } else { + lock->type_mode = lock->type_mode & ~LOCK_GAP; + } +} + +/************************************************************************* +Calculates if lock mode 1 is stronger or equal to lock mode 2. */ +UNIV_INLINE +ibool +lock_mode_stronger_or_eq( +/*=====================*/ + /* out: TRUE if mode1 stronger or equal to mode2 */ + ulint mode1, /* in: lock mode */ + ulint mode2) /* in: lock mode */ +{ + ut_ad((mode1 == LOCK_X) || (mode1 == LOCK_S) || (mode1 == LOCK_IX) + || (mode1 == LOCK_IS)); + ut_ad((mode2 == LOCK_X) || (mode2 == LOCK_S) || (mode2 == LOCK_IX) + || (mode2 == LOCK_IS)); + if (mode1 == LOCK_X) { + + return(TRUE); + + } else if ((mode1 == LOCK_S) + && ((mode2 == LOCK_S) || (mode2 == LOCK_IS))) { + return(TRUE); + + } else if ((mode1 == LOCK_IS) && (mode2 == LOCK_IS)) { + + return(TRUE); + + } else if ((mode1 == LOCK_IX) && ((mode2 == LOCK_IX) + || (mode2 == LOCK_IS))) { + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Calculates if lock mode 1 is compatible with lock mode 2. */ +UNIV_INLINE +ibool +lock_mode_compatible( +/*=================*/ + /* out: TRUE if mode1 compatible with mode2 */ + ulint mode1, /* in: lock mode */ + ulint mode2) /* in: lock mode */ +{ + ut_ad((mode1 == LOCK_X) || (mode1 == LOCK_S) || (mode1 == LOCK_IX) + || (mode1 == LOCK_IS)); + ut_ad((mode2 == LOCK_X) || (mode2 == LOCK_S) || (mode2 == LOCK_IX) + || (mode2 == LOCK_IS)); + + if ((mode1 == LOCK_S) && ((mode2 == LOCK_IS) || (mode2 == LOCK_S))) { + + return(TRUE); + + } else if (mode1 == LOCK_X) { + + return(FALSE); + + } else if ((mode1 == LOCK_IS) && ((mode2 == LOCK_IS) + || (mode2 == LOCK_IX) + || (mode2 == LOCK_S))) { + return(TRUE); + + } else if ((mode1 == LOCK_IX) && ((mode2 == LOCK_IS) + || (mode2 == LOCK_IX))) { + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Returns LOCK_X if mode is LOCK_S, and vice versa. */ +UNIV_INLINE +ulint +lock_get_confl_mode( +/*================*/ + /* out: conflicting basic lock mode */ + ulint mode) /* in: LOCK_S or LOCK_X */ +{ + ut_ad((mode == LOCK_X) || (mode == LOCK_S)); + + if (mode == LOCK_S) { + + return(LOCK_X); + } + + return(LOCK_S); +} + +/************************************************************************* +Checks if a lock request lock1 has to wait for request lock2. NOTE that we, +for simplicity, ignore the gap bits in locks, and treat gap type lock +requests like non-gap lock requests. */ +UNIV_INLINE +ibool +lock_has_to_wait( +/*=============*/ + /* out: TRUE if lock1 has to wait lock2 to be removed */ + lock_t* lock1, /* in: waiting record lock */ + lock_t* lock2) /* in: another lock; NOTE that it is assumed that this + has a lock bit set on the same record as in lock1 */ +{ + if ((lock1->trx != lock2->trx) + && !lock_mode_compatible(lock_get_mode(lock1), + lock_get_mode(lock2))) { + return(TRUE); + } + + return(FALSE); +} + +/*============== RECORD LOCK BASIC FUNCTIONS ============================*/ + +/************************************************************************* +Gets the number of bits in a record lock bitmap. */ +UNIV_INLINE +ulint +lock_rec_get_n_bits( +/*================*/ + /* out: number of bits */ + lock_t* lock) /* in: record lock */ +{ + return(lock->un_member.rec_lock.n_bits); +} + +/************************************************************************* +Gets the nth bit of a record lock. */ +UNIV_INLINE +ibool +lock_rec_get_nth_bit( +/*=================*/ + /* out: TRUE if bit set */ + lock_t* lock, /* in: record lock */ + ulint i) /* in: index of the bit */ +{ + ulint byte_index; + ulint bit_index; + ulint b; + + ut_ad(lock); + ut_ad(lock_get_type(lock) == LOCK_REC); + + if (i >= lock->un_member.rec_lock.n_bits) { + + return(FALSE); + } + + byte_index = i / 8; + bit_index = i % 8; + + b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index); + + return(ut_bit_get_nth(b, bit_index)); +} + +/************************************************************************** +Sets the nth bit of a record lock to TRUE. */ +UNIV_INLINE +void +lock_rec_set_nth_bit( +/*==================*/ + lock_t* lock, /* in: record lock */ + ulint i) /* in: index of the bit */ +{ + ulint byte_index; + ulint bit_index; + byte* ptr; + ulint b; + + ut_ad(lock); + ut_ad(lock_get_type(lock) == LOCK_REC); + ut_ad(i < lock->un_member.rec_lock.n_bits); + + byte_index = i / 8; + bit_index = i % 8; + + ptr = (byte*)lock + sizeof(lock_t) + byte_index; + + b = (ulint)*ptr; + + b = ut_bit_set_nth(b, bit_index, TRUE); + + *ptr = (byte)b; +} + +/************************************************************************** +Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, +if none found. */ +static +ulint +lock_rec_find_set_bit( +/*==================*/ + /* out: bit index == heap number of the record, or + ULINT_UNDEFINED if none found */ + lock_t* lock) /* in: record lock with at least one bit set */ +{ + ulint i; + + for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + + if (lock_rec_get_nth_bit(lock, i)) { + + return(i); + } + } + + return(ULINT_UNDEFINED); +} + +/************************************************************************** +Resets the nth bit of a record lock. */ +UNIV_INLINE +void +lock_rec_reset_nth_bit( +/*===================*/ + lock_t* lock, /* in: record lock */ + ulint i) /* in: index of the bit which must be set to TRUE + when this function is called */ +{ + ulint byte_index; + ulint bit_index; + byte* ptr; + ulint b; + + ut_ad(lock); + ut_ad(lock_get_type(lock) == LOCK_REC); + ut_ad(i < lock->un_member.rec_lock.n_bits); + + byte_index = i / 8; + bit_index = i % 8; + + ptr = (byte*)lock + sizeof(lock_t) + byte_index; + + b = (ulint)*ptr; + + b = ut_bit_set_nth(b, bit_index, FALSE); + + *ptr = (byte)b; +} + +/************************************************************************* +Gets the first or next record lock on a page. */ +UNIV_INLINE +lock_t* +lock_rec_get_next_on_page( +/*======================*/ + /* out: next lock, NULL if none exists */ + lock_t* lock) /* in: a record lock */ +{ + ulint space; + ulint page_no; + + ut_ad(mutex_own(&kernel_mutex)); + + space = lock->un_member.rec_lock.space; + page_no = lock->un_member.rec_lock.page_no; + + for (;;) { + lock = HASH_GET_NEXT(hash, lock); + + if (!lock) { + + break; + } + + if ((lock->un_member.rec_lock.space == space) + && (lock->un_member.rec_lock.page_no == page_no)) { + + break; + } + } + + return(lock); +} + +/************************************************************************* +Gets the first record lock on a page, where the page is identified by its +file address. */ +UNIV_INLINE +lock_t* +lock_rec_get_first_on_page_addr( +/*============================*/ + /* out: first lock, NULL if none exists */ + ulint space, /* in: space */ + ulint page_no)/* in: page number */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + + lock = HASH_GET_FIRST(lock_sys->rec_hash, + lock_rec_hash(space, page_no)); + while (lock) { + if ((lock->un_member.rec_lock.space == space) + && (lock->un_member.rec_lock.page_no == page_no)) { + + break; + } + + lock = HASH_GET_NEXT(hash, lock); + } + + return(lock); +} + +/************************************************************************* +Returns TRUE if there are explicit record locks on a page. */ + +ibool +lock_rec_expl_exist_on_page( +/*========================*/ + /* out: TRUE if there are explicit record locks on + the page */ + ulint space, /* in: space id */ + ulint page_no)/* in: page number */ +{ + ibool ret; + + mutex_enter(&kernel_mutex); + + if (lock_rec_get_first_on_page_addr(space, page_no)) { + ret = TRUE; + } else { + ret = FALSE; + } + + mutex_exit(&kernel_mutex); + + return(ret); +} + +/************************************************************************* +Gets the first record lock on a page, where the page is identified by a +pointer to it. */ +UNIV_INLINE +lock_t* +lock_rec_get_first_on_page( +/*=======================*/ + /* out: first lock, NULL if none exists */ + byte* ptr) /* in: pointer to somewhere on the page */ +{ + ulint hash; + lock_t* lock; + ulint space; + ulint page_no; + + ut_ad(mutex_own(&kernel_mutex)); + + hash = buf_frame_get_lock_hash_val(ptr); + + lock = HASH_GET_FIRST(lock_sys->rec_hash, hash); + + while (lock) { + space = buf_frame_get_space_id(ptr); + page_no = buf_frame_get_page_no(ptr); + + if ((lock->un_member.rec_lock.space == space) + && (lock->un_member.rec_lock.page_no == page_no)) { + + break; + } + + lock = HASH_GET_NEXT(hash, lock); + } + + return(lock); +} + +/************************************************************************* +Gets the next explicit lock request on a record. */ +UNIV_INLINE +lock_t* +lock_rec_get_next( +/*==============*/ + /* out: next lock, NULL if none exists */ + rec_t* rec, /* in: record on a page */ + lock_t* lock) /* in: lock */ +{ + ut_ad(mutex_own(&kernel_mutex)); + + for (;;) { + lock = lock_rec_get_next_on_page(lock); + + if (lock == NULL) { + + return(NULL); + } + + if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) { + + return(lock); + } + } +} + +/************************************************************************* +Gets the first explicit lock request on a record. */ +UNIV_INLINE +lock_t* +lock_rec_get_first( +/*===============*/ + /* out: first lock, NULL if none exists */ + rec_t* rec) /* in: record on a page */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + + lock = lock_rec_get_first_on_page(rec); + + while (lock) { + if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) { + + break; + } + + lock = lock_rec_get_next_on_page(lock); + } + + return(lock); +} + +/************************************************************************* +Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock +pointer in the transaction! This function is used in lock object creation +and resetting. */ +static +void +lock_rec_bitmap_reset( +/*==================*/ + lock_t* lock) /* in: record lock */ +{ + byte* ptr; + ulint n_bytes; + ulint i; + + /* Reset to zero the bitmap which resides immediately after the lock + struct */ + + ptr = (byte*)lock + sizeof(lock_t); + + n_bytes = lock_rec_get_n_bits(lock) / 8; + + ut_ad((lock_rec_get_n_bits(lock) % 8) == 0); + + for (i = 0; i < n_bytes; i++) { + + *ptr = 0; + ptr++; + } +} + +/************************************************************************* +Copies a record lock to heap. */ +static +lock_t* +lock_rec_copy( +/*==========*/ + /* out: copy of lock */ + lock_t* lock, /* in: record lock */ + mem_heap_t* heap) /* in: memory heap */ +{ + lock_t* dupl_lock; + ulint size; + + size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8; + + dupl_lock = mem_heap_alloc(heap, size); + + ut_memcpy(dupl_lock, lock, size); + + return(dupl_lock); +} + +/************************************************************************* +Gets the previous record lock set on a record. */ +static +lock_t* +lock_rec_get_prev( +/*==============*/ + /* out: previous lock on the same record, NULL if + none exists */ + lock_t* in_lock,/* in: record lock */ + ulint heap_no)/* in: heap number of the record */ +{ + lock_t* lock; + ulint space; + ulint page_no; + lock_t* found_lock = NULL; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(lock_get_type(in_lock) == LOCK_REC); + + space = in_lock->un_member.rec_lock.space; + page_no = in_lock->un_member.rec_lock.page_no; + + lock = lock_rec_get_first_on_page_addr(space, page_no); + + for (;;) { + ut_ad(lock); + + if (lock == in_lock) { + + return(found_lock); + } + + if (lock_rec_get_nth_bit(lock, heap_no)) { + + found_lock = lock; + } + + lock = lock_rec_get_next_on_page(lock); + } +} + +/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/ + +/************************************************************************* +Checks if a transaction has the specified table lock, or stronger. */ +UNIV_INLINE +lock_t* +lock_table_has( +/*===========*/ + /* out: lock or NULL */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table */ + ulint mode) /* in: lock mode */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + + /* Look for stronger locks the same trx already has on the table */ + + lock = UT_LIST_GET_LAST(table->locks); + + while (lock != NULL) { + + if ((lock->trx == trx) + && (lock_mode_stronger_or_eq(lock_get_mode(lock), mode))) { + + /* The same trx already has locked the table in + a mode stronger or equal to the mode given */ + + ut_ad(!lock_get_wait(lock)); + + return(lock); + } + + lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); + } + + return(NULL); +} + +/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/ + +/************************************************************************* +Checks if a transaction has a GRANTED explicit non-gap lock on rec, stronger +or equal to mode. */ +UNIV_INLINE +lock_t* +lock_rec_has_expl( +/*==============*/ + /* out: lock or NULL */ + ulint mode, /* in: lock mode */ + rec_t* rec, /* in: record */ + trx_t* trx) /* in: transaction */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((mode == LOCK_X) || (mode == LOCK_S)); + + lock = lock_rec_get_first(rec); + + while (lock) { + if ((lock->trx == trx) + && lock_mode_stronger_or_eq(lock_get_mode(lock), mode) + && !lock_get_wait(lock) + && !(lock_rec_get_gap(lock) + || page_rec_is_supremum(rec))) { + return(lock); + } + + lock = lock_rec_get_next(rec, lock); + } + + return(NULL); +} + +/************************************************************************* +Checks if some other transaction has an explicit lock request stronger or +equal to mode on rec or gap, waiting or granted, in the lock queue. */ +UNIV_INLINE +lock_t* +lock_rec_other_has_expl_req( +/*========================*/ + /* out: lock or NULL */ + ulint mode, /* in: lock mode */ + ulint gap, /* in: LOCK_GAP if also gap locks are taken + into account, or 0 if not */ + ulint wait, /* in: LOCK_WAIT if also waiting locks are + taken into account, or 0 if not */ + rec_t* rec, /* in: record to look at */ + trx_t* trx) /* in: transaction, or NULL if requests + by any transaction are wanted */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((mode == LOCK_X) || (mode == LOCK_S)); + + lock = lock_rec_get_first(rec); + + while (lock) { + if ((lock->trx != trx) + && (gap || + !(lock_rec_get_gap(lock) || page_rec_is_supremum(rec))) + && (wait || !lock_get_wait(lock)) + && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) { + + return(lock); + } + + lock = lock_rec_get_next(rec, lock); + } + + return(NULL); +} + +/************************************************************************* +Looks for a suitable type record lock struct by the same trx on the same page. +This can be used to save space when a new record lock should be set on a page: +no new struct is needed, if a suitable old is found. */ +UNIV_INLINE +lock_t* +lock_rec_find_similar_on_page( +/*==========================*/ + /* out: lock or NULL */ + ulint type_mode, /* in: lock type_mode field */ + rec_t* rec, /* in: record */ + trx_t* trx) /* in: transaction */ +{ + lock_t* lock; + ulint heap_no; + + ut_ad(mutex_own(&kernel_mutex)); + + heap_no = rec_get_heap_no(rec); + + lock = lock_rec_get_first_on_page(rec); + + while (lock != NULL) { + if ((lock->trx == trx) + && (lock->type_mode == type_mode) + && (lock_rec_get_n_bits(lock) > heap_no)) { + + return(lock); + } + + lock = lock_rec_get_next_on_page(lock); + } + + return(NULL); +} + +/************************************************************************* +Checks if some transaction has an implicit x-lock on a record in a secondary +index. */ + +trx_t* +lock_sec_rec_some_has_impl_off_kernel( +/*==================================*/ + /* out: transaction which has the x-lock, or + NULL */ + rec_t* rec, /* in: user record */ + dict_index_t* index) /* in: secondary index */ +{ + page_t* page; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(!(index->type & DICT_CLUSTERED)); + ut_ad(page_rec_is_user_rec(rec)); + + page = buf_frame_align(rec); + + /* Some transaction may have an implicit x-lock on the record only + if the max trx id for the page >= min trx id for the trx list, or + database recovery is running. We do not write the changes of a page + max trx id to the log, and therefore during recovery, this value + for a page may be incorrect. */ + + if (!(ut_dulint_cmp(page_get_max_trx_id(page), + trx_list_get_min_trx_id()) >= 0) + && !recv_recovery_is_on()) { + + return(NULL); + } + + /* Ok, in this case it is possible that some transaction has an + implicit x-lock. We have to look in the clustered index. */ + + return(row_vers_impl_x_locked_off_kernel(rec, index)); +} + +/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/ + +/************************************************************************* +Creates a new record lock and inserts it to the lock queue. Does NOT check +for deadlocks or lock compatibility! */ +static +lock_t* +lock_rec_create( +/*============*/ + /* out: created lock, NULL if out of memory */ + ulint type_mode,/* in: lock mode and wait flag, type is + ignored and replaced by LOCK_REC */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: index of record */ + trx_t* trx) /* in: transaction */ +{ + page_t* page; + lock_t* lock; + ulint page_no; + ulint heap_no; + ulint space; + ulint n_bits; + ulint n_bytes; + + ut_ad(mutex_own(&kernel_mutex)); + + page = buf_frame_align(rec); + space = buf_frame_get_space_id(page); + page_no = buf_frame_get_page_no(page); + heap_no = rec_get_heap_no(rec); + + /* If rec is the supremum record, then we reset the gap bit, as + all locks on the supremum are automatically of the gap type, and + we try to avoid unnecessary memory consumption of a new record lock + struct for a gap type lock */ + + if (rec == page_get_supremum_rec(page)) { + + type_mode = type_mode & ~LOCK_GAP; + } + + /* Make lock bitmap bigger by a safety margin */ + n_bits = page_header_get_field(page, PAGE_N_HEAP) + + LOCK_PAGE_BITMAP_MARGIN; + n_bytes = 1 + n_bits / 8; + + lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes); + + if (lock == NULL) { + + return(NULL); + } + + UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock); + + lock->trx = trx; + + lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; + lock->index = index; + + lock->un_member.rec_lock.space = space; + lock->un_member.rec_lock.page_no = page_no; + lock->un_member.rec_lock.n_bits = n_bytes * 8; + + /* Reset to zero the bitmap which resides immediately after the + lock struct */ + + lock_rec_bitmap_reset(lock); + + /* Set the bit corresponding to rec */ + lock_rec_set_nth_bit(lock, heap_no); + + HASH_INSERT(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), lock); + if (type_mode & LOCK_WAIT) { + + lock_set_lock_and_trx_wait(lock, trx); + } + + return(lock); +} + +/************************************************************************* +Enqueues a waiting request for a lock which cannot be granted immediately. +Checks for deadlocks. */ +static +ulint +lock_rec_enqueue_waiting( +/*=====================*/ + /* out: DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ + ulint type_mode,/* in: lock mode this transaction is + requesting: LOCK_S or LOCK_X, ORed with + LOCK_GAP if a gap lock is requested */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: index of record */ + que_thr_t* thr) /* in: query thread */ +{ + lock_t* lock; + trx_t* trx; + + ut_ad(mutex_own(&kernel_mutex)); + + /* Test if there already is some other reason to suspend thread: + we do not enqueue a lock request if the query thread should be + stopped anyway */ + + if (que_thr_stop(thr)) { + + return(DB_QUE_THR_SUSPENDED); + } + + trx = thr_get_trx(thr); + + /* Enqueue the lock request that will wait to be granted */ + lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx); + + /* Check if a deadlock occurs: if yes, remove the lock request and + return an error code */ + + if (lock_deadlock_occurs(lock, trx)) { + + lock_reset_lock_and_trx_wait(lock); + lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec)); + + return(DB_DEADLOCK); + } + + trx->que_state = TRX_QUE_LOCK_WAIT; + + ut_a(que_thr_stop(thr)); + + if (lock_print_waits) { + printf("Lock wait for trx %lu in index %s\n", + ut_dulint_get_low(trx->id), index->name); + } + + return(DB_LOCK_WAIT); +} + +/************************************************************************* +Adds a record lock request in the record queue. The request is normally +added as the last in the queue, but if there are no waiting lock requests +on the record, and the request to be added is not a waiting request, we +can reuse a suitable record lock object already existing on the same page, +just setting the appropriate bit in its bitmap. This is a low-level function +which does NOT check for deadlocks or lock compatibility! */ +static +lock_t* +lock_rec_add_to_queue( +/*==================*/ + /* out: lock where the bit was set, NULL if out + of memory */ + ulint type_mode,/* in: lock mode, wait, and gap flags; type + is ignored and replaced by LOCK_REC */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: index of record */ + trx_t* trx) /* in: transaction */ +{ + lock_t* lock; + lock_t* similar_lock = NULL; + ulint heap_no; + page_t* page; + ibool somebody_waits = FALSE; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) + || ((type_mode & LOCK_MODE_MASK) != LOCK_S) + || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, rec, trx)); + ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) + || ((type_mode & LOCK_MODE_MASK) != LOCK_X) + || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, rec, trx)); + + type_mode = type_mode | LOCK_REC; + + page = buf_frame_align(rec); + + /* If rec is the supremum record, then we can reset the gap bit, as + all locks on the supremum are automatically of the gap type, and we + try to avoid unnecessary memory consumption of a new record lock + struct for a gap type lock */ + + if (rec == page_get_supremum_rec(page)) { + + type_mode = type_mode & ~LOCK_GAP; + } + + /* Look for a waiting lock request on the same record, or for a + similar record lock on the same page */ + + heap_no = rec_get_heap_no(rec); + lock = lock_rec_get_first_on_page(rec); + + while (lock != NULL) { + if (lock_get_wait(lock) + && (lock_rec_get_nth_bit(lock, heap_no))) { + + somebody_waits = TRUE; + } + + lock = lock_rec_get_next_on_page(lock); + } + + similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx); + + if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) { + + lock_rec_set_nth_bit(similar_lock, heap_no); + + return(similar_lock); + } + + return(lock_rec_create(type_mode, rec, index, trx)); +} + +/************************************************************************* +This is a fast routine for locking a record in the most common cases: +there are no explicit locks on the page, or there is just one lock, owned +by this transaction, and of the right type_mode. This is a low-level function +which does NOT look at implicit locks! Checks lock compatibility within +explicit locks. */ +UNIV_INLINE +ibool +lock_rec_lock_fast( +/*===============*/ + /* out: TRUE if locking succeeded */ + ibool impl, /* in: if TRUE, no lock is set if no wait + is necessary: we assume that the caller will + set an implicit lock */ + ulint mode, /* in: lock mode */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: index of record */ + que_thr_t* thr) /* in: query thread */ +{ + lock_t* lock; + ulint heap_no; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((mode == LOCK_X) || (mode == LOCK_S)); + + heap_no = rec_get_heap_no(rec); + + lock = lock_rec_get_first_on_page(rec); + + if (lock == NULL) { + if (!impl) { + lock_rec_create(mode, rec, index, thr_get_trx(thr)); + } + + return(TRUE); + } + + if (lock_rec_get_next_on_page(lock)) { + + return(FALSE); + } + + if ((lock->trx != thr_get_trx(thr)) + || (lock->type_mode != (mode | LOCK_REC)) + || (lock_rec_get_n_bits(lock) <= heap_no)) { + return(FALSE); + } + + if (!impl) { + lock_rec_set_nth_bit(lock, heap_no); + } + + return(TRUE); +} + +/************************************************************************* +This is the general, and slower, routine for locking a record. This is a +low-level function which does NOT look at implicit locks! Checks lock +compatibility within explicit locks. */ +static +ulint +lock_rec_lock_slow( +/*===============*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or error + code */ + ibool impl, /* in: if TRUE, no lock is set if no wait is + necessary: we assume that the caller will set + an implicit lock */ + ulint mode, /* in: lock mode */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: index of record */ + que_thr_t* thr) /* in: query thread */ +{ + ulint confl_mode; + trx_t* trx; + ulint err; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((mode == LOCK_X) || (mode == LOCK_S)); + + trx = thr_get_trx(thr); + confl_mode = lock_get_confl_mode(mode); + + ut_ad((mode != LOCK_S) || lock_table_has(trx, index->table, + LOCK_IS)); + ut_ad((mode != LOCK_X) || lock_table_has(trx, index->table, + LOCK_IX)); + if (lock_rec_has_expl(mode, rec, trx)) { + /* The trx already has a strong enough lock on rec: do + nothing */ + + err = DB_SUCCESS; + } else if (lock_rec_other_has_expl_req(confl_mode, 0, LOCK_WAIT, rec, + trx)) { + /* If another transaction has a non-gap conflicting request in + the queue, as this transaction does not have a lock strong + enough already granted on the record, we have to wait. */ + + err = lock_rec_enqueue_waiting(mode, rec, index, thr); + } else { + if (!impl) { + /* Set the requested lock on the record */ + + lock_rec_add_to_queue(LOCK_REC | mode, rec, index, + trx); + } + + err = DB_SUCCESS; + } + + return(err); +} + +/************************************************************************* +Tries to lock the specified record in the mode requested. If not immediately +possible, enqueues a waiting lock request. This is a low-level function +which does NOT look at implicit locks! Checks lock compatibility within +explicit locks. */ + +ulint +lock_rec_lock( +/*==========*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or error + code */ + ibool impl, /* in: if TRUE, no lock is set if no wait is + necessary: we assume that the caller will set + an implicit lock */ + ulint mode, /* in: lock mode */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: index of record */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((mode != LOCK_S) || lock_table_has(thr_get_trx(thr), + index->table, LOCK_IS)); + ut_ad((mode != LOCK_X) || lock_table_has(thr_get_trx(thr), + index->table, LOCK_IX)); + + if (lock_rec_lock_fast(impl, mode, rec, index, thr)) { + + /* We try a simplified and faster subroutine for the most + common cases */ + + err = DB_SUCCESS; + } else { + err = lock_rec_lock_slow(impl, mode, rec, index, thr); + } + + return(err); +} + +/************************************************************************* +Checks if a waiting record lock request still has to wait in a queue. +NOTE that we, for simplicity, ignore the gap bits in locks, and treat +gap type lock requests like non-gap lock requests. */ +static +ibool +lock_rec_has_to_wait_in_queue( +/*==========================*/ + /* out: TRUE if still has to wait */ + lock_t* wait_lock) /* in: waiting record lock */ +{ + lock_t* lock; + ulint space; + ulint page_no; + ulint heap_no; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(lock_get_wait(wait_lock)); + + space = wait_lock->un_member.rec_lock.space; + page_no = wait_lock->un_member.rec_lock.page_no; + heap_no = lock_rec_find_set_bit(wait_lock); + + lock = lock_rec_get_first_on_page_addr(space, page_no); + + while (lock != wait_lock) { + + if (lock_has_to_wait(wait_lock, lock) + && lock_rec_get_nth_bit(lock, heap_no)) { + + return(TRUE); + } + + lock = lock_rec_get_next_on_page(lock); + } + + return(FALSE); +} + +/***************************************************************** +Grants a lock to a waiting lock request and releases the waiting +transaction. */ + +void +lock_grant( +/*=======*/ + lock_t* lock) /* in: waiting lock request */ +{ + ut_ad(mutex_own(&kernel_mutex)); + + lock_reset_lock_and_trx_wait(lock); + + if (lock_print_waits) { + printf("Lock wait for trx %lu ends\n", + ut_dulint_get_low(lock->trx->id)); + } + + trx_end_lock_wait(lock->trx); +} + +/***************************************************************** +Cancels a waiting record lock request and releases the waiting transaction +that requested it. NOTE: does NOT check if waiting lock requests behind this +one can now be granted! */ + +void +lock_rec_cancel( +/*============*/ + lock_t* lock) /* in: waiting record lock request */ +{ + ut_ad(mutex_own(&kernel_mutex)); + + /* Reset the bit in lock bitmap */ + lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock)); + + /* Reset the wait flag and the back pointer to lock in trx */ + + lock_reset_lock_and_trx_wait(lock); + + /* The following function releases the trx from lock wait */ + + trx_end_lock_wait(lock->trx); +} + +/***************************************************************** +Removes a record lock request, waiting or granted, from the queue and +grants locks to other transactions in the queue if they now are entitled +to a lock. NOTE: all record locks contained in in_lock are removed. */ + +void +lock_rec_dequeue_from_page( +/*=======================*/ + lock_t* in_lock)/* in: record lock object: all record locks which + are contained in this lock object are removed; + transactions waiting behind will get their lock + requests granted, if they are now qualified to it */ +{ + ulint space; + ulint page_no; + lock_t* lock; + trx_t* trx; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(lock_get_type(in_lock) == LOCK_REC); + + trx = in_lock->trx; + + space = in_lock->un_member.rec_lock.space; + page_no = in_lock->un_member.rec_lock.page_no; + + HASH_DELETE(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), in_lock); + + UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock); + + /* Check if waiting locks in the queue can now be granted: grant + locks if there are no conflicting locks ahead. */ + + lock = lock_rec_get_first_on_page_addr(space, page_no); + + while (lock != NULL) { + + if (lock_get_wait(lock) + && !lock_rec_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + lock_grant(lock); + } + + lock = lock_rec_get_next_on_page(lock); + } +} + +/***************************************************************** +Removes record lock objects set on an index page which is discarded. This +function does not move locks, or check for waiting locks, therefore the +lock bitmaps must already be reset when this function is called. */ +static +void +lock_rec_free_all_from_discard_page( +/*================================*/ + page_t* page) /* in: page to be discarded */ +{ + ulint space; + ulint page_no; + lock_t* lock; + lock_t* next_lock; + trx_t* trx; + + ut_ad(mutex_own(&kernel_mutex)); + + space = buf_frame_get_space_id(page); + page_no = buf_frame_get_page_no(page); + + lock = lock_rec_get_first_on_page_addr(space, page_no); + + while (lock != NULL) { + ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED); + ut_ad(!lock_get_wait(lock)); + + next_lock = lock_rec_get_next_on_page(lock); + + HASH_DELETE(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), lock); + trx = lock->trx; + + UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock); + + lock = next_lock; + } +} + +/*============= RECORD LOCK MOVING AND INHERITING ===================*/ + +/***************************************************************** +Resets the lock bits for a single record. Releases transactions waiting for +lock requests here. */ + +void +lock_rec_reset_and_release_wait( +/*============================*/ + rec_t* rec) /* in: record whose locks bits should be reset */ +{ + lock_t* lock; + ulint heap_no; + + ut_ad(mutex_own(&kernel_mutex)); + + heap_no = rec_get_heap_no(rec); + + lock = lock_rec_get_first(rec); + + while (lock != NULL) { + + if (lock_get_wait(lock)) { + lock_rec_cancel(lock); + } else { + lock_rec_reset_nth_bit(lock, heap_no); + } + + lock = lock_rec_get_next(rec, lock); + } +} + +/***************************************************************** +Makes a record to inherit the locks of another record as gap type locks, but +does not reset the lock bits of the other record. Also waiting lock requests +on rec are inherited as GRANTED gap locks. */ + +void +lock_rec_inherit_to_gap( +/*====================*/ + rec_t* heir, /* in: record which inherits */ + rec_t* rec) /* in: record from which inherited; does NOT reset + the locks on this record */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + + lock = lock_rec_get_first(rec); + + while (lock != NULL) { + lock_rec_add_to_queue((lock->type_mode | LOCK_GAP) & ~LOCK_WAIT, + heir, lock->index, lock->trx); + lock = lock_rec_get_next(rec, lock); + } +} + +/***************************************************************** +Moves the locks of a record to another record and resets the lock bits of +the donating record. */ +static +void +lock_rec_move( +/*==========*/ + rec_t* receiver, /* in: record which gets locks; this record + must have no lock requests on it! */ + rec_t* donator) /* in: record which gives locks */ +{ + lock_t* lock; + ulint heap_no; + ulint type_mode; + + ut_ad(mutex_own(&kernel_mutex)); + + heap_no = rec_get_heap_no(donator); + + lock = lock_rec_get_first(donator); + + ut_ad(lock_rec_get_first(receiver) == NULL); + + while (lock != NULL) { + type_mode = lock->type_mode; + + lock_rec_reset_nth_bit(lock, heap_no); + + if (lock_get_wait(lock)) { + lock_reset_lock_and_trx_wait(lock); + } + + /* Note that we FIRST reset the bit, and then set the lock: + the function works also if donator == receiver */ + + lock_rec_add_to_queue(type_mode, receiver, lock->index, + lock->trx); + lock = lock_rec_get_next(donator, lock); + } + + ut_ad(lock_rec_get_first(donator) == NULL); +} + +/***************************************************************** +Updates the lock table when we have reorganized a page. NOTE: we copy +also the locks set on the infimum of the page; the infimum may carry +locks if an update of a record is occurring on the page, and its locks +were temporarily stored on the infimum. */ + +void +lock_move_reorganize_page( +/*======================*/ + page_t* page, /* in: old index page, now reorganized */ + page_t* old_page) /* in: copy of the old, not reorganized page */ +{ + lock_t* lock; + lock_t* old_lock; + page_cur_t cur1; + page_cur_t cur2; + ulint old_heap_no; + UT_LIST_BASE_NODE_T(lock_t) old_locks; + mem_heap_t* heap = NULL; + rec_t* sup; + + lock_mutex_enter_kernel(); + + lock = lock_rec_get_first_on_page(page); + + if (lock == NULL) { + lock_mutex_exit_kernel(); + + return; + } + + heap = mem_heap_create(256); + + /* Copy first all the locks on the page to heap and reset the + bitmaps in the original locks; chain the copies of the locks + using the trx_locks field in them. */ + + UT_LIST_INIT(old_locks); + + while (lock != NULL) { + + /* Make a copy of the lock */ + old_lock = lock_rec_copy(lock, heap); + + UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock); + + /* Reset bitmap of lock */ + lock_rec_bitmap_reset(lock); + + if (lock_get_wait(lock)) { + lock_reset_lock_and_trx_wait(lock); + } + + lock = lock_rec_get_next_on_page(lock); + } + + sup = page_get_supremum_rec(page); + + lock = UT_LIST_GET_FIRST(old_locks); + + while (lock) { + /* NOTE: we copy also the locks set on the infimum and + supremum of the page; the infimum may carry locks if an + update of a record is occurring on the page, and its locks + were temporarily stored on the infimum */ + + page_cur_set_before_first(page, &cur1); + page_cur_set_before_first(old_page, &cur2); + + /* Set locks according to old locks */ + for (;;) { + ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + page_cur_get_rec(&cur2), + rec_get_data_size( + page_cur_get_rec(&cur2)))); + + old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2)); + + if (lock_rec_get_nth_bit(lock, old_heap_no)) { + + /* NOTE that the old lock bitmap could be too + small for the new heap number! */ + + lock_rec_add_to_queue(lock->type_mode, + page_cur_get_rec(&cur1), + lock->index, lock->trx); + + /* if ((page_cur_get_rec(&cur1) == sup) + && lock_get_wait(lock)) { + printf( + "---\n--\n!!!Lock reorg: supr type %lu\n", + lock->type_mode); + } */ + } + + if (page_cur_get_rec(&cur1) == sup) { + + break; + } + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + + /* Remember that we chained old locks on the trx_locks field: */ + + lock = UT_LIST_GET_NEXT(trx_locks, lock); + } + + lock_mutex_exit_kernel(); + + mem_heap_free(heap); + +/* ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page), + buf_frame_get_page_no(page))); */ +} + +/***************************************************************** +Moves the explicit locks on user records to another page if a record +list end is moved to another page. */ + +void +lock_move_rec_list_end( +/*===================*/ + page_t* new_page, /* in: index page to move to */ + page_t* page, /* in: index page */ + rec_t* rec) /* in: record on page: this is the + first record moved */ +{ + lock_t* lock; + page_cur_t cur1; + page_cur_t cur2; + ulint heap_no; + rec_t* sup; + ulint type_mode; + + lock_mutex_enter_kernel(); + + /* Note: when we move locks from record to record, waiting locks + and possible granted gap type locks behind them are enqueued in + the original order, because new elements are inserted to a hash + table to the end of the hash chain, and lock_rec_add_to_queue + does not reuse locks if there are waiters in the queue. */ + + sup = page_get_supremum_rec(page); + + lock = lock_rec_get_first_on_page(page); + + while (lock != NULL) { + + page_cur_position(rec, &cur1); + + if (page_cur_is_before_first(&cur1)) { + page_cur_move_to_next(&cur1); + } + + page_cur_set_before_first(new_page, &cur2); + page_cur_move_to_next(&cur2); + + /* Copy lock requests on user records to new page and + reset the lock bits on the old */ + + while (page_cur_get_rec(&cur1) != sup) { + + ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + page_cur_get_rec(&cur2), + rec_get_data_size( + page_cur_get_rec(&cur2)))); + + heap_no = rec_get_heap_no(page_cur_get_rec(&cur1)); + + if (lock_rec_get_nth_bit(lock, heap_no)) { + type_mode = lock->type_mode; + + lock_rec_reset_nth_bit(lock, heap_no); + + if (lock_get_wait(lock)) { + lock_reset_lock_and_trx_wait(lock); + } + + lock_rec_add_to_queue(type_mode, + page_cur_get_rec(&cur2), + lock->index, lock->trx); + } + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + + lock = lock_rec_get_next_on_page(lock); + } + + lock_mutex_exit_kernel(); + +/* ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page), + buf_frame_get_page_no(page))); + ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page), + buf_frame_get_page_no(new_page))); */ +} + +/***************************************************************** +Moves the explicit locks on user records to another page if a record +list start is moved to another page. */ + +void +lock_move_rec_list_start( +/*=====================*/ + page_t* new_page, /* in: index page to move to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page: this is the + first record NOT copied */ + rec_t* old_end) /* in: old previous-to-last record on + new_page before the records were copied */ +{ + lock_t* lock; + page_cur_t cur1; + page_cur_t cur2; + ulint heap_no; + ulint type_mode; + + ut_ad(new_page); + + lock_mutex_enter_kernel(); + + lock = lock_rec_get_first_on_page(page); + + while (lock != NULL) { + + page_cur_set_before_first(page, &cur1); + page_cur_move_to_next(&cur1); + + page_cur_position(old_end, &cur2); + page_cur_move_to_next(&cur2); + + /* Copy lock requests on user records to new page and + reset the lock bits on the old */ + + while (page_cur_get_rec(&cur1) != rec) { + + ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + page_cur_get_rec(&cur2), + rec_get_data_size( + page_cur_get_rec(&cur2)))); + + heap_no = rec_get_heap_no(page_cur_get_rec(&cur1)); + + if (lock_rec_get_nth_bit(lock, heap_no)) { + type_mode = lock->type_mode; + + lock_rec_reset_nth_bit(lock, heap_no); + + if (lock_get_wait(lock)) { + lock_reset_lock_and_trx_wait(lock); + } + + lock_rec_add_to_queue(type_mode, + page_cur_get_rec(&cur2), + lock->index, lock->trx); + } + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + + lock = lock_rec_get_next_on_page(lock); + } + + lock_mutex_exit_kernel(); + +/* ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page), + buf_frame_get_page_no(page))); + ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page), + buf_frame_get_page_no(new_page))); */ +} + +/***************************************************************** +Updates the lock table when a page is split to the right. */ + +void +lock_update_split_right( +/*====================*/ + page_t* right_page, /* in: right page */ + page_t* left_page) /* in: left page */ +{ + lock_mutex_enter_kernel(); + + /* Move the locks on the supremum of the left page to the supremum + of the right page */ + + lock_rec_move(page_get_supremum_rec(right_page), + page_get_supremum_rec(left_page)); + + /* Inherit the locks to the supremum of left page from the successor + of the infimum on right page */ + + lock_rec_inherit_to_gap(page_get_supremum_rec(left_page), + page_rec_get_next(page_get_infimum_rec(right_page))); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when a page is merged to the right. */ + +void +lock_update_merge_right( +/*====================*/ + rec_t* orig_succ, /* in: original successor of infimum + on the right page before merge */ + page_t* left_page) /* in: merged index page which will be + discarded */ +{ + lock_mutex_enter_kernel(); + + /* Inherit the locks from the supremum of the left page to the + original successor of infimum on the right page, to which the left + page was merged */ + + lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page)); + + /* Reset the locks on the supremum of the left page, releasing + waiting transactions */ + + lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page)); + + lock_rec_free_all_from_discard_page(left_page); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when the root page is copied to another in +btr_root_raise_and_insert. Note that we leave lock structs on the +root page, even though they do not make sense on other than leaf +pages: the reason is that in a pessimistic update the infimum record +of the root page will act as a dummy carrier of the locks of the record +to be updated. */ + +void +lock_update_root_raise( +/*===================*/ + page_t* new_page, /* in: index page to which copied */ + page_t* root) /* in: root page */ +{ + lock_mutex_enter_kernel(); + + /* Move the locks on the supremum of the root to the supremum + of new_page */ + + lock_rec_move(page_get_supremum_rec(new_page), + page_get_supremum_rec(root)); + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when a page is copied to another and the original page +is removed from the chain of leaf pages, except if page is the root! */ + +void +lock_update_copy_and_discard( +/*=========================*/ + page_t* new_page, /* in: index page to which copied */ + page_t* page) /* in: index page; NOT the root! */ +{ + lock_mutex_enter_kernel(); + + /* Move the locks on the supremum of the old page to the supremum + of new_page */ + + lock_rec_move(page_get_supremum_rec(new_page), + page_get_supremum_rec(page)); + lock_rec_free_all_from_discard_page(page); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when a page is split to the left. */ + +void +lock_update_split_left( +/*===================*/ + page_t* right_page, /* in: right page */ + page_t* left_page) /* in: left page */ +{ + lock_mutex_enter_kernel(); + + /* Inherit the locks to the supremum of the left page from the + successor of the infimum on the right page */ + + lock_rec_inherit_to_gap(page_get_supremum_rec(left_page), + page_rec_get_next(page_get_infimum_rec(right_page))); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when a page is merged to the left. */ + +void +lock_update_merge_left( +/*===================*/ + page_t* left_page, /* in: left page to which merged */ + rec_t* orig_pred, /* in: original predecessor of supremum + on the left page before merge */ + page_t* right_page) /* in: merged index page which will be + discarded */ +{ + lock_mutex_enter_kernel(); + + if (page_rec_get_next(orig_pred) != page_get_supremum_rec(left_page)) { + + /* Inherit the locks on the supremum of the left page to the + first record which was moved from the right page */ + + lock_rec_inherit_to_gap(page_rec_get_next(orig_pred), + page_get_supremum_rec(left_page)); + + /* Reset the locks on the supremum of the left page, + releasing waiting transactions */ + + lock_rec_reset_and_release_wait(page_get_supremum_rec( + left_page)); + } + + /* Move the locks from the supremum of right page to the supremum + of the left page */ + + lock_rec_move(page_get_supremum_rec(left_page), + page_get_supremum_rec(right_page)); + + lock_rec_free_all_from_discard_page(right_page); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Resets the original locks on heir and replaces them with gap type locks +inherited from rec. */ + +void +lock_rec_reset_and_inherit_gap_locks( +/*=================================*/ + rec_t* heir, /* in: heir record */ + rec_t* rec) /* in: record */ +{ + mutex_enter(&kernel_mutex); + + lock_rec_reset_and_release_wait(heir); + + lock_rec_inherit_to_gap(heir, rec); + + mutex_exit(&kernel_mutex); +} + +/***************************************************************** +Updates the lock table when a page is discarded. */ + +void +lock_update_discard( +/*================*/ + rec_t* heir, /* in: record which will inherit the locks */ + page_t* page) /* in: index page which will be discarded */ +{ + rec_t* rec; + + lock_mutex_enter_kernel(); + + if (NULL == lock_rec_get_first_on_page(page)) { + /* No locks exist on page, nothing to do */ + + lock_mutex_exit_kernel(); + + return; + } + + /* Inherit all the locks on the page to the record and reset all + the locks on the page */ + + rec = page_get_infimum_rec(page); + + for (;;) { + lock_rec_inherit_to_gap(heir, rec); + + /* Reset the locks on rec, releasing waiting transactions */ + + lock_rec_reset_and_release_wait(rec); + + if (rec == page_get_supremum_rec(page)) { + + break; + } + + rec = page_rec_get_next(rec); + } + + lock_rec_free_all_from_discard_page(page); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when a new user record is inserted. */ + +void +lock_update_insert( +/*===============*/ + rec_t* rec) /* in: the inserted record */ +{ + lock_mutex_enter_kernel(); + + /* Inherit the locks for rec, in gap mode, from the next record */ + + lock_rec_inherit_to_gap(rec, page_rec_get_next(rec)); + + lock_mutex_exit_kernel(); +} + +/***************************************************************** +Updates the lock table when a record is removed. */ + +void +lock_update_delete( +/*===============*/ + rec_t* rec) /* in: the record to be removed */ +{ + lock_mutex_enter_kernel(); + + /* Let the next record inherit the locks from rec, in gap mode */ + + lock_rec_inherit_to_gap(page_rec_get_next(rec), rec); + + /* Reset the lock bits on rec and release waiting transactions */ + + lock_rec_reset_and_release_wait(rec); + + lock_mutex_exit_kernel(); +} + +/************************************************************************* +Stores on the page infimum record the explicit locks of another record. +This function is used to store the lock state of a record when it is +updated and the size of the record changes in the update. The record +is moved in such an update, perhaps to another page. The infimum record +acts as a dummy carrier record, taking care of lock releases while the +actual record is being moved. */ + +void +lock_rec_store_on_page_infimum( +/*===========================*/ + rec_t* rec) /* in: record whose lock state is stored + on the infimum record of the same page; lock + bits are reset on the record */ +{ + page_t* page; + + page = buf_frame_align(rec); + + lock_mutex_enter_kernel(); + + lock_rec_move(page_get_infimum_rec(page), rec); + + lock_mutex_exit_kernel(); +} + +/************************************************************************* +Restores the state of explicit lock requests on a single record, where the +state was stored on the infimum of the page. */ + +void +lock_rec_restore_from_page_infimum( +/*===============================*/ + rec_t* rec, /* in: record whose lock state is restored */ + page_t* page) /* in: page (rec is not necessarily on this page) + whose infimum stored the lock state; lock bits are + reset on the infimum */ +{ + lock_mutex_enter_kernel(); + + lock_rec_move(rec, page_get_infimum_rec(page)); + + lock_mutex_exit_kernel(); +} + +/*=========== DEADLOCK CHECKING ======================================*/ + +/************************************************************************ +Checks if a lock request results in a deadlock. */ +static +ibool +lock_deadlock_occurs( +/*=================*/ + /* out: TRUE if a deadlock was detected */ + lock_t* lock, /* in: lock the transaction is requesting */ + trx_t* trx) /* in: transaction */ +{ + dict_table_t* table; + dict_index_t* index; + ibool ret; + + ut_ad(trx && lock); + ut_ad(mutex_own(&kernel_mutex)); + + ret = lock_deadlock_recursive(trx, trx, lock); + + if (ret) { + if (lock_get_type(lock) == LOCK_TABLE) { + table = lock->un_member.tab_lock.table; + index = NULL; + } else { + index = lock->index; + table = index->table; + } + /* + sess_raise_error_low(trx, DB_DEADLOCK, lock->type_mode, table, + index, NULL, NULL, NULL); + */ + } + + return(ret); +} + +/************************************************************************ +Looks recursively for a deadlock. */ +static +ibool +lock_deadlock_recursive( +/*====================*/ + /* out: TRUE if a deadlock was detected */ + trx_t* start, /* in: recursion starting point */ + trx_t* trx, /* in: a transaction waiting for a lock */ + lock_t* wait_lock) /* in: the lock trx is waiting to be granted */ +{ + lock_t* lock; + ulint bit_no; + trx_t* lock_trx; + + ut_a(trx && start && wait_lock); + ut_ad(mutex_own(&kernel_mutex)); + + lock = wait_lock; + + if (lock_get_type(wait_lock) == LOCK_REC) { + + bit_no = lock_rec_find_set_bit(wait_lock); + + ut_a(bit_no != ULINT_UNDEFINED); + } + + /* Look at the locks ahead of wait_lock in the lock queue */ + + for (;;) { + if (lock_get_type(lock) == LOCK_TABLE) { + + lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); + } else { + ut_ad(lock_get_type(lock) == LOCK_REC); + + lock = lock_rec_get_prev(lock, bit_no); + } + + if (lock == NULL) { + + return(FALSE); + } + + if (lock_has_to_wait(wait_lock, lock)) { + + lock_trx = lock->trx; + + if (lock_trx == start) { + if (lock_print_waits) { + printf("Deadlock detected\n"); + } + + return(TRUE); + } + + if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) { + + /* Another trx ahead has requested lock in an + incompatible mode, and is itself waiting for + a lock */ + + if (lock_deadlock_recursive(start, lock_trx, + lock_trx->wait_lock)) { + + return(TRUE); + } + } + } + }/* end of the 'for (;;)'-loop */ +} + +/*========================= TABLE LOCKS ==============================*/ + +/************************************************************************* +Creates a table lock object and adds it as the last in the lock queue +of the table. Does NOT check for deadlocks or lock compatibility. */ +UNIV_INLINE +lock_t* +lock_table_create( +/*==============*/ + /* out, own: new lock object, or NULL if + out of memory */ + dict_table_t* table, /* in: database table in dictionary cache */ + ulint type_mode,/* in: lock mode possibly ORed with + LOCK_WAIT */ + trx_t* trx) /* in: trx */ +{ + lock_t* lock; + + ut_ad(table && trx); + ut_ad(mutex_own(&kernel_mutex)); + + lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t)); + + if (lock == NULL) { + + return(NULL); + } + + UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock); + + lock->type_mode = type_mode | LOCK_TABLE; + lock->trx = trx; + + lock->un_member.tab_lock.table = table; + + UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); + + if (type_mode & LOCK_WAIT) { + + lock_set_lock_and_trx_wait(lock, trx); + } + + return(lock); +} + +/***************************************************************** +Removes a table lock request from the queue and the trx list of locks; +this is a low-level function which does NOT check if waiting requests +can now be granted. */ +UNIV_INLINE +void +lock_table_remove_low( +/*==================*/ + lock_t* lock) /* in: table lock */ +{ + dict_table_t* table; + trx_t* trx; + + ut_ad(mutex_own(&kernel_mutex)); + + table = lock->un_member.tab_lock.table; + trx = lock->trx; + + UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock); + UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock); +} + +/************************************************************************* +Enqueues a waiting request for a table lock which cannot be granted +immediately. Checks for deadlocks. */ + +ulint +lock_table_enqueue_waiting( +/*=======================*/ + /* out: DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ + ulint mode, /* in: lock mode this transaction is + requesting */ + dict_table_t* table, /* in: table */ + que_thr_t* thr) /* in: query thread */ +{ + lock_t* lock; + trx_t* trx; + + ut_ad(mutex_own(&kernel_mutex)); + + /* Test if there already is some other reason to suspend thread: + we do not enqueue a lock request if the query thread should be + stopped anyway */ + + if (que_thr_stop(thr)) { + + return(DB_QUE_THR_SUSPENDED); + } + + trx = thr_get_trx(thr); + + /* Enqueue the lock request that will wait to be granted */ + + lock = lock_table_create(table, mode | LOCK_WAIT, trx); + + /* Check if a deadlock occurs: if yes, remove the lock request and + return an error code */ + + if (lock_deadlock_occurs(lock, trx)) { + + lock_reset_lock_and_trx_wait(lock); + lock_table_remove_low(lock); + + return(DB_DEADLOCK); + } + + trx->que_state = TRX_QUE_LOCK_WAIT; + + ut_a(que_thr_stop(thr)); + + return(DB_LOCK_WAIT); +} + +/************************************************************************* +Checks if other transactions have an incompatible mode lock request in +the lock queue. */ +UNIV_INLINE +ibool +lock_table_other_has_incompatible( +/*==============================*/ + trx_t* trx, /* in: transaction, or NULL if all + transactions should be included */ + ulint wait, /* in: LOCK_WAIT if also waiting locks are + taken into account, or 0 if not */ + dict_table_t* table, /* in: table */ + ulint mode) /* in: lock mode */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + + lock = UT_LIST_GET_LAST(table->locks); + + while (lock != NULL) { + + if ((lock->trx != trx) + && (!lock_mode_compatible(lock_get_mode(lock), mode)) + && (wait || !(lock_get_wait(lock)))) { + + return(TRUE); + } + + lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); + } + + return(FALSE); +} + +/************************************************************************* +Locks the specified database table in the mode given. If the lock cannot +be granted immediately, the query thread is put to wait. */ + +ulint +lock_table( +/*=======*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + dict_table_t* table, /* in: database table in dictionary cache */ + ulint mode, /* in: lock mode */ + que_thr_t* thr) /* in: query thread */ +{ + trx_t* trx; + ulint err; + + ut_ad(table && thr); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + trx = thr_get_trx(thr); + + lock_mutex_enter_kernel(); + + /* Look for stronger locks the same trx already has on the table */ + + if (lock_table_has(trx, table, mode)) { + + lock_mutex_exit_kernel(); + + return(DB_SUCCESS); + } + + /* We have to check if the new lock is compatible with any locks + other transactions have in the table lock queue. */ + + if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { + + /* Another trx has request on the table in an incompatible + mode: this trx must wait */ + + err = lock_table_enqueue_waiting(mode, table, thr); + + lock_mutex_exit_kernel(); + + return(err); + } + + lock_table_create(table, mode, trx); + + lock_mutex_exit_kernel(); + + return(DB_SUCCESS); +} + +/************************************************************************* +Checks if there are any locks set on the table. */ + +ibool +lock_is_on_table( +/*=============*/ + /* out: TRUE if there are lock(s) */ + dict_table_t* table) /* in: database table in dictionary cache */ +{ + ibool ret; + + ut_ad(table); + + lock_mutex_enter_kernel(); + + if (UT_LIST_GET_LAST(table->locks)) { + ret = TRUE; + } else { + ret = FALSE; + } + + lock_mutex_exit_kernel(); + + return(ret); +} + +/************************************************************************* +Checks if a waiting table lock request still has to wait in a queue. */ +static +ibool +lock_table_has_to_wait_in_queue( +/*============================*/ + /* out: TRUE if still has to wait */ + lock_t* wait_lock) /* in: waiting table lock */ +{ + dict_table_t* table; + lock_t* lock; + + ut_ad(lock_get_wait(wait_lock)); + + table = wait_lock->un_member.tab_lock.table; + + lock = UT_LIST_GET_FIRST(table->locks); + + while (lock != wait_lock) { + + if (lock_has_to_wait(wait_lock, lock)) { + + return(TRUE); + } + + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); + } + + return(FALSE); +} + +/***************************************************************** +Removes a table lock request, waiting or granted, from the queue and grants +locks to other transactions in the queue, if they now are entitled to a +lock. */ + +void +lock_table_dequeue( +/*===============*/ + lock_t* in_lock)/* in: table lock object; transactions waiting + behind will get their lock requests granted, if + they are now qualified to it */ +{ + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(lock_get_type(in_lock) == LOCK_TABLE); + + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock); + + lock_table_remove_low(in_lock); + + /* Check if waiting locks in the queue can now be granted: grant + locks if there are no conflicting locks ahead. */ + + while (lock != NULL) { + + if (lock_get_wait(lock) + && !lock_table_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + lock_grant(lock); + } + + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); + } +} + +/*=========================== LOCK RELEASE ==============================*/ + +/************************************************************************* +Releases transaction locks, and releases possible other transactions waiting +because of these locks. */ + +void +lock_release_off_kernel( +/*====================*/ + trx_t* trx) /* in: transaction */ +{ + ulint count; + lock_t* lock; + + ut_ad(mutex_own(&kernel_mutex)); + + lock = UT_LIST_GET_LAST(trx->trx_locks); + + count = 0; + + while (lock != NULL) { + + count++; + + if (lock_get_type(lock) == LOCK_REC) { + + lock_rec_dequeue_from_page(lock); + } else { + ut_ad(lock_get_type(lock) == LOCK_TABLE); + + lock_table_dequeue(lock); + } + + if (count == LOCK_RELEASE_KERNEL_INTERVAL) { + /* Release the kernel mutex for a while, so that we + do not monopolize it */ + + lock_mutex_exit_kernel(); + + lock_mutex_enter_kernel(); + + count = 0; + } + + lock = UT_LIST_GET_LAST(trx->trx_locks); + } + + mem_heap_empty(trx->lock_heap); +} + +/*===================== VALIDATION AND DEBUGGING ====================*/ + +/************************************************************************* +Prints info of a table lock. */ + +void +lock_table_print( +/*=============*/ + lock_t* lock) /* in: table type lock */ +{ + ut_ad(mutex_own(&kernel_mutex)); + ut_a(lock_get_type(lock) == LOCK_TABLE); + + printf("\nTABLE LOCK table %s trx id %lu %lu", + lock->un_member.tab_lock.table->name, + (lock->trx)->id.high, (lock->trx)->id.low); + + if (lock_get_mode(lock) == LOCK_S) { + printf(" lock mode S"); + } else if (lock_get_mode(lock) == LOCK_X) { + printf(" lock_mode X"); + } else if (lock_get_mode(lock) == LOCK_IS) { + printf(" lock_mode IS"); + } else if (lock_get_mode(lock) == LOCK_IX) { + printf(" lock_mode IX"); + } else { + ut_error; + } + + if (lock_get_wait(lock)) { + printf(" waiting"); + } + + printf("\n"); +} + +/************************************************************************* +Prints info of a record lock. */ + +void +lock_rec_print( +/*===========*/ + lock_t* lock) /* in: record type lock */ +{ + page_t* page; + ulint space; + ulint page_no; + ulint i; + mtr_t mtr; + + ut_ad(mutex_own(&kernel_mutex)); + ut_a(lock_get_type(lock) == LOCK_REC); + + space = lock->un_member.rec_lock.space; + page_no = lock->un_member.rec_lock.page_no; + + printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu", + space, page_no, lock_rec_get_n_bits(lock)); + + printf(" index %s trx id %lu %lu", (lock->index)->name, + (lock->trx)->id.high, (lock->trx)->id.low); + + if (lock_get_mode(lock) == LOCK_S) { + printf(" lock mode S"); + } else if (lock_get_mode(lock) == LOCK_X) { + printf(" lock_mode X"); + } else { + ut_error; + } + + if (lock_rec_get_gap(lock)) { + printf(" gap type lock"); + } + + if (lock_get_wait(lock)) { + printf(" waiting"); + } + + printf("\n"); + + mtr_start(&mtr); + + /* If the page is not in the buffer pool, we cannot load it + because we have the kernel mutex and ibuf operations would + break the latching order */ + + page = buf_page_get_gen(space, page_no, RW_NO_LATCH, + NULL, BUF_GET_IF_IN_POOL, +#ifdef UNIV_SYNC_DEBUG + __FILE__, __LINE__, +#endif + &mtr); + if (page) { + page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr); + } + + if (page) { + buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); + } + + for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + + if (lock_rec_get_nth_bit(lock, i)) { + + printf("Record lock, heap no %lu ", i); + + if (page) { + rec_print(page_find_rec_with_heap_no(page, i)); + } + + printf("\n"); + } + } + + mtr_commit(&mtr); +} + +/************************************************************************* +Calculates the number of record lock structs in the record lock hash table. */ +static +ulint +lock_get_n_rec_locks(void) +/*======================*/ +{ + lock_t* lock; + ulint n_locks = 0; + ulint i; + + ut_ad(mutex_own(&kernel_mutex)); + + for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { + + lock = HASH_GET_FIRST(lock_sys->rec_hash, i); + + while (lock) { + n_locks++; + + lock = HASH_GET_NEXT(hash, lock); + } + } + + return(n_locks); +} + +/************************************************************************* +Prints info of locks for all transactions. */ + +void +lock_print_info(void) +/*=================*/ +{ + lock_t* lock; + trx_t* trx; + ulint space; + ulint page_no; + page_t* page; + ibool load_page_first = TRUE; + ulint nth_trx = 0; + ulint nth_lock = 0; + ulint i; + mtr_t mtr; + + lock_mutex_enter_kernel(); + + printf("------------------------------------\n"); + printf("LOCK INFO:\n"); + printf("Number of locks in the record hash table %lu\n", + lock_get_n_rec_locks()); +loop: + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + i = 0; + + while (trx && (i < nth_trx)) { + trx = UT_LIST_GET_NEXT(trx_list, trx); + i++; + } + + if (trx == NULL) { + lock_mutex_exit_kernel(); + + lock_validate(); + + return; + } + + if (nth_lock == 0) { + printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high, + trx->id.low); + } + + i = 0; + + lock = UT_LIST_GET_FIRST(trx->trx_locks); + + while (lock && (i < nth_lock)) { + lock = UT_LIST_GET_NEXT(trx_locks, lock); + i++; + } + + if (lock == NULL) { + nth_trx++; + nth_lock = 0; + + goto loop; + } + + if (lock_get_type(lock) == LOCK_REC) { + space = lock->un_member.rec_lock.space; + page_no = lock->un_member.rec_lock.page_no; + + if (load_page_first) { + lock_mutex_exit_kernel(); + + mtr_start(&mtr); + + page = buf_page_get_with_no_latch(space, page_no, &mtr); + + mtr_commit(&mtr); + + load_page_first = FALSE; + + lock_mutex_enter_kernel(); + + goto loop; + } + + lock_rec_print(lock); + } else { + ut_ad(lock_get_type(lock) == LOCK_TABLE); + + lock_table_print(lock); + } + + load_page_first = TRUE; + + nth_lock++; + + goto loop; +} + +/************************************************************************* +Validates the lock queue on a table. */ + +ibool +lock_table_queue_validate( +/*======================*/ + /* out: TRUE if ok */ + dict_table_t* table) /* in: table */ +{ + lock_t* lock; + ibool is_waiting; + + ut_ad(mutex_own(&kernel_mutex)); + + is_waiting = FALSE; + + lock = UT_LIST_GET_FIRST(table->locks); + + while (lock) { + ut_a(((lock->trx)->conc_state == TRX_ACTIVE) + || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY)); + + if (!lock_get_wait(lock)) { + + ut_a(!is_waiting); + + ut_a(!lock_table_other_has_incompatible(lock->trx, 0, + table, lock_get_mode(lock))); + } else { + is_waiting = TRUE; + + ut_a(lock_table_has_to_wait_in_queue(lock)); + } + + lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); + } + + return(TRUE); +} + +/************************************************************************* +Validates the lock queue on a single record. */ + +ibool +lock_rec_queue_validate( +/*====================*/ + /* out: TRUE if ok */ + rec_t* rec, /* in: record to look at */ + dict_index_t* index) /* in: index, or NULL if not known */ +{ + trx_t* impl_trx; + lock_t* lock; + ibool is_waiting; + + ut_a(rec); + + lock_mutex_enter_kernel(); + + if (page_rec_is_supremum(rec) || page_rec_is_infimum(rec)) { + + lock = lock_rec_get_first(rec); + + while (lock) { + ut_a(lock->trx->conc_state == TRX_ACTIVE + || lock->trx->conc_state + == TRX_COMMITTED_IN_MEMORY); + + ut_a(trx_in_trx_list(lock->trx)); + + if (lock_get_wait(lock)) { + ut_a(lock_rec_has_to_wait_in_queue(lock)); + } + + if (index) { + ut_a(lock->index == index); + } + + lock = lock_rec_get_next(rec, lock); + } + + lock_mutex_exit_kernel(); + + return(TRUE); + } + + if (index && index->type & DICT_CLUSTERED) { + + impl_trx = lock_clust_rec_some_has_impl(rec, index); + + if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, + LOCK_WAIT, rec, impl_trx)) { + + ut_a(lock_rec_has_expl(LOCK_X, rec, impl_trx)); + } + } + + if (index && !(index->type & DICT_CLUSTERED)) { + + /* The kernel mutex may get released temporarily in the + next function call: we have to release lock table mutex + to obey the latching order */ + + impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index); + + if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, + LOCK_WAIT, rec, impl_trx)) { + + ut_a(lock_rec_has_expl(LOCK_X, rec, impl_trx)); + } + } + + is_waiting = FALSE; + + lock = lock_rec_get_first(rec); + + while (lock) { + ut_a(lock->trx->conc_state == TRX_ACTIVE + || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY); + ut_a(trx_in_trx_list(lock->trx)); + + if (index) { + ut_a(lock->index == index); + } + + if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) { + + ut_a(!is_waiting); + + if (lock_get_mode(lock) == LOCK_S) { + ut_a(!lock_rec_other_has_expl_req(LOCK_X, + 0, 0, rec, + lock->trx)); + } else { + ut_a(!lock_rec_other_has_expl_req(LOCK_S, + 0, 0, rec, + lock->trx)); + } + + } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { + + is_waiting = TRUE; + ut_a(lock_rec_has_to_wait_in_queue(lock)); + } + + lock = lock_rec_get_next(rec, lock); + } + + lock_mutex_exit_kernel(); + + return(TRUE); +} + +/************************************************************************* +Validates the record lock queues on a page. */ + +ibool +lock_rec_validate_page( +/*===================*/ + /* out: TRUE if ok */ + ulint space, /* in: space id */ + ulint page_no)/* in: page number */ +{ + dict_index_t* index; + page_t* page; + lock_t* lock; + rec_t* rec; + ulint nth_lock = 0; + ulint nth_bit = 0; + ulint i; + mtr_t mtr; + + ut_ad(!mutex_own(&kernel_mutex)); + + mtr_start(&mtr); + + page = buf_page_get(space, page_no, RW_X_LATCH, &mtr); + buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); + + lock_mutex_enter_kernel(); +loop: + lock = lock_rec_get_first_on_page_addr(space, page_no); + + if (!lock) { + goto function_exit; + } + + for (i = 0; i < nth_lock; i++) { + + lock = lock_rec_get_next_on_page(lock); + + if (!lock) { + goto function_exit; + } + } + + ut_a(trx_in_trx_list(lock->trx)); + ut_a(((lock->trx)->conc_state == TRX_ACTIVE) + || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY)); + + for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) { + + if ((i == 1) || lock_rec_get_nth_bit(lock, i)) { + + index = lock->index; + rec = page_find_rec_with_heap_no(page, i); + + printf("Validating %lu %lu\n", space, page_no); + + lock_mutex_exit_kernel(); + + lock_rec_queue_validate(rec, index); + + lock_mutex_enter_kernel(); + + nth_bit = i + 1; + + goto loop; + } + } + + nth_bit = 0; + nth_lock++; + + goto loop; + +function_exit: + lock_mutex_exit_kernel(); + + mtr_commit(&mtr); + + return(TRUE); +} + +/************************************************************************* +Validates the lock system. */ + +ibool +lock_validate(void) +/*===============*/ + /* out: TRUE if ok */ +{ + lock_t* lock; + trx_t* trx; + dulint limit; + ulint space; + ulint page_no; + ulint i; + + lock_mutex_enter_kernel(); + + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx) { + lock = UT_LIST_GET_FIRST(trx->trx_locks); + + while (lock) { + if (lock_get_type(lock) == LOCK_TABLE) { + + lock_table_queue_validate( + lock->un_member.tab_lock.table); + } + + lock = UT_LIST_GET_NEXT(trx_locks, lock); + } + + trx = UT_LIST_GET_NEXT(trx_list, trx); + } + + for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { + + limit = ut_dulint_zero; + + for (;;) { + lock = HASH_GET_FIRST(lock_sys->rec_hash, i); + + while (lock) { + ut_a(trx_in_trx_list(lock->trx)); + + space = lock->un_member.rec_lock.space; + page_no = lock->un_member.rec_lock.page_no; + + if (ut_dulint_cmp( + ut_dulint_create(space, page_no), + limit) >= 0) { + break; + } + + lock = HASH_GET_NEXT(hash, lock); + } + + if (!lock) { + + break; + } + + lock_mutex_exit_kernel(); + + lock_rec_validate_page(space, page_no); + + lock_mutex_enter_kernel(); + + limit = ut_dulint_create(space, page_no + 1); + } + } + + lock_mutex_exit_kernel(); + + return(TRUE); +} + +/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/ + +/************************************************************************* +Checks if locks of other transactions prevent an immediate insert of +a record. If they do, first tests if the query thread should anyway +be suspended for some reason; if not, then puts the transaction and +the query thread to the lock wait state and inserts a waiting request +for a gap x-lock to the lock queue. */ + +ulint +lock_rec_insert_check_and_lock( +/*===========================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + rec_t* rec, /* in: record after which to insert */ + dict_index_t* index, /* in: index */ + que_thr_t* thr, /* in: query thread */ + ibool* inherit)/* out: set to TRUE if the new inserted + record maybe should inherit LOCK_GAP type + locks from the successor record */ +{ + rec_t* next_rec; + trx_t* trx; + lock_t* lock; + ulint err; + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + ut_ad(rec); + + trx = thr_get_trx(thr); + next_rec = page_rec_get_next(rec); + + *inherit = FALSE; + + lock_mutex_enter_kernel(); + + ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + + lock = lock_rec_get_first(next_rec); + + if (lock == NULL) { + /* We optimize CPU time usage in the simplest case */ + + lock_mutex_exit_kernel(); + + if (!(index->type & DICT_CLUSTERED)) { + + /* Update the page max trx id field */ + page_update_max_trx_id(buf_frame_align(rec), + thr_get_trx(thr)->id); + } + + return(DB_SUCCESS); + } + + *inherit = TRUE; + + /* If another transaction has an explicit lock request, gap or not, + waiting or granted, on the successor, the insert has to wait */ + + if (lock_rec_other_has_expl_req(LOCK_S, LOCK_GAP, LOCK_WAIT, next_rec, + trx)) { + err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP, next_rec, + index, thr); + } else { + err = DB_SUCCESS; + } + + lock_mutex_exit_kernel(); + + if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) { + + /* Update the page max trx id field */ + page_update_max_trx_id(buf_frame_align(rec), + thr_get_trx(thr)->id); + } + + ut_ad(lock_rec_queue_validate(next_rec, index)); + + return(err); +} + +/************************************************************************* +If a transaction has an implicit x-lock on a record, but no explicit x-lock +set on the record, sets one for it. NOTE that in the case of a secondary +index, the kernel mutex may get temporarily released. */ +static +void +lock_rec_convert_impl_to_expl( +/*==========================*/ + rec_t* rec, /* in: user record on page */ + dict_index_t* index) /* in: index of record */ +{ + trx_t* impl_trx; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(page_rec_is_user_rec(rec)); + + if (index->type & DICT_CLUSTERED) { + impl_trx = lock_clust_rec_some_has_impl(rec, index); + } else { + impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index); + } + + if (impl_trx) { + /* If the transaction has no explicit x-lock set on the + record, set one for it */ + + if (!lock_rec_has_expl(LOCK_X, rec, impl_trx)) { + + lock_rec_add_to_queue(LOCK_REC | LOCK_X, rec, index, + impl_trx); + } + } +} + +/************************************************************************* +Checks if locks of other transactions prevent an immediate modify (update, +delete mark, or delete unmark) of a clustered index record. If they do, +first tests if the query thread should anyway be suspended for some +reason; if not, then puts the transaction and the query thread to the +lock wait state and inserts a waiting request for a record x-lock to the +lock queue. */ + +ulint +lock_clust_rec_modify_check_and_lock( +/*=================================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + rec_t* rec, /* in: record which should be modified */ + dict_index_t* index, /* in: clustered index */ + que_thr_t* thr) /* in: query thread */ +{ + trx_t* trx; + ulint err; + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + ut_ad(index->type & DICT_CLUSTERED); + + trx = thr_get_trx(thr); + + lock_mutex_enter_kernel(); + + ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + + /* If a transaction has no explicit x-lock set on the record, set one + for it */ + + lock_rec_convert_impl_to_expl(rec, index); + + err = lock_rec_lock(TRUE, LOCK_X, rec, index, thr); + + lock_mutex_exit_kernel(); + + ut_ad(lock_rec_queue_validate(rec, index)); + + return(err); +} + +/************************************************************************* +Checks if locks of other transactions prevent an immediate modify (delete +mark or delete unmark) of a secondary index record. */ + +ulint +lock_sec_rec_modify_check_and_lock( +/*===============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + rec_t* rec, /* in: record which should be modified; + NOTE: as this is a secondary index, we + always have to modify the clustered index + record first: see the comment below */ + dict_index_t* index, /* in: secondary index */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + ut_ad(!(index->type & DICT_CLUSTERED)); + + /* Another transaction cannot have an implicit lock on the record, + because when we come here, we already have modified the clustered + index record, and this would not have been possible if another active + transaction had modified this secondary index record. */ + + lock_mutex_enter_kernel(); + + ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + + err = lock_rec_lock(TRUE, LOCK_X, rec, index, thr); + + lock_mutex_exit_kernel(); + + ut_ad(lock_rec_queue_validate(rec, index)); + + if (err == DB_SUCCESS) { + /* Update the page max trx id field */ + + page_update_max_trx_id(buf_frame_align(rec), + thr_get_trx(thr)->id); + } + + return(err); +} + +/************************************************************************* +Like the counterpart for a clustered index below, but now we read a +secondary index record. */ + +ulint +lock_sec_rec_read_check_and_lock( +/*=============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + rec_t* rec, /* in: user record or page supremum record + which should be read or passed over by a read + cursor */ + dict_index_t* index, /* in: secondary index */ + ulint mode, /* in: mode of the lock which the read cursor + should set on records: LOCK_S or LOCK_X; the + latter is possible in SELECT FOR UPDATE */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + ut_ad(!(index->type & DICT_CLUSTERED)); + ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + lock_mutex_enter_kernel(); + + ut_ad((mode != LOCK_X) + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad((mode != LOCK_S) + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + + /* Some transaction may have an implicit x-lock on the record only + if the max trx id for the page >= min trx id for the trx list or a + database recovery is running. */ + + if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)), + trx_list_get_min_trx_id()) >= 0) + || recv_recovery_is_on()) + && !page_rec_is_supremum(rec)) { + + lock_rec_convert_impl_to_expl(rec, index); + } + + err = lock_rec_lock(FALSE, mode, rec, index, thr); + + lock_mutex_exit_kernel(); + + ut_ad(lock_rec_queue_validate(rec, index)); + + return(err); +} + +/************************************************************************* +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. */ + +ulint +lock_clust_rec_read_check_and_lock( +/*===============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + rec_t* rec, /* in: user record or page supremum record + which should be read or passed over by a read + cursor */ + dict_index_t* index, /* in: clustered index */ + ulint mode, /* in: mode of the lock which the read cursor + should set on records: LOCK_S or LOCK_X; the + latter is possible in SELECT FOR UPDATE */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); + + if (flags & BTR_NO_LOCKING_FLAG) { + + return(DB_SUCCESS); + } + + lock_mutex_enter_kernel(); + + ut_ad((mode != LOCK_X) + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); + ut_ad((mode != LOCK_S) + || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); + + if (!page_rec_is_supremum(rec)) { + + lock_rec_convert_impl_to_expl(rec, index); + } + + err = lock_rec_lock(FALSE, mode, rec, index, thr); + + lock_mutex_exit_kernel(); + + ut_ad(lock_rec_queue_validate(rec, index)); + + return(err); +} |